faiss 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +5 -6
- data/ext/faiss/index_binary.cpp +76 -17
- data/ext/faiss/{index.cpp → index_rb.cpp} +108 -35
- data/ext/faiss/kmeans.cpp +12 -9
- data/ext/faiss/numo.hpp +11 -9
- data/ext/faiss/pca_matrix.cpp +10 -8
- data/ext/faiss/product_quantizer.cpp +14 -12
- data/ext/faiss/{utils.cpp → utils_rb.cpp} +10 -3
- data/ext/faiss/{utils.h → utils_rb.h} +6 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +130 -11
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +59 -10
- data/vendor/faiss/faiss/Clustering.h +12 -0
- data/vendor/faiss/faiss/IVFlib.cpp +31 -28
- data/vendor/faiss/faiss/Index.cpp +20 -8
- data/vendor/faiss/faiss/Index.h +25 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +19 -24
- data/vendor/faiss/faiss/IndexBinary.cpp +1 -0
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +45 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +35 -22
- data/vendor/faiss/faiss/IndexFastScan.h +10 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +193 -136
- data/vendor/faiss/faiss/IndexFlat.h +16 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +46 -22
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +24 -50
- data/vendor/faiss/faiss/IndexHNSW.h +14 -12
- data/vendor/faiss/faiss/IndexIDMap.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVF.cpp +76 -49
- data/vendor/faiss/faiss/IndexIVF.h +14 -4
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +11 -8
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +25 -14
- data/vendor/faiss/faiss/IndexIVFFastScan.h +26 -22
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +10 -61
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +39 -111
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +89 -147
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +37 -5
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +42 -30
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -2
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +246 -97
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +32 -29
- data/vendor/faiss/faiss/IndexLSH.cpp +8 -6
- data/vendor/faiss/faiss/IndexLattice.cpp +29 -24
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +2 -1
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +1 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +19 -10
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +26 -13
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -2
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +132 -78
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +14 -12
- data/vendor/faiss/faiss/IndexRefine.cpp +0 -30
- data/vendor/faiss/faiss/IndexShards.cpp +3 -4
- data/vendor/faiss/faiss/MetricType.h +16 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +120 -0
- data/vendor/faiss/faiss/VectorTransform.h +23 -0
- data/vendor/faiss/faiss/clone_index.cpp +7 -4
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +1 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +37 -11
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -28
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +4 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/FaissAssert.h +60 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +25 -34
- data/vendor/faiss/faiss/impl/HNSW.h +8 -6
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +34 -27
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
- data/vendor/faiss/faiss/impl/NSG.cpp +6 -5
- data/vendor/faiss/faiss/impl/NSG.h +17 -7
- data/vendor/faiss/faiss/impl/Panorama.cpp +53 -46
- data/vendor/faiss/faiss/impl/Panorama.h +22 -6
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +16 -5
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +70 -58
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +92 -0
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +93 -31
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +12 -28
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +14 -9
- data/vendor/faiss/faiss/impl/ResultHandler.h +131 -50
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +67 -2358
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -2
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +69 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +158 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +829 -471
- data/vendor/faiss/faiss/impl/index_read_utils.h +0 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +17 -8
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +47 -20
- data/vendor/faiss/faiss/impl/mapped_io.cpp +9 -2
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +7 -2
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +11 -3
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +19 -13
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +29 -21
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.cpp} +42 -215
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.cpp} +68 -107
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +141 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +23 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -144
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +9 -6
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +136 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +280 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +164 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +455 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +430 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +329 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +467 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +203 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +42 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +139 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +18 -18
- data/vendor/faiss/faiss/index_factory.cpp +35 -16
- data/vendor/faiss/faiss/index_io.h +29 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +7 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +9 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +9 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +46 -0
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +10 -7
- data/vendor/faiss/faiss/utils/distances.cpp +141 -23
- data/vendor/faiss/faiss/utils/distances.h +98 -0
- data/vendor/faiss/faiss/utils/distances_dispatch.h +170 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +74 -3511
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +164 -157
- data/vendor/faiss/faiss/utils/extra_distances.cpp +52 -95
- data/vendor/faiss/faiss/utils/extra_distances.h +47 -1
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -1
- data/vendor/faiss/faiss/utils/partitioning.cpp +1 -1
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/rabitq_simd.h +260 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +150 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +568 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +153 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1185 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1092 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +391 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +322 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +91 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +12 -1
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +69 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +6 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +4 -4
- data/vendor/faiss/faiss/utils/utils.cpp +16 -9
- metadata +47 -18
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -5,64 +5,34 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
#
|
|
9
|
-
|
|
10
|
-
#ifdef __ARM_FEATURE_SVE
|
|
8
|
+
#ifdef COMPILE_SIMD_ARM_SVE
|
|
11
9
|
|
|
12
10
|
#include <arm_sve.h>
|
|
13
11
|
|
|
14
|
-
#include <
|
|
15
|
-
#include <type_traits>
|
|
16
|
-
|
|
17
|
-
#include <faiss/impl/ProductQuantizer.h>
|
|
18
|
-
#include <faiss/impl/code_distance/code_distance-generic.h>
|
|
19
|
-
|
|
20
|
-
namespace faiss {
|
|
12
|
+
#include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
|
|
21
13
|
|
|
22
|
-
|
|
23
|
-
std::enable_if_t<!std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_single_code_sve(
|
|
24
|
-
// the product quantizer
|
|
25
|
-
const size_t M,
|
|
26
|
-
// number of bits per quantization index
|
|
27
|
-
const size_t nbits,
|
|
28
|
-
// precomputed distances, layout (M, ksub)
|
|
29
|
-
const float* sim_table,
|
|
30
|
-
const uint8_t* code) {
|
|
31
|
-
// default implementation
|
|
32
|
-
return distance_single_code_generic<PQDecoderT>(M, nbits, sim_table, code);
|
|
33
|
-
}
|
|
14
|
+
namespace {
|
|
34
15
|
|
|
35
|
-
|
|
16
|
+
inline void distance_codes_kernel(
|
|
36
17
|
svbool_t pg,
|
|
37
18
|
svuint32_t idx1,
|
|
38
19
|
svuint32_t offsets_0,
|
|
39
20
|
const float* tab,
|
|
40
21
|
svfloat32_t& partialSum) {
|
|
41
|
-
// add offset
|
|
42
22
|
const auto indices_to_read_from = svadd_u32_x(pg, idx1, offsets_0);
|
|
43
|
-
|
|
44
|
-
// gather values, similar to some operations of tab[index]
|
|
45
23
|
const auto collected =
|
|
46
24
|
svld1_gather_u32index_f32(pg, tab, indices_to_read_from);
|
|
47
|
-
|
|
48
|
-
// collect partial sum
|
|
49
25
|
partialSum = svadd_f32_m(pg, partialSum, collected);
|
|
50
26
|
}
|
|
51
27
|
|
|
52
|
-
|
|
53
|
-
// the product quantizer
|
|
28
|
+
inline float distance_single_code_sve_for_small_m(
|
|
54
29
|
const size_t M,
|
|
55
|
-
// precomputed distances, layout (M, ksub)
|
|
56
30
|
const float* sim_table,
|
|
57
|
-
// codes
|
|
58
31
|
const uint8_t* __restrict code) {
|
|
59
32
|
constexpr size_t nbits = 8u;
|
|
60
|
-
|
|
61
33
|
const size_t ksub = 1 << nbits;
|
|
62
34
|
|
|
63
35
|
const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
|
|
64
|
-
|
|
65
|
-
// loop
|
|
66
36
|
const auto pg = svwhilelt_b32_u64(0, M);
|
|
67
37
|
|
|
68
38
|
auto mm1 = svld1ub_u32(pg, code);
|
|
@@ -71,44 +41,75 @@ static inline float distance_single_code_sve_for_small_m(
|
|
|
71
41
|
return svaddv_f32(pg, collected0);
|
|
72
42
|
}
|
|
73
43
|
|
|
74
|
-
|
|
75
|
-
std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_single_code_sve(
|
|
76
|
-
// the product quantizer
|
|
44
|
+
inline void distance_four_codes_sve_for_small_m(
|
|
77
45
|
const size_t M,
|
|
78
|
-
|
|
79
|
-
const
|
|
80
|
-
|
|
46
|
+
const float* sim_table,
|
|
47
|
+
const uint8_t* __restrict code0,
|
|
48
|
+
const uint8_t* __restrict code1,
|
|
49
|
+
const uint8_t* __restrict code2,
|
|
50
|
+
const uint8_t* __restrict code3,
|
|
51
|
+
float& result0,
|
|
52
|
+
float& result1,
|
|
53
|
+
float& result2,
|
|
54
|
+
float& result3) {
|
|
55
|
+
constexpr size_t nbits = 8u;
|
|
56
|
+
const size_t ksub = 1 << nbits;
|
|
57
|
+
|
|
58
|
+
const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
|
|
59
|
+
const auto pg = svwhilelt_b32_u64(0, M);
|
|
60
|
+
|
|
61
|
+
auto mm10 = svld1ub_u32(pg, code0);
|
|
62
|
+
auto mm11 = svld1ub_u32(pg, code1);
|
|
63
|
+
auto mm12 = svld1ub_u32(pg, code2);
|
|
64
|
+
auto mm13 = svld1ub_u32(pg, code3);
|
|
65
|
+
mm10 = svadd_u32_x(pg, mm10, offsets_0);
|
|
66
|
+
mm11 = svadd_u32_x(pg, mm11, offsets_0);
|
|
67
|
+
mm12 = svadd_u32_x(pg, mm12, offsets_0);
|
|
68
|
+
mm13 = svadd_u32_x(pg, mm13, offsets_0);
|
|
69
|
+
const auto collected0 = svld1_gather_u32index_f32(pg, sim_table, mm10);
|
|
70
|
+
const auto collected1 = svld1_gather_u32index_f32(pg, sim_table, mm11);
|
|
71
|
+
const auto collected2 = svld1_gather_u32index_f32(pg, sim_table, mm12);
|
|
72
|
+
const auto collected3 = svld1_gather_u32index_f32(pg, sim_table, mm13);
|
|
73
|
+
result0 = svaddv_f32(pg, collected0);
|
|
74
|
+
result1 = svaddv_f32(pg, collected1);
|
|
75
|
+
result2 = svaddv_f32(pg, collected2);
|
|
76
|
+
result3 = svaddv_f32(pg, collected3);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
} // namespace
|
|
80
|
+
|
|
81
|
+
namespace faiss {
|
|
82
|
+
namespace pq_code_distance {
|
|
83
|
+
|
|
84
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
85
|
+
template <>
|
|
86
|
+
float pq_code_distance_single_impl<SIMDLevel::ARM_SVE>(
|
|
87
|
+
size_t M,
|
|
88
|
+
size_t nbits,
|
|
81
89
|
const float* sim_table,
|
|
82
90
|
const uint8_t* code) {
|
|
83
91
|
if (M <= svcntw())
|
|
84
92
|
return distance_single_code_sve_for_small_m(M, sim_table, code);
|
|
85
93
|
|
|
86
94
|
const float* tab = sim_table;
|
|
87
|
-
|
|
88
95
|
const size_t ksub = 1 << nbits;
|
|
89
96
|
|
|
90
97
|
const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
|
|
91
|
-
|
|
92
|
-
// accumulators of partial sums
|
|
93
98
|
auto partialSum = svdup_n_f32(0.f);
|
|
94
99
|
|
|
95
100
|
const auto lanes = svcntb();
|
|
96
101
|
const auto quad_lanes = lanes / 4;
|
|
97
102
|
|
|
98
|
-
// loop
|
|
99
103
|
for (std::size_t m = 0; m < M;) {
|
|
100
104
|
const auto pg = svwhilelt_b8_u64(m, M);
|
|
101
|
-
|
|
102
105
|
const auto mm1 = svld1_u8(pg, code + m);
|
|
103
106
|
{
|
|
104
107
|
const auto mm1lo = svunpklo_u16(mm1);
|
|
105
108
|
const auto pglo = svunpklo_b(pg);
|
|
106
109
|
|
|
107
110
|
{
|
|
108
|
-
// convert uint8 values to uint32 values
|
|
109
111
|
const auto idx1 = svunpklo_u32(mm1lo);
|
|
110
112
|
const auto pglolo = svunpklo_b(pglo);
|
|
111
|
-
|
|
112
113
|
distance_codes_kernel(pglolo, idx1, offsets_0, tab, partialSum);
|
|
113
114
|
tab += ksub * quad_lanes;
|
|
114
115
|
}
|
|
@@ -118,10 +119,8 @@ std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_
|
|
|
118
119
|
break;
|
|
119
120
|
|
|
120
121
|
{
|
|
121
|
-
// convert uint8 values to uint32 values
|
|
122
122
|
const auto idx1 = svunpkhi_u32(mm1lo);
|
|
123
123
|
const auto pglohi = svunpkhi_b(pglo);
|
|
124
|
-
|
|
125
124
|
distance_codes_kernel(pglohi, idx1, offsets_0, tab, partialSum);
|
|
126
125
|
tab += ksub * quad_lanes;
|
|
127
126
|
}
|
|
@@ -136,10 +135,8 @@ std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_
|
|
|
136
135
|
const auto pghi = svunpkhi_b(pg);
|
|
137
136
|
|
|
138
137
|
{
|
|
139
|
-
// convert uint8 values to uint32 values
|
|
140
138
|
const auto idx1 = svunpklo_u32(mm1hi);
|
|
141
139
|
const auto pghilo = svunpklo_b(pghi);
|
|
142
|
-
|
|
143
140
|
distance_codes_kernel(pghilo, idx1, offsets_0, tab, partialSum);
|
|
144
141
|
tab += ksub * quad_lanes;
|
|
145
142
|
}
|
|
@@ -149,10 +146,8 @@ std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_
|
|
|
149
146
|
break;
|
|
150
147
|
|
|
151
148
|
{
|
|
152
|
-
// convert uint8 values to uint32 values
|
|
153
149
|
const auto idx1 = svunpkhi_u32(mm1hi);
|
|
154
150
|
const auto pghihi = svunpkhi_b(pghi);
|
|
155
|
-
|
|
156
151
|
distance_codes_kernel(pghihi, idx1, offsets_0, tab, partialSum);
|
|
157
152
|
tab += ksub * quad_lanes;
|
|
158
153
|
}
|
|
@@ -164,97 +159,17 @@ std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_
|
|
|
164
159
|
return svaddv_f32(svptrue_b32(), partialSum);
|
|
165
160
|
}
|
|
166
161
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
const size_t nbits,
|
|
174
|
-
// precomputed distances, layout (M, ksub)
|
|
175
|
-
const float* sim_table,
|
|
176
|
-
// codes
|
|
177
|
-
const uint8_t* __restrict code0,
|
|
178
|
-
const uint8_t* __restrict code1,
|
|
179
|
-
const uint8_t* __restrict code2,
|
|
180
|
-
const uint8_t* __restrict code3,
|
|
181
|
-
// computed distances
|
|
182
|
-
float& result0,
|
|
183
|
-
float& result1,
|
|
184
|
-
float& result2,
|
|
185
|
-
float& result3) {
|
|
186
|
-
distance_four_codes_generic<PQDecoderT>(
|
|
187
|
-
M,
|
|
188
|
-
nbits,
|
|
189
|
-
sim_table,
|
|
190
|
-
code0,
|
|
191
|
-
code1,
|
|
192
|
-
code2,
|
|
193
|
-
code3,
|
|
194
|
-
result0,
|
|
195
|
-
result1,
|
|
196
|
-
result2,
|
|
197
|
-
result3);
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
static inline void distance_four_codes_sve_for_small_m(
|
|
201
|
-
// the product quantizer
|
|
202
|
-
const size_t M,
|
|
203
|
-
// precomputed distances, layout (M, ksub)
|
|
204
|
-
const float* sim_table,
|
|
205
|
-
// codes
|
|
206
|
-
const uint8_t* __restrict code0,
|
|
207
|
-
const uint8_t* __restrict code1,
|
|
208
|
-
const uint8_t* __restrict code2,
|
|
209
|
-
const uint8_t* __restrict code3,
|
|
210
|
-
// computed distances
|
|
211
|
-
float& result0,
|
|
212
|
-
float& result1,
|
|
213
|
-
float& result2,
|
|
214
|
-
float& result3) {
|
|
215
|
-
constexpr size_t nbits = 8u;
|
|
216
|
-
|
|
217
|
-
const size_t ksub = 1 << nbits;
|
|
218
|
-
|
|
219
|
-
const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
|
|
220
|
-
|
|
221
|
-
// loop
|
|
222
|
-
const auto pg = svwhilelt_b32_u64(0, M);
|
|
223
|
-
|
|
224
|
-
auto mm10 = svld1ub_u32(pg, code0);
|
|
225
|
-
auto mm11 = svld1ub_u32(pg, code1);
|
|
226
|
-
auto mm12 = svld1ub_u32(pg, code2);
|
|
227
|
-
auto mm13 = svld1ub_u32(pg, code3);
|
|
228
|
-
mm10 = svadd_u32_x(pg, mm10, offsets_0);
|
|
229
|
-
mm11 = svadd_u32_x(pg, mm11, offsets_0);
|
|
230
|
-
mm12 = svadd_u32_x(pg, mm12, offsets_0);
|
|
231
|
-
mm13 = svadd_u32_x(pg, mm13, offsets_0);
|
|
232
|
-
const auto collected0 = svld1_gather_u32index_f32(pg, sim_table, mm10);
|
|
233
|
-
const auto collected1 = svld1_gather_u32index_f32(pg, sim_table, mm11);
|
|
234
|
-
const auto collected2 = svld1_gather_u32index_f32(pg, sim_table, mm12);
|
|
235
|
-
const auto collected3 = svld1_gather_u32index_f32(pg, sim_table, mm13);
|
|
236
|
-
result0 = svaddv_f32(pg, collected0);
|
|
237
|
-
result1 = svaddv_f32(pg, collected1);
|
|
238
|
-
result2 = svaddv_f32(pg, collected2);
|
|
239
|
-
result3 = svaddv_f32(pg, collected3);
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
// Combines 4 operations of distance_single_code()
|
|
243
|
-
template <typename PQDecoderT>
|
|
244
|
-
std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, void>
|
|
245
|
-
distance_four_codes_sve(
|
|
246
|
-
// the product quantizer
|
|
247
|
-
const size_t M,
|
|
248
|
-
// number of bits per quantization index
|
|
249
|
-
const size_t nbits,
|
|
250
|
-
// precomputed distances, layout (M, ksub)
|
|
162
|
+
// Combines 4 operations of pq_code_distance_single_impl().
|
|
163
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
164
|
+
template <>
|
|
165
|
+
void pq_code_distance_four_impl<SIMDLevel::ARM_SVE>(
|
|
166
|
+
size_t M,
|
|
167
|
+
size_t nbits,
|
|
251
168
|
const float* sim_table,
|
|
252
|
-
// codes
|
|
253
169
|
const uint8_t* __restrict code0,
|
|
254
170
|
const uint8_t* __restrict code1,
|
|
255
171
|
const uint8_t* __restrict code2,
|
|
256
172
|
const uint8_t* __restrict code3,
|
|
257
|
-
// computed distances
|
|
258
173
|
float& result0,
|
|
259
174
|
float& result1,
|
|
260
175
|
float& result2,
|
|
@@ -275,12 +190,10 @@ distance_four_codes_sve(
|
|
|
275
190
|
}
|
|
276
191
|
|
|
277
192
|
const float* tab = sim_table;
|
|
278
|
-
|
|
279
193
|
const size_t ksub = 1 << nbits;
|
|
280
194
|
|
|
281
195
|
const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
|
|
282
196
|
|
|
283
|
-
// accumulators of partial sums
|
|
284
197
|
auto partialSum0 = svdup_n_f32(0.f);
|
|
285
198
|
auto partialSum1 = svdup_n_f32(0.f);
|
|
286
199
|
auto partialSum2 = svdup_n_f32(0.f);
|
|
@@ -289,7 +202,6 @@ distance_four_codes_sve(
|
|
|
289
202
|
const auto lanes = svcntb();
|
|
290
203
|
const auto quad_lanes = lanes / 4;
|
|
291
204
|
|
|
292
|
-
// loop
|
|
293
205
|
for (std::size_t m = 0; m < M;) {
|
|
294
206
|
const auto pg = svwhilelt_b8_u64(m, M);
|
|
295
207
|
|
|
@@ -434,6 +346,7 @@ distance_four_codes_sve(
|
|
|
434
346
|
result3 = svaddv_f32(svptrue_b32(), partialSum3);
|
|
435
347
|
}
|
|
436
348
|
|
|
349
|
+
} // namespace pq_code_distance
|
|
437
350
|
} // namespace faiss
|
|
438
351
|
|
|
439
|
-
#endif
|
|
352
|
+
#endif // COMPILE_SIMD_ARM_SVE
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
11
11
|
#include <faiss/impl/FaissAssert.h>
|
|
12
12
|
#include <faiss/impl/ResidualQuantizer.h>
|
|
13
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
13
14
|
#include <faiss/utils/Heap.h>
|
|
14
15
|
#include <faiss/utils/distances.h>
|
|
15
16
|
#include <faiss/utils/simdlib.h>
|
|
@@ -877,12 +878,14 @@ void compute_codes_add_centroids_mp_lut0(
|
|
|
877
878
|
pool.norms.resize(n);
|
|
878
879
|
// recover the norms of reconstruction as
|
|
879
880
|
// || original_vector - residual ||^2
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
881
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
882
|
+
for (size_t i = 0; i < n; i++) {
|
|
883
|
+
pool.norms[i] = fvec_L2sqr<SL>(
|
|
884
|
+
x + i * rq.d,
|
|
885
|
+
pool.residuals.data() + i * rq.max_beam_size * rq.d,
|
|
886
|
+
rq.d);
|
|
887
|
+
}
|
|
888
|
+
});
|
|
886
889
|
}
|
|
887
890
|
|
|
888
891
|
// pack only the first code of the beam
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/ScalarQuantizer.h>
|
|
11
|
+
#include <faiss/utils/simd_levels.h>
|
|
12
|
+
#include <faiss/utils/simdlib.h>
|
|
13
|
+
|
|
14
|
+
namespace faiss {
|
|
15
|
+
|
|
16
|
+
namespace scalar_quantizer {
|
|
17
|
+
|
|
18
|
+
/*******************************************************************
|
|
19
|
+
* Codec: converts between values in [0, 1] and an index in a code
|
|
20
|
+
* array. The "i" parameter is the vector component index (not byte
|
|
21
|
+
* index).
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
template <SIMDLevel SL>
|
|
25
|
+
struct Codec8bit {};
|
|
26
|
+
|
|
27
|
+
template <>
|
|
28
|
+
struct Codec8bit<SIMDLevel::NONE> {
|
|
29
|
+
static FAISS_ALWAYS_INLINE void encode_component(
|
|
30
|
+
float x,
|
|
31
|
+
uint8_t* code,
|
|
32
|
+
size_t i) {
|
|
33
|
+
code[i] = (int)(255 * x);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static FAISS_ALWAYS_INLINE float decode_component(
|
|
37
|
+
const uint8_t* code,
|
|
38
|
+
size_t i) {
|
|
39
|
+
return (code[i] + 0.5f) / 255.0f;
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
template <SIMDLevel SL>
|
|
44
|
+
struct Codec4bit {};
|
|
45
|
+
|
|
46
|
+
template <>
|
|
47
|
+
struct Codec4bit<SIMDLevel::NONE> {
|
|
48
|
+
static FAISS_ALWAYS_INLINE void encode_component(
|
|
49
|
+
float x,
|
|
50
|
+
uint8_t* code,
|
|
51
|
+
size_t i) {
|
|
52
|
+
code[i / 2] |= (int)(x * 15.0) << ((i & 1) << 2);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
static FAISS_ALWAYS_INLINE float decode_component(
|
|
56
|
+
const uint8_t* code,
|
|
57
|
+
size_t i) {
|
|
58
|
+
return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f;
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
template <SIMDLevel SL>
|
|
63
|
+
struct Codec6bit {};
|
|
64
|
+
|
|
65
|
+
template <>
|
|
66
|
+
struct Codec6bit<SIMDLevel::NONE> {
|
|
67
|
+
static FAISS_ALWAYS_INLINE void encode_component(
|
|
68
|
+
float x,
|
|
69
|
+
uint8_t* code,
|
|
70
|
+
size_t i) {
|
|
71
|
+
int bits = (int)(x * 63.0);
|
|
72
|
+
code += (i >> 2) * 3;
|
|
73
|
+
switch (i & 3) {
|
|
74
|
+
case 0:
|
|
75
|
+
code[0] |= bits;
|
|
76
|
+
break;
|
|
77
|
+
case 1:
|
|
78
|
+
code[0] |= bits << 6;
|
|
79
|
+
code[1] |= bits >> 2;
|
|
80
|
+
break;
|
|
81
|
+
case 2:
|
|
82
|
+
code[1] |= bits << 4;
|
|
83
|
+
code[2] |= bits >> 4;
|
|
84
|
+
break;
|
|
85
|
+
case 3:
|
|
86
|
+
code[2] |= bits << 2;
|
|
87
|
+
break;
|
|
88
|
+
default:
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
static FAISS_ALWAYS_INLINE float decode_component(
|
|
94
|
+
const uint8_t* code,
|
|
95
|
+
size_t i) {
|
|
96
|
+
uint8_t bits = 0;
|
|
97
|
+
code += (i >> 2) * 3;
|
|
98
|
+
switch (i & 3) {
|
|
99
|
+
case 0:
|
|
100
|
+
bits = code[0] & 0x3f;
|
|
101
|
+
break;
|
|
102
|
+
case 1:
|
|
103
|
+
bits = code[0] >> 6;
|
|
104
|
+
bits |= (code[1] & 0xf) << 2;
|
|
105
|
+
break;
|
|
106
|
+
case 2:
|
|
107
|
+
bits = code[1] >> 4;
|
|
108
|
+
bits |= (code[2] & 3) << 4;
|
|
109
|
+
break;
|
|
110
|
+
case 3:
|
|
111
|
+
bits = code[2] >> 2;
|
|
112
|
+
break;
|
|
113
|
+
default:
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
return (bits + 0.5f) / 63.0f;
|
|
117
|
+
}
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
} // namespace scalar_quantizer
|
|
121
|
+
} // namespace faiss
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/ScalarQuantizer.h>
|
|
11
|
+
#include <faiss/utils/simd_levels.h>
|
|
12
|
+
#include <faiss/utils/simdlib.h>
|
|
13
|
+
|
|
14
|
+
namespace faiss {
|
|
15
|
+
|
|
16
|
+
namespace scalar_quantizer {
|
|
17
|
+
|
|
18
|
+
using SQDistanceComputer = ScalarQuantizer::SQDistanceComputer;
|
|
19
|
+
|
|
20
|
+
/*******************************************************************
|
|
21
|
+
* DistanceComputer: combines a similarity and a quantizer to do
|
|
22
|
+
* code-to-vector or code-to-code comparisons
|
|
23
|
+
*******************************************************************/
|
|
24
|
+
|
|
25
|
+
template <class Quantizer, class Similarity, SIMDLevel SL>
|
|
26
|
+
struct DCTemplate : SQDistanceComputer {};
|
|
27
|
+
|
|
28
|
+
template <class Quantizer, class Similarity>
|
|
29
|
+
struct DCTemplate<Quantizer, Similarity, SIMDLevel::NONE> : SQDistanceComputer {
|
|
30
|
+
using Sim = Similarity;
|
|
31
|
+
|
|
32
|
+
Quantizer quant;
|
|
33
|
+
|
|
34
|
+
DCTemplate(size_t d, const std::vector<float>& trained)
|
|
35
|
+
: quant(d, trained) {}
|
|
36
|
+
|
|
37
|
+
float compute_distance(const float* x, const uint8_t* code) const {
|
|
38
|
+
Similarity sim(x);
|
|
39
|
+
sim.begin();
|
|
40
|
+
for (size_t i = 0; i < quant.d; i++) {
|
|
41
|
+
float xi = quant.reconstruct_component(code, i);
|
|
42
|
+
sim.add_component(xi);
|
|
43
|
+
}
|
|
44
|
+
return sim.result();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
float compute_code_distance(const uint8_t* code1, const uint8_t* code2)
|
|
48
|
+
const {
|
|
49
|
+
Similarity sim(nullptr);
|
|
50
|
+
sim.begin();
|
|
51
|
+
for (size_t i = 0; i < quant.d; i++) {
|
|
52
|
+
float x1 = quant.reconstruct_component(code1, i);
|
|
53
|
+
float x2 = quant.reconstruct_component(code2, i);
|
|
54
|
+
sim.add_component_2(x1, x2);
|
|
55
|
+
}
|
|
56
|
+
return sim.result();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
void set_query(const float* x) final {
|
|
60
|
+
q = x;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
float symmetric_dis(idx_t i, idx_t j) override {
|
|
64
|
+
return compute_code_distance(
|
|
65
|
+
codes + i * code_size, codes + j * code_size);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
float query_to_code(const uint8_t* code) const final {
|
|
69
|
+
return compute_distance(q, code);
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
/*******************************************************************
|
|
74
|
+
* DistanceComputerByte: computes distances in the integer domain
|
|
75
|
+
*******************************************************************/
|
|
76
|
+
|
|
77
|
+
template <class Similarity, SIMDLevel SL>
|
|
78
|
+
struct DistanceComputerByte : SQDistanceComputer {};
|
|
79
|
+
|
|
80
|
+
template <class Similarity>
|
|
81
|
+
struct DistanceComputerByte<Similarity, SIMDLevel::NONE> : SQDistanceComputer {
|
|
82
|
+
using Sim = Similarity;
|
|
83
|
+
|
|
84
|
+
int d;
|
|
85
|
+
std::vector<uint8_t> tmp;
|
|
86
|
+
|
|
87
|
+
DistanceComputerByte(int d, const std::vector<float>&) : d(d), tmp(d) {}
|
|
88
|
+
|
|
89
|
+
int compute_code_distance(const uint8_t* code1, const uint8_t* code2)
|
|
90
|
+
const {
|
|
91
|
+
int accu = 0;
|
|
92
|
+
for (int i = 0; i < d; i++) {
|
|
93
|
+
if (Sim::metric_type == METRIC_INNER_PRODUCT) {
|
|
94
|
+
accu += int(code1[i]) * code2[i];
|
|
95
|
+
} else {
|
|
96
|
+
int diff = int(code1[i]) - code2[i];
|
|
97
|
+
accu += diff * diff;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return accu;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
void set_query(const float* x) final {
|
|
104
|
+
for (int i = 0; i < d; i++) {
|
|
105
|
+
tmp[i] = int(x[i]);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
int compute_distance(const float* x, const uint8_t* code) {
|
|
110
|
+
set_query(x);
|
|
111
|
+
return compute_code_distance(tmp.data(), code);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
float symmetric_dis(idx_t i, idx_t j) override {
|
|
115
|
+
return compute_code_distance(
|
|
116
|
+
codes + i * code_size, codes + j * code_size);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
float query_to_code(const uint8_t* code) const final {
|
|
120
|
+
return compute_code_distance(tmp.data(), code);
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
/*******************************************************************
|
|
125
|
+
* Selection function
|
|
126
|
+
*******************************************************************/
|
|
127
|
+
|
|
128
|
+
template <SIMDLevel SL>
|
|
129
|
+
SQDistanceComputer* sq_select_distance_computer(
|
|
130
|
+
MetricType metric,
|
|
131
|
+
ScalarQuantizer::QuantizerType qtype,
|
|
132
|
+
size_t d,
|
|
133
|
+
const std::vector<float>& trained);
|
|
134
|
+
|
|
135
|
+
} // namespace scalar_quantizer
|
|
136
|
+
} // namespace faiss
|