@img/sharp-libvips-dev 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +7 -1
- package/include/aom/aom_image.h +24 -12
- package/include/aom/aom_integer.h +3 -3
- package/include/aom/aomcx.h +15 -0
- package/include/aom/aomdx.h +5 -2
- package/include/archive.h +7 -5
- package/include/archive_entry.h +5 -3
- package/include/cgif.h +3 -0
- package/include/freetype2/freetype/config/ftoption.h +1 -1
- package/include/fribidi/fribidi-config.h +2 -2
- package/include/fribidi/fribidi-unicode-version.h +3 -3
- package/include/glib-2.0/gio/gappinfo.h +40 -25
- package/include/glib-2.0/gio/gasyncresult.h +1 -1
- package/include/glib-2.0/gio/gconverter.h +5 -0
- package/include/glib-2.0/gio/gdbusintrospection.h +1 -1
- package/include/glib-2.0/gio/gfile.h +16 -0
- package/include/glib-2.0/gio/gio-visibility.h +34 -0
- package/include/glib-2.0/gio/gsettings.h +8 -0
- package/include/glib-2.0/gio/gvfs.h +2 -2
- package/include/glib-2.0/girepository/gi-visibility.h +34 -0
- package/include/glib-2.0/glib/gbookmarkfile.h +1 -1
- package/include/glib-2.0/glib/giochannel.h +2 -2
- package/include/glib-2.0/glib/glib-visibility.h +34 -0
- package/include/glib-2.0/glib/gmacros.h +12 -5
- package/include/glib-2.0/glib/gmain.h +93 -7
- package/include/glib-2.0/glib/gqsort.h +8 -1
- package/include/glib-2.0/glib/gstrfuncs.h +0 -12
- package/include/glib-2.0/glib/gstrvbuilder.h +3 -0
- package/include/glib-2.0/glib/gunicode.h +1 -1
- package/include/glib-2.0/glib/gversionmacros.h +9 -0
- package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
- package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
- package/include/glib-2.0/gobject/gtype.h +6 -6
- package/include/harfbuzz/hb-buffer.h +6 -0
- package/include/harfbuzz/hb-common.h +6 -9
- package/include/harfbuzz/hb-cplusplus.hh +8 -11
- package/include/harfbuzz/hb-subset.h +17 -4
- package/include/harfbuzz/hb-version.h +3 -3
- package/include/hwy/abort.h +28 -0
- package/include/hwy/aligned_allocator.h +48 -1
- package/include/hwy/base.h +235 -34
- package/include/hwy/detect_compiler_arch.h +84 -10
- package/include/hwy/detect_targets.h +95 -29
- package/include/hwy/foreach_target.h +12 -1
- package/include/hwy/highway.h +205 -50
- package/include/hwy/ops/arm_neon-inl.h +841 -99
- package/include/hwy/ops/arm_sve-inl.h +413 -141
- package/include/hwy/ops/emu128-inl.h +373 -360
- package/include/hwy/ops/generic_ops-inl.h +804 -401
- package/include/hwy/ops/inside-inl.h +691 -0
- package/include/hwy/ops/ppc_vsx-inl.h +456 -166
- package/include/hwy/ops/rvv-inl.h +537 -249
- package/include/hwy/ops/scalar-inl.h +169 -79
- package/include/hwy/ops/set_macros-inl.h +106 -18
- package/include/hwy/ops/shared-inl.h +23 -0
- package/include/hwy/ops/wasm_128-inl.h +130 -108
- package/include/hwy/ops/x86_128-inl.h +1892 -577
- package/include/hwy/ops/x86_256-inl.h +625 -184
- package/include/hwy/ops/x86_512-inl.h +733 -131
- package/include/hwy/targets.h +22 -21
- package/include/hwy/timer-inl.h +3 -3
- package/include/hwy/timer.h +5 -1
- package/include/libheif/heif.h +170 -15
- package/include/libheif/heif_items.h +237 -0
- package/include/libheif/heif_properties.h +38 -2
- package/include/libheif/heif_regions.h +1 -1
- package/include/libheif/heif_version.h +2 -2
- package/include/libpng16/pnglibconf.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-cairo.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-features.h +3 -4
- package/include/librsvg-2.0/librsvg/rsvg-pixbuf.h +235 -0
- package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
- package/include/librsvg-2.0/librsvg/rsvg.h +55 -176
- package/include/libxml2/libxml/HTMLparser.h +12 -19
- package/include/libxml2/libxml/c14n.h +1 -12
- package/include/libxml2/libxml/debugXML.h +1 -1
- package/include/libxml2/libxml/encoding.h +9 -0
- package/include/libxml2/libxml/entities.h +12 -1
- package/include/libxml2/libxml/hash.h +19 -0
- package/include/libxml2/libxml/list.h +2 -2
- package/include/libxml2/libxml/nanohttp.h +17 -0
- package/include/libxml2/libxml/parser.h +61 -55
- package/include/libxml2/libxml/parserInternals.h +9 -1
- package/include/libxml2/libxml/pattern.h +6 -0
- package/include/libxml2/libxml/tree.h +32 -12
- package/include/libxml2/libxml/uri.h +11 -0
- package/include/libxml2/libxml/valid.h +29 -2
- package/include/libxml2/libxml/xinclude.h +7 -0
- package/include/libxml2/libxml/xmlIO.h +21 -4
- package/include/libxml2/libxml/xmlerror.h +14 -0
- package/include/libxml2/libxml/xmlexports.h +111 -15
- package/include/libxml2/libxml/xmlmemory.h +8 -45
- package/include/libxml2/libxml/xmlreader.h +2 -0
- package/include/libxml2/libxml/xmlsave.h +5 -0
- package/include/libxml2/libxml/xmlunicode.h +165 -1
- package/include/libxml2/libxml/xmlversion.h +15 -179
- package/include/libxml2/libxml/xmlwriter.h +1 -0
- package/include/libxml2/libxml/xpath.h +4 -0
- package/include/pango-1.0/pango/pango-features.h +3 -3
- package/include/pango-1.0/pango/pango-item.h +4 -2
- package/include/pango-1.0/pango/pango-version-macros.h +25 -0
- package/include/pango-1.0/pango/pangofc-font.h +2 -1
- package/include/pnglibconf.h +1 -1
- package/include/vips/util.h +1 -2
- package/include/vips/version.h +4 -4
- package/include/webp/decode.h +58 -56
- package/include/webp/demux.h +25 -21
- package/include/webp/encode.h +44 -39
- package/include/webp/mux.h +76 -15
- package/include/webp/mux_types.h +2 -1
- package/include/webp/sharpyuv/sharpyuv.h +77 -8
- package/include/webp/types.h +29 -8
- package/include/zconf.h +1 -1
- package/include/zlib.h +12 -12
- package/package.json +1 -1
- package/versions.json +14 -15
|
@@ -0,0 +1,691 @@
|
|
|
1
|
+
// Copyright 2023 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
// Must be included inside an existing include guard, with the following ops
|
|
17
|
+
// already defined: BitCast, And, Set, ShiftLeft, ShiftRight, PromoteLowerTo,
|
|
18
|
+
// ConcatEven, ConcatOdd, plus the optional detail::PromoteEvenTo and
|
|
19
|
+
// detail::PromoteOddTo (if implemented in the target-specific header).
|
|
20
|
+
|
|
21
|
+
// This is normally set by set_macros-inl.h before this header is included;
|
|
22
|
+
// if not, we are viewing this header standalone. Reduce IDE errors by:
|
|
23
|
+
#if !defined(HWY_NAMESPACE)
|
|
24
|
+
// 1) Defining HWY_IDE so we get syntax highlighting rather than all-gray text.
|
|
25
|
+
#include "hwy/ops/shared-inl.h"
|
|
26
|
+
// 2) Entering the HWY_NAMESPACE to make definitions from shared-inl.h visible.
|
|
27
|
+
HWY_BEFORE_NAMESPACE();
|
|
28
|
+
namespace hwy {
|
|
29
|
+
namespace HWY_NAMESPACE {
|
|
30
|
+
#define HWY_INSIDE_END_NAMESPACE
|
|
31
|
+
// 3) Providing a dummy VFromD (usually done by the target-specific header).
|
|
32
|
+
template <class D>
|
|
33
|
+
using VFromD = int;
|
|
34
|
+
template <class D>
|
|
35
|
+
using TFromV = int;
|
|
36
|
+
template <class D>
|
|
37
|
+
struct DFromV {};
|
|
38
|
+
#endif
|
|
39
|
+
|
|
40
|
+
// ------------------------------ Vec/Create/Get/Set2..4
|
|
41
|
+
|
|
42
|
+
// On SVE and RVV, Vec2..4 are aliases to built-in types. Also exclude the
|
|
43
|
+
// fixed-size SVE targets.
|
|
44
|
+
#if HWY_IDE || (!HWY_HAVE_SCALABLE && !HWY_TARGET_IS_SVE)
|
|
45
|
+
|
|
46
|
+
// NOTE: these are used inside arm_neon-inl.h, hence they cannot be defined in
|
|
47
|
+
// generic_ops-inl.h, which is included after that.
|
|
48
|
+
template <class D>
|
|
49
|
+
struct Vec2 {
|
|
50
|
+
VFromD<D> v0;
|
|
51
|
+
VFromD<D> v1;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
template <class D>
|
|
55
|
+
struct Vec3 {
|
|
56
|
+
VFromD<D> v0;
|
|
57
|
+
VFromD<D> v1;
|
|
58
|
+
VFromD<D> v2;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
template <class D>
|
|
62
|
+
struct Vec4 {
|
|
63
|
+
VFromD<D> v0;
|
|
64
|
+
VFromD<D> v1;
|
|
65
|
+
VFromD<D> v2;
|
|
66
|
+
VFromD<D> v3;
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
// D arg is unused but allows deducing D.
|
|
70
|
+
template <class D>
|
|
71
|
+
HWY_API Vec2<D> Create2(D /* tag */, VFromD<D> v0, VFromD<D> v1) {
|
|
72
|
+
return Vec2<D>{v0, v1};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
template <class D>
|
|
76
|
+
HWY_API Vec3<D> Create3(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2) {
|
|
77
|
+
return Vec3<D>{v0, v1, v2};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
template <class D>
|
|
81
|
+
HWY_API Vec4<D> Create4(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
|
|
82
|
+
VFromD<D> v3) {
|
|
83
|
+
return Vec4<D>{v0, v1, v2, v3};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
template <size_t kIndex, class D>
|
|
87
|
+
HWY_API VFromD<D> Get2(Vec2<D> tuple) {
|
|
88
|
+
static_assert(kIndex < 2, "Tuple index out of bounds");
|
|
89
|
+
return kIndex == 0 ? tuple.v0 : tuple.v1;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
template <size_t kIndex, class D>
|
|
93
|
+
HWY_API VFromD<D> Get3(Vec3<D> tuple) {
|
|
94
|
+
static_assert(kIndex < 3, "Tuple index out of bounds");
|
|
95
|
+
return kIndex == 0 ? tuple.v0 : kIndex == 1 ? tuple.v1 : tuple.v2;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
template <size_t kIndex, class D>
|
|
99
|
+
HWY_API VFromD<D> Get4(Vec4<D> tuple) {
|
|
100
|
+
static_assert(kIndex < 4, "Tuple index out of bounds");
|
|
101
|
+
return kIndex == 0 ? tuple.v0
|
|
102
|
+
: kIndex == 1 ? tuple.v1
|
|
103
|
+
: kIndex == 2 ? tuple.v2
|
|
104
|
+
: tuple.v3;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
template <size_t kIndex, class D>
|
|
108
|
+
HWY_API Vec2<D> Set2(Vec2<D> tuple, VFromD<D> val) {
|
|
109
|
+
static_assert(kIndex < 2, "Tuple index out of bounds");
|
|
110
|
+
if (kIndex == 0) {
|
|
111
|
+
tuple.v0 = val;
|
|
112
|
+
} else {
|
|
113
|
+
tuple.v1 = val;
|
|
114
|
+
}
|
|
115
|
+
return tuple;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
template <size_t kIndex, class D>
|
|
119
|
+
HWY_API Vec3<D> Set3(Vec3<D> tuple, VFromD<D> val) {
|
|
120
|
+
static_assert(kIndex < 3, "Tuple index out of bounds");
|
|
121
|
+
if (kIndex == 0) {
|
|
122
|
+
tuple.v0 = val;
|
|
123
|
+
} else if (kIndex == 1) {
|
|
124
|
+
tuple.v1 = val;
|
|
125
|
+
} else {
|
|
126
|
+
tuple.v2 = val;
|
|
127
|
+
}
|
|
128
|
+
return tuple;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
template <size_t kIndex, class D>
|
|
132
|
+
HWY_API Vec4<D> Set4(Vec4<D> tuple, VFromD<D> val) {
|
|
133
|
+
static_assert(kIndex < 4, "Tuple index out of bounds");
|
|
134
|
+
if (kIndex == 0) {
|
|
135
|
+
tuple.v0 = val;
|
|
136
|
+
} else if (kIndex == 1) {
|
|
137
|
+
tuple.v1 = val;
|
|
138
|
+
} else if (kIndex == 2) {
|
|
139
|
+
tuple.v2 = val;
|
|
140
|
+
} else {
|
|
141
|
+
tuple.v3 = val;
|
|
142
|
+
}
|
|
143
|
+
return tuple;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
#endif // !HWY_HAVE_SCALABLE || HWY_IDE
|
|
147
|
+
|
|
148
|
+
// ------------------------------ Rol/Ror (And, Or, Neg, Shl, Shr)
|
|
149
|
+
#if (defined(HWY_NATIVE_ROL_ROR_8) == defined(HWY_TARGET_TOGGLE))
|
|
150
|
+
#ifdef HWY_NATIVE_ROL_ROR_8
|
|
151
|
+
#undef HWY_NATIVE_ROL_ROR_8
|
|
152
|
+
#else
|
|
153
|
+
#define HWY_NATIVE_ROL_ROR_8
|
|
154
|
+
#endif
|
|
155
|
+
|
|
156
|
+
template <class V, HWY_IF_UI8(TFromV<V>)>
|
|
157
|
+
HWY_API V Rol(V a, V b) {
|
|
158
|
+
const DFromV<decltype(a)> d;
|
|
159
|
+
const RebindToSigned<decltype(d)> di;
|
|
160
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
161
|
+
|
|
162
|
+
const auto shift_amt_mask = Set(du, uint8_t{7});
|
|
163
|
+
const auto shl_amt = And(BitCast(du, b), shift_amt_mask);
|
|
164
|
+
const auto shr_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
|
|
165
|
+
|
|
166
|
+
const auto vu = BitCast(du, a);
|
|
167
|
+
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
template <class V, HWY_IF_UI8(TFromV<V>)>
|
|
171
|
+
HWY_API V Ror(V a, V b) {
|
|
172
|
+
const DFromV<decltype(a)> d;
|
|
173
|
+
const RebindToSigned<decltype(d)> di;
|
|
174
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
175
|
+
|
|
176
|
+
const auto shift_amt_mask = Set(du, uint8_t{7});
|
|
177
|
+
const auto shr_amt = And(BitCast(du, b), shift_amt_mask);
|
|
178
|
+
const auto shl_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
|
|
179
|
+
|
|
180
|
+
const auto vu = BitCast(du, a);
|
|
181
|
+
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
#endif // HWY_NATIVE_ROL_ROR_8
|
|
185
|
+
|
|
186
|
+
#if (defined(HWY_NATIVE_ROL_ROR_16) == defined(HWY_TARGET_TOGGLE))
|
|
187
|
+
#ifdef HWY_NATIVE_ROL_ROR_16
|
|
188
|
+
#undef HWY_NATIVE_ROL_ROR_16
|
|
189
|
+
#else
|
|
190
|
+
#define HWY_NATIVE_ROL_ROR_16
|
|
191
|
+
#endif
|
|
192
|
+
|
|
193
|
+
template <class V, HWY_IF_UI16(TFromV<V>)>
|
|
194
|
+
HWY_API V Rol(V a, V b) {
|
|
195
|
+
const DFromV<decltype(a)> d;
|
|
196
|
+
const RebindToSigned<decltype(d)> di;
|
|
197
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
198
|
+
|
|
199
|
+
const auto shift_amt_mask = Set(du, uint16_t{15});
|
|
200
|
+
const auto shl_amt = And(BitCast(du, b), shift_amt_mask);
|
|
201
|
+
const auto shr_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
|
|
202
|
+
|
|
203
|
+
const auto vu = BitCast(du, a);
|
|
204
|
+
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
template <class V, HWY_IF_UI16(TFromV<V>)>
|
|
208
|
+
HWY_API V Ror(V a, V b) {
|
|
209
|
+
const DFromV<decltype(a)> d;
|
|
210
|
+
const RebindToSigned<decltype(d)> di;
|
|
211
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
212
|
+
|
|
213
|
+
const auto shift_amt_mask = Set(du, uint16_t{15});
|
|
214
|
+
const auto shr_amt = And(BitCast(du, b), shift_amt_mask);
|
|
215
|
+
const auto shl_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
|
|
216
|
+
|
|
217
|
+
const auto vu = BitCast(du, a);
|
|
218
|
+
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
#endif // HWY_NATIVE_ROL_ROR_16
|
|
222
|
+
|
|
223
|
+
#if (defined(HWY_NATIVE_ROL_ROR_32_64) == defined(HWY_TARGET_TOGGLE))
|
|
224
|
+
#ifdef HWY_NATIVE_ROL_ROR_32_64
|
|
225
|
+
#undef HWY_NATIVE_ROL_ROR_32_64
|
|
226
|
+
#else
|
|
227
|
+
#define HWY_NATIVE_ROL_ROR_32_64
|
|
228
|
+
#endif
|
|
229
|
+
|
|
230
|
+
template <class V, HWY_IF_UI32(TFromV<V>)>
|
|
231
|
+
HWY_API V Rol(V a, V b) {
|
|
232
|
+
const DFromV<decltype(a)> d;
|
|
233
|
+
const RebindToSigned<decltype(d)> di;
|
|
234
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
235
|
+
|
|
236
|
+
const auto shift_amt_mask = Set(du, uint32_t{31});
|
|
237
|
+
const auto shl_amt = And(BitCast(du, b), shift_amt_mask);
|
|
238
|
+
const auto shr_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
|
|
239
|
+
|
|
240
|
+
const auto vu = BitCast(du, a);
|
|
241
|
+
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
template <class V, HWY_IF_UI32(TFromV<V>)>
|
|
245
|
+
HWY_API V Ror(V a, V b) {
|
|
246
|
+
const DFromV<decltype(a)> d;
|
|
247
|
+
const RebindToSigned<decltype(d)> di;
|
|
248
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
249
|
+
|
|
250
|
+
const auto shift_amt_mask = Set(du, uint32_t{31});
|
|
251
|
+
const auto shr_amt = And(BitCast(du, b), shift_amt_mask);
|
|
252
|
+
const auto shl_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
|
|
253
|
+
|
|
254
|
+
const auto vu = BitCast(du, a);
|
|
255
|
+
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
#if HWY_HAVE_INTEGER64
|
|
259
|
+
template <class V, HWY_IF_UI64(TFromV<V>)>
|
|
260
|
+
HWY_API V Rol(V a, V b) {
|
|
261
|
+
const DFromV<decltype(a)> d;
|
|
262
|
+
const RebindToSigned<decltype(d)> di;
|
|
263
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
264
|
+
|
|
265
|
+
const auto shift_amt_mask = Set(du, uint64_t{63});
|
|
266
|
+
const auto shl_amt = And(BitCast(du, b), shift_amt_mask);
|
|
267
|
+
const auto shr_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
|
|
268
|
+
|
|
269
|
+
const auto vu = BitCast(du, a);
|
|
270
|
+
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
template <class V, HWY_IF_UI64(TFromV<V>)>
|
|
274
|
+
HWY_API V Ror(V a, V b) {
|
|
275
|
+
const DFromV<decltype(a)> d;
|
|
276
|
+
const RebindToSigned<decltype(d)> di;
|
|
277
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
278
|
+
|
|
279
|
+
const auto shift_amt_mask = Set(du, uint64_t{63});
|
|
280
|
+
const auto shr_amt = And(BitCast(du, b), shift_amt_mask);
|
|
281
|
+
const auto shl_amt = And(BitCast(du, Neg(BitCast(di, b))), shift_amt_mask);
|
|
282
|
+
|
|
283
|
+
const auto vu = BitCast(du, a);
|
|
284
|
+
return BitCast(d, Or(Shl(vu, shl_amt), Shr(vu, shr_amt)));
|
|
285
|
+
}
|
|
286
|
+
#endif // HWY_HAVE_INTEGER64
|
|
287
|
+
|
|
288
|
+
#endif // HWY_NATIVE_ROL_ROR_32_64
|
|
289
|
+
|
|
290
|
+
// ------------------------------ RotateLeftSame/RotateRightSame
|
|
291
|
+
|
|
292
|
+
#if (defined(HWY_NATIVE_ROL_ROR_SAME_8) == defined(HWY_TARGET_TOGGLE))
|
|
293
|
+
#ifdef HWY_NATIVE_ROL_ROR_SAME_8
|
|
294
|
+
#undef HWY_NATIVE_ROL_ROR_SAME_8
|
|
295
|
+
#else
|
|
296
|
+
#define HWY_NATIVE_ROL_ROR_SAME_8
|
|
297
|
+
#endif
|
|
298
|
+
|
|
299
|
+
template <class V, HWY_IF_UI8(TFromV<V>)>
|
|
300
|
+
HWY_API V RotateLeftSame(V v, int bits) {
|
|
301
|
+
const DFromV<decltype(v)> d;
|
|
302
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
303
|
+
|
|
304
|
+
const int shl_amt = bits & 7;
|
|
305
|
+
const int shr_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 7u);
|
|
306
|
+
|
|
307
|
+
const auto vu = BitCast(du, v);
|
|
308
|
+
return BitCast(d,
|
|
309
|
+
Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
template <class V, HWY_IF_UI8(TFromV<V>)>
|
|
313
|
+
HWY_API V RotateRightSame(V v, int bits) {
|
|
314
|
+
const DFromV<decltype(v)> d;
|
|
315
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
316
|
+
|
|
317
|
+
const int shr_amt = bits & 7;
|
|
318
|
+
const int shl_amt = static_cast<int>((0u - static_cast<unsigned>(bits)) & 7u);
|
|
319
|
+
|
|
320
|
+
const auto vu = BitCast(du, v);
|
|
321
|
+
return BitCast(d,
|
|
322
|
+
Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
#endif // HWY_NATIVE_ROL_ROR_SAME_8
|
|
326
|
+
|
|
327
|
+
#if (defined(HWY_NATIVE_ROL_ROR_SAME_16) == defined(HWY_TARGET_TOGGLE))
|
|
328
|
+
#ifdef HWY_NATIVE_ROL_ROR_SAME_16
|
|
329
|
+
#undef HWY_NATIVE_ROL_ROR_SAME_16
|
|
330
|
+
#else
|
|
331
|
+
#define HWY_NATIVE_ROL_ROR_SAME_16
|
|
332
|
+
#endif
|
|
333
|
+
|
|
334
|
+
template <class V, HWY_IF_UI16(TFromV<V>)>
|
|
335
|
+
HWY_API V RotateLeftSame(V v, int bits) {
|
|
336
|
+
const DFromV<decltype(v)> d;
|
|
337
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
338
|
+
|
|
339
|
+
const int shl_amt = bits & 15;
|
|
340
|
+
const int shr_amt =
|
|
341
|
+
static_cast<int>((0u - static_cast<unsigned>(bits)) & 15u);
|
|
342
|
+
|
|
343
|
+
const auto vu = BitCast(du, v);
|
|
344
|
+
return BitCast(d,
|
|
345
|
+
Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
template <class V, HWY_IF_UI16(TFromV<V>)>
|
|
349
|
+
HWY_API V RotateRightSame(V v, int bits) {
|
|
350
|
+
const DFromV<decltype(v)> d;
|
|
351
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
352
|
+
|
|
353
|
+
const int shr_amt = bits & 15;
|
|
354
|
+
const int shl_amt =
|
|
355
|
+
static_cast<int>((0u - static_cast<unsigned>(bits)) & 15u);
|
|
356
|
+
|
|
357
|
+
const auto vu = BitCast(du, v);
|
|
358
|
+
return BitCast(d,
|
|
359
|
+
Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
|
|
360
|
+
}
|
|
361
|
+
#endif // HWY_NATIVE_ROL_ROR_SAME_16
|
|
362
|
+
|
|
363
|
+
#if (defined(HWY_NATIVE_ROL_ROR_SAME_32_64) == defined(HWY_TARGET_TOGGLE))
|
|
364
|
+
#ifdef HWY_NATIVE_ROL_ROR_SAME_32_64
|
|
365
|
+
#undef HWY_NATIVE_ROL_ROR_SAME_32_64
|
|
366
|
+
#else
|
|
367
|
+
#define HWY_NATIVE_ROL_ROR_SAME_32_64
|
|
368
|
+
#endif
|
|
369
|
+
|
|
370
|
+
template <class V, HWY_IF_UI32(TFromV<V>)>
|
|
371
|
+
HWY_API V RotateLeftSame(V v, int bits) {
|
|
372
|
+
const DFromV<decltype(v)> d;
|
|
373
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
374
|
+
|
|
375
|
+
const int shl_amt = bits & 31;
|
|
376
|
+
const int shr_amt =
|
|
377
|
+
static_cast<int>((0u - static_cast<unsigned>(bits)) & 31u);
|
|
378
|
+
|
|
379
|
+
const auto vu = BitCast(du, v);
|
|
380
|
+
return BitCast(d,
|
|
381
|
+
Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
template <class V, HWY_IF_UI32(TFromV<V>)>
|
|
385
|
+
HWY_API V RotateRightSame(V v, int bits) {
|
|
386
|
+
const DFromV<decltype(v)> d;
|
|
387
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
388
|
+
|
|
389
|
+
const int shr_amt = bits & 31;
|
|
390
|
+
const int shl_amt =
|
|
391
|
+
static_cast<int>((0u - static_cast<unsigned>(bits)) & 31u);
|
|
392
|
+
|
|
393
|
+
const auto vu = BitCast(du, v);
|
|
394
|
+
return BitCast(d,
|
|
395
|
+
Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
#if HWY_HAVE_INTEGER64
|
|
399
|
+
template <class V, HWY_IF_UI64(TFromV<V>)>
|
|
400
|
+
HWY_API V RotateLeftSame(V v, int bits) {
|
|
401
|
+
const DFromV<decltype(v)> d;
|
|
402
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
403
|
+
|
|
404
|
+
const int shl_amt = bits & 63;
|
|
405
|
+
const int shr_amt =
|
|
406
|
+
static_cast<int>((0u - static_cast<unsigned>(bits)) & 63u);
|
|
407
|
+
|
|
408
|
+
const auto vu = BitCast(du, v);
|
|
409
|
+
return BitCast(d,
|
|
410
|
+
Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
template <class V, HWY_IF_UI64(TFromV<V>)>
|
|
414
|
+
HWY_API V RotateRightSame(V v, int bits) {
|
|
415
|
+
const DFromV<decltype(v)> d;
|
|
416
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
417
|
+
|
|
418
|
+
const int shr_amt = bits & 63;
|
|
419
|
+
const int shl_amt =
|
|
420
|
+
static_cast<int>((0u - static_cast<unsigned>(bits)) & 63u);
|
|
421
|
+
|
|
422
|
+
const auto vu = BitCast(du, v);
|
|
423
|
+
return BitCast(d,
|
|
424
|
+
Or(ShiftLeftSame(vu, shl_amt), ShiftRightSame(vu, shr_amt)));
|
|
425
|
+
}
|
|
426
|
+
#endif // HWY_HAVE_INTEGER64
|
|
427
|
+
|
|
428
|
+
#endif // HWY_NATIVE_ROL_ROR_SAME_32_64
|
|
429
|
+
|
|
430
|
+
// ------------------------------ PromoteEvenTo/PromoteOddTo
|
|
431
|
+
|
|
432
|
+
// These are used by target-specific headers for ReorderWidenMulAccumulate etc.
|
|
433
|
+
|
|
434
|
+
#if HWY_TARGET != HWY_SCALAR || HWY_IDE
|
|
435
|
+
namespace detail {
|
|
436
|
+
|
|
437
|
+
// Tag dispatch is used in detail::PromoteEvenTo and detail::PromoteOddTo as
|
|
438
|
+
// there are target-specific specializations for some of the
|
|
439
|
+
// detail::PromoteEvenTo and detail::PromoteOddTo cases on
|
|
440
|
+
// SVE/PPC/SSE2/SSSE3/SSE4/AVX2.
|
|
441
|
+
|
|
442
|
+
// All targets except HWY_SCALAR use the implementations of
|
|
443
|
+
// detail::PromoteEvenTo and detail::PromoteOddTo in generic_ops-inl.h for at
|
|
444
|
+
// least some of the PromoteEvenTo and PromoteOddTo cases.
|
|
445
|
+
|
|
446
|
+
// Signed to signed PromoteEvenTo/PromoteOddTo
|
|
447
|
+
template <size_t kToLaneSize, class D, class V>
|
|
448
|
+
HWY_INLINE VFromD<D> PromoteEvenTo(
|
|
449
|
+
hwy::SignedTag /*to_type_tag*/,
|
|
450
|
+
hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
451
|
+
hwy::SignedTag /*from_type_tag*/, D d_to, V v) {
|
|
452
|
+
#if HWY_TARGET_IS_SVE
|
|
453
|
+
// The intrinsic expects the wide lane type.
|
|
454
|
+
return NativePromoteEvenTo(BitCast(d_to, v));
|
|
455
|
+
#else
|
|
456
|
+
#if HWY_IS_LITTLE_ENDIAN
|
|
457
|
+
// On little-endian targets, need to shift each lane of the bitcasted
|
|
458
|
+
// vector left by kToLaneSize * 4 bits to get the bits of the even
|
|
459
|
+
// source lanes into the upper kToLaneSize * 4 bits of even_in_hi.
|
|
460
|
+
const auto even_in_hi = ShiftLeft<kToLaneSize * 4>(BitCast(d_to, v));
|
|
461
|
+
#else
|
|
462
|
+
// On big-endian targets, the bits of the even source lanes are already
|
|
463
|
+
// in the upper kToLaneSize * 4 bits of the lanes of the bitcasted
|
|
464
|
+
// vector.
|
|
465
|
+
const auto even_in_hi = BitCast(d_to, v);
|
|
466
|
+
#endif
|
|
467
|
+
|
|
468
|
+
// Right-shift even_in_hi by kToLaneSize * 4 bits
|
|
469
|
+
return ShiftRight<kToLaneSize * 4>(even_in_hi);
|
|
470
|
+
#endif // HWY_TARGET_IS_SVE
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Unsigned to unsigned PromoteEvenTo/PromoteOddTo
|
|
474
|
+
template <size_t kToLaneSize, class D, class V>
|
|
475
|
+
HWY_INLINE VFromD<D> PromoteEvenTo(
|
|
476
|
+
hwy::UnsignedTag /*to_type_tag*/,
|
|
477
|
+
hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
478
|
+
hwy::UnsignedTag /*from_type_tag*/, D d_to, V v) {
|
|
479
|
+
#if HWY_TARGET_IS_SVE
|
|
480
|
+
// The intrinsic expects the wide lane type.
|
|
481
|
+
return NativePromoteEvenTo(BitCast(d_to, v));
|
|
482
|
+
#else
|
|
483
|
+
#if HWY_IS_LITTLE_ENDIAN
|
|
484
|
+
// On little-endian targets, the bits of the even source lanes are already
|
|
485
|
+
// in the lower kToLaneSize * 4 bits of the lanes of the bitcasted vector.
|
|
486
|
+
|
|
487
|
+
// Simply need to zero out the upper bits of each lane of the bitcasted
|
|
488
|
+
// vector.
|
|
489
|
+
return And(BitCast(d_to, v),
|
|
490
|
+
Set(d_to, static_cast<TFromD<D>>(LimitsMax<TFromV<V>>())));
|
|
491
|
+
#else
|
|
492
|
+
// On big-endian targets, need to shift each lane of the bitcasted vector
|
|
493
|
+
// right by kToLaneSize * 4 bits to get the bits of the even source lanes into
|
|
494
|
+
// the lower kToLaneSize * 4 bits of the result.
|
|
495
|
+
|
|
496
|
+
// The right shift below will zero out the upper kToLaneSize * 4 bits of the
|
|
497
|
+
// result.
|
|
498
|
+
return ShiftRight<kToLaneSize * 4>(BitCast(d_to, v));
|
|
499
|
+
#endif
|
|
500
|
+
#endif // HWY_TARGET_IS_SVE
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
template <size_t kToLaneSize, class D, class V>
|
|
504
|
+
HWY_INLINE VFromD<D> PromoteOddTo(
|
|
505
|
+
hwy::SignedTag /*to_type_tag*/,
|
|
506
|
+
hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
507
|
+
hwy::SignedTag /*from_type_tag*/, D d_to, V v) {
|
|
508
|
+
#if HWY_IS_LITTLE_ENDIAN
|
|
509
|
+
// On little-endian targets, the bits of the odd source lanes are already in
|
|
510
|
+
// the upper kToLaneSize * 4 bits of the lanes of the bitcasted vector.
|
|
511
|
+
const auto odd_in_hi = BitCast(d_to, v);
|
|
512
|
+
#else
|
|
513
|
+
// On big-endian targets, need to shift each lane of the bitcasted vector
|
|
514
|
+
// left by kToLaneSize * 4 bits to get the bits of the odd source lanes into
|
|
515
|
+
// the upper kToLaneSize * 4 bits of odd_in_hi.
|
|
516
|
+
const auto odd_in_hi = ShiftLeft<kToLaneSize * 4>(BitCast(d_to, v));
|
|
517
|
+
#endif
|
|
518
|
+
|
|
519
|
+
// Right-shift odd_in_hi by kToLaneSize * 4 bits
|
|
520
|
+
return ShiftRight<kToLaneSize * 4>(odd_in_hi);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
template <size_t kToLaneSize, class D, class V>
|
|
524
|
+
HWY_INLINE VFromD<D> PromoteOddTo(
|
|
525
|
+
hwy::UnsignedTag /*to_type_tag*/,
|
|
526
|
+
hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
527
|
+
hwy::UnsignedTag /*from_type_tag*/, D d_to, V v) {
|
|
528
|
+
#if HWY_IS_LITTLE_ENDIAN
|
|
529
|
+
// On little-endian targets, need to shift each lane of the bitcasted vector
|
|
530
|
+
// right by kToLaneSize * 4 bits to get the bits of the odd source lanes into
|
|
531
|
+
// the lower kToLaneSize * 4 bits of the result.
|
|
532
|
+
|
|
533
|
+
// The right shift below will zero out the upper kToLaneSize * 4 bits of the
|
|
534
|
+
// result.
|
|
535
|
+
return ShiftRight<kToLaneSize * 4>(BitCast(d_to, v));
|
|
536
|
+
#else
|
|
537
|
+
// On big-endian targets, the bits of the even source lanes are already
|
|
538
|
+
// in the lower kToLaneSize * 4 bits of the lanes of the bitcasted vector.
|
|
539
|
+
|
|
540
|
+
// Simply need to zero out the upper bits of each lane of the bitcasted
|
|
541
|
+
// vector.
|
|
542
|
+
return And(BitCast(d_to, v),
|
|
543
|
+
Set(d_to, static_cast<TFromD<D>>(LimitsMax<TFromV<V>>())));
|
|
544
|
+
#endif
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
// Unsigned to signed: Same as unsigned->unsigned PromoteEvenTo/PromoteOddTo
|
|
548
|
+
// followed by BitCast to signed
|
|
549
|
+
template <size_t kToLaneSize, class D, class V>
|
|
550
|
+
HWY_INLINE VFromD<D> PromoteEvenTo(
|
|
551
|
+
hwy::SignedTag /*to_type_tag*/,
|
|
552
|
+
hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
553
|
+
hwy::UnsignedTag /*from_type_tag*/, D d_to, V v) {
|
|
554
|
+
const RebindToUnsigned<decltype(d_to)> du_to;
|
|
555
|
+
return BitCast(d_to,
|
|
556
|
+
PromoteEvenTo(hwy::UnsignedTag(), hwy::SizeTag<kToLaneSize>(),
|
|
557
|
+
hwy::UnsignedTag(), du_to, v));
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
template <size_t kToLaneSize, class D, class V>
|
|
561
|
+
HWY_INLINE VFromD<D> PromoteOddTo(
|
|
562
|
+
hwy::SignedTag /*to_type_tag*/,
|
|
563
|
+
hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
564
|
+
hwy::UnsignedTag /*from_type_tag*/, D d_to, V v) {
|
|
565
|
+
const RebindToUnsigned<decltype(d_to)> du_to;
|
|
566
|
+
return BitCast(d_to,
|
|
567
|
+
PromoteOddTo(hwy::UnsignedTag(), hwy::SizeTag<kToLaneSize>(),
|
|
568
|
+
hwy::UnsignedTag(), du_to, v));
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
// BF16->F32 PromoteEvenTo
|
|
572
|
+
|
|
573
|
+
// NOTE: It is possible for FromTypeTag to be hwy::SignedTag or hwy::UnsignedTag
|
|
574
|
+
// instead of hwy::FloatTag on targets that use scalable vectors.
|
|
575
|
+
|
|
576
|
+
// VBF16 is considered to be a bfloat16_t vector if TFromV<VBF16> is the same
|
|
577
|
+
// type as TFromV<VFromD<Repartition<bfloat16_t, DF32>>>
|
|
578
|
+
|
|
579
|
+
// The BF16->F32 PromoteEvenTo overload is only enabled if VBF16 is considered
|
|
580
|
+
// to be a bfloat16_t vector.
|
|
581
|
+
template <class FromTypeTag, class DF32, class VBF16,
|
|
582
|
+
class VBF16_2 = VFromD<Repartition<bfloat16_t, DF32>>,
|
|
583
|
+
hwy::EnableIf<IsSame<TFromV<VBF16>, TFromV<VBF16_2>>()>* = nullptr>
|
|
584
|
+
HWY_INLINE VFromD<DF32> PromoteEvenTo(hwy::FloatTag /*to_type_tag*/,
|
|
585
|
+
hwy::SizeTag<4> /*to_lane_size_tag*/,
|
|
586
|
+
FromTypeTag /*from_type_tag*/, DF32 d_to,
|
|
587
|
+
VBF16 v) {
|
|
588
|
+
const RebindToUnsigned<decltype(d_to)> du_to;
|
|
589
|
+
#if HWY_IS_LITTLE_ENDIAN
|
|
590
|
+
// On little-endian platforms, need to shift left each lane of the bitcasted
|
|
591
|
+
// vector by 16 bits.
|
|
592
|
+
return BitCast(d_to, ShiftLeft<16>(BitCast(du_to, v)));
|
|
593
|
+
#else
|
|
594
|
+
// On big-endian platforms, the even lanes of the source vector are already
|
|
595
|
+
// in the upper 16 bits of the lanes of the bitcasted vector.
|
|
596
|
+
|
|
597
|
+
// Need to simply zero out the lower 16 bits of each lane of the bitcasted
|
|
598
|
+
// vector.
|
|
599
|
+
return BitCast(d_to,
|
|
600
|
+
And(BitCast(du_to, v), Set(du_to, uint32_t{0xFFFF0000u})));
|
|
601
|
+
#endif
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// BF16->F32 PromoteOddTo
|
|
605
|
+
|
|
606
|
+
// NOTE: It is possible for FromTypeTag to be hwy::SignedTag or hwy::UnsignedTag
|
|
607
|
+
// instead of hwy::FloatTag on targets that use scalable vectors.
|
|
608
|
+
|
|
609
|
+
// VBF16 is considered to be a bfloat16_t vector if TFromV<VBF16> is the same
|
|
610
|
+
// type as TFromV<VFromD<Repartition<bfloat16_t, DF32>>>
|
|
611
|
+
|
|
612
|
+
// The BF16->F32 PromoteEvenTo overload is only enabled if VBF16 is considered
|
|
613
|
+
// to be a bfloat16_t vector.
|
|
614
|
+
template <class FromTypeTag, class DF32, class VBF16,
|
|
615
|
+
class VBF16_2 = VFromD<Repartition<bfloat16_t, DF32>>,
|
|
616
|
+
hwy::EnableIf<IsSame<TFromV<VBF16>, TFromV<VBF16_2>>()>* = nullptr>
|
|
617
|
+
HWY_INLINE VFromD<DF32> PromoteOddTo(hwy::FloatTag /*to_type_tag*/,
|
|
618
|
+
hwy::SizeTag<4> /*to_lane_size_tag*/,
|
|
619
|
+
FromTypeTag /*from_type_tag*/, DF32 d_to,
|
|
620
|
+
VBF16 v) {
|
|
621
|
+
const RebindToUnsigned<decltype(d_to)> du_to;
|
|
622
|
+
#if HWY_IS_LITTLE_ENDIAN
|
|
623
|
+
// On little-endian platforms, the odd lanes of the source vector are already
|
|
624
|
+
// in the upper 16 bits of the lanes of the bitcasted vector.
|
|
625
|
+
|
|
626
|
+
// Need to simply zero out the lower 16 bits of each lane of the bitcasted
|
|
627
|
+
// vector.
|
|
628
|
+
return BitCast(d_to,
|
|
629
|
+
And(BitCast(du_to, v), Set(du_to, uint32_t{0xFFFF0000u})));
|
|
630
|
+
#else
|
|
631
|
+
// On big-endian platforms, need to shift left each lane of the bitcasted
|
|
632
|
+
// vector by 16 bits.
|
|
633
|
+
return BitCast(d_to, ShiftLeft<16>(BitCast(du_to, v)));
|
|
634
|
+
#endif
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Default PromoteEvenTo/PromoteOddTo implementations
|
|
638
|
+
template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D,
|
|
639
|
+
class V, HWY_IF_LANES_D(D, 1)>
|
|
640
|
+
HWY_INLINE VFromD<D> PromoteEvenTo(
|
|
641
|
+
ToTypeTag /*to_type_tag*/, hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
642
|
+
FromTypeTag /*from_type_tag*/, D d_to, V v) {
|
|
643
|
+
return PromoteLowerTo(d_to, v);
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D,
|
|
647
|
+
class V, HWY_IF_LANES_GT_D(D, 1)>
|
|
648
|
+
HWY_INLINE VFromD<D> PromoteEvenTo(
|
|
649
|
+
ToTypeTag /*to_type_tag*/, hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
650
|
+
FromTypeTag /*from_type_tag*/, D d_to, V v) {
|
|
651
|
+
const DFromV<decltype(v)> d;
|
|
652
|
+
return PromoteLowerTo(d_to, ConcatEven(d, v, v));
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D,
|
|
656
|
+
class V>
|
|
657
|
+
HWY_INLINE VFromD<D> PromoteOddTo(
|
|
658
|
+
ToTypeTag /*to_type_tag*/, hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
|
|
659
|
+
FromTypeTag /*from_type_tag*/, D d_to, V v) {
|
|
660
|
+
const DFromV<decltype(v)> d;
|
|
661
|
+
return PromoteLowerTo(d_to, ConcatOdd(d, v, v));
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
} // namespace detail
|
|
665
|
+
|
|
666
|
+
template <class D, class V, HWY_IF_T_SIZE_D(D, 2 * sizeof(TFromV<V>)),
|
|
667
|
+
class V2 = VFromD<Repartition<TFromV<V>, D>>,
|
|
668
|
+
HWY_IF_LANES_D(DFromV<V>, HWY_MAX_LANES_V(V2))>
|
|
669
|
+
HWY_API VFromD<D> PromoteEvenTo(D d, V v) {
|
|
670
|
+
return detail::PromoteEvenTo(hwy::TypeTag<TFromD<D>>(),
|
|
671
|
+
hwy::SizeTag<sizeof(TFromD<D>)>(),
|
|
672
|
+
hwy::TypeTag<TFromV<V>>(), d, v);
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
template <class D, class V, HWY_IF_T_SIZE_D(D, 2 * sizeof(TFromV<V>)),
|
|
676
|
+
class V2 = VFromD<Repartition<TFromV<V>, D>>,
|
|
677
|
+
HWY_IF_LANES_D(DFromV<V>, HWY_MAX_LANES_V(V2))>
|
|
678
|
+
HWY_API VFromD<D> PromoteOddTo(D d, V v) {
|
|
679
|
+
return detail::PromoteOddTo(hwy::TypeTag<TFromD<D>>(),
|
|
680
|
+
hwy::SizeTag<sizeof(TFromD<D>)>(),
|
|
681
|
+
hwy::TypeTag<TFromV<V>>(), d, v);
|
|
682
|
+
}
|
|
683
|
+
#endif // HWY_TARGET != HWY_SCALAR
|
|
684
|
+
|
|
685
|
+
#ifdef HWY_INSIDE_END_NAMESPACE
|
|
686
|
+
#undef HWY_INSIDE_END_NAMESPACE
|
|
687
|
+
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
688
|
+
} // namespace HWY_NAMESPACE
|
|
689
|
+
} // namespace hwy
|
|
690
|
+
HWY_AFTER_NAMESPACE();
|
|
691
|
+
#endif
|