@img/sharp-libvips-dev 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/cplusplus/VConnection.cpp +54 -54
- package/cplusplus/VError.cpp +20 -18
- package/cplusplus/VImage.cpp +636 -589
- package/cplusplus/VInterpolate.cpp +22 -22
- package/cplusplus/VRegion.cpp +4 -4
- package/cplusplus/vips-operators.cpp +2326 -2301
- package/include/aom/aom_codec.h +10 -6
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +9 -2
- package/include/aom/aomcx.h +72 -3
- package/include/cairo/cairo-ft.h +1 -1
- package/include/cairo/cairo-gobject.h +8 -0
- package/include/cairo/cairo-svg.h +3 -3
- package/include/cairo/cairo-version.h +2 -2
- package/include/cairo/cairo.h +91 -24
- package/include/glib-2.0/glib/gmacros.h +1 -1
- package/include/glib-2.0/glib/gtestutils.h +1 -1
- package/include/glib-2.0/gobject/gtype.h +7 -7
- package/include/harfbuzz/hb-version.h +2 -2
- package/include/hwy/aligned_allocator.h +211 -0
- package/include/hwy/base.h +1517 -0
- package/include/hwy/cache_control.h +108 -0
- package/include/hwy/detect_compiler_arch.h +281 -0
- package/include/hwy/detect_targets.h +644 -0
- package/include/hwy/foreach_target.h +340 -0
- package/include/hwy/highway.h +435 -0
- package/include/hwy/highway_export.h +74 -0
- package/include/hwy/nanobenchmark.h +171 -0
- package/include/hwy/ops/arm_neon-inl.h +8913 -0
- package/include/hwy/ops/arm_sve-inl.h +5105 -0
- package/include/hwy/ops/emu128-inl.h +2811 -0
- package/include/hwy/ops/generic_ops-inl.h +4745 -0
- package/include/hwy/ops/ppc_vsx-inl.h +5716 -0
- package/include/hwy/ops/rvv-inl.h +5070 -0
- package/include/hwy/ops/scalar-inl.h +1995 -0
- package/include/hwy/ops/set_macros-inl.h +578 -0
- package/include/hwy/ops/shared-inl.h +539 -0
- package/include/hwy/ops/tuple-inl.h +125 -0
- package/include/hwy/ops/wasm_128-inl.h +5917 -0
- package/include/hwy/ops/x86_128-inl.h +11173 -0
- package/include/hwy/ops/x86_256-inl.h +7529 -0
- package/include/hwy/ops/x86_512-inl.h +6849 -0
- package/include/hwy/per_target.h +44 -0
- package/include/hwy/print-inl.h +62 -0
- package/include/hwy/print.h +75 -0
- package/include/hwy/robust_statistics.h +148 -0
- package/include/hwy/targets.h +338 -0
- package/include/hwy/timer-inl.h +200 -0
- package/include/hwy/timer.h +55 -0
- package/include/jconfig.h +2 -2
- package/include/jpeglib.h +3 -2
- package/include/libheif/heif.h +461 -384
- package/include/libheif/heif_cxx.h +4 -1
- package/include/libheif/heif_plugin.h +1 -1
- package/include/libheif/heif_properties.h +138 -0
- package/include/libheif/heif_regions.h +866 -0
- package/include/libheif/heif_version.h +3 -3
- package/include/libpng16/pnglibconf.h +1 -1
- package/include/pnglibconf.h +1 -1
- package/include/vips/VConnection8.h +43 -49
- package/include/vips/VError8.h +27 -24
- package/include/vips/VImage8.h +4861 -4597
- package/include/vips/VInterpolate8.h +24 -27
- package/include/vips/VRegion8.h +32 -33
- package/include/vips/arithmetic.h +169 -169
- package/include/vips/basic.h +33 -33
- package/include/vips/buf.h +56 -54
- package/include/vips/colour.h +95 -95
- package/include/vips/connection.h +190 -193
- package/include/vips/conversion.h +91 -91
- package/include/vips/convolution.h +36 -30
- package/include/vips/create.h +63 -63
- package/include/vips/dbuf.h +35 -37
- package/include/vips/debug.h +65 -33
- package/include/vips/draw.h +41 -41
- package/include/vips/enumtypes.h +54 -51
- package/include/vips/error.h +63 -63
- package/include/vips/foreign.h +263 -223
- package/include/vips/format.h +48 -48
- package/include/vips/freqfilt.h +22 -22
- package/include/vips/gate.h +55 -47
- package/include/vips/generate.h +34 -34
- package/include/vips/header.h +111 -101
- package/include/vips/histogram.h +28 -28
- package/include/vips/image.h +213 -213
- package/include/vips/interpolate.h +40 -41
- package/include/vips/memory.h +61 -52
- package/include/vips/morphology.h +24 -24
- package/include/vips/mosaicing.h +32 -33
- package/include/vips/object.h +371 -357
- package/include/vips/operation.h +68 -67
- package/include/vips/private.h +76 -76
- package/include/vips/rect.h +26 -26
- package/include/vips/region.h +92 -92
- package/include/vips/resample.h +38 -38
- package/include/vips/sbuf.h +53 -54
- package/include/vips/semaphore.h +24 -24
- package/include/vips/thread.h +30 -27
- package/include/vips/threadpool.h +48 -49
- package/include/vips/transform.h +39 -39
- package/include/vips/type.h +90 -85
- package/include/vips/util.h +274 -229
- package/include/vips/vector.h +24 -144
- package/include/vips/version.h +9 -9
- package/include/vips/vips.h +41 -40
- package/include/zlib.h +23 -19
- package/package.json +1 -1
- package/versions.json +9 -9
|
@@ -0,0 +1,539 @@
|
|
|
1
|
+
// Copyright 2020 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
// Per-target definitions shared by ops/*.h and user code.
|
|
17
|
+
|
|
18
|
+
// IWYU pragma: begin_exports
|
|
19
|
+
// Export does not seem to be recursive, so re-export these (also in base.h)
|
|
20
|
+
#include <stddef.h>
|
|
21
|
+
|
|
22
|
+
#include "hwy/base.h"
|
|
23
|
+
// "IWYU pragma: keep" does not work for this include, so hide it from the IDE.
|
|
24
|
+
#if !HWY_IDE
|
|
25
|
+
#include <stdint.h>
|
|
26
|
+
#endif
|
|
27
|
+
|
|
28
|
+
#include "hwy/detect_compiler_arch.h"
|
|
29
|
+
|
|
30
|
+
// Separate header because foreach_target.h re-enables its include guard.
|
|
31
|
+
#include "hwy/ops/set_macros-inl.h"
|
|
32
|
+
|
|
33
|
+
// IWYU pragma: end_exports
|
|
34
|
+
|
|
35
|
+
#if HWY_IS_MSAN
|
|
36
|
+
#include <sanitizer/msan_interface.h>
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
// We are covered by the highway.h include guard, but generic_ops-inl.h
|
|
40
|
+
// includes this again #if HWY_IDE.
|
|
41
|
+
#if defined(HIGHWAY_HWY_OPS_SHARED_TOGGLE) == defined(HWY_TARGET_TOGGLE)
|
|
42
|
+
#ifdef HIGHWAY_HWY_OPS_SHARED_TOGGLE
|
|
43
|
+
#undef HIGHWAY_HWY_OPS_SHARED_TOGGLE
|
|
44
|
+
#else
|
|
45
|
+
#define HIGHWAY_HWY_OPS_SHARED_TOGGLE
|
|
46
|
+
#endif
|
|
47
|
+
|
|
48
|
+
HWY_BEFORE_NAMESPACE();
|
|
49
|
+
namespace hwy {
|
|
50
|
+
namespace HWY_NAMESPACE {
|
|
51
|
+
|
|
52
|
+
// NOTE: GCC generates incorrect code for vector arguments to non-inlined
|
|
53
|
+
// functions in two situations:
|
|
54
|
+
// - on Windows and GCC 10.3, passing by value crashes due to unaligned loads:
|
|
55
|
+
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412.
|
|
56
|
+
// - on aarch64 and GCC 9.3.0 or 11.2.1, passing by value causes many (but not
|
|
57
|
+
// all) tests to fail.
|
|
58
|
+
//
|
|
59
|
+
// We therefore pass by const& only on GCC and (Windows or aarch64). This alias
|
|
60
|
+
// must be used for all vector/mask parameters of functions marked HWY_NOINLINE,
|
|
61
|
+
// and possibly also other functions that are not inlined.
|
|
62
|
+
#if HWY_COMPILER_GCC_ACTUAL && (HWY_OS_WIN || HWY_ARCH_ARM_A64)
|
|
63
|
+
template <class V>
|
|
64
|
+
using VecArg = const V&;
|
|
65
|
+
#else
|
|
66
|
+
template <class V>
|
|
67
|
+
using VecArg = V;
|
|
68
|
+
#endif
|
|
69
|
+
|
|
70
|
+
namespace detail {
|
|
71
|
+
|
|
72
|
+
// Primary template: default is no change for all but f16.
|
|
73
|
+
template <typename T>
|
|
74
|
+
struct NativeLaneTypeT {
|
|
75
|
+
using type = T;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
template <>
|
|
79
|
+
struct NativeLaneTypeT<hwy::float16_t> {
|
|
80
|
+
using type = hwy::float16_t::Raw;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
template <>
|
|
84
|
+
struct NativeLaneTypeT<hwy::bfloat16_t> {
|
|
85
|
+
using type = hwy::bfloat16_t::Raw;
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// Evaluates to the type expected by intrinsics given the Highway lane type T.
|
|
89
|
+
// This is usually the same, but differs for our wrapper types [b]float16_t.
|
|
90
|
+
template <typename T>
|
|
91
|
+
using NativeLaneType = typename NativeLaneTypeT<T>::type;
|
|
92
|
+
|
|
93
|
+
// Returns N * 2^pow2. N is the number of lanes in a full vector and pow2 the
|
|
94
|
+
// desired fraction or multiple of it, see Simd<>. `pow2` is most often in
|
|
95
|
+
// [-3, 3] but can also be lower for user-specified fractions.
|
|
96
|
+
constexpr size_t ScaleByPower(size_t N, int pow2) {
|
|
97
|
+
return pow2 >= 0 ? (N << pow2) : (N >> (-pow2));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
template <typename T>
|
|
101
|
+
HWY_INLINE void MaybeUnpoison(T* HWY_RESTRICT unaligned, size_t count) {
|
|
102
|
+
// Workaround for MSAN not marking compressstore as initialized (b/233326619)
|
|
103
|
+
#if HWY_IS_MSAN
|
|
104
|
+
__msan_unpoison(unaligned, count * sizeof(T));
|
|
105
|
+
#else
|
|
106
|
+
(void)unaligned;
|
|
107
|
+
(void)count;
|
|
108
|
+
#endif
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
} // namespace detail
|
|
112
|
+
|
|
113
|
+
// Highway operations are implemented as overloaded functions selected using a
|
|
114
|
+
// zero-sized tag type D := Simd<T, N, kPow2>. T denotes the lane type.
|
|
115
|
+
//
|
|
116
|
+
// N defines how many lanes are in a 'full' vector, typically equal to
|
|
117
|
+
// HWY_LANES(T) (which is the actual count on targets with vectors of known
|
|
118
|
+
// size, and an upper bound in case of scalable vectors), otherwise a
|
|
119
|
+
// user-specified limit at most that large.
|
|
120
|
+
//
|
|
121
|
+
// 2^kPow2 is a _subsequently_ applied scaling factor that indicates the
|
|
122
|
+
// desired fraction of a 'full' vector: 0 means full, -1 means half; 1,2,3
|
|
123
|
+
// means two/four/eight full vectors ganged together. The largest supported
|
|
124
|
+
// kPow2 is `HWY_MAX_POW2` and the aliases below take care of clamping
|
|
125
|
+
// user-specified values to that. Note that `Simd<T, 1, 0>` and `Simd<T, 2, -1>`
|
|
126
|
+
// have the same `MaxLanes` and `Lanes`.
|
|
127
|
+
//
|
|
128
|
+
// We can theoretically keep halving Lanes(), but recursive instantiations of
|
|
129
|
+
// kPow2 - 1 will eventually fail e.g. because -64 is not a valid shift count.
|
|
130
|
+
// Users must terminate such compile-time recursions at or above HWY_MIN_POW2.
|
|
131
|
+
//
|
|
132
|
+
// WARNING: do not use N directly because it may be a special representation of
|
|
133
|
+
// a fractional MaxLanes. This arises when we Rebind Simd<uint8_t, 1, 0> to
|
|
134
|
+
// Simd<uint32_t, ??, 2>. RVV requires that the last argument (kPow2) be two,
|
|
135
|
+
// but we want MaxLanes to be the same in both cases. Hence ?? is a
|
|
136
|
+
// fixed-point encoding of 1/4.
|
|
137
|
+
//
|
|
138
|
+
// Instead of referring to Simd<> directly, users create D via aliases:
|
|
139
|
+
// - ScalableTag<T> for a full vector;
|
|
140
|
+
// - ScalableTag<T, kPow2>() for a fraction/group, where `kPow2` is
|
|
141
|
+
// interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`;
|
|
142
|
+
// - CappedTag<T, kLimit> for a vector with up to kLimit lanes; or
|
|
143
|
+
// - FixedTag<T, kNumLanes> for a vector with exactly kNumLanes lanes.
|
|
144
|
+
//
|
|
145
|
+
// Instead of N, use Lanes(D()) for the actual number of lanes at runtime and
|
|
146
|
+
// D().MaxLanes() for a constexpr upper bound. Both are powers of two.
|
|
147
|
+
template <typename Lane, size_t N, int kPow2>
|
|
148
|
+
struct Simd {
|
|
149
|
+
constexpr Simd() = default;
|
|
150
|
+
using T = Lane;
|
|
151
|
+
|
|
152
|
+
private:
|
|
153
|
+
static_assert(sizeof(Lane) <= 8, "Lanes are up to 64-bit");
|
|
154
|
+
// 20 bits are sufficient for any HWY_MAX_BYTES. This is the 'normal' value of
|
|
155
|
+
// N when kFrac == 0, otherwise it is one (see FracN).
|
|
156
|
+
static constexpr size_t kWhole = N & 0xFFFFF;
|
|
157
|
+
// Fractional part is in the bits above kWhole.
|
|
158
|
+
static constexpr int kFrac = static_cast<int>(N >> 20);
|
|
159
|
+
// Can be 8x larger because kPow2 may be as low as -3 (Rebind of a larger
|
|
160
|
+
// type to u8 results in fractions).
|
|
161
|
+
static_assert(kWhole <= 8 * HWY_MAX_N && kFrac <= 3, "Out of range");
|
|
162
|
+
static_assert(kFrac == 0 || kWhole == 1, "If frac, whole must be 1");
|
|
163
|
+
static_assert((kWhole & (kWhole - 1)) == 0 && kWhole != 0, "Not 2^x");
|
|
164
|
+
// Important to check this here because kPow2 <= -64 causes confusing
|
|
165
|
+
// compile errors (invalid shift count).
|
|
166
|
+
static_assert(kPow2 >= HWY_MIN_POW2, "Forgot kPow2 recursion terminator?");
|
|
167
|
+
// However, do NOT verify kPow2 <= HWY_MAX_POW2 - users should be able to
|
|
168
|
+
// Rebind<uint64_t, ScalableTag<uint8_t, 3>> in order to discover that its
|
|
169
|
+
// kPow2 is out of bounds.
|
|
170
|
+
|
|
171
|
+
public:
|
|
172
|
+
// Upper bound on the number of lanes (tight if !HWY_HAVE_SCALABLE). In the
|
|
173
|
+
// common case, N == kWhole, but if kFrac is nonzero, we deduct it from kPow2.
|
|
174
|
+
// E.g. Rebind<uint32_t, Simd<uint8_t, 1, 0>> is Simd<uint32_t, 0x200001, 2>.
|
|
175
|
+
// The resulting number of lanes is still 1 because this N represents 1/4
|
|
176
|
+
// (the ratio of the sizes). Note that RVV requires kPow2 to be the ratio of
|
|
177
|
+
// the sizes so that the correct LMUL overloads are chosen, even if N is
|
|
178
|
+
// small enough that it would fit in an LMUL=1 vector.
|
|
179
|
+
//
|
|
180
|
+
// Cannot be an enum because GCC warns when using enums and non-enums in the
|
|
181
|
+
// same expression. Cannot be a static constexpr function (MSVC limitation).
|
|
182
|
+
// Rounded up to one so this is a valid array length.
|
|
183
|
+
//
|
|
184
|
+
// Do not use this directly - only 'public' so it is visible from the accessor
|
|
185
|
+
// macro required by MSVC.
|
|
186
|
+
static constexpr size_t kPrivateLanes =
|
|
187
|
+
HWY_MAX(size_t{1}, detail::ScaleByPower(kWhole, kPow2 - kFrac));
|
|
188
|
+
|
|
189
|
+
constexpr size_t MaxLanes() const { return kPrivateLanes; }
|
|
190
|
+
constexpr size_t MaxBytes() const { return kPrivateLanes * sizeof(Lane); }
|
|
191
|
+
constexpr size_t MaxBlocks() const { return (MaxBytes() + 15) / 16; }
|
|
192
|
+
// For SFINAE on RVV.
|
|
193
|
+
constexpr int Pow2() const { return kPow2; }
|
|
194
|
+
|
|
195
|
+
// ------------------------------ Changing lane type or count
|
|
196
|
+
// Do not use any of these directly. Anything used from member typedefs cannot
|
|
197
|
+
// be made private, but functions only used within other functions can.
|
|
198
|
+
|
|
199
|
+
// Returns number of NewT lanes that fit within MaxBytes().
|
|
200
|
+
template <typename NewT>
|
|
201
|
+
static constexpr size_t RepartitionLanes() {
|
|
202
|
+
// Round up to correctly handle larger NewT.
|
|
203
|
+
return (kPrivateLanes * sizeof(T) + sizeof(NewT) - 1) / sizeof(NewT);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Returns the new kPow2 required for lanes of type NewT.
|
|
207
|
+
template <typename NewT>
|
|
208
|
+
static constexpr int RebindPow2() {
|
|
209
|
+
return kPow2 +
|
|
210
|
+
((sizeof(NewT) >= sizeof(T))
|
|
211
|
+
? static_cast<int>(CeilLog2(sizeof(NewT) / sizeof(T)))
|
|
212
|
+
: -static_cast<int>(CeilLog2(sizeof(T) / sizeof(NewT))));
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
private:
|
|
216
|
+
// Returns 0 or whole NewN such that kNewMaxLanes = NewN * 2^kNewPow2.
|
|
217
|
+
template <int kNewPow2, size_t kNewMaxLanes>
|
|
218
|
+
static constexpr size_t WholeN() {
|
|
219
|
+
return detail::ScaleByPower(kNewMaxLanes, -kNewPow2);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Returns fractional NewN such that kNewMaxLanes = NewN * 2^kNewPow2.
|
|
223
|
+
template <int kNewPow2, size_t kNewMaxLanes>
|
|
224
|
+
static constexpr size_t FracN() {
|
|
225
|
+
// Only reached if kNewPow2 > CeilLog2(kNewMaxLanes) >= 0 (else WholeN
|
|
226
|
+
// would not have been zero), but clamp to zero to avoid warnings. kFrac is
|
|
227
|
+
// the difference, stored in the upper bits of N, and we also set kWhole =
|
|
228
|
+
// 1 so that the new kPrivateLanes = kNewMaxLanes.
|
|
229
|
+
static_assert(HWY_MAX_N <= (size_t{1} << 20), "Change bit shift");
|
|
230
|
+
return static_cast<size_t>(
|
|
231
|
+
1 + (HWY_MAX(0, kNewPow2 - static_cast<int>(CeilLog2(kNewMaxLanes)))
|
|
232
|
+
<< 20));
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
public:
|
|
236
|
+
// Returns (whole or fractional) NewN, see above.
|
|
237
|
+
template <int kNewPow2, size_t kNewMaxLanes>
|
|
238
|
+
static constexpr size_t NewN() {
|
|
239
|
+
// We require a fraction if inverting kNewPow2 results in 0.
|
|
240
|
+
return WholeN<kNewPow2, kNewMaxLanes>() == 0
|
|
241
|
+
? FracN<kNewPow2, kNewMaxLanes>()
|
|
242
|
+
: WholeN<kNewPow2, kNewMaxLanes>();
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// PromoteTo/DemoteTo() with another lane type, but same number of lanes.
|
|
246
|
+
template <typename NewT>
|
|
247
|
+
using Rebind =
|
|
248
|
+
Simd<NewT, NewN<RebindPow2<NewT>(), kPrivateLanes>(), RebindPow2<NewT>()>;
|
|
249
|
+
|
|
250
|
+
// Change lane type while keeping the same vector size, e.g. for MulEven.
|
|
251
|
+
template <typename NewT>
|
|
252
|
+
using Repartition =
|
|
253
|
+
Simd<NewT, NewN<kPow2, RepartitionLanes<NewT>()>(), kPow2>;
|
|
254
|
+
|
|
255
|
+
// Half the lanes while keeping the same lane type, e.g. for LowerHalf.
|
|
256
|
+
using Half = Simd<T, N, kPow2 - 1>;
|
|
257
|
+
|
|
258
|
+
// Twice the lanes while keeping the same lane type, e.g. for Combine.
|
|
259
|
+
using Twice = Simd<T, N, kPow2 + 1>;
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
namespace detail {
|
|
263
|
+
|
|
264
|
+
template <typename T, size_t N, int kPow2>
|
|
265
|
+
constexpr bool IsFull(Simd<T, N, kPow2> /* d */) {
|
|
266
|
+
return N == HWY_LANES(T) && kPow2 == 0;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Struct wrappers enable validation of arguments via static_assert.
|
|
270
|
+
template <typename T, size_t N, int kPow2>
|
|
271
|
+
struct ClampNAndPow2 {
|
|
272
|
+
using type = Simd<T, HWY_MIN(N, HWY_MAX_N), HWY_MIN(kPow2, HWY_MAX_POW2)>;
|
|
273
|
+
};
|
|
274
|
+
|
|
275
|
+
template <typename T, int kPow2>
|
|
276
|
+
struct ScalableTagChecker {
|
|
277
|
+
using type = typename ClampNAndPow2<T, HWY_LANES(T), kPow2>::type;
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
template <typename T, size_t kLimit, int kPow2>
|
|
281
|
+
struct CappedTagChecker {
|
|
282
|
+
static_assert(kLimit != 0, "Does not make sense to have zero lanes");
|
|
283
|
+
// Safely handle non-power-of-two inputs by rounding down, which is allowed by
|
|
284
|
+
// CappedTag. Otherwise, Simd<T, 3, 0> would static_assert.
|
|
285
|
+
static constexpr size_t kLimitPow2 = size_t{1} << hwy::FloorLog2(kLimit);
|
|
286
|
+
static constexpr size_t N = HWY_MIN(kLimitPow2, HWY_LANES(T));
|
|
287
|
+
using type = typename ClampNAndPow2<T, N, kPow2>::type;
|
|
288
|
+
};
|
|
289
|
+
|
|
290
|
+
template <typename T, size_t kNumLanes>
|
|
291
|
+
struct FixedTagChecker {
|
|
292
|
+
static_assert(kNumLanes != 0, "Does not make sense to have zero lanes");
|
|
293
|
+
static_assert(kNumLanes <= HWY_LANES(T), "Too many lanes");
|
|
294
|
+
using type = Simd<T, kNumLanes, 0>;
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
} // namespace detail
|
|
298
|
+
|
|
299
|
+
// ------------------------------ Aliases for Simd<>
|
|
300
|
+
|
|
301
|
+
// Tag describing a full vector (kPow2 == 0: the most common usage, e.g. 1D
|
|
302
|
+
// loops where the application does not care about the vector size) or a
|
|
303
|
+
// fraction/multiple of one. Fractions (kPow2 < 0) are useful for arguments or
|
|
304
|
+
// return values of type promotion and demotion. User-specified kPow2 is
|
|
305
|
+
// interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`.
|
|
306
|
+
template <typename T, int kPow2 = 0>
|
|
307
|
+
using ScalableTag = typename detail::ScalableTagChecker<T, kPow2>::type;
|
|
308
|
+
|
|
309
|
+
// Tag describing a vector with *up to* kLimit active lanes, even on targets
|
|
310
|
+
// with scalable vectors and HWY_SCALAR. The runtime lane count `Lanes(tag)` may
|
|
311
|
+
// be less than kLimit, and is 1 on HWY_SCALAR. This alias is typically used for
|
|
312
|
+
// 1D loops with a relatively low application-defined upper bound, e.g. for 8x8
|
|
313
|
+
// DCTs. However, it is better if data structures are designed to be
|
|
314
|
+
// vector-length-agnostic (e.g. a hybrid SoA where there are chunks of `M >=
|
|
315
|
+
// MaxLanes(d)` DC components followed by M AC1, .., and M AC63; this would
|
|
316
|
+
// enable vector-length-agnostic loops using ScalableTag). User-specified kPow2
|
|
317
|
+
// is interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`.
|
|
318
|
+
template <typename T, size_t kLimit, int kPow2 = 0>
|
|
319
|
+
using CappedTag = typename detail::CappedTagChecker<T, kLimit, kPow2>::type;
|
|
320
|
+
|
|
321
|
+
#if !HWY_HAVE_SCALABLE
|
|
322
|
+
// If the vector size is known, and the app knows it does not want more than
|
|
323
|
+
// kLimit lanes, then capping can be beneficial. For example, AVX-512 has lower
|
|
324
|
+
// IPC and potentially higher costs for unaligned load/store vs. 256-bit AVX2.
|
|
325
|
+
template <typename T, size_t kLimit, int kPow2 = 0>
|
|
326
|
+
using CappedTagIfFixed = CappedTag<T, kLimit, kPow2>;
|
|
327
|
+
#else // HWY_HAVE_SCALABLE
|
|
328
|
+
// .. whereas on RVV/SVE, the cost of clamping Lanes() may exceed the benefit.
|
|
329
|
+
template <typename T, size_t kLimit, int kPow2 = 0>
|
|
330
|
+
using CappedTagIfFixed = ScalableTag<T, kPow2>;
|
|
331
|
+
#endif
|
|
332
|
+
|
|
333
|
+
// Alias for a tag describing a vector with *exactly* kNumLanes active lanes,
|
|
334
|
+
// even on targets with scalable vectors. Requires `kNumLanes` to be a power of
|
|
335
|
+
// two not exceeding `HWY_LANES(T)`.
|
|
336
|
+
//
|
|
337
|
+
// NOTE: if the application does not need to support HWY_SCALAR (+), use this
|
|
338
|
+
// instead of CappedTag to emphasize that there will be exactly kNumLanes lanes.
|
|
339
|
+
// This is useful for data structures that rely on exactly 128-bit SIMD, but
|
|
340
|
+
// these are discouraged because they cannot benefit from wider vectors.
|
|
341
|
+
// Instead, applications would ideally define a larger problem size and loop
|
|
342
|
+
// over it with the (unknown size) vectors from ScalableTag.
|
|
343
|
+
//
|
|
344
|
+
// + e.g. if the baseline is known to support SIMD, or the application requires
|
|
345
|
+
// ops such as TableLookupBytes not supported by HWY_SCALAR.
|
|
346
|
+
template <typename T, size_t kNumLanes>
|
|
347
|
+
using FixedTag = typename detail::FixedTagChecker<T, kNumLanes>::type;
|
|
348
|
+
|
|
349
|
+
// Convenience form for fixed sizes.
|
|
350
|
+
template <typename T>
|
|
351
|
+
using Full16 = Simd<T, 2 / sizeof(T), 0>;
|
|
352
|
+
|
|
353
|
+
template <typename T>
|
|
354
|
+
using Full32 = Simd<T, 4 / sizeof(T), 0>;
|
|
355
|
+
|
|
356
|
+
template <typename T>
|
|
357
|
+
using Full64 = Simd<T, 8 / sizeof(T), 0>;
|
|
358
|
+
|
|
359
|
+
template <typename T>
|
|
360
|
+
using Full128 = Simd<T, 16 / sizeof(T), 0>;
|
|
361
|
+
|
|
362
|
+
// ------------------------------ Accessors for Simd<>
|
|
363
|
+
|
|
364
|
+
// Lane type.
|
|
365
|
+
template <class D>
|
|
366
|
+
using TFromD = typename D::T;
|
|
367
|
+
|
|
368
|
+
// Upper bound on the number of lanes, typically used for SFINAE conditions and
|
|
369
|
+
// to allocate storage for targets with known vector sizes. Note: this may be a
|
|
370
|
+
// loose bound, instead use Lanes() as the actual size for AllocateAligned.
|
|
371
|
+
// MSVC workaround: use static constant directly instead of a function.
|
|
372
|
+
#define HWY_MAX_LANES_D(D) D::kPrivateLanes
|
|
373
|
+
|
|
374
|
+
// Non-macro form of HWY_MAX_LANES_D in case that is preferable. WARNING: the
|
|
375
|
+
// macro form may be required for MSVC, which has limitations on deducing
|
|
376
|
+
// arguments.
|
|
377
|
+
template <class D>
|
|
378
|
+
HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t MaxLanes(D) {
|
|
379
|
+
return HWY_MAX_LANES_D(D);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
#if !HWY_HAVE_SCALABLE
|
|
383
|
+
|
|
384
|
+
// If non-scalable, this is constexpr; otherwise the target's header defines a
|
|
385
|
+
// non-constexpr version of this function. This is the actual vector length,
|
|
386
|
+
// used when advancing loop counters.
|
|
387
|
+
template <class D>
|
|
388
|
+
HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t Lanes(D) {
|
|
389
|
+
return HWY_MAX_LANES_D(D);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
#endif // !HWY_HAVE_SCALABLE
|
|
393
|
+
|
|
394
|
+
// Tag for the same number of lanes as D, but with the LaneType T.
|
|
395
|
+
template <class T, class D>
|
|
396
|
+
using Rebind = typename D::template Rebind<T>;
|
|
397
|
+
|
|
398
|
+
template <class D>
|
|
399
|
+
using RebindToSigned = Rebind<MakeSigned<TFromD<D>>, D>;
|
|
400
|
+
template <class D>
|
|
401
|
+
using RebindToUnsigned = Rebind<MakeUnsigned<TFromD<D>>, D>;
|
|
402
|
+
template <class D>
|
|
403
|
+
using RebindToFloat = Rebind<MakeFloat<TFromD<D>>, D>;
|
|
404
|
+
|
|
405
|
+
// Tag for the same total size as D, but with the LaneType T.
|
|
406
|
+
template <class T, class D>
|
|
407
|
+
using Repartition = typename D::template Repartition<T>;
|
|
408
|
+
|
|
409
|
+
template <class D>
|
|
410
|
+
using RepartitionToWide = Repartition<MakeWide<TFromD<D>>, D>;
|
|
411
|
+
template <class D>
|
|
412
|
+
using RepartitionToNarrow = Repartition<MakeNarrow<TFromD<D>>, D>;
|
|
413
|
+
|
|
414
|
+
// Tag for the same lane type as D, but half the lanes.
|
|
415
|
+
template <class D>
|
|
416
|
+
using Half = typename D::Half;
|
|
417
|
+
|
|
418
|
+
// Tag for the same lane type as D, but twice the lanes.
|
|
419
|
+
template <class D>
|
|
420
|
+
using Twice = typename D::Twice;
|
|
421
|
+
|
|
422
|
+
// Tag for a 16-byte block with the same lane type as D
|
|
423
|
+
#if HWY_HAVE_SCALABLE
|
|
424
|
+
namespace detail {
|
|
425
|
+
|
|
426
|
+
template <class D>
|
|
427
|
+
class BlockDFromD_t {};
|
|
428
|
+
|
|
429
|
+
template <typename T, size_t N, int kPow2>
|
|
430
|
+
class BlockDFromD_t<Simd<T, N, kPow2>> {
|
|
431
|
+
using D = Simd<T, N, kPow2>;
|
|
432
|
+
static constexpr int kNewPow2 = HWY_MIN(kPow2, 0);
|
|
433
|
+
static constexpr size_t kMaxLpb = HWY_MIN(16 / sizeof(T), HWY_MAX_LANES_D(D));
|
|
434
|
+
static constexpr size_t kNewN = D::template NewN<kNewPow2, kMaxLpb>();
|
|
435
|
+
|
|
436
|
+
public:
|
|
437
|
+
using type = Simd<T, kNewN, kNewPow2>;
|
|
438
|
+
};
|
|
439
|
+
|
|
440
|
+
} // namespace detail
|
|
441
|
+
|
|
442
|
+
template <class D>
|
|
443
|
+
using BlockDFromD = typename detail::BlockDFromD_t<RemoveConst<D>>::type;
|
|
444
|
+
#else
|
|
445
|
+
template <class D>
|
|
446
|
+
using BlockDFromD =
|
|
447
|
+
Simd<TFromD<D>, HWY_MIN(16 / sizeof(TFromD<D>), HWY_MAX_LANES_D(D)), 0>;
|
|
448
|
+
#endif
|
|
449
|
+
|
|
450
|
+
// ------------------------------ Choosing overloads (SFINAE)
|
|
451
|
+
|
|
452
|
+
// Same as base.h macros but with a Simd<T, N, kPow2> argument instead of T.
|
|
453
|
+
#define HWY_IF_UNSIGNED_D(D) HWY_IF_UNSIGNED(TFromD<D>)
|
|
454
|
+
#define HWY_IF_SIGNED_D(D) HWY_IF_SIGNED(TFromD<D>)
|
|
455
|
+
#define HWY_IF_FLOAT_D(D) HWY_IF_FLOAT(TFromD<D>)
|
|
456
|
+
#define HWY_IF_NOT_FLOAT_D(D) HWY_IF_NOT_FLOAT(TFromD<D>)
|
|
457
|
+
#define HWY_IF_FLOAT3264_D(D) HWY_IF_FLOAT3264(TFromD<D>)
|
|
458
|
+
#define HWY_IF_NOT_FLOAT3264_D(D) HWY_IF_NOT_FLOAT3264(TFromD<D>)
|
|
459
|
+
#define HWY_IF_SPECIAL_FLOAT_D(D) HWY_IF_SPECIAL_FLOAT(TFromD<D>)
|
|
460
|
+
#define HWY_IF_NOT_SPECIAL_FLOAT_D(D) HWY_IF_NOT_SPECIAL_FLOAT(TFromD<D>)
|
|
461
|
+
#define HWY_IF_FLOAT_OR_SPECIAL_D(D) HWY_IF_FLOAT_OR_SPECIAL(TFromD<D>)
|
|
462
|
+
#define HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D) \
|
|
463
|
+
HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>)
|
|
464
|
+
|
|
465
|
+
#define HWY_IF_T_SIZE_D(D, bytes) HWY_IF_T_SIZE(TFromD<D>, bytes)
|
|
466
|
+
#define HWY_IF_NOT_T_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE(TFromD<D>, bytes)
|
|
467
|
+
#define HWY_IF_T_SIZE_ONE_OF_D(D, bit_array) \
|
|
468
|
+
HWY_IF_T_SIZE_ONE_OF(TFromD<D>, bit_array)
|
|
469
|
+
|
|
470
|
+
#define HWY_IF_LANES_D(D, lanes) HWY_IF_LANES(HWY_MAX_LANES_D(D), lanes)
|
|
471
|
+
#define HWY_IF_LANES_LE_D(D, lanes) HWY_IF_LANES_LE(HWY_MAX_LANES_D(D), lanes)
|
|
472
|
+
#define HWY_IF_LANES_GT_D(D, lanes) HWY_IF_LANES_GT(HWY_MAX_LANES_D(D), lanes)
|
|
473
|
+
#define HWY_IF_LANES_PER_BLOCK_D(D, lanes) \
|
|
474
|
+
HWY_IF_LANES_PER_BLOCK( \
|
|
475
|
+
TFromD<D>, HWY_MIN(HWY_MAX_LANES_D(D), 16 / sizeof(TFromD<D>)), lanes)
|
|
476
|
+
|
|
477
|
+
#define HWY_IF_POW2_LE_D(D, pow2) hwy::EnableIf<D().Pow2() <= pow2>* = nullptr
|
|
478
|
+
#define HWY_IF_POW2_GT_D(D, pow2) hwy::EnableIf<(D().Pow2() > pow2)>* = nullptr
|
|
479
|
+
|
|
480
|
+
#define HWY_IF_U8_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint8_t>()>* = nullptr
|
|
481
|
+
#define HWY_IF_U16_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint16_t>()>* = nullptr
|
|
482
|
+
#define HWY_IF_U32_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint32_t>()>* = nullptr
|
|
483
|
+
#define HWY_IF_U64_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint64_t>()>* = nullptr
|
|
484
|
+
|
|
485
|
+
#define HWY_IF_I8_D(D) hwy::EnableIf<IsSame<TFromD<D>, int8_t>()>* = nullptr
|
|
486
|
+
#define HWY_IF_I16_D(D) hwy::EnableIf<IsSame<TFromD<D>, int16_t>()>* = nullptr
|
|
487
|
+
#define HWY_IF_I32_D(D) hwy::EnableIf<IsSame<TFromD<D>, int32_t>()>* = nullptr
|
|
488
|
+
#define HWY_IF_I64_D(D) hwy::EnableIf<IsSame<TFromD<D>, int64_t>()>* = nullptr
|
|
489
|
+
|
|
490
|
+
// Use instead of HWY_IF_T_SIZE_D to avoid ambiguity with float16_t/float/double
|
|
491
|
+
// overloads.
|
|
492
|
+
#define HWY_IF_UI16_D(D) HWY_IF_UI16(TFromD<D>)
|
|
493
|
+
#define HWY_IF_UI32_D(D) HWY_IF_UI32(TFromD<D>)
|
|
494
|
+
#define HWY_IF_UI64_D(D) HWY_IF_UI64(TFromD<D>)
|
|
495
|
+
|
|
496
|
+
#define HWY_IF_BF16_D(D) HWY_IF_BF16(TFromD<D>)
|
|
497
|
+
#define HWY_IF_F16_D(D) HWY_IF_F16(TFromD<D>)
|
|
498
|
+
#define HWY_IF_F32_D(D) hwy::EnableIf<IsSame<TFromD<D>, float>()>* = nullptr
|
|
499
|
+
#define HWY_IF_F64_D(D) hwy::EnableIf<IsSame<TFromD<D>, double>()>* = nullptr
|
|
500
|
+
|
|
501
|
+
#define HWY_IF_V_SIZE_D(D, bytes) \
|
|
502
|
+
HWY_IF_V_SIZE(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
|
|
503
|
+
#define HWY_IF_V_SIZE_LE_D(D, bytes) \
|
|
504
|
+
HWY_IF_V_SIZE_LE(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
|
|
505
|
+
#define HWY_IF_V_SIZE_GT_D(D, bytes) \
|
|
506
|
+
HWY_IF_V_SIZE_GT(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
|
|
507
|
+
|
|
508
|
+
// Same, but with a vector argument. ops/*-inl.h define their own TFromV.
|
|
509
|
+
#define HWY_IF_UNSIGNED_V(V) HWY_IF_UNSIGNED(TFromV<V>)
|
|
510
|
+
#define HWY_IF_SIGNED_V(V) HWY_IF_SIGNED(TFromV<V>)
|
|
511
|
+
#define HWY_IF_FLOAT_V(V) HWY_IF_FLOAT(TFromV<V>)
|
|
512
|
+
#define HWY_IF_NOT_FLOAT_V(V) HWY_IF_NOT_FLOAT(TFromV<V>)
|
|
513
|
+
#define HWY_IF_SPECIAL_FLOAT_V(V) HWY_IF_SPECIAL_FLOAT(TFromV<V>)
|
|
514
|
+
#define HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V) \
|
|
515
|
+
HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromV<V>)
|
|
516
|
+
|
|
517
|
+
#define HWY_IF_T_SIZE_V(V, bytes) HWY_IF_T_SIZE(TFromV<V>, bytes)
|
|
518
|
+
#define HWY_IF_NOT_T_SIZE_V(V, bytes) HWY_IF_NOT_T_SIZE(TFromV<V>, bytes)
|
|
519
|
+
#define HWY_IF_T_SIZE_ONE_OF_V(V, bit_array) \
|
|
520
|
+
HWY_IF_T_SIZE_ONE_OF(TFromV<V>, bit_array)
|
|
521
|
+
|
|
522
|
+
#define HWY_MAX_LANES_V(V) HWY_MAX_LANES_D(DFromV<V>)
|
|
523
|
+
#define HWY_IF_V_SIZE_V(V, bytes) \
|
|
524
|
+
HWY_IF_V_SIZE(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
|
|
525
|
+
#define HWY_IF_V_SIZE_LE_V(V, bytes) \
|
|
526
|
+
HWY_IF_V_SIZE_LE(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
|
|
527
|
+
#define HWY_IF_V_SIZE_GT_V(V, bytes) \
|
|
528
|
+
HWY_IF_V_SIZE_GT(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
|
|
529
|
+
|
|
530
|
+
// Old names (deprecated)
|
|
531
|
+
#define HWY_IF_LANE_SIZE_D(D, bytes) HWY_IF_T_SIZE_D(D, bytes)
|
|
532
|
+
#define HWY_IF_NOT_LANE_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE_D(D, bytes)
|
|
533
|
+
|
|
534
|
+
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
535
|
+
} // namespace HWY_NAMESPACE
|
|
536
|
+
} // namespace hwy
|
|
537
|
+
HWY_AFTER_NAMESPACE();
|
|
538
|
+
|
|
539
|
+
#endif // HIGHWAY_HWY_OPS_SHARED_TOGGLE
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
// Copyright 2023 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
// Tuple support. Included by those ops/* that lack native tuple types, after
|
|
17
|
+
// they define VFromD and before they use the tuples e.g. for LoadInterleaved2.
|
|
18
|
+
// Assumes we are already in the HWY_NAMESPACE and under an include guard.
|
|
19
|
+
|
|
20
|
+
// If viewing this header standalone, define VFromD to avoid IDE warnings.
|
|
21
|
+
// This is normally set by set_macros-inl.h before this header is included.
|
|
22
|
+
#if !defined(HWY_NAMESPACE)
|
|
23
|
+
#include "hwy/base.h"
|
|
24
|
+
template <class D>
|
|
25
|
+
using VFromD = int;
|
|
26
|
+
#endif
|
|
27
|
+
|
|
28
|
+
// On SVE, Vec2..4 are aliases to built-in types.
|
|
29
|
+
template <class D>
|
|
30
|
+
struct Vec2 {
|
|
31
|
+
VFromD<D> v0;
|
|
32
|
+
VFromD<D> v1;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
template <class D>
|
|
36
|
+
struct Vec3 {
|
|
37
|
+
VFromD<D> v0;
|
|
38
|
+
VFromD<D> v1;
|
|
39
|
+
VFromD<D> v2;
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
template <class D>
|
|
43
|
+
struct Vec4 {
|
|
44
|
+
VFromD<D> v0;
|
|
45
|
+
VFromD<D> v1;
|
|
46
|
+
VFromD<D> v2;
|
|
47
|
+
VFromD<D> v3;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
// D arg is unused but allows deducing D.
|
|
51
|
+
template <class D>
|
|
52
|
+
HWY_API Vec2<D> Create2(D /* tag */, VFromD<D> v0, VFromD<D> v1) {
|
|
53
|
+
return Vec2<D>{v0, v1};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
template <class D>
|
|
57
|
+
HWY_API Vec3<D> Create3(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2) {
|
|
58
|
+
return Vec3<D>{v0, v1, v2};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
template <class D>
|
|
62
|
+
HWY_API Vec4<D> Create4(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
|
|
63
|
+
VFromD<D> v3) {
|
|
64
|
+
return Vec4<D>{v0, v1, v2, v3};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
template <size_t kIndex, class D>
|
|
68
|
+
HWY_API VFromD<D> Get2(Vec2<D> tuple) {
|
|
69
|
+
static_assert(kIndex < 2, "Tuple index out of bounds");
|
|
70
|
+
return kIndex == 0 ? tuple.v0 : tuple.v1;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
template <size_t kIndex, class D>
|
|
74
|
+
HWY_API VFromD<D> Get3(Vec3<D> tuple) {
|
|
75
|
+
static_assert(kIndex < 3, "Tuple index out of bounds");
|
|
76
|
+
return kIndex == 0 ? tuple.v0 : kIndex == 1 ? tuple.v1 : tuple.v2;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
template <size_t kIndex, class D>
|
|
80
|
+
HWY_API VFromD<D> Get4(Vec4<D> tuple) {
|
|
81
|
+
static_assert(kIndex < 4, "Tuple index out of bounds");
|
|
82
|
+
return kIndex == 0 ? tuple.v0
|
|
83
|
+
: kIndex == 1 ? tuple.v1
|
|
84
|
+
: kIndex == 2 ? tuple.v2
|
|
85
|
+
: tuple.v3;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
template <size_t kIndex, class D>
|
|
89
|
+
HWY_API Vec2<D> Set2(Vec2<D> tuple, VFromD<D> val) {
|
|
90
|
+
static_assert(kIndex < 2, "Tuple index out of bounds");
|
|
91
|
+
if (kIndex == 0) {
|
|
92
|
+
tuple.v0 = val;
|
|
93
|
+
} else {
|
|
94
|
+
tuple.v1 = val;
|
|
95
|
+
}
|
|
96
|
+
return tuple;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
template <size_t kIndex, class D>
|
|
100
|
+
HWY_API Vec3<D> Set3(Vec3<D> tuple, VFromD<D> val) {
|
|
101
|
+
static_assert(kIndex < 3, "Tuple index out of bounds");
|
|
102
|
+
if (kIndex == 0) {
|
|
103
|
+
tuple.v0 = val;
|
|
104
|
+
} else if (kIndex == 1) {
|
|
105
|
+
tuple.v1 = val;
|
|
106
|
+
} else {
|
|
107
|
+
tuple.v2 = val;
|
|
108
|
+
}
|
|
109
|
+
return tuple;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
template <size_t kIndex, class D>
|
|
113
|
+
HWY_API Vec4<D> Set4(Vec4<D> tuple, VFromD<D> val) {
|
|
114
|
+
static_assert(kIndex < 4, "Tuple index out of bounds");
|
|
115
|
+
if (kIndex == 0) {
|
|
116
|
+
tuple.v0 = val;
|
|
117
|
+
} else if (kIndex == 1) {
|
|
118
|
+
tuple.v1 = val;
|
|
119
|
+
} else if (kIndex == 2) {
|
|
120
|
+
tuple.v2 = val;
|
|
121
|
+
} else {
|
|
122
|
+
tuple.v3 = val;
|
|
123
|
+
}
|
|
124
|
+
return tuple;
|
|
125
|
+
}
|