@img/sharp-libvips-dev 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +7 -1
- package/include/aom/aom_image.h +24 -12
- package/include/aom/aom_integer.h +3 -3
- package/include/aom/aomcx.h +15 -0
- package/include/aom/aomdx.h +5 -2
- package/include/archive.h +7 -5
- package/include/archive_entry.h +5 -3
- package/include/cgif.h +3 -0
- package/include/expat.h +21 -10
- package/include/expat_config.h +11 -5
- package/include/ffi.h +12 -25
- package/include/freetype2/freetype/config/ftoption.h +2 -2
- package/include/fribidi/fribidi-config.h +2 -2
- package/include/fribidi/fribidi-unicode-version.h +3 -3
- package/include/gio-unix-2.0/gio/gfiledescriptorbased.h +3 -2
- package/include/glib-2.0/gio/gappinfo.h +40 -25
- package/include/glib-2.0/gio/gapplication.h +6 -0
- package/include/glib-2.0/gio/gasyncresult.h +1 -1
- package/include/glib-2.0/gio/gconverter.h +5 -0
- package/include/glib-2.0/gio/gdbusintrospection.h +1 -1
- package/include/glib-2.0/gio/gfile.h +16 -0
- package/include/glib-2.0/gio/gio-visibility.h +34 -0
- package/include/glib-2.0/gio/giotypes.h +0 -1
- package/include/glib-2.0/gio/gsettings.h +8 -0
- package/include/glib-2.0/gio/gvfs.h +2 -2
- package/include/glib-2.0/girepository/gi-visibility.h +34 -0
- package/include/glib-2.0/girepository/giarginfo.h +23 -6
- package/include/glib-2.0/girepository/gibaseinfo.h +44 -18
- package/include/glib-2.0/girepository/gicallableinfo.h +26 -16
- package/include/glib-2.0/girepository/gicallbackinfo.h +17 -2
- package/include/glib-2.0/girepository/giconstantinfo.h +19 -4
- package/include/glib-2.0/girepository/gienuminfo.h +20 -21
- package/include/glib-2.0/girepository/gifieldinfo.h +22 -7
- package/include/glib-2.0/girepository/giflagsinfo.h +60 -0
- package/include/glib-2.0/girepository/gifunctioninfo.h +22 -7
- package/include/glib-2.0/girepository/giinterfaceinfo.h +33 -18
- package/include/glib-2.0/girepository/giobjectinfo.h +41 -26
- package/include/glib-2.0/girepository/gipropertyinfo.h +18 -3
- package/include/glib-2.0/girepository/giregisteredtypeinfo.h +22 -11
- package/include/glib-2.0/girepository/girepository-autocleanups.h +56 -0
- package/include/glib-2.0/girepository/girepository.h +53 -62
- package/include/glib-2.0/girepository/girffi.h +8 -7
- package/include/glib-2.0/girepository/gisignalinfo.h +18 -3
- package/include/glib-2.0/girepository/gistructinfo.h +26 -11
- package/include/glib-2.0/girepository/gitypeinfo.h +29 -16
- package/include/glib-2.0/girepository/gitypelib.h +9 -13
- package/include/glib-2.0/girepository/gitypes.h +52 -104
- package/include/glib-2.0/girepository/giunioninfo.h +28 -12
- package/include/glib-2.0/girepository/giunresolvedinfo.h +17 -2
- package/include/glib-2.0/girepository/givalueinfo.h +65 -0
- package/include/glib-2.0/girepository/givfuncinfo.h +23 -8
- package/include/glib-2.0/glib/deprecated/gthread.h +9 -5
- package/include/glib-2.0/glib/gbitlock.h +31 -0
- package/include/glib-2.0/glib/gbookmarkfile.h +1 -1
- package/include/glib-2.0/glib/giochannel.h +2 -2
- package/include/glib-2.0/glib/glib-visibility.h +34 -0
- package/include/glib-2.0/glib/gmacros.h +12 -5
- package/include/glib-2.0/glib/gmain.h +93 -7
- package/include/glib-2.0/glib/gmessages.h +8 -0
- package/include/glib-2.0/glib/gqsort.h +8 -1
- package/include/glib-2.0/glib/gslice.h +2 -0
- package/include/glib-2.0/glib/gstrfuncs.h +24 -30
- package/include/glib-2.0/glib/gstrvbuilder.h +3 -0
- package/include/glib-2.0/glib/gthread.h +191 -3
- package/include/glib-2.0/glib/gunicode.h +1 -1
- package/include/glib-2.0/glib/gversionmacros.h +9 -0
- package/include/glib-2.0/glib-unix.h +7 -1
- package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
- package/include/glib-2.0/gobject/genums.h +6 -6
- package/include/glib-2.0/gobject/glib-types.h +11 -0
- package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
- package/include/glib-2.0/gobject/gsignal.h +16 -6
- package/include/glib-2.0/gobject/gtype.h +6 -6
- package/include/harfbuzz/hb-buffer.h +6 -0
- package/include/harfbuzz/hb-common.h +6 -9
- package/include/harfbuzz/hb-cplusplus.hh +8 -11
- package/include/harfbuzz/hb-subset.h +17 -4
- package/include/harfbuzz/hb-version.h +3 -3
- package/include/hwy/abort.h +28 -0
- package/include/hwy/aligned_allocator.h +218 -6
- package/include/hwy/base.h +1935 -512
- package/include/hwy/cache_control.h +24 -6
- package/include/hwy/detect_compiler_arch.h +105 -10
- package/include/hwy/detect_targets.h +146 -37
- package/include/hwy/foreach_target.h +36 -1
- package/include/hwy/highway.h +222 -50
- package/include/hwy/ops/arm_neon-inl.h +2055 -894
- package/include/hwy/ops/arm_sve-inl.h +1476 -348
- package/include/hwy/ops/emu128-inl.h +711 -623
- package/include/hwy/ops/generic_ops-inl.h +4431 -2157
- package/include/hwy/ops/inside-inl.h +691 -0
- package/include/hwy/ops/ppc_vsx-inl.h +2186 -673
- package/include/hwy/ops/rvv-inl.h +1556 -536
- package/include/hwy/ops/scalar-inl.h +353 -233
- package/include/hwy/ops/set_macros-inl.h +171 -23
- package/include/hwy/ops/shared-inl.h +198 -56
- package/include/hwy/ops/wasm_128-inl.h +283 -244
- package/include/hwy/ops/x86_128-inl.h +3673 -1357
- package/include/hwy/ops/x86_256-inl.h +1737 -663
- package/include/hwy/ops/x86_512-inl.h +1697 -500
- package/include/hwy/per_target.h +4 -0
- package/include/hwy/profiler.h +648 -0
- package/include/hwy/robust_statistics.h +2 -2
- package/include/hwy/targets.h +40 -32
- package/include/hwy/timer-inl.h +3 -3
- package/include/hwy/timer.h +16 -1
- package/include/libheif/heif.h +170 -15
- package/include/libheif/heif_items.h +237 -0
- package/include/libheif/heif_properties.h +38 -2
- package/include/libheif/heif_regions.h +1 -1
- package/include/libheif/heif_version.h +2 -2
- package/include/libpng16/png.h +32 -29
- package/include/libpng16/pngconf.h +2 -2
- package/include/libpng16/pnglibconf.h +8 -3
- package/include/librsvg-2.0/librsvg/rsvg-cairo.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-features.h +3 -4
- package/include/librsvg-2.0/librsvg/rsvg-pixbuf.h +235 -0
- package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
- package/include/librsvg-2.0/librsvg/rsvg.h +55 -176
- package/include/libxml2/libxml/HTMLparser.h +12 -19
- package/include/libxml2/libxml/c14n.h +1 -12
- package/include/libxml2/libxml/debugXML.h +1 -1
- package/include/libxml2/libxml/encoding.h +9 -0
- package/include/libxml2/libxml/entities.h +12 -1
- package/include/libxml2/libxml/hash.h +19 -0
- package/include/libxml2/libxml/list.h +2 -2
- package/include/libxml2/libxml/nanohttp.h +17 -0
- package/include/libxml2/libxml/parser.h +73 -58
- package/include/libxml2/libxml/parserInternals.h +9 -1
- package/include/libxml2/libxml/pattern.h +6 -0
- package/include/libxml2/libxml/tree.h +32 -12
- package/include/libxml2/libxml/uri.h +11 -0
- package/include/libxml2/libxml/valid.h +29 -2
- package/include/libxml2/libxml/xinclude.h +7 -0
- package/include/libxml2/libxml/xmlIO.h +21 -5
- package/include/libxml2/libxml/xmlerror.h +14 -0
- package/include/libxml2/libxml/xmlexports.h +111 -15
- package/include/libxml2/libxml/xmlmemory.h +8 -45
- package/include/libxml2/libxml/xmlreader.h +2 -0
- package/include/libxml2/libxml/xmlsave.h +5 -0
- package/include/libxml2/libxml/xmlunicode.h +165 -1
- package/include/libxml2/libxml/xmlversion.h +15 -179
- package/include/libxml2/libxml/xmlwriter.h +1 -0
- package/include/libxml2/libxml/xpath.h +4 -0
- package/include/pango-1.0/pango/pango-features.h +2 -2
- package/include/pango-1.0/pango/pango-fontmap.h +7 -0
- package/include/pango-1.0/pango/pango-item.h +4 -2
- package/include/pango-1.0/pango/pango-version-macros.h +25 -0
- package/include/pango-1.0/pango/pangofc-font.h +2 -1
- package/include/pixman-1/pixman-version.h +2 -2
- package/include/png.h +32 -29
- package/include/pngconf.h +2 -2
- package/include/pnglibconf.h +8 -3
- package/include/vips/connection.h +9 -3
- package/include/vips/util.h +1 -11
- package/include/vips/version.h +4 -4
- package/include/webp/decode.h +58 -56
- package/include/webp/demux.h +25 -21
- package/include/webp/encode.h +44 -39
- package/include/webp/mux.h +76 -15
- package/include/webp/mux_types.h +2 -1
- package/include/webp/sharpyuv/sharpyuv.h +77 -8
- package/include/webp/types.h +29 -8
- package/include/zconf.h +1 -1
- package/include/zlib.h +12 -12
- package/package.json +1 -1
- package/versions.json +18 -19
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
// Single-element vectors and operations.
|
|
17
17
|
// External include guard in highway.h - see comment there.
|
|
18
18
|
|
|
19
|
+
#include <stdint.h>
|
|
19
20
|
#ifndef HWY_NO_LIBCXX
|
|
20
21
|
#include <math.h> // sqrtf
|
|
21
22
|
#endif
|
|
@@ -53,6 +54,9 @@ struct Vec1 {
|
|
|
53
54
|
HWY_INLINE Vec1& operator-=(const Vec1 other) {
|
|
54
55
|
return *this = (*this - other);
|
|
55
56
|
}
|
|
57
|
+
HWY_INLINE Vec1& operator%=(const Vec1 other) {
|
|
58
|
+
return *this = (*this % other);
|
|
59
|
+
}
|
|
56
60
|
HWY_INLINE Vec1& operator&=(const Vec1 other) {
|
|
57
61
|
return *this = (*this & other);
|
|
58
62
|
}
|
|
@@ -101,17 +105,12 @@ HWY_API Vec1<TTo> BitCast(DTo /* tag */, Vec1<TFrom> v) {
|
|
|
101
105
|
|
|
102
106
|
template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
|
|
103
107
|
HWY_API Vec1<T> Zero(D /* tag */) {
|
|
104
|
-
Vec1<T>
|
|
105
|
-
ZeroBytes<sizeof(v.raw)>(&v.raw);
|
|
106
|
-
return v;
|
|
108
|
+
return Vec1<T>(ConvertScalarTo<T>(0));
|
|
107
109
|
}
|
|
108
110
|
|
|
109
111
|
template <class D>
|
|
110
112
|
using VFromD = decltype(Zero(D()));
|
|
111
113
|
|
|
112
|
-
// ------------------------------ Tuple (VFromD)
|
|
113
|
-
#include "hwy/ops/tuple-inl.h"
|
|
114
|
-
|
|
115
114
|
// ------------------------------ Set
|
|
116
115
|
template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>, typename T2>
|
|
117
116
|
HWY_API Vec1<T> Set(D /* tag */, const T2 t) {
|
|
@@ -137,7 +136,7 @@ HWY_API VFromD<D> ResizeBitCast(D /* tag */, FromV v) {
|
|
|
137
136
|
using TFrom = TFromV<FromV>;
|
|
138
137
|
using TTo = TFromD<D>;
|
|
139
138
|
constexpr size_t kCopyLen = HWY_MIN(sizeof(TFrom), sizeof(TTo));
|
|
140
|
-
TTo to
|
|
139
|
+
TTo to{};
|
|
141
140
|
CopyBytes<kCopyLen>(&v.raw, &to);
|
|
142
141
|
return VFromD<D>(to);
|
|
143
142
|
}
|
|
@@ -156,6 +155,39 @@ HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(FromSizeTag /* from_size_tag */,
|
|
|
156
155
|
|
|
157
156
|
} // namespace detail
|
|
158
157
|
|
|
158
|
+
// ------------------------------ Dup128VecFromValues
|
|
159
|
+
|
|
160
|
+
template <class D, HWY_IF_T_SIZE_D(D, 1)>
|
|
161
|
+
HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
|
|
162
|
+
TFromD<D> /*t2*/, TFromD<D> /*t3*/,
|
|
163
|
+
TFromD<D> /*t4*/, TFromD<D> /*t5*/,
|
|
164
|
+
TFromD<D> /*t6*/, TFromD<D> /*t7*/,
|
|
165
|
+
TFromD<D> /*t8*/, TFromD<D> /*t9*/,
|
|
166
|
+
TFromD<D> /*t10*/, TFromD<D> /*t11*/,
|
|
167
|
+
TFromD<D> /*t12*/, TFromD<D> /*t13*/,
|
|
168
|
+
TFromD<D> /*t14*/, TFromD<D> /*t15*/) {
|
|
169
|
+
return VFromD<D>(t0);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
template <class D, HWY_IF_T_SIZE_D(D, 2)>
|
|
173
|
+
HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
|
|
174
|
+
TFromD<D> /*t2*/, TFromD<D> /*t3*/,
|
|
175
|
+
TFromD<D> /*t4*/, TFromD<D> /*t5*/,
|
|
176
|
+
TFromD<D> /*t6*/, TFromD<D> /*t7*/) {
|
|
177
|
+
return VFromD<D>(t0);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
template <class D, HWY_IF_T_SIZE_D(D, 4)>
|
|
181
|
+
HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
|
|
182
|
+
TFromD<D> /*t2*/, TFromD<D> /*t3*/) {
|
|
183
|
+
return VFromD<D>(t0);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
template <class D, HWY_IF_T_SIZE_D(D, 8)>
|
|
187
|
+
HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/) {
|
|
188
|
+
return VFromD<D>(t0);
|
|
189
|
+
}
|
|
190
|
+
|
|
159
191
|
// ================================================== LOGICAL
|
|
160
192
|
|
|
161
193
|
// ------------------------------ Not
|
|
@@ -300,8 +332,7 @@ HWY_API Vec1<T> CopySignToAbs(const Vec1<T> abs, const Vec1<T> sign) {
|
|
|
300
332
|
// ------------------------------ BroadcastSignBit
|
|
301
333
|
template <typename T>
|
|
302
334
|
HWY_API Vec1<T> BroadcastSignBit(const Vec1<T> v) {
|
|
303
|
-
|
|
304
|
-
return v.raw < 0 ? Vec1<T>(T(-1)) : Vec1<T>(0);
|
|
335
|
+
return Vec1<T>(ScalarShr(v.raw, sizeof(T) * 8 - 1));
|
|
305
336
|
}
|
|
306
337
|
|
|
307
338
|
// ------------------------------ PopulationCount
|
|
@@ -328,12 +359,12 @@ HWY_API Vec1<T> IfThenElse(const Mask1<T> mask, const Vec1<T> yes,
|
|
|
328
359
|
|
|
329
360
|
template <typename T>
|
|
330
361
|
HWY_API Vec1<T> IfThenElseZero(const Mask1<T> mask, const Vec1<T> yes) {
|
|
331
|
-
return mask.bits ? yes : Vec1<T>(0);
|
|
362
|
+
return mask.bits ? yes : Vec1<T>(ConvertScalarTo<T>(0));
|
|
332
363
|
}
|
|
333
364
|
|
|
334
365
|
template <typename T>
|
|
335
366
|
HWY_API Vec1<T> IfThenZeroElse(const Mask1<T> mask, const Vec1<T> no) {
|
|
336
|
-
return mask.bits ? Vec1<T>(0) : no;
|
|
367
|
+
return mask.bits ? Vec1<T>(ConvertScalarTo<T>(0)) : no;
|
|
337
368
|
}
|
|
338
369
|
|
|
339
370
|
template <typename T>
|
|
@@ -345,11 +376,6 @@ HWY_API Vec1<T> IfNegativeThenElse(Vec1<T> v, Vec1<T> yes, Vec1<T> no) {
|
|
|
345
376
|
return vi.raw < 0 ? yes : no;
|
|
346
377
|
}
|
|
347
378
|
|
|
348
|
-
template <typename T>
|
|
349
|
-
HWY_API Vec1<T> ZeroIfNegative(const Vec1<T> v) {
|
|
350
|
-
return v.raw < 0 ? Vec1<T>(0) : v;
|
|
351
|
-
}
|
|
352
|
-
|
|
353
379
|
// ------------------------------ Mask logical
|
|
354
380
|
|
|
355
381
|
template <typename T>
|
|
@@ -407,6 +433,19 @@ HWY_API Mask1<T> SetAtOrBeforeFirst(Mask1<T> /*mask*/) {
|
|
|
407
433
|
return Mask1<T>::FromBool(true);
|
|
408
434
|
}
|
|
409
435
|
|
|
436
|
+
// ------------------------------ LowerHalfOfMask
|
|
437
|
+
|
|
438
|
+
#ifdef HWY_NATIVE_LOWER_HALF_OF_MASK
|
|
439
|
+
#undef HWY_NATIVE_LOWER_HALF_OF_MASK
|
|
440
|
+
#else
|
|
441
|
+
#define HWY_NATIVE_LOWER_HALF_OF_MASK
|
|
442
|
+
#endif
|
|
443
|
+
|
|
444
|
+
template <class D>
|
|
445
|
+
HWY_API MFromD<D> LowerHalfOfMask(D /*d*/, MFromD<D> m) {
|
|
446
|
+
return m;
|
|
447
|
+
}
|
|
448
|
+
|
|
410
449
|
// ================================================== SHIFTS
|
|
411
450
|
|
|
412
451
|
// ------------------------------ ShiftLeft/ShiftRight (BroadcastSignBit)
|
|
@@ -421,35 +460,20 @@ HWY_API Vec1<T> ShiftLeft(const Vec1<T> v) {
|
|
|
421
460
|
template <int kBits, typename T>
|
|
422
461
|
HWY_API Vec1<T> ShiftRight(const Vec1<T> v) {
|
|
423
462
|
static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
|
|
424
|
-
|
|
425
|
-
// Signed right shift is now guaranteed to be arithmetic (rounding toward
|
|
426
|
-
// negative infinity, i.e. shifting in the sign bit).
|
|
427
|
-
return Vec1<T>(static_cast<T>(v.raw >> kBits));
|
|
428
|
-
#else
|
|
429
|
-
if (IsSigned<T>()) {
|
|
430
|
-
// Emulate arithmetic shift using only logical (unsigned) shifts, because
|
|
431
|
-
// signed shifts are still implementation-defined.
|
|
432
|
-
using TU = hwy::MakeUnsigned<T>;
|
|
433
|
-
const Sisd<TU> du;
|
|
434
|
-
const TU shifted = static_cast<TU>(BitCast(du, v).raw >> kBits);
|
|
435
|
-
const TU sign = BitCast(du, BroadcastSignBit(v)).raw;
|
|
436
|
-
const size_t sign_shift =
|
|
437
|
-
static_cast<size_t>(static_cast<int>(sizeof(TU)) * 8 - 1 - kBits);
|
|
438
|
-
const TU upper = static_cast<TU>(sign << sign_shift);
|
|
439
|
-
return BitCast(Sisd<T>(), Vec1<TU>(shifted | upper));
|
|
440
|
-
} else { // T is unsigned
|
|
441
|
-
return Vec1<T>(static_cast<T>(v.raw >> kBits));
|
|
442
|
-
}
|
|
443
|
-
#endif
|
|
463
|
+
return Vec1<T>(ScalarShr(v.raw, kBits));
|
|
444
464
|
}
|
|
445
465
|
|
|
446
466
|
// ------------------------------ RotateRight (ShiftRight)
|
|
447
|
-
template <int kBits, typename T>
|
|
467
|
+
template <int kBits, typename T, HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
|
|
448
468
|
HWY_API Vec1<T> RotateRight(const Vec1<T> v) {
|
|
469
|
+
const DFromV<decltype(v)> d;
|
|
470
|
+
const RebindToUnsigned<decltype(d)> du;
|
|
471
|
+
|
|
449
472
|
constexpr size_t kSizeInBits = sizeof(T) * 8;
|
|
450
|
-
static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift");
|
|
473
|
+
static_assert(0 <= kBits && kBits < kSizeInBits, "Invalid shift count");
|
|
451
474
|
if (kBits == 0) return v;
|
|
452
|
-
|
|
475
|
+
|
|
476
|
+
return Or(BitCast(d, ShiftRight<kBits>(BitCast(du, v))),
|
|
453
477
|
ShiftLeft<HWY_MIN(kSizeInBits - 1, kSizeInBits - kBits)>(v));
|
|
454
478
|
}
|
|
455
479
|
|
|
@@ -463,26 +487,7 @@ HWY_API Vec1<T> ShiftLeftSame(const Vec1<T> v, int bits) {
|
|
|
463
487
|
|
|
464
488
|
template <typename T>
|
|
465
489
|
HWY_API Vec1<T> ShiftRightSame(const Vec1<T> v, int bits) {
|
|
466
|
-
|
|
467
|
-
// Signed right shift is now guaranteed to be arithmetic (rounding toward
|
|
468
|
-
// negative infinity, i.e. shifting in the sign bit).
|
|
469
|
-
return Vec1<T>(static_cast<T>(v.raw >> bits));
|
|
470
|
-
#else
|
|
471
|
-
if (IsSigned<T>()) {
|
|
472
|
-
// Emulate arithmetic shift using only logical (unsigned) shifts, because
|
|
473
|
-
// signed shifts are still implementation-defined.
|
|
474
|
-
using TU = hwy::MakeUnsigned<T>;
|
|
475
|
-
const Sisd<TU> du;
|
|
476
|
-
const TU shifted = static_cast<TU>(BitCast(du, v).raw >> bits);
|
|
477
|
-
const TU sign = BitCast(du, BroadcastSignBit(v)).raw;
|
|
478
|
-
const size_t sign_shift =
|
|
479
|
-
static_cast<size_t>(static_cast<int>(sizeof(TU)) * 8 - 1 - bits);
|
|
480
|
-
const TU upper = static_cast<TU>(sign << sign_shift);
|
|
481
|
-
return BitCast(Sisd<T>(), Vec1<TU>(shifted | upper));
|
|
482
|
-
} else { // T is unsigned
|
|
483
|
-
return Vec1<T>(static_cast<T>(v.raw >> bits));
|
|
484
|
-
}
|
|
485
|
-
#endif
|
|
490
|
+
return Vec1<T>(ScalarShr(v.raw, bits));
|
|
486
491
|
}
|
|
487
492
|
|
|
488
493
|
// ------------------------------ Shl
|
|
@@ -528,10 +533,22 @@ HWY_API Vec1<double> operator-(const Vec1<double> a, const Vec1<double> b) {
|
|
|
528
533
|
|
|
529
534
|
// ------------------------------ SumsOf8
|
|
530
535
|
|
|
536
|
+
HWY_API Vec1<int64_t> SumsOf8(const Vec1<int8_t> v) {
|
|
537
|
+
return Vec1<int64_t>(v.raw);
|
|
538
|
+
}
|
|
531
539
|
HWY_API Vec1<uint64_t> SumsOf8(const Vec1<uint8_t> v) {
|
|
532
540
|
return Vec1<uint64_t>(v.raw);
|
|
533
541
|
}
|
|
534
542
|
|
|
543
|
+
// ------------------------------ SumsOf2
|
|
544
|
+
|
|
545
|
+
template <class T>
|
|
546
|
+
HWY_API Vec1<MakeWide<T>> SumsOf2(const Vec1<T> v) {
|
|
547
|
+
const DFromV<decltype(v)> d;
|
|
548
|
+
const Rebind<MakeWide<T>, decltype(d)> dw;
|
|
549
|
+
return PromoteTo(dw, v);
|
|
550
|
+
}
|
|
551
|
+
|
|
535
552
|
// ------------------------------ SaturatedAdd
|
|
536
553
|
|
|
537
554
|
// Returns a + b clamped to the destination range.
|
|
@@ -603,57 +620,12 @@ HWY_API Vec1<uint16_t> AverageRound(const Vec1<uint16_t> a,
|
|
|
603
620
|
|
|
604
621
|
template <typename T>
|
|
605
622
|
HWY_API Vec1<T> Abs(const Vec1<T> a) {
|
|
606
|
-
|
|
607
|
-
if (i >= 0 || i == hwy::LimitsMin<T>()) return a;
|
|
608
|
-
return Vec1<T>(static_cast<T>(-i & T{-1}));
|
|
609
|
-
}
|
|
610
|
-
HWY_API Vec1<float> Abs(Vec1<float> a) {
|
|
611
|
-
int32_t i;
|
|
612
|
-
CopyBytes<sizeof(i)>(&a.raw, &i);
|
|
613
|
-
i &= 0x7FFFFFFF;
|
|
614
|
-
CopyBytes<sizeof(i)>(&i, &a.raw);
|
|
615
|
-
return a;
|
|
616
|
-
}
|
|
617
|
-
HWY_API Vec1<double> Abs(Vec1<double> a) {
|
|
618
|
-
int64_t i;
|
|
619
|
-
CopyBytes<sizeof(i)>(&a.raw, &i);
|
|
620
|
-
i &= 0x7FFFFFFFFFFFFFFFL;
|
|
621
|
-
CopyBytes<sizeof(i)>(&i, &a.raw);
|
|
622
|
-
return a;
|
|
623
|
+
return Vec1<T>(ScalarAbs(a.raw));
|
|
623
624
|
}
|
|
624
625
|
|
|
625
626
|
// ------------------------------ Min/Max
|
|
626
627
|
|
|
627
628
|
// <cmath> may be unavailable, so implement our own.
|
|
628
|
-
namespace detail {
|
|
629
|
-
|
|
630
|
-
static inline float Abs(float f) {
|
|
631
|
-
uint32_t i;
|
|
632
|
-
CopyBytes<4>(&f, &i);
|
|
633
|
-
i &= 0x7FFFFFFFu;
|
|
634
|
-
CopyBytes<4>(&i, &f);
|
|
635
|
-
return f;
|
|
636
|
-
}
|
|
637
|
-
static inline double Abs(double f) {
|
|
638
|
-
uint64_t i;
|
|
639
|
-
CopyBytes<8>(&f, &i);
|
|
640
|
-
i &= 0x7FFFFFFFFFFFFFFFull;
|
|
641
|
-
CopyBytes<8>(&i, &f);
|
|
642
|
-
return f;
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
static inline bool SignBit(float f) {
|
|
646
|
-
uint32_t i;
|
|
647
|
-
CopyBytes<4>(&f, &i);
|
|
648
|
-
return (i >> 31) != 0;
|
|
649
|
-
}
|
|
650
|
-
static inline bool SignBit(double f) {
|
|
651
|
-
uint64_t i;
|
|
652
|
-
CopyBytes<8>(&f, &i);
|
|
653
|
-
return (i >> 63) != 0;
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
} // namespace detail
|
|
657
629
|
|
|
658
630
|
template <typename T, HWY_IF_NOT_FLOAT(T)>
|
|
659
631
|
HWY_API Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
|
|
@@ -662,8 +634,8 @@ HWY_API Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
|
|
|
662
634
|
|
|
663
635
|
template <typename T, HWY_IF_FLOAT(T)>
|
|
664
636
|
HWY_API Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
|
|
665
|
-
if (
|
|
666
|
-
if (
|
|
637
|
+
if (ScalarIsNaN(a.raw)) return b;
|
|
638
|
+
if (ScalarIsNaN(b.raw)) return a;
|
|
667
639
|
return Vec1<T>(HWY_MIN(a.raw, b.raw));
|
|
668
640
|
}
|
|
669
641
|
|
|
@@ -674,8 +646,8 @@ HWY_API Vec1<T> Max(const Vec1<T> a, const Vec1<T> b) {
|
|
|
674
646
|
|
|
675
647
|
template <typename T, HWY_IF_FLOAT(T)>
|
|
676
648
|
HWY_API Vec1<T> Max(const Vec1<T> a, const Vec1<T> b) {
|
|
677
|
-
if (
|
|
678
|
-
if (
|
|
649
|
+
if (ScalarIsNaN(a.raw)) return b;
|
|
650
|
+
if (ScalarIsNaN(b.raw)) return a;
|
|
679
651
|
return Vec1<T>(HWY_MAX(a.raw, b.raw));
|
|
680
652
|
}
|
|
681
653
|
|
|
@@ -716,21 +688,24 @@ HWY_API Vec1<T> operator*(const Vec1<T> a, const Vec1<T> b) {
|
|
|
716
688
|
static_cast<uint64_t>(b.raw)));
|
|
717
689
|
}
|
|
718
690
|
|
|
719
|
-
template <typename T>
|
|
691
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
720
692
|
HWY_API Vec1<T> operator/(const Vec1<T> a, const Vec1<T> b) {
|
|
721
693
|
return Vec1<T>(a.raw / b.raw);
|
|
722
694
|
}
|
|
723
695
|
|
|
724
|
-
// Returns the upper
|
|
725
|
-
|
|
726
|
-
|
|
696
|
+
// Returns the upper sizeof(T)*8 bits of a * b in each lane.
|
|
697
|
+
template <class T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4)),
|
|
698
|
+
HWY_IF_NOT_FLOAT_NOR_SPECIAL(T)>
|
|
699
|
+
HWY_API Vec1<T> MulHigh(const Vec1<T> a, const Vec1<T> b) {
|
|
700
|
+
using TW = MakeWide<T>;
|
|
701
|
+
return Vec1<T>(static_cast<T>(
|
|
702
|
+
(static_cast<TW>(a.raw) * static_cast<TW>(b.raw)) >> (sizeof(T) * 8)));
|
|
727
703
|
}
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
return Vec1<
|
|
733
|
-
(static_cast<uint32_t>(a.raw) * static_cast<uint32_t>(b.raw)) >> 16));
|
|
704
|
+
template <class T, HWY_IF_UI64(T)>
|
|
705
|
+
HWY_API Vec1<T> MulHigh(const Vec1<T> a, const Vec1<T> b) {
|
|
706
|
+
T hi;
|
|
707
|
+
Mul128(a.raw, b.raw, &hi);
|
|
708
|
+
return Vec1<T>(hi);
|
|
734
709
|
}
|
|
735
710
|
|
|
736
711
|
HWY_API Vec1<int16_t> MulFixedPoint15(Vec1<int16_t> a, Vec1<int16_t> b) {
|
|
@@ -763,23 +738,23 @@ HWY_API Vec1<T> AbsDiff(const Vec1<T> a, const Vec1<T> b) {
|
|
|
763
738
|
|
|
764
739
|
// ------------------------------ Floating-point multiply-add variants
|
|
765
740
|
|
|
766
|
-
template <typename T>
|
|
741
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
767
742
|
HWY_API Vec1<T> MulAdd(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> add) {
|
|
768
743
|
return mul * x + add;
|
|
769
744
|
}
|
|
770
745
|
|
|
771
|
-
template <typename T>
|
|
746
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
772
747
|
HWY_API Vec1<T> NegMulAdd(const Vec1<T> mul, const Vec1<T> x,
|
|
773
748
|
const Vec1<T> add) {
|
|
774
749
|
return add - mul * x;
|
|
775
750
|
}
|
|
776
751
|
|
|
777
|
-
template <typename T>
|
|
752
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
778
753
|
HWY_API Vec1<T> MulSub(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> sub) {
|
|
779
754
|
return mul * x - sub;
|
|
780
755
|
}
|
|
781
756
|
|
|
782
|
-
template <typename T>
|
|
757
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
783
758
|
HWY_API Vec1<T> NegMulSub(const Vec1<T> mul, const Vec1<T> x,
|
|
784
759
|
const Vec1<T> sub) {
|
|
785
760
|
return Neg(mul) * x - sub;
|
|
@@ -842,14 +817,17 @@ HWY_API Vec1<T> Round(const Vec1<T> v) {
|
|
|
842
817
|
if (!(Abs(v).raw < MantissaEnd<T>())) { // Huge or NaN
|
|
843
818
|
return v;
|
|
844
819
|
}
|
|
845
|
-
const T
|
|
846
|
-
const
|
|
847
|
-
|
|
820
|
+
const T k0 = ConvertScalarTo<T>(0);
|
|
821
|
+
const T bias = ConvertScalarTo<T>(v.raw < k0 ? -0.5 : 0.5);
|
|
822
|
+
const TI rounded = ConvertScalarTo<TI>(v.raw + bias);
|
|
823
|
+
if (rounded == 0) return CopySignToAbs(Vec1<T>(k0), v);
|
|
824
|
+
TI offset = 0;
|
|
848
825
|
// Round to even
|
|
849
|
-
if ((rounded & 1) &&
|
|
850
|
-
|
|
826
|
+
if ((rounded & 1) && ScalarAbs(ConvertScalarTo<T>(rounded) - v.raw) ==
|
|
827
|
+
ConvertScalarTo<T>(0.5)) {
|
|
828
|
+
offset = v.raw < k0 ? -1 : 1;
|
|
851
829
|
}
|
|
852
|
-
return Vec1<T>(
|
|
830
|
+
return Vec1<T>(ConvertScalarTo<T>(rounded - offset));
|
|
853
831
|
}
|
|
854
832
|
|
|
855
833
|
// Round-to-nearest even.
|
|
@@ -858,23 +836,26 @@ HWY_API Vec1<int32_t> NearestInt(const Vec1<float> v) {
|
|
|
858
836
|
using TI = int32_t;
|
|
859
837
|
|
|
860
838
|
const T abs = Abs(v).raw;
|
|
861
|
-
const bool is_sign =
|
|
839
|
+
const bool is_sign = ScalarSignBit(v.raw);
|
|
862
840
|
|
|
863
841
|
if (!(abs < MantissaEnd<T>())) { // Huge or NaN
|
|
864
842
|
// Check if too large to cast or NaN
|
|
865
|
-
if (!(abs <=
|
|
843
|
+
if (!(abs <= ConvertScalarTo<T>(LimitsMax<TI>()))) {
|
|
866
844
|
return Vec1<TI>(is_sign ? LimitsMin<TI>() : LimitsMax<TI>());
|
|
867
845
|
}
|
|
868
|
-
return Vec1<int32_t>(
|
|
846
|
+
return Vec1<int32_t>(ConvertScalarTo<TI>(v.raw));
|
|
869
847
|
}
|
|
870
|
-
const T bias =
|
|
871
|
-
|
|
848
|
+
const T bias =
|
|
849
|
+
ConvertScalarTo<T>(v.raw < ConvertScalarTo<T>(0.0) ? -0.5 : 0.5);
|
|
850
|
+
const TI rounded = ConvertScalarTo<TI>(v.raw + bias);
|
|
872
851
|
if (rounded == 0) return Vec1<int32_t>(0);
|
|
852
|
+
TI offset = 0;
|
|
873
853
|
// Round to even
|
|
874
|
-
if ((rounded & 1) &&
|
|
875
|
-
|
|
854
|
+
if ((rounded & 1) && ScalarAbs(ConvertScalarTo<T>(rounded) - v.raw) ==
|
|
855
|
+
ConvertScalarTo<T>(0.5)) {
|
|
856
|
+
offset = is_sign ? -1 : 1;
|
|
876
857
|
}
|
|
877
|
-
return Vec1<TI>(rounded);
|
|
858
|
+
return Vec1<TI>(rounded - offset);
|
|
878
859
|
}
|
|
879
860
|
|
|
880
861
|
template <typename T>
|
|
@@ -883,9 +864,9 @@ HWY_API Vec1<T> Trunc(const Vec1<T> v) {
|
|
|
883
864
|
if (!(Abs(v).raw <= MantissaEnd<T>())) { // Huge or NaN
|
|
884
865
|
return v;
|
|
885
866
|
}
|
|
886
|
-
const TI truncated =
|
|
867
|
+
const TI truncated = ConvertScalarTo<TI>(v.raw);
|
|
887
868
|
if (truncated == 0) return CopySignToAbs(Vec1<T>(0), v);
|
|
888
|
-
return Vec1<T>(
|
|
869
|
+
return Vec1<T>(ConvertScalarTo<T>(truncated));
|
|
889
870
|
}
|
|
890
871
|
|
|
891
872
|
template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
|
|
@@ -1009,14 +990,16 @@ HWY_API Mask1<T> operator>=(const Vec1<T> a, const Vec1<T> b) {
|
|
|
1009
990
|
template <typename T>
|
|
1010
991
|
HWY_API Mask1<T> IsNaN(const Vec1<T> v) {
|
|
1011
992
|
// std::isnan returns false for 0x7F..FF in clang AVX3 builds, so DIY.
|
|
1012
|
-
|
|
1013
|
-
CopySameSize(&v, &bits);
|
|
1014
|
-
bits += bits;
|
|
1015
|
-
bits >>= 1; // clear sign bit
|
|
1016
|
-
// NaN if all exponent bits are set and the mantissa is not zero.
|
|
1017
|
-
return Mask1<T>::FromBool(bits > ExponentMask<T>());
|
|
993
|
+
return Mask1<T>::FromBool(ScalarIsNaN(v.raw));
|
|
1018
994
|
}
|
|
1019
995
|
|
|
996
|
+
// Per-target flag to prevent generic_ops-inl.h from defining IsInf / IsFinite.
|
|
997
|
+
#ifdef HWY_NATIVE_ISINF
|
|
998
|
+
#undef HWY_NATIVE_ISINF
|
|
999
|
+
#else
|
|
1000
|
+
#define HWY_NATIVE_ISINF
|
|
1001
|
+
#endif
|
|
1002
|
+
|
|
1020
1003
|
HWY_API Mask1<float> IsInf(const Vec1<float> v) {
|
|
1021
1004
|
const Sisd<float> d;
|
|
1022
1005
|
const RebindToUnsigned<decltype(d)> du;
|
|
@@ -1126,6 +1109,9 @@ HWY_API void StoreN(VFromD<D> v, D d, T* HWY_RESTRICT p,
|
|
|
1126
1109
|
}
|
|
1127
1110
|
}
|
|
1128
1111
|
|
|
1112
|
+
// ------------------------------ Tuples
|
|
1113
|
+
#include "hwy/ops/inside-inl.h"
|
|
1114
|
+
|
|
1129
1115
|
// ------------------------------ LoadInterleaved2/3/4
|
|
1130
1116
|
|
|
1131
1117
|
// Per-target flag to prevent generic_ops-inl.h from defining StoreInterleaved2.
|
|
@@ -1205,8 +1191,9 @@ HWY_API void Stream(const Vec1<T> v, D d, T* HWY_RESTRICT aligned) {
|
|
|
1205
1191
|
template <class D, typename T = TFromD<D>, typename TI>
|
|
1206
1192
|
HWY_API void ScatterOffset(Vec1<T> v, D d, T* base, Vec1<TI> offset) {
|
|
1207
1193
|
static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
|
|
1208
|
-
|
|
1209
|
-
|
|
1194
|
+
const intptr_t addr =
|
|
1195
|
+
reinterpret_cast<intptr_t>(base) + static_cast<intptr_t>(offset.raw);
|
|
1196
|
+
Store(v, d, reinterpret_cast<T*>(addr));
|
|
1210
1197
|
}
|
|
1211
1198
|
|
|
1212
1199
|
template <class D, typename T = TFromD<D>, typename TI>
|
|
@@ -1231,27 +1218,36 @@ HWY_API void MaskedScatterIndex(Vec1<T> v, Mask1<T> m, D d,
|
|
|
1231
1218
|
#define HWY_NATIVE_GATHER
|
|
1232
1219
|
#endif
|
|
1233
1220
|
|
|
1234
|
-
template <class D, typename T = TFromD<D
|
|
1235
|
-
HWY_API Vec1<T> GatherOffset(D d, const T* base, Vec1<
|
|
1236
|
-
|
|
1221
|
+
template <class D, typename T = TFromD<D>>
|
|
1222
|
+
HWY_API Vec1<T> GatherOffset(D d, const T* base, Vec1<MakeSigned<T>> offset) {
|
|
1223
|
+
HWY_DASSERT(offset.raw >= 0);
|
|
1237
1224
|
const intptr_t addr =
|
|
1238
1225
|
reinterpret_cast<intptr_t>(base) + static_cast<intptr_t>(offset.raw);
|
|
1239
1226
|
return Load(d, reinterpret_cast<const T*>(addr));
|
|
1240
1227
|
}
|
|
1241
1228
|
|
|
1242
|
-
template <class D, typename T = TFromD<D
|
|
1243
|
-
HWY_API Vec1<T> GatherIndex(D d, const T* HWY_RESTRICT base,
|
|
1244
|
-
|
|
1229
|
+
template <class D, typename T = TFromD<D>>
|
|
1230
|
+
HWY_API Vec1<T> GatherIndex(D d, const T* HWY_RESTRICT base,
|
|
1231
|
+
Vec1<MakeSigned<T>> index) {
|
|
1232
|
+
HWY_DASSERT(index.raw >= 0);
|
|
1245
1233
|
return Load(d, base + index.raw);
|
|
1246
1234
|
}
|
|
1247
1235
|
|
|
1248
|
-
template <class D, typename T = TFromD<D
|
|
1236
|
+
template <class D, typename T = TFromD<D>>
|
|
1249
1237
|
HWY_API Vec1<T> MaskedGatherIndex(Mask1<T> m, D d, const T* HWY_RESTRICT base,
|
|
1250
|
-
Vec1<
|
|
1251
|
-
|
|
1238
|
+
Vec1<MakeSigned<T>> index) {
|
|
1239
|
+
HWY_DASSERT(index.raw >= 0);
|
|
1252
1240
|
return MaskedLoad(m, d, base + index.raw);
|
|
1253
1241
|
}
|
|
1254
1242
|
|
|
1243
|
+
template <class D, typename T = TFromD<D>>
|
|
1244
|
+
HWY_API Vec1<T> MaskedGatherIndexOr(Vec1<T> no, Mask1<T> m, D d,
|
|
1245
|
+
const T* HWY_RESTRICT base,
|
|
1246
|
+
Vec1<MakeSigned<T>> index) {
|
|
1247
|
+
HWY_DASSERT(index.raw >= 0);
|
|
1248
|
+
return MaskedLoadOr(no, m, d, base + index.raw);
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1255
1251
|
// ================================================== CONVERT
|
|
1256
1252
|
|
|
1257
1253
|
// ConvertTo and DemoteTo with floating-point input and integer output truncate
|
|
@@ -1260,73 +1256,111 @@ HWY_API Vec1<T> MaskedGatherIndex(Mask1<T> m, D d, const T* HWY_RESTRICT base,
|
|
|
1260
1256
|
namespace detail {
|
|
1261
1257
|
|
|
1262
1258
|
template <class ToT, class FromT>
|
|
1263
|
-
HWY_INLINE ToT CastValueForF2IConv(
|
|
1264
|
-
FromT val) {
|
|
1259
|
+
HWY_INLINE ToT CastValueForF2IConv(FromT val) {
|
|
1265
1260
|
// Prevent ubsan errors when converting float to narrower integer
|
|
1266
1261
|
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
constexpr
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
//
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
constexpr FromT kSmallestOutOfToTRangePosVal =
|
|
1301
|
-
(sizeof(ToT) * 8 <= static_cast<size_t>(MantissaBits<FromT>()) + 2)
|
|
1302
|
-
? static_cast<FromT>(LimitsMax<ToT>())
|
|
1303
|
-
: static_cast<FromT>(-static_cast<FromT>(LimitsMin<ToT>()));
|
|
1304
|
-
|
|
1305
|
-
if (IsInf(Vec1<FromT>(val)).bits ||
|
|
1306
|
-
detail::Abs(val) >= kSmallestOutOfToTRangePosVal) {
|
|
1307
|
-
return detail::SignBit(val) ? LimitsMin<ToT>() : LimitsMax<ToT>();
|
|
1308
|
-
} else {
|
|
1309
|
-
return static_cast<ToT>(val);
|
|
1310
|
-
}
|
|
1262
|
+
using FromTU = MakeUnsigned<FromT>;
|
|
1263
|
+
using ToTU = MakeUnsigned<ToT>;
|
|
1264
|
+
|
|
1265
|
+
constexpr unsigned kMaxExpField =
|
|
1266
|
+
static_cast<unsigned>(MaxExponentField<FromT>());
|
|
1267
|
+
constexpr unsigned kExpBias = kMaxExpField >> 1;
|
|
1268
|
+
constexpr unsigned kMinOutOfRangeExpField = static_cast<unsigned>(HWY_MIN(
|
|
1269
|
+
kExpBias + sizeof(ToT) * 8 - static_cast<unsigned>(IsSigned<ToT>()),
|
|
1270
|
+
kMaxExpField));
|
|
1271
|
+
|
|
1272
|
+
// If ToT is signed, compare only the exponent bits of val against
|
|
1273
|
+
// kMinOutOfRangeExpField.
|
|
1274
|
+
//
|
|
1275
|
+
// Otherwise, if ToT is unsigned, compare the sign bit plus exponent bits of
|
|
1276
|
+
// val against kMinOutOfRangeExpField as a negative value is outside of the
|
|
1277
|
+
// range of an unsigned integer type.
|
|
1278
|
+
const FromT val_to_compare =
|
|
1279
|
+
static_cast<FromT>(IsSigned<ToT>() ? ScalarAbs(val) : val);
|
|
1280
|
+
|
|
1281
|
+
// val is within the range of ToT if
|
|
1282
|
+
// (BitCastScalar<FromTU>(val_to_compare) >> MantissaBits<FromT>()) is less
|
|
1283
|
+
// than kMinOutOfRangeExpField
|
|
1284
|
+
//
|
|
1285
|
+
// Otherwise, val is either outside of the range of ToT or equal to
|
|
1286
|
+
// LimitsMin<ToT>() if
|
|
1287
|
+
// (BitCastScalar<FromTU>(val_to_compare) >> MantissaBits<FromT>()) is greater
|
|
1288
|
+
// than or equal to kMinOutOfRangeExpField.
|
|
1289
|
+
|
|
1290
|
+
return (static_cast<unsigned>(BitCastScalar<FromTU>(val_to_compare) >>
|
|
1291
|
+
MantissaBits<FromT>()) < kMinOutOfRangeExpField)
|
|
1292
|
+
? static_cast<ToT>(val)
|
|
1293
|
+
: static_cast<ToT>(static_cast<ToTU>(LimitsMax<ToT>()) +
|
|
1294
|
+
static_cast<ToTU>(ScalarSignBit(val)));
|
|
1311
1295
|
}
|
|
1312
1296
|
|
|
1313
1297
|
template <class ToT, class ToTypeTag, class FromT>
|
|
1314
1298
|
HWY_INLINE ToT CastValueForPromoteTo(ToTypeTag /* to_type_tag */, FromT val) {
|
|
1315
|
-
return
|
|
1299
|
+
return ConvertScalarTo<ToT>(val);
|
|
1316
1300
|
}
|
|
1317
1301
|
|
|
1318
1302
|
template <class ToT>
|
|
1319
|
-
HWY_INLINE ToT CastValueForPromoteTo(hwy::SignedTag to_type_tag
|
|
1320
|
-
|
|
1303
|
+
HWY_INLINE ToT CastValueForPromoteTo(hwy::SignedTag /*to_type_tag*/,
|
|
1304
|
+
float val) {
|
|
1305
|
+
return CastValueForF2IConv<ToT>(val);
|
|
1321
1306
|
}
|
|
1322
1307
|
|
|
1323
1308
|
template <class ToT>
|
|
1324
|
-
HWY_INLINE ToT CastValueForPromoteTo(hwy::UnsignedTag to_type_tag
|
|
1325
|
-
|
|
1309
|
+
HWY_INLINE ToT CastValueForPromoteTo(hwy::UnsignedTag /*to_type_tag*/,
|
|
1310
|
+
float val) {
|
|
1311
|
+
return CastValueForF2IConv<ToT>(val);
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
// If val is within the range of ToT, CastValueForInRangeF2IConv<ToT>(val)
|
|
1315
|
+
// returns static_cast<ToT>(val)
|
|
1316
|
+
//
|
|
1317
|
+
// Otherwise, CastValueForInRangeF2IConv<ToT>(val) returns an
|
|
1318
|
+
// implementation-defined result if val is not within the range of ToT.
|
|
1319
|
+
template <class ToT, class FromT>
|
|
1320
|
+
HWY_INLINE ToT CastValueForInRangeF2IConv(FromT val) {
|
|
1321
|
+
// Prevent ubsan errors when converting float to narrower integer
|
|
1322
|
+
|
|
1323
|
+
using FromTU = MakeUnsigned<FromT>;
|
|
1324
|
+
|
|
1325
|
+
constexpr unsigned kMaxExpField =
|
|
1326
|
+
static_cast<unsigned>(MaxExponentField<FromT>());
|
|
1327
|
+
constexpr unsigned kExpBias = kMaxExpField >> 1;
|
|
1328
|
+
constexpr unsigned kMinOutOfRangeExpField = static_cast<unsigned>(HWY_MIN(
|
|
1329
|
+
kExpBias + sizeof(ToT) * 8 - static_cast<unsigned>(IsSigned<ToT>()),
|
|
1330
|
+
kMaxExpField));
|
|
1331
|
+
|
|
1332
|
+
// If ToT is signed, compare only the exponent bits of val against
|
|
1333
|
+
// kMinOutOfRangeExpField.
|
|
1334
|
+
//
|
|
1335
|
+
// Otherwise, if ToT is unsigned, compare the sign bit plus exponent bits of
|
|
1336
|
+
// val against kMinOutOfRangeExpField as a negative value is outside of the
|
|
1337
|
+
// range of an unsigned integer type.
|
|
1338
|
+
const FromT val_to_compare =
|
|
1339
|
+
static_cast<FromT>(IsSigned<ToT>() ? ScalarAbs(val) : val);
|
|
1340
|
+
|
|
1341
|
+
// val is within the range of ToT if
|
|
1342
|
+
// (BitCastScalar<FromTU>(val_to_compare) >> MantissaBits<FromT>()) is less
|
|
1343
|
+
// than kMinOutOfRangeExpField
|
|
1344
|
+
//
|
|
1345
|
+
// Otherwise, val is either outside of the range of ToT or equal to
|
|
1346
|
+
// LimitsMin<ToT>() if
|
|
1347
|
+
// (BitCastScalar<FromTU>(val_to_compare) >> MantissaBits<FromT>()) is greater
|
|
1348
|
+
// than or equal to kMinOutOfRangeExpField.
|
|
1349
|
+
|
|
1350
|
+
return (static_cast<unsigned>(BitCastScalar<FromTU>(val_to_compare) >>
|
|
1351
|
+
MantissaBits<FromT>()) < kMinOutOfRangeExpField)
|
|
1352
|
+
? static_cast<ToT>(val)
|
|
1353
|
+
: static_cast<ToT>(LimitsMin<ToT>());
|
|
1326
1354
|
}
|
|
1327
1355
|
|
|
1328
1356
|
} // namespace detail
|
|
1329
1357
|
|
|
1358
|
+
#ifdef HWY_NATIVE_PROMOTE_F16_TO_F64
|
|
1359
|
+
#undef HWY_NATIVE_PROMOTE_F16_TO_F64
|
|
1360
|
+
#else
|
|
1361
|
+
#define HWY_NATIVE_PROMOTE_F16_TO_F64
|
|
1362
|
+
#endif
|
|
1363
|
+
|
|
1330
1364
|
template <class DTo, typename TTo = TFromD<DTo>, typename TFrom>
|
|
1331
1365
|
HWY_API Vec1<TTo> PromoteTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
1332
1366
|
static_assert(sizeof(TTo) > sizeof(TFrom), "Not promoting");
|
|
@@ -1335,6 +1369,18 @@ HWY_API Vec1<TTo> PromoteTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
|
1335
1369
|
detail::CastValueForPromoteTo<TTo>(hwy::TypeTag<TTo>(), from.raw));
|
|
1336
1370
|
}
|
|
1337
1371
|
|
|
1372
|
+
#ifdef HWY_NATIVE_F32_TO_UI64_PROMOTE_IN_RANGE_TO
|
|
1373
|
+
#undef HWY_NATIVE_F32_TO_UI64_PROMOTE_IN_RANGE_TO
|
|
1374
|
+
#else
|
|
1375
|
+
#define HWY_NATIVE_F32_TO_UI64_PROMOTE_IN_RANGE_TO
|
|
1376
|
+
#endif
|
|
1377
|
+
|
|
1378
|
+
template <class DTo, HWY_IF_UI64_D(DTo)>
|
|
1379
|
+
HWY_API VFromD<DTo> PromoteInRangeTo(DTo /* tag */, Vec1<float> from) {
|
|
1380
|
+
using TTo = TFromD<DTo>;
|
|
1381
|
+
return Vec1<TTo>(detail::CastValueForInRangeF2IConv<TTo>(from.raw));
|
|
1382
|
+
}
|
|
1383
|
+
|
|
1338
1384
|
// MSVC 19.10 cannot deduce the argument type if HWY_IF_FLOAT(TFrom) is here,
|
|
1339
1385
|
// so we overload for TFrom=double and TTo={float,int32_t}.
|
|
1340
1386
|
template <class D, HWY_IF_F32_D(D)>
|
|
@@ -1342,16 +1388,15 @@ HWY_API Vec1<float> DemoteTo(D /* tag */, Vec1<double> from) {
|
|
|
1342
1388
|
// Prevent ubsan errors when converting float to narrower integer/float
|
|
1343
1389
|
if (IsInf(from).bits ||
|
|
1344
1390
|
Abs(from).raw > static_cast<double>(HighestValue<float>())) {
|
|
1345
|
-
return Vec1<float>(
|
|
1346
|
-
|
|
1391
|
+
return Vec1<float>(ScalarSignBit(from.raw) ? LowestValue<float>()
|
|
1392
|
+
: HighestValue<float>());
|
|
1347
1393
|
}
|
|
1348
1394
|
return Vec1<float>(static_cast<float>(from.raw));
|
|
1349
1395
|
}
|
|
1350
1396
|
template <class D, HWY_IF_UI32_D(D)>
|
|
1351
1397
|
HWY_API VFromD<D> DemoteTo(D /* tag */, Vec1<double> from) {
|
|
1352
1398
|
// Prevent ubsan errors when converting int32_t to narrower integer/int32_t
|
|
1353
|
-
return Vec1<TFromD<D>>(detail::CastValueForF2IConv<TFromD<D>>(
|
|
1354
|
-
hwy::TypeTag<TFromD<D>>(), from.raw));
|
|
1399
|
+
return Vec1<TFromD<D>>(detail::CastValueForF2IConv<TFromD<D>>(from.raw));
|
|
1355
1400
|
}
|
|
1356
1401
|
|
|
1357
1402
|
template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
|
|
@@ -1365,15 +1410,30 @@ HWY_API Vec1<TTo> DemoteTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
|
1365
1410
|
return Vec1<TTo>(static_cast<TTo>(from.raw));
|
|
1366
1411
|
}
|
|
1367
1412
|
|
|
1413
|
+
// Disable the default unsigned to signed DemoteTo implementation in
|
|
1414
|
+
// generic_ops-inl.h on SCALAR as the SCALAR target has a target-specific
|
|
1415
|
+
// implementation of the unsigned to signed DemoteTo op and as ReorderDemote2To
|
|
1416
|
+
// is not supported on the SCALAR target
|
|
1417
|
+
|
|
1418
|
+
// NOTE: hwy::EnableIf<!hwy::IsSame<V, V>()>* = nullptr is used instead of
|
|
1419
|
+
// hwy::EnableIf<false>* = nullptr to avoid compiler errors since
|
|
1420
|
+
// !hwy::IsSame<V, V>() is always false and as !hwy::IsSame<V, V>() will cause
|
|
1421
|
+
// SFINAE to occur instead of a hard error due to a dependency on the V template
|
|
1422
|
+
// argument
|
|
1423
|
+
#undef HWY_IF_U2I_DEMOTE_FROM_LANE_SIZE_V
|
|
1424
|
+
#define HWY_IF_U2I_DEMOTE_FROM_LANE_SIZE_V(V) \
|
|
1425
|
+
hwy::EnableIf<!hwy::IsSame<V, V>()>* = nullptr
|
|
1426
|
+
|
|
1368
1427
|
template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
|
|
1369
|
-
HWY_IF_UNSIGNED(TFrom),
|
|
1428
|
+
HWY_IF_UNSIGNED(TFrom), HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(DTo)>
|
|
1370
1429
|
HWY_API Vec1<TTo> DemoteTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
1371
1430
|
static_assert(!IsFloat<TFrom>(), "TFrom=double are handled above");
|
|
1372
1431
|
static_assert(sizeof(TTo) < sizeof(TFrom), "Not demoting");
|
|
1373
1432
|
|
|
1433
|
+
const auto max = static_cast<MakeUnsigned<TTo>>(LimitsMax<TTo>());
|
|
1434
|
+
|
|
1374
1435
|
// Int to int: choose closest value in TTo to `from` (avoids UB)
|
|
1375
|
-
|
|
1376
|
-
return Vec1<TTo>(static_cast<TTo>(from.raw));
|
|
1436
|
+
return Vec1<TTo>(static_cast<TTo>(HWY_MIN(from.raw, max)));
|
|
1377
1437
|
}
|
|
1378
1438
|
|
|
1379
1439
|
template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
|
|
@@ -1383,6 +1443,19 @@ HWY_API Vec1<TTo> DemoteTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
|
1383
1443
|
return Vec1<TTo>(static_cast<TTo>(from.raw));
|
|
1384
1444
|
}
|
|
1385
1445
|
|
|
1446
|
+
#ifdef HWY_NATIVE_F64_TO_UI32_DEMOTE_IN_RANGE_TO
|
|
1447
|
+
#undef HWY_NATIVE_F64_TO_UI32_DEMOTE_IN_RANGE_TO
|
|
1448
|
+
#else
|
|
1449
|
+
#define HWY_NATIVE_F64_TO_UI32_DEMOTE_IN_RANGE_TO
|
|
1450
|
+
#endif
|
|
1451
|
+
|
|
1452
|
+
template <class D32, HWY_IF_UI32_D(D32)>
|
|
1453
|
+
HWY_API VFromD<D32> DemoteInRangeTo(D32 /*d32*/,
|
|
1454
|
+
VFromD<Rebind<double, D32>> v) {
|
|
1455
|
+
using TTo = TFromD<D32>;
|
|
1456
|
+
return Vec1<TTo>(detail::CastValueForInRangeF2IConv<TTo>(v.raw));
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1386
1459
|
// Per-target flag to prevent generic_ops-inl.h from defining f16 conversions;
|
|
1387
1460
|
// use this scalar version to verify the vector implementation.
|
|
1388
1461
|
#ifdef HWY_NATIVE_F16C
|
|
@@ -1401,11 +1474,22 @@ HWY_API Vec1<float> PromoteTo(D d, const Vec1<bfloat16_t> v) {
|
|
|
1401
1474
|
return Set(d, F32FromBF16(v.raw));
|
|
1402
1475
|
}
|
|
1403
1476
|
|
|
1477
|
+
template <class DTo, typename TFrom>
|
|
1478
|
+
HWY_API VFromD<DTo> PromoteEvenTo(DTo d_to, Vec1<TFrom> v) {
|
|
1479
|
+
return PromoteTo(d_to, v);
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1404
1482
|
template <class D, HWY_IF_F16_D(D)>
|
|
1405
1483
|
HWY_API Vec1<float16_t> DemoteTo(D /* tag */, const Vec1<float> v) {
|
|
1406
1484
|
return Vec1<float16_t>(F16FromF32(v.raw));
|
|
1407
1485
|
}
|
|
1408
1486
|
|
|
1487
|
+
#ifdef HWY_NATIVE_DEMOTE_F32_TO_BF16
|
|
1488
|
+
#undef HWY_NATIVE_DEMOTE_F32_TO_BF16
|
|
1489
|
+
#else
|
|
1490
|
+
#define HWY_NATIVE_DEMOTE_F32_TO_BF16
|
|
1491
|
+
#endif
|
|
1492
|
+
|
|
1409
1493
|
template <class D, HWY_IF_BF16_D(D)>
|
|
1410
1494
|
HWY_API Vec1<bfloat16_t> DemoteTo(D d, const Vec1<float> v) {
|
|
1411
1495
|
return Set(d, BF16FromF32(v.raw));
|
|
@@ -1416,8 +1500,7 @@ template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
|
|
|
1416
1500
|
HWY_API Vec1<TTo> ConvertTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
1417
1501
|
static_assert(sizeof(TTo) == sizeof(TFrom), "Should have same size");
|
|
1418
1502
|
// float## -> int##: return closest representable value.
|
|
1419
|
-
return Vec1<TTo>(
|
|
1420
|
-
detail::CastValueForF2IConv<TTo>(hwy::TypeTag<TTo>(), from.raw));
|
|
1503
|
+
return Vec1<TTo>(detail::CastValueForF2IConv<TTo>(from.raw));
|
|
1421
1504
|
}
|
|
1422
1505
|
|
|
1423
1506
|
template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
|
|
@@ -1428,6 +1511,19 @@ HWY_API Vec1<TTo> ConvertTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
|
1428
1511
|
return Vec1<TTo>(static_cast<TTo>(from.raw));
|
|
1429
1512
|
}
|
|
1430
1513
|
|
|
1514
|
+
#ifdef HWY_NATIVE_F2I_CONVERT_IN_RANGE_TO
|
|
1515
|
+
#undef HWY_NATIVE_F2I_CONVERT_IN_RANGE_TO
|
|
1516
|
+
#else
|
|
1517
|
+
#define HWY_NATIVE_F2I_CONVERT_IN_RANGE_TO
|
|
1518
|
+
#endif
|
|
1519
|
+
|
|
1520
|
+
template <class DI, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(DI),
|
|
1521
|
+
HWY_IF_T_SIZE_ONE_OF_D(DI, (1 << 4) | (1 << 8))>
|
|
1522
|
+
HWY_API VFromD<DI> ConvertInRangeTo(DI /*di*/, VFromD<RebindToFloat<DI>> v) {
|
|
1523
|
+
using TTo = TFromD<DI>;
|
|
1524
|
+
return VFromD<DI>(detail::CastValueForInRangeF2IConv<TTo>(v.raw));
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1431
1527
|
HWY_API Vec1<uint8_t> U8FromU32(const Vec1<uint32_t> v) {
|
|
1432
1528
|
return DemoteTo(Sisd<uint8_t>(), v);
|
|
1433
1529
|
}
|
|
@@ -1792,6 +1888,11 @@ HWY_API Mask1<T> LoadMaskBits(D /* tag */, const uint8_t* HWY_RESTRICT bits) {
|
|
|
1792
1888
|
return Mask1<T>::FromBool((bits[0] & 1) != 0);
|
|
1793
1889
|
}
|
|
1794
1890
|
|
|
1891
|
+
template <class D, HWY_IF_LANES_D(D, 1)>
|
|
1892
|
+
HWY_API MFromD<D> Dup128MaskFromMaskBits(D /*d*/, unsigned mask_bits) {
|
|
1893
|
+
return MFromD<D>::FromBool((mask_bits & 1) != 0);
|
|
1894
|
+
}
|
|
1895
|
+
|
|
1795
1896
|
// `p` points to at least 8 writable bytes.
|
|
1796
1897
|
template <class D, typename T = TFromD<D>>
|
|
1797
1898
|
HWY_API size_t StoreMaskBits(D d, const Mask1<T> mask, uint8_t* bits) {
|
|
@@ -1910,6 +2011,35 @@ HWY_API Vec1<int32_t> WidenMulPairwiseAdd(D32 /* tag */, Vec1<int16_t> a,
|
|
|
1910
2011
|
return Vec1<int32_t>(a.raw * b.raw);
|
|
1911
2012
|
}
|
|
1912
2013
|
|
|
2014
|
+
// ------------------------------ SatWidenMulAccumFixedPoint
|
|
2015
|
+
#ifdef HWY_NATIVE_I16_SATWIDENMULACCUMFIXEDPOINT
|
|
2016
|
+
#undef HWY_NATIVE_I16_SATWIDENMULACCUMFIXEDPOINT
|
|
2017
|
+
#else
|
|
2018
|
+
#define HWY_NATIVE_I16_SATWIDENMULACCUMFIXEDPOINT
|
|
2019
|
+
#endif
|
|
2020
|
+
|
|
2021
|
+
template <class DI32, HWY_IF_I32_D(DI32)>
|
|
2022
|
+
HWY_API VFromD<DI32> SatWidenMulAccumFixedPoint(DI32 di32,
|
|
2023
|
+
VFromD<Rebind<int16_t, DI32>> a,
|
|
2024
|
+
VFromD<Rebind<int16_t, DI32>> b,
|
|
2025
|
+
VFromD<DI32> sum) {
|
|
2026
|
+
// Multiplying static_cast<int32_t>(a.raw) by static_cast<int32_t>(b.raw)
|
|
2027
|
+
// followed by an addition of the product is okay as
|
|
2028
|
+
// (a.raw * b.raw * 2) is between -2147418112 and 2147483648 and as
|
|
2029
|
+
// a.raw * b.raw * 2 can only overflow an int32_t if both a.raw and b.raw are
|
|
2030
|
+
// equal to -32768.
|
|
2031
|
+
|
|
2032
|
+
const VFromD<DI32> product(static_cast<int32_t>(a.raw) *
|
|
2033
|
+
static_cast<int32_t>(b.raw));
|
|
2034
|
+
const VFromD<DI32> product2 = Add(product, product);
|
|
2035
|
+
|
|
2036
|
+
const auto mul_overflow =
|
|
2037
|
+
VecFromMask(di32, Eq(product2, Set(di32, LimitsMin<int32_t>())));
|
|
2038
|
+
|
|
2039
|
+
return SaturatedAdd(Sub(sum, And(BroadcastSignBit(sum), mul_overflow)),
|
|
2040
|
+
Add(product2, mul_overflow));
|
|
2041
|
+
}
|
|
2042
|
+
|
|
1913
2043
|
// ------------------------------ SatWidenMulPairwiseAdd
|
|
1914
2044
|
|
|
1915
2045
|
#ifdef HWY_NATIVE_U8_I8_SATWIDENMULPAIRWISEADD
|
|
@@ -1937,6 +2067,12 @@ HWY_API Vec1<int16_t> SatWidenMulPairwiseAdd(DI16 /* tag */, Vec1<uint8_t> a,
|
|
|
1937
2067
|
|
|
1938
2068
|
// ------------------------------ ReorderWidenMulAccumulate (MulAdd, ZipLower)
|
|
1939
2069
|
|
|
2070
|
+
#ifdef HWY_NATIVE_REORDER_WIDEN_MUL_ACC_BF16
|
|
2071
|
+
#undef HWY_NATIVE_REORDER_WIDEN_MUL_ACC_BF16
|
|
2072
|
+
#else
|
|
2073
|
+
#define HWY_NATIVE_REORDER_WIDEN_MUL_ACC_BF16
|
|
2074
|
+
#endif
|
|
2075
|
+
|
|
1940
2076
|
template <class D32, HWY_IF_F32_D(D32)>
|
|
1941
2077
|
HWY_API Vec1<float> ReorderWidenMulAccumulate(D32 /* tag */, Vec1<bfloat16_t> a,
|
|
1942
2078
|
Vec1<bfloat16_t> b,
|
|
@@ -1971,23 +2107,7 @@ HWY_API Vec1<TW> RearrangeToOddPlusEven(Vec1<TW> sum0, Vec1<TW> /* sum1 */) {
|
|
|
1971
2107
|
|
|
1972
2108
|
// ================================================== REDUCTIONS
|
|
1973
2109
|
|
|
1974
|
-
//
|
|
1975
|
-
template <class D, typename T = TFromD<D>>
|
|
1976
|
-
HWY_API Vec1<T> SumOfLanes(D /* tag */, const Vec1<T> v) {
|
|
1977
|
-
return v;
|
|
1978
|
-
}
|
|
1979
|
-
template <class D, typename T = TFromD<D>>
|
|
1980
|
-
HWY_API T ReduceSum(D /* tag */, const Vec1<T> v) {
|
|
1981
|
-
return GetLane(v);
|
|
1982
|
-
}
|
|
1983
|
-
template <class D, typename T = TFromD<D>>
|
|
1984
|
-
HWY_API Vec1<T> MinOfLanes(D /* tag */, const Vec1<T> v) {
|
|
1985
|
-
return v;
|
|
1986
|
-
}
|
|
1987
|
-
template <class D, typename T = TFromD<D>>
|
|
1988
|
-
HWY_API Vec1<T> MaxOfLanes(D /* tag */, const Vec1<T> v) {
|
|
1989
|
-
return v;
|
|
1990
|
-
}
|
|
2110
|
+
// Nothing native, generic_ops-inl defines SumOfLanes and ReduceSum.
|
|
1991
2111
|
|
|
1992
2112
|
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
1993
2113
|
} // namespace HWY_NAMESPACE
|