@img/sharp-libvips-dev 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/include/aom/aom_encoder.h +3 -3
- package/include/aom/aomcx.h +17 -8
- package/include/expat.h +21 -10
- package/include/expat_config.h +11 -5
- package/include/ffi.h +12 -25
- package/include/fontconfig/fontconfig.h +5 -3
- package/include/freetype2/freetype/config/ftoption.h +1 -1
- package/include/gio-unix-2.0/gio/gfiledescriptorbased.h +3 -7
- package/include/gio-unix-2.0/gio/gunixinputstream.h +0 -5
- package/include/gio-unix-2.0/gio/gunixoutputstream.h +0 -5
- package/include/glib-2.0/gio/gappinfo.h +0 -7
- package/include/glib-2.0/gio/gapplication.h +6 -0
- package/include/glib-2.0/gio/gapplicationcommandline.h +12 -1
- package/include/glib-2.0/gio/gasyncinitable.h +0 -7
- package/include/glib-2.0/gio/gasyncresult.h +0 -6
- package/include/glib-2.0/gio/gbufferedinputstream.h +0 -5
- package/include/glib-2.0/gio/gbufferedoutputstream.h +0 -5
- package/include/glib-2.0/gio/gbytesicon.h +0 -5
- package/include/glib-2.0/gio/gcancellable.h +0 -5
- package/include/glib-2.0/gio/gconverter.h +0 -7
- package/include/glib-2.0/gio/gconverterinputstream.h +0 -6
- package/include/glib-2.0/gio/gconverteroutputstream.h +0 -6
- package/include/glib-2.0/gio/gdatagrambased.h +0 -7
- package/include/glib-2.0/gio/gdatainputstream.h +0 -6
- package/include/glib-2.0/gio/gdataoutputstream.h +0 -6
- package/include/glib-2.0/gio/gdbusinterface.h +0 -8
- package/include/glib-2.0/gio/gdbusinterfaceskeleton.h +0 -8
- package/include/glib-2.0/gio/gdbusmessage.h +2 -1
- package/include/glib-2.0/gio/gdbusobjectmanagerclient.h +0 -8
- package/include/glib-2.0/gio/gdbusobjectmanagerserver.h +0 -8
- package/include/glib-2.0/gio/gdbusobjectproxy.h +0 -8
- package/include/glib-2.0/gio/gdbusobjectskeleton.h +0 -8
- package/include/glib-2.0/gio/gdbusproxy.h +0 -8
- package/include/glib-2.0/gio/gdebugcontroller.h +0 -8
- package/include/glib-2.0/gio/gdebugcontrollerdbus.h +0 -7
- package/include/glib-2.0/gio/gdtlsserverconnection.h +0 -8
- package/include/glib-2.0/gio/gemblem.h +0 -5
- package/include/glib-2.0/gio/gemblemedicon.h +0 -5
- package/include/glib-2.0/gio/gfile.h +0 -10
- package/include/glib-2.0/gio/gfileenumerator.h +0 -5
- package/include/glib-2.0/gio/gfileicon.h +0 -5
- package/include/glib-2.0/gio/gfileinfo.h +0 -5
- package/include/glib-2.0/gio/gfileinputstream.h +0 -8
- package/include/glib-2.0/gio/gfileiostream.h +0 -8
- package/include/glib-2.0/gio/gfilemonitor.h +0 -5
- package/include/glib-2.0/gio/gfilenamecompleter.h +0 -5
- package/include/glib-2.0/gio/gfileoutputstream.h +0 -8
- package/include/glib-2.0/gio/gfilterinputstream.h +0 -5
- package/include/glib-2.0/gio/gfilteroutputstream.h +0 -5
- package/include/glib-2.0/gio/gicon.h +0 -5
- package/include/glib-2.0/gio/ginitable.h +0 -7
- package/include/glib-2.0/gio/ginputstream.h +0 -5
- package/include/glib-2.0/gio/gio-autocleanups.h +4 -0
- package/include/glib-2.0/gio/gio-visibility.h +34 -0
- package/include/glib-2.0/gio/gioenums.h +6 -1
- package/include/glib-2.0/gio/giomodule.h +0 -5
- package/include/glib-2.0/gio/giostream.h +0 -5
- package/include/glib-2.0/gio/giotypes.h +5 -108
- package/include/glib-2.0/gio/gloadableicon.h +0 -6
- package/include/glib-2.0/gio/gmemoryinputstream.h +0 -5
- package/include/glib-2.0/gio/gmemoryoutputstream.h +0 -5
- package/include/glib-2.0/gio/gmountoperation.h +0 -6
- package/include/glib-2.0/gio/gnetworking.h +4 -0
- package/include/glib-2.0/gio/goutputstream.h +0 -9
- package/include/glib-2.0/gio/gpollableinputstream.h +0 -7
- package/include/glib-2.0/gio/gpollableoutputstream.h +0 -7
- package/include/glib-2.0/gio/gproxy.h +0 -7
- package/include/glib-2.0/gio/gproxyaddressenumerator.h +0 -8
- package/include/glib-2.0/gio/gseekable.h +0 -5
- package/include/glib-2.0/gio/gsettingsbackend.h +0 -5
- package/include/glib-2.0/gio/gsimpleactiongroup.h +0 -7
- package/include/glib-2.0/gio/gsimpleasyncresult.h +0 -5
- package/include/glib-2.0/gio/gsimpleproxyresolver.h +0 -5
- package/include/glib-2.0/gio/gsocket.h +13 -0
- package/include/glib-2.0/gio/gsocketaddressenumerator.h +0 -6
- package/include/glib-2.0/gio/gsocketconnectable.h +0 -5
- package/include/glib-2.0/gio/gtask.h +12 -0
- package/include/glib-2.0/gio/gthemedicon.h +0 -5
- package/include/glib-2.0/gio/gtlsserverconnection.h +0 -8
- package/include/glib-2.0/gio/gunixcredentialsmessage.h +0 -8
- package/include/glib-2.0/gio/gvfs.h +0 -5
- package/include/glib-2.0/gio/gvolume.h +2 -2
- package/include/glib-2.0/gio/gvolumemonitor.h +0 -5
- package/include/glib-2.0/girepository/gi-visibility.h +986 -0
- package/include/glib-2.0/girepository/giarginfo.h +100 -0
- package/include/glib-2.0/girepository/gibaseinfo.h +129 -0
- package/include/glib-2.0/girepository/gicallableinfo.h +119 -0
- package/include/glib-2.0/girepository/gicallbackinfo.h +60 -0
- package/include/glib-2.0/girepository/giconstantinfo.h +72 -0
- package/include/glib-2.0/girepository/gienuminfo.h +82 -0
- package/include/glib-2.0/girepository/gifieldinfo.h +84 -0
- package/include/glib-2.0/girepository/giflagsinfo.h +60 -0
- package/include/glib-2.0/girepository/gifunctioninfo.h +117 -0
- package/include/glib-2.0/girepository/giinterfaceinfo.h +120 -0
- package/include/glib-2.0/girepository/giobjectinfo.h +230 -0
- package/include/glib-2.0/girepository/gipropertyinfo.h +77 -0
- package/include/glib-2.0/girepository/giregisteredtypeinfo.h +75 -0
- package/include/glib-2.0/girepository/girepository-autocleanups.h +56 -0
- package/include/glib-2.0/girepository/girepository.h +247 -0
- package/include/glib-2.0/girepository/girffi.h +129 -0
- package/include/glib-2.0/girepository/gisignalinfo.h +72 -0
- package/include/glib-2.0/girepository/gistructinfo.h +102 -0
- package/include/glib-2.0/girepository/gitypeinfo.h +144 -0
- package/include/glib-2.0/girepository/gitypelib.h +61 -0
- package/include/glib-2.0/girepository/gitypes.h +421 -0
- package/include/glib-2.0/girepository/giunioninfo.h +105 -0
- package/include/glib-2.0/girepository/giunresolvedinfo.h +60 -0
- package/include/glib-2.0/girepository/givalueinfo.h +65 -0
- package/include/glib-2.0/girepository/givfuncinfo.h +88 -0
- package/include/glib-2.0/glib/deprecated/gcompletion.h +1 -1
- package/include/glib-2.0/glib/deprecated/grel.h +0 -23
- package/include/glib-2.0/glib/deprecated/gthread.h +10 -6
- package/include/glib-2.0/glib/gatomic.h +20 -20
- package/include/glib-2.0/glib/gbitlock.h +31 -0
- package/include/glib-2.0/glib/gbookmarkfile.h +39 -1
- package/include/glib-2.0/glib/gchecksum.h +0 -10
- package/include/glib-2.0/glib/gdate.h +0 -9
- package/include/glib-2.0/glib/gdatetime.h +33 -1
- package/include/glib-2.0/glib/gdir.h +5 -0
- package/include/glib-2.0/glib/ghmac.h +0 -9
- package/include/glib-2.0/glib/glib-autocleanups.h +4 -0
- package/include/glib-2.0/glib/glib-visibility.h +34 -0
- package/include/glib-2.0/glib/gmacros.h +1 -0
- package/include/glib-2.0/glib/gmessages.h +11 -0
- package/include/glib-2.0/glib/gpathbuf.h +0 -7
- package/include/glib-2.0/glib/gslice.h +2 -0
- package/include/glib-2.0/glib/gstdio.h +1 -1
- package/include/glib-2.0/glib/gstrfuncs.h +24 -18
- package/include/glib-2.0/glib/gstrvbuilder.h +4 -8
- package/include/glib-2.0/glib/gtestutils.h +5 -0
- package/include/glib-2.0/glib/gthread.h +216 -3
- package/include/glib-2.0/glib/gunicode.h +12 -2
- package/include/glib-2.0/glib/gvarianttype.h +1 -10
- package/include/glib-2.0/glib/gversionmacros.h +9 -0
- package/include/glib-2.0/glib/gwin32.h +4 -4
- package/include/glib-2.0/glib-unix.h +214 -0
- package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
- package/include/glib-2.0/gobject/gbinding.h +0 -8
- package/include/glib-2.0/gobject/gbindinggroup.h +0 -8
- package/include/glib-2.0/gobject/gclosure.h +1 -9
- package/include/glib-2.0/gobject/genums.h +6 -6
- package/include/glib-2.0/gobject/glib-types.h +44 -0
- package/include/glib-2.0/gobject/gobject-autocleanups.h +4 -0
- package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
- package/include/glib-2.0/gobject/gobject.h +1 -16
- package/include/glib-2.0/gobject/gparam.h +3 -12
- package/include/glib-2.0/gobject/gsignal.h +16 -6
- package/include/glib-2.0/gobject/gsignalgroup.h +0 -8
- package/include/glib-2.0/gobject/gtype.h +53 -20
- package/include/glib-2.0/gobject/gtypemodule.h +0 -7
- package/include/glib-2.0/gobject/gtypeplugin.h +0 -6
- package/include/glib-2.0/gobject/gvaluearray.h +0 -7
- package/include/glib-2.0/gobject/gvaluecollector.h +1 -11
- package/include/glib-2.0/gobject/gvaluetypes.h +2 -0
- package/include/hwy/aligned_allocator.h +171 -6
- package/include/hwy/base.h +1765 -543
- package/include/hwy/cache_control.h +24 -6
- package/include/hwy/detect_compiler_arch.h +23 -2
- package/include/hwy/detect_targets.h +56 -13
- package/include/hwy/foreach_target.h +24 -0
- package/include/hwy/highway.h +20 -3
- package/include/hwy/ops/arm_neon-inl.h +1086 -667
- package/include/hwy/ops/arm_sve-inl.h +1091 -235
- package/include/hwy/ops/emu128-inl.h +271 -196
- package/include/hwy/ops/generic_ops-inl.h +2270 -399
- package/include/hwy/ops/ppc_vsx-inl.h +1786 -563
- package/include/hwy/ops/rvv-inl.h +1043 -311
- package/include/hwy/ops/scalar-inl.h +189 -159
- package/include/hwy/ops/set_macros-inl.h +66 -6
- package/include/hwy/ops/shared-inl.h +175 -56
- package/include/hwy/ops/wasm_128-inl.h +153 -136
- package/include/hwy/ops/x86_128-inl.h +1647 -646
- package/include/hwy/ops/x86_256-inl.h +1003 -370
- package/include/hwy/ops/x86_512-inl.h +948 -353
- package/include/hwy/per_target.h +4 -0
- package/include/hwy/profiler.h +648 -0
- package/include/hwy/robust_statistics.h +2 -2
- package/include/hwy/targets.h +18 -11
- package/include/hwy/timer.h +11 -0
- package/include/lcms2.h +46 -7
- package/include/lcms2_plugin.h +4 -4
- package/include/libheif/heif_version.h +2 -2
- package/include/libpng16/png.h +32 -29
- package/include/libpng16/pngconf.h +2 -2
- package/include/libpng16/pnglibconf.h +7 -2
- package/include/librsvg-2.0/librsvg/rsvg-version.h +2 -2
- package/include/libxml2/libxml/HTMLparser.h +23 -0
- package/include/libxml2/libxml/SAX.h +0 -2
- package/include/libxml2/libxml/SAX2.h +0 -2
- package/include/libxml2/libxml/c14n.h +0 -2
- package/include/libxml2/libxml/dict.h +1 -0
- package/include/libxml2/libxml/encoding.h +16 -14
- package/include/libxml2/libxml/entities.h +4 -0
- package/include/libxml2/libxml/globals.h +15 -503
- package/include/libxml2/libxml/hash.h +57 -61
- package/include/libxml2/libxml/nanoftp.h +2 -2
- package/include/libxml2/libxml/parser.h +137 -18
- package/include/libxml2/libxml/parserInternals.h +1 -0
- package/include/libxml2/libxml/relaxng.h +2 -1
- package/include/libxml2/libxml/schemasInternals.h +1 -0
- package/include/libxml2/libxml/schematron.h +1 -0
- package/include/libxml2/libxml/threads.h +4 -11
- package/include/libxml2/libxml/tree.h +68 -20
- package/include/libxml2/libxml/uri.h +2 -1
- package/include/libxml2/libxml/valid.h +2 -0
- package/include/libxml2/libxml/xmlIO.h +65 -13
- package/include/libxml2/libxml/xmlerror.h +37 -8
- package/include/libxml2/libxml/xmlmemory.h +37 -40
- package/include/libxml2/libxml/xmlreader.h +6 -0
- package/include/libxml2/libxml/xmlregexp.h +2 -9
- package/include/libxml2/libxml/xmlsave.h +9 -0
- package/include/libxml2/libxml/xmlschemas.h +3 -0
- package/include/libxml2/libxml/xmlversion.h +28 -43
- package/include/libxml2/libxml/xpath.h +1 -1
- package/include/libxml2/libxml/xpathInternals.h +2 -1
- package/include/libxml2/libxml/xpointer.h +5 -4
- package/include/pango-1.0/pango/pango-features.h +3 -3
- package/include/pango-1.0/pango/pango-fontmap.h +7 -0
- package/include/pixman-1/pixman-version.h +3 -3
- package/include/pixman-1/pixman.h +9 -2
- package/include/png.h +32 -29
- package/include/pngconf.h +2 -2
- package/include/pnglibconf.h +7 -2
- package/include/vips/connection.h +9 -3
- package/include/vips/util.h +0 -9
- package/include/vips/version.h +4 -4
- package/include/zconf.h +3 -0
- package/include/zlib.h +3 -3
- package/package.json +1 -1
- package/versions.json +15 -15
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
// Single-element vectors and operations.
|
|
17
17
|
// External include guard in highway.h - see comment there.
|
|
18
18
|
|
|
19
|
+
#include <stdint.h>
|
|
19
20
|
#ifndef HWY_NO_LIBCXX
|
|
20
21
|
#include <math.h> // sqrtf
|
|
21
22
|
#endif
|
|
@@ -53,6 +54,9 @@ struct Vec1 {
|
|
|
53
54
|
HWY_INLINE Vec1& operator-=(const Vec1 other) {
|
|
54
55
|
return *this = (*this - other);
|
|
55
56
|
}
|
|
57
|
+
HWY_INLINE Vec1& operator%=(const Vec1 other) {
|
|
58
|
+
return *this = (*this % other);
|
|
59
|
+
}
|
|
56
60
|
HWY_INLINE Vec1& operator&=(const Vec1 other) {
|
|
57
61
|
return *this = (*this & other);
|
|
58
62
|
}
|
|
@@ -101,9 +105,7 @@ HWY_API Vec1<TTo> BitCast(DTo /* tag */, Vec1<TFrom> v) {
|
|
|
101
105
|
|
|
102
106
|
template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
|
|
103
107
|
HWY_API Vec1<T> Zero(D /* tag */) {
|
|
104
|
-
Vec1<T>
|
|
105
|
-
ZeroBytes<sizeof(v.raw)>(&v.raw);
|
|
106
|
-
return v;
|
|
108
|
+
return Vec1<T>(ConvertScalarTo<T>(0));
|
|
107
109
|
}
|
|
108
110
|
|
|
109
111
|
template <class D>
|
|
@@ -137,7 +139,7 @@ HWY_API VFromD<D> ResizeBitCast(D /* tag */, FromV v) {
|
|
|
137
139
|
using TFrom = TFromV<FromV>;
|
|
138
140
|
using TTo = TFromD<D>;
|
|
139
141
|
constexpr size_t kCopyLen = HWY_MIN(sizeof(TFrom), sizeof(TTo));
|
|
140
|
-
TTo to
|
|
142
|
+
TTo to{};
|
|
141
143
|
CopyBytes<kCopyLen>(&v.raw, &to);
|
|
142
144
|
return VFromD<D>(to);
|
|
143
145
|
}
|
|
@@ -156,6 +158,39 @@ HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(FromSizeTag /* from_size_tag */,
|
|
|
156
158
|
|
|
157
159
|
} // namespace detail
|
|
158
160
|
|
|
161
|
+
// ------------------------------ Dup128VecFromValues
|
|
162
|
+
|
|
163
|
+
template <class D, HWY_IF_T_SIZE_D(D, 1)>
|
|
164
|
+
HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
|
|
165
|
+
TFromD<D> /*t2*/, TFromD<D> /*t3*/,
|
|
166
|
+
TFromD<D> /*t4*/, TFromD<D> /*t5*/,
|
|
167
|
+
TFromD<D> /*t6*/, TFromD<D> /*t7*/,
|
|
168
|
+
TFromD<D> /*t8*/, TFromD<D> /*t9*/,
|
|
169
|
+
TFromD<D> /*t10*/, TFromD<D> /*t11*/,
|
|
170
|
+
TFromD<D> /*t12*/, TFromD<D> /*t13*/,
|
|
171
|
+
TFromD<D> /*t14*/, TFromD<D> /*t15*/) {
|
|
172
|
+
return VFromD<D>(t0);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
template <class D, HWY_IF_T_SIZE_D(D, 2)>
|
|
176
|
+
HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
|
|
177
|
+
TFromD<D> /*t2*/, TFromD<D> /*t3*/,
|
|
178
|
+
TFromD<D> /*t4*/, TFromD<D> /*t5*/,
|
|
179
|
+
TFromD<D> /*t6*/, TFromD<D> /*t7*/) {
|
|
180
|
+
return VFromD<D>(t0);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
template <class D, HWY_IF_T_SIZE_D(D, 4)>
|
|
184
|
+
HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
|
|
185
|
+
TFromD<D> /*t2*/, TFromD<D> /*t3*/) {
|
|
186
|
+
return VFromD<D>(t0);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
template <class D, HWY_IF_T_SIZE_D(D, 8)>
|
|
190
|
+
HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/) {
|
|
191
|
+
return VFromD<D>(t0);
|
|
192
|
+
}
|
|
193
|
+
|
|
159
194
|
// ================================================== LOGICAL
|
|
160
195
|
|
|
161
196
|
// ------------------------------ Not
|
|
@@ -328,12 +363,12 @@ HWY_API Vec1<T> IfThenElse(const Mask1<T> mask, const Vec1<T> yes,
|
|
|
328
363
|
|
|
329
364
|
template <typename T>
|
|
330
365
|
HWY_API Vec1<T> IfThenElseZero(const Mask1<T> mask, const Vec1<T> yes) {
|
|
331
|
-
return mask.bits ? yes : Vec1<T>(0);
|
|
366
|
+
return mask.bits ? yes : Vec1<T>(ConvertScalarTo<T>(0));
|
|
332
367
|
}
|
|
333
368
|
|
|
334
369
|
template <typename T>
|
|
335
370
|
HWY_API Vec1<T> IfThenZeroElse(const Mask1<T> mask, const Vec1<T> no) {
|
|
336
|
-
return mask.bits ? Vec1<T>(0) : no;
|
|
371
|
+
return mask.bits ? Vec1<T>(ConvertScalarTo<T>(0)) : no;
|
|
337
372
|
}
|
|
338
373
|
|
|
339
374
|
template <typename T>
|
|
@@ -347,7 +382,11 @@ HWY_API Vec1<T> IfNegativeThenElse(Vec1<T> v, Vec1<T> yes, Vec1<T> no) {
|
|
|
347
382
|
|
|
348
383
|
template <typename T>
|
|
349
384
|
HWY_API Vec1<T> ZeroIfNegative(const Vec1<T> v) {
|
|
350
|
-
|
|
385
|
+
const DFromV<decltype(v)> d;
|
|
386
|
+
const RebindToSigned<decltype(d)> di;
|
|
387
|
+
const auto vi = BitCast(di, v);
|
|
388
|
+
|
|
389
|
+
return vi.raw < 0 ? Vec1<T>(ConvertScalarTo<T>(0)) : v;
|
|
351
390
|
}
|
|
352
391
|
|
|
353
392
|
// ------------------------------ Mask logical
|
|
@@ -407,6 +446,19 @@ HWY_API Mask1<T> SetAtOrBeforeFirst(Mask1<T> /*mask*/) {
|
|
|
407
446
|
return Mask1<T>::FromBool(true);
|
|
408
447
|
}
|
|
409
448
|
|
|
449
|
+
// ------------------------------ LowerHalfOfMask
|
|
450
|
+
|
|
451
|
+
#ifdef HWY_NATIVE_LOWER_HALF_OF_MASK
|
|
452
|
+
#undef HWY_NATIVE_LOWER_HALF_OF_MASK
|
|
453
|
+
#else
|
|
454
|
+
#define HWY_NATIVE_LOWER_HALF_OF_MASK
|
|
455
|
+
#endif
|
|
456
|
+
|
|
457
|
+
template <class D>
|
|
458
|
+
HWY_API MFromD<D> LowerHalfOfMask(D /*d*/, MFromD<D> m) {
|
|
459
|
+
return m;
|
|
460
|
+
}
|
|
461
|
+
|
|
410
462
|
// ================================================== SHIFTS
|
|
411
463
|
|
|
412
464
|
// ------------------------------ ShiftLeft/ShiftRight (BroadcastSignBit)
|
|
@@ -528,10 +580,22 @@ HWY_API Vec1<double> operator-(const Vec1<double> a, const Vec1<double> b) {
|
|
|
528
580
|
|
|
529
581
|
// ------------------------------ SumsOf8
|
|
530
582
|
|
|
583
|
+
HWY_API Vec1<int64_t> SumsOf8(const Vec1<int8_t> v) {
|
|
584
|
+
return Vec1<int64_t>(v.raw);
|
|
585
|
+
}
|
|
531
586
|
HWY_API Vec1<uint64_t> SumsOf8(const Vec1<uint8_t> v) {
|
|
532
587
|
return Vec1<uint64_t>(v.raw);
|
|
533
588
|
}
|
|
534
589
|
|
|
590
|
+
// ------------------------------ SumsOf2
|
|
591
|
+
|
|
592
|
+
template <class T>
|
|
593
|
+
HWY_API Vec1<MakeWide<T>> SumsOf2(const Vec1<T> v) {
|
|
594
|
+
const DFromV<decltype(v)> d;
|
|
595
|
+
const Rebind<MakeWide<T>, decltype(d)> dw;
|
|
596
|
+
return PromoteTo(dw, v);
|
|
597
|
+
}
|
|
598
|
+
|
|
535
599
|
// ------------------------------ SaturatedAdd
|
|
536
600
|
|
|
537
601
|
// Returns a + b clamped to the destination range.
|
|
@@ -603,57 +667,12 @@ HWY_API Vec1<uint16_t> AverageRound(const Vec1<uint16_t> a,
|
|
|
603
667
|
|
|
604
668
|
template <typename T>
|
|
605
669
|
HWY_API Vec1<T> Abs(const Vec1<T> a) {
|
|
606
|
-
|
|
607
|
-
if (i >= 0 || i == hwy::LimitsMin<T>()) return a;
|
|
608
|
-
return Vec1<T>(static_cast<T>(-i & T{-1}));
|
|
609
|
-
}
|
|
610
|
-
HWY_API Vec1<float> Abs(Vec1<float> a) {
|
|
611
|
-
int32_t i;
|
|
612
|
-
CopyBytes<sizeof(i)>(&a.raw, &i);
|
|
613
|
-
i &= 0x7FFFFFFF;
|
|
614
|
-
CopyBytes<sizeof(i)>(&i, &a.raw);
|
|
615
|
-
return a;
|
|
616
|
-
}
|
|
617
|
-
HWY_API Vec1<double> Abs(Vec1<double> a) {
|
|
618
|
-
int64_t i;
|
|
619
|
-
CopyBytes<sizeof(i)>(&a.raw, &i);
|
|
620
|
-
i &= 0x7FFFFFFFFFFFFFFFL;
|
|
621
|
-
CopyBytes<sizeof(i)>(&i, &a.raw);
|
|
622
|
-
return a;
|
|
670
|
+
return Vec1<T>(ScalarAbs(a.raw));
|
|
623
671
|
}
|
|
624
672
|
|
|
625
673
|
// ------------------------------ Min/Max
|
|
626
674
|
|
|
627
675
|
// <cmath> may be unavailable, so implement our own.
|
|
628
|
-
namespace detail {
|
|
629
|
-
|
|
630
|
-
static inline float Abs(float f) {
|
|
631
|
-
uint32_t i;
|
|
632
|
-
CopyBytes<4>(&f, &i);
|
|
633
|
-
i &= 0x7FFFFFFFu;
|
|
634
|
-
CopyBytes<4>(&i, &f);
|
|
635
|
-
return f;
|
|
636
|
-
}
|
|
637
|
-
static inline double Abs(double f) {
|
|
638
|
-
uint64_t i;
|
|
639
|
-
CopyBytes<8>(&f, &i);
|
|
640
|
-
i &= 0x7FFFFFFFFFFFFFFFull;
|
|
641
|
-
CopyBytes<8>(&i, &f);
|
|
642
|
-
return f;
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
static inline bool SignBit(float f) {
|
|
646
|
-
uint32_t i;
|
|
647
|
-
CopyBytes<4>(&f, &i);
|
|
648
|
-
return (i >> 31) != 0;
|
|
649
|
-
}
|
|
650
|
-
static inline bool SignBit(double f) {
|
|
651
|
-
uint64_t i;
|
|
652
|
-
CopyBytes<8>(&f, &i);
|
|
653
|
-
return (i >> 63) != 0;
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
} // namespace detail
|
|
657
676
|
|
|
658
677
|
template <typename T, HWY_IF_NOT_FLOAT(T)>
|
|
659
678
|
HWY_API Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
|
|
@@ -716,7 +735,7 @@ HWY_API Vec1<T> operator*(const Vec1<T> a, const Vec1<T> b) {
|
|
|
716
735
|
static_cast<uint64_t>(b.raw)));
|
|
717
736
|
}
|
|
718
737
|
|
|
719
|
-
template <typename T>
|
|
738
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
720
739
|
HWY_API Vec1<T> operator/(const Vec1<T> a, const Vec1<T> b) {
|
|
721
740
|
return Vec1<T>(a.raw / b.raw);
|
|
722
741
|
}
|
|
@@ -763,23 +782,23 @@ HWY_API Vec1<T> AbsDiff(const Vec1<T> a, const Vec1<T> b) {
|
|
|
763
782
|
|
|
764
783
|
// ------------------------------ Floating-point multiply-add variants
|
|
765
784
|
|
|
766
|
-
template <typename T>
|
|
785
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
767
786
|
HWY_API Vec1<T> MulAdd(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> add) {
|
|
768
787
|
return mul * x + add;
|
|
769
788
|
}
|
|
770
789
|
|
|
771
|
-
template <typename T>
|
|
790
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
772
791
|
HWY_API Vec1<T> NegMulAdd(const Vec1<T> mul, const Vec1<T> x,
|
|
773
792
|
const Vec1<T> add) {
|
|
774
793
|
return add - mul * x;
|
|
775
794
|
}
|
|
776
795
|
|
|
777
|
-
template <typename T>
|
|
796
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
778
797
|
HWY_API Vec1<T> MulSub(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> sub) {
|
|
779
798
|
return mul * x - sub;
|
|
780
799
|
}
|
|
781
800
|
|
|
782
|
-
template <typename T>
|
|
801
|
+
template <typename T, HWY_IF_FLOAT(T)>
|
|
783
802
|
HWY_API Vec1<T> NegMulSub(const Vec1<T> mul, const Vec1<T> x,
|
|
784
803
|
const Vec1<T> sub) {
|
|
785
804
|
return Neg(mul) * x - sub;
|
|
@@ -842,14 +861,17 @@ HWY_API Vec1<T> Round(const Vec1<T> v) {
|
|
|
842
861
|
if (!(Abs(v).raw < MantissaEnd<T>())) { // Huge or NaN
|
|
843
862
|
return v;
|
|
844
863
|
}
|
|
845
|
-
const T
|
|
846
|
-
const
|
|
847
|
-
|
|
864
|
+
const T k0 = ConvertScalarTo<T>(0);
|
|
865
|
+
const T bias = ConvertScalarTo<T>(v.raw < k0 ? -0.5 : 0.5);
|
|
866
|
+
const TI rounded = ConvertScalarTo<TI>(v.raw + bias);
|
|
867
|
+
if (rounded == 0) return CopySignToAbs(Vec1<T>(k0), v);
|
|
868
|
+
TI offset = 0;
|
|
848
869
|
// Round to even
|
|
849
|
-
if ((rounded & 1) &&
|
|
850
|
-
|
|
870
|
+
if ((rounded & 1) && ScalarAbs(ConvertScalarTo<T>(rounded) - v.raw) ==
|
|
871
|
+
ConvertScalarTo<T>(0.5)) {
|
|
872
|
+
offset = v.raw < k0 ? -1 : 1;
|
|
851
873
|
}
|
|
852
|
-
return Vec1<T>(
|
|
874
|
+
return Vec1<T>(ConvertScalarTo<T>(rounded - offset));
|
|
853
875
|
}
|
|
854
876
|
|
|
855
877
|
// Round-to-nearest even.
|
|
@@ -858,23 +880,26 @@ HWY_API Vec1<int32_t> NearestInt(const Vec1<float> v) {
|
|
|
858
880
|
using TI = int32_t;
|
|
859
881
|
|
|
860
882
|
const T abs = Abs(v).raw;
|
|
861
|
-
const bool is_sign =
|
|
883
|
+
const bool is_sign = ScalarSignBit(v.raw);
|
|
862
884
|
|
|
863
885
|
if (!(abs < MantissaEnd<T>())) { // Huge or NaN
|
|
864
886
|
// Check if too large to cast or NaN
|
|
865
|
-
if (!(abs <=
|
|
887
|
+
if (!(abs <= ConvertScalarTo<T>(LimitsMax<TI>()))) {
|
|
866
888
|
return Vec1<TI>(is_sign ? LimitsMin<TI>() : LimitsMax<TI>());
|
|
867
889
|
}
|
|
868
|
-
return Vec1<int32_t>(
|
|
890
|
+
return Vec1<int32_t>(ConvertScalarTo<TI>(v.raw));
|
|
869
891
|
}
|
|
870
|
-
const T bias =
|
|
871
|
-
|
|
892
|
+
const T bias =
|
|
893
|
+
ConvertScalarTo<T>(v.raw < ConvertScalarTo<T>(0.0) ? -0.5 : 0.5);
|
|
894
|
+
const TI rounded = ConvertScalarTo<TI>(v.raw + bias);
|
|
872
895
|
if (rounded == 0) return Vec1<int32_t>(0);
|
|
896
|
+
TI offset = 0;
|
|
873
897
|
// Round to even
|
|
874
|
-
if ((rounded & 1) &&
|
|
875
|
-
|
|
898
|
+
if ((rounded & 1) && ScalarAbs(ConvertScalarTo<T>(rounded) - v.raw) ==
|
|
899
|
+
ConvertScalarTo<T>(0.5)) {
|
|
900
|
+
offset = is_sign ? -1 : 1;
|
|
876
901
|
}
|
|
877
|
-
return Vec1<TI>(rounded);
|
|
902
|
+
return Vec1<TI>(rounded - offset);
|
|
878
903
|
}
|
|
879
904
|
|
|
880
905
|
template <typename T>
|
|
@@ -883,9 +908,9 @@ HWY_API Vec1<T> Trunc(const Vec1<T> v) {
|
|
|
883
908
|
if (!(Abs(v).raw <= MantissaEnd<T>())) { // Huge or NaN
|
|
884
909
|
return v;
|
|
885
910
|
}
|
|
886
|
-
const TI truncated =
|
|
911
|
+
const TI truncated = ConvertScalarTo<TI>(v.raw);
|
|
887
912
|
if (truncated == 0) return CopySignToAbs(Vec1<T>(0), v);
|
|
888
|
-
return Vec1<T>(
|
|
913
|
+
return Vec1<T>(ConvertScalarTo<T>(truncated));
|
|
889
914
|
}
|
|
890
915
|
|
|
891
916
|
template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
|
|
@@ -1017,6 +1042,13 @@ HWY_API Mask1<T> IsNaN(const Vec1<T> v) {
|
|
|
1017
1042
|
return Mask1<T>::FromBool(bits > ExponentMask<T>());
|
|
1018
1043
|
}
|
|
1019
1044
|
|
|
1045
|
+
// Per-target flag to prevent generic_ops-inl.h from defining IsInf / IsFinite.
|
|
1046
|
+
#ifdef HWY_NATIVE_ISINF
|
|
1047
|
+
#undef HWY_NATIVE_ISINF
|
|
1048
|
+
#else
|
|
1049
|
+
#define HWY_NATIVE_ISINF
|
|
1050
|
+
#endif
|
|
1051
|
+
|
|
1020
1052
|
HWY_API Mask1<float> IsInf(const Vec1<float> v) {
|
|
1021
1053
|
const Sisd<float> d;
|
|
1022
1054
|
const RebindToUnsigned<decltype(d)> du;
|
|
@@ -1205,8 +1237,9 @@ HWY_API void Stream(const Vec1<T> v, D d, T* HWY_RESTRICT aligned) {
|
|
|
1205
1237
|
template <class D, typename T = TFromD<D>, typename TI>
|
|
1206
1238
|
HWY_API void ScatterOffset(Vec1<T> v, D d, T* base, Vec1<TI> offset) {
|
|
1207
1239
|
static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
|
|
1208
|
-
|
|
1209
|
-
|
|
1240
|
+
const intptr_t addr =
|
|
1241
|
+
reinterpret_cast<intptr_t>(base) + static_cast<intptr_t>(offset.raw);
|
|
1242
|
+
Store(v, d, reinterpret_cast<T*>(addr));
|
|
1210
1243
|
}
|
|
1211
1244
|
|
|
1212
1245
|
template <class D, typename T = TFromD<D>, typename TI>
|
|
@@ -1231,27 +1264,36 @@ HWY_API void MaskedScatterIndex(Vec1<T> v, Mask1<T> m, D d,
|
|
|
1231
1264
|
#define HWY_NATIVE_GATHER
|
|
1232
1265
|
#endif
|
|
1233
1266
|
|
|
1234
|
-
template <class D, typename T = TFromD<D
|
|
1235
|
-
HWY_API Vec1<T> GatherOffset(D d, const T* base, Vec1<
|
|
1236
|
-
|
|
1267
|
+
template <class D, typename T = TFromD<D>>
|
|
1268
|
+
HWY_API Vec1<T> GatherOffset(D d, const T* base, Vec1<MakeSigned<T>> offset) {
|
|
1269
|
+
HWY_DASSERT(offset.raw >= 0);
|
|
1237
1270
|
const intptr_t addr =
|
|
1238
1271
|
reinterpret_cast<intptr_t>(base) + static_cast<intptr_t>(offset.raw);
|
|
1239
1272
|
return Load(d, reinterpret_cast<const T*>(addr));
|
|
1240
1273
|
}
|
|
1241
1274
|
|
|
1242
|
-
template <class D, typename T = TFromD<D
|
|
1243
|
-
HWY_API Vec1<T> GatherIndex(D d, const T* HWY_RESTRICT base,
|
|
1244
|
-
|
|
1275
|
+
template <class D, typename T = TFromD<D>>
|
|
1276
|
+
HWY_API Vec1<T> GatherIndex(D d, const T* HWY_RESTRICT base,
|
|
1277
|
+
Vec1<MakeSigned<T>> index) {
|
|
1278
|
+
HWY_DASSERT(index.raw >= 0);
|
|
1245
1279
|
return Load(d, base + index.raw);
|
|
1246
1280
|
}
|
|
1247
1281
|
|
|
1248
|
-
template <class D, typename T = TFromD<D
|
|
1282
|
+
template <class D, typename T = TFromD<D>>
|
|
1249
1283
|
HWY_API Vec1<T> MaskedGatherIndex(Mask1<T> m, D d, const T* HWY_RESTRICT base,
|
|
1250
|
-
Vec1<
|
|
1251
|
-
|
|
1284
|
+
Vec1<MakeSigned<T>> index) {
|
|
1285
|
+
HWY_DASSERT(index.raw >= 0);
|
|
1252
1286
|
return MaskedLoad(m, d, base + index.raw);
|
|
1253
1287
|
}
|
|
1254
1288
|
|
|
1289
|
+
template <class D, typename T = TFromD<D>>
|
|
1290
|
+
HWY_API Vec1<T> MaskedGatherIndexOr(Vec1<T> no, Mask1<T> m, D d,
|
|
1291
|
+
const T* HWY_RESTRICT base,
|
|
1292
|
+
Vec1<MakeSigned<T>> index) {
|
|
1293
|
+
HWY_DASSERT(index.raw >= 0);
|
|
1294
|
+
return MaskedLoadOr(no, m, d, base + index.raw);
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1255
1297
|
// ================================================== CONVERT
|
|
1256
1298
|
|
|
1257
1299
|
// ConvertTo and DemoteTo with floating-point input and integer output truncate
|
|
@@ -1260,73 +1302,69 @@ HWY_API Vec1<T> MaskedGatherIndex(Mask1<T> m, D d, const T* HWY_RESTRICT base,
|
|
|
1260
1302
|
namespace detail {
|
|
1261
1303
|
|
|
1262
1304
|
template <class ToT, class FromT>
|
|
1263
|
-
HWY_INLINE ToT CastValueForF2IConv(
|
|
1264
|
-
FromT val) {
|
|
1265
|
-
// Prevent ubsan errors when converting float to narrower integer
|
|
1266
|
-
|
|
1267
|
-
// If LimitsMax<ToT>() can be exactly represented in FromT,
|
|
1268
|
-
// kSmallestOutOfToTRangePosVal is equal to LimitsMax<ToT>().
|
|
1269
|
-
|
|
1270
|
-
// Otherwise, if LimitsMax<ToT>() cannot be exactly represented in FromT,
|
|
1271
|
-
// kSmallestOutOfToTRangePosVal is equal to LimitsMax<ToT>() + 1, which can
|
|
1272
|
-
// be exactly represented in FromT.
|
|
1273
|
-
constexpr FromT kSmallestOutOfToTRangePosVal =
|
|
1274
|
-
(sizeof(ToT) * 8 <= static_cast<size_t>(MantissaBits<FromT>()) + 1)
|
|
1275
|
-
? static_cast<FromT>(LimitsMax<ToT>())
|
|
1276
|
-
: static_cast<FromT>(
|
|
1277
|
-
static_cast<FromT>(ToT{1} << (sizeof(ToT) * 8 - 1)) * FromT(2));
|
|
1278
|
-
|
|
1279
|
-
if (detail::SignBit(val)) {
|
|
1280
|
-
return ToT{0};
|
|
1281
|
-
} else if (IsInf(Vec1<FromT>(val)).bits ||
|
|
1282
|
-
val >= kSmallestOutOfToTRangePosVal) {
|
|
1283
|
-
return LimitsMax<ToT>();
|
|
1284
|
-
} else {
|
|
1285
|
-
return static_cast<ToT>(val);
|
|
1286
|
-
}
|
|
1287
|
-
}
|
|
1288
|
-
|
|
1289
|
-
template <class ToT, class FromT>
|
|
1290
|
-
HWY_INLINE ToT CastValueForF2IConv(hwy::SignedTag /* to_type_tag */,
|
|
1291
|
-
FromT val) {
|
|
1305
|
+
HWY_INLINE ToT CastValueForF2IConv(FromT val) {
|
|
1292
1306
|
// Prevent ubsan errors when converting float to narrower integer
|
|
1293
1307
|
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
constexpr
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1308
|
+
using FromTU = MakeUnsigned<FromT>;
|
|
1309
|
+
using ToTU = MakeUnsigned<ToT>;
|
|
1310
|
+
|
|
1311
|
+
constexpr unsigned kMaxExpField =
|
|
1312
|
+
static_cast<unsigned>(MaxExponentField<FromT>());
|
|
1313
|
+
constexpr unsigned kExpBias = kMaxExpField >> 1;
|
|
1314
|
+
constexpr unsigned kMinOutOfRangeExpField = static_cast<unsigned>(HWY_MIN(
|
|
1315
|
+
kExpBias + sizeof(ToT) * 8 - static_cast<unsigned>(IsSigned<ToT>()),
|
|
1316
|
+
kMaxExpField));
|
|
1317
|
+
|
|
1318
|
+
// If ToT is signed, compare only the exponent bits of val against
|
|
1319
|
+
// kMinOutOfRangeExpField.
|
|
1320
|
+
//
|
|
1321
|
+
// Otherwise, if ToT is unsigned, compare the sign bit plus exponent bits of
|
|
1322
|
+
// val against kMinOutOfRangeExpField as a negative value is outside of the
|
|
1323
|
+
// range of an unsigned integer type.
|
|
1324
|
+
const FromT val_to_compare =
|
|
1325
|
+
static_cast<FromT>(IsSigned<ToT>() ? ScalarAbs(val) : val);
|
|
1326
|
+
|
|
1327
|
+
// val is within the range of ToT if
|
|
1328
|
+
// (BitCastScalar<FromTU>(val_to_compare) >> MantissaBits<FromT>()) is less
|
|
1329
|
+
// than kMinOutOfRangeExpField
|
|
1330
|
+
//
|
|
1331
|
+
// Otherwise, val is either outside of the range of ToT or equal to
|
|
1332
|
+
// LimitsMin<ToT>() if
|
|
1333
|
+
// (BitCastScalar<FromTU>(val_to_compare) >> MantissaBits<FromT>()) is greater
|
|
1334
|
+
// than or equal to kMinOutOfRangeExpField.
|
|
1335
|
+
|
|
1336
|
+
return (static_cast<unsigned>(BitCastScalar<FromTU>(val_to_compare) >>
|
|
1337
|
+
MantissaBits<FromT>()) < kMinOutOfRangeExpField)
|
|
1338
|
+
? static_cast<ToT>(val)
|
|
1339
|
+
: static_cast<ToT>(static_cast<ToTU>(LimitsMax<ToT>()) +
|
|
1340
|
+
static_cast<ToTU>(ScalarSignBit(val)));
|
|
1311
1341
|
}
|
|
1312
1342
|
|
|
1313
1343
|
template <class ToT, class ToTypeTag, class FromT>
|
|
1314
1344
|
HWY_INLINE ToT CastValueForPromoteTo(ToTypeTag /* to_type_tag */, FromT val) {
|
|
1315
|
-
return
|
|
1345
|
+
return ConvertScalarTo<ToT>(val);
|
|
1316
1346
|
}
|
|
1317
1347
|
|
|
1318
1348
|
template <class ToT>
|
|
1319
|
-
HWY_INLINE ToT CastValueForPromoteTo(hwy::SignedTag to_type_tag
|
|
1320
|
-
|
|
1349
|
+
HWY_INLINE ToT CastValueForPromoteTo(hwy::SignedTag /*to_type_tag*/,
|
|
1350
|
+
float val) {
|
|
1351
|
+
return CastValueForF2IConv<ToT>(val);
|
|
1321
1352
|
}
|
|
1322
1353
|
|
|
1323
1354
|
template <class ToT>
|
|
1324
|
-
HWY_INLINE ToT CastValueForPromoteTo(hwy::UnsignedTag to_type_tag
|
|
1325
|
-
|
|
1355
|
+
HWY_INLINE ToT CastValueForPromoteTo(hwy::UnsignedTag /*to_type_tag*/,
|
|
1356
|
+
float val) {
|
|
1357
|
+
return CastValueForF2IConv<ToT>(val);
|
|
1326
1358
|
}
|
|
1327
1359
|
|
|
1328
1360
|
} // namespace detail
|
|
1329
1361
|
|
|
1362
|
+
#ifdef HWY_NATIVE_PROMOTE_F16_TO_F64
|
|
1363
|
+
#undef HWY_NATIVE_PROMOTE_F16_TO_F64
|
|
1364
|
+
#else
|
|
1365
|
+
#define HWY_NATIVE_PROMOTE_F16_TO_F64
|
|
1366
|
+
#endif
|
|
1367
|
+
|
|
1330
1368
|
template <class DTo, typename TTo = TFromD<DTo>, typename TFrom>
|
|
1331
1369
|
HWY_API Vec1<TTo> PromoteTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
1332
1370
|
static_assert(sizeof(TTo) > sizeof(TFrom), "Not promoting");
|
|
@@ -1342,16 +1380,15 @@ HWY_API Vec1<float> DemoteTo(D /* tag */, Vec1<double> from) {
|
|
|
1342
1380
|
// Prevent ubsan errors when converting float to narrower integer/float
|
|
1343
1381
|
if (IsInf(from).bits ||
|
|
1344
1382
|
Abs(from).raw > static_cast<double>(HighestValue<float>())) {
|
|
1345
|
-
return Vec1<float>(
|
|
1346
|
-
|
|
1383
|
+
return Vec1<float>(ScalarSignBit(from.raw) ? LowestValue<float>()
|
|
1384
|
+
: HighestValue<float>());
|
|
1347
1385
|
}
|
|
1348
1386
|
return Vec1<float>(static_cast<float>(from.raw));
|
|
1349
1387
|
}
|
|
1350
1388
|
template <class D, HWY_IF_UI32_D(D)>
|
|
1351
1389
|
HWY_API VFromD<D> DemoteTo(D /* tag */, Vec1<double> from) {
|
|
1352
1390
|
// Prevent ubsan errors when converting int32_t to narrower integer/int32_t
|
|
1353
|
-
return Vec1<TFromD<D>>(detail::CastValueForF2IConv<TFromD<D>>(
|
|
1354
|
-
hwy::TypeTag<TFromD<D>>(), from.raw));
|
|
1391
|
+
return Vec1<TFromD<D>>(detail::CastValueForF2IConv<TFromD<D>>(from.raw));
|
|
1355
1392
|
}
|
|
1356
1393
|
|
|
1357
1394
|
template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
|
|
@@ -1401,6 +1438,11 @@ HWY_API Vec1<float> PromoteTo(D d, const Vec1<bfloat16_t> v) {
|
|
|
1401
1438
|
return Set(d, F32FromBF16(v.raw));
|
|
1402
1439
|
}
|
|
1403
1440
|
|
|
1441
|
+
template <class DTo, typename TFrom>
|
|
1442
|
+
HWY_API VFromD<DTo> PromoteEvenTo(DTo d_to, Vec1<TFrom> v) {
|
|
1443
|
+
return PromoteTo(d_to, v);
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1404
1446
|
template <class D, HWY_IF_F16_D(D)>
|
|
1405
1447
|
HWY_API Vec1<float16_t> DemoteTo(D /* tag */, const Vec1<float> v) {
|
|
1406
1448
|
return Vec1<float16_t>(F16FromF32(v.raw));
|
|
@@ -1416,8 +1458,7 @@ template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
|
|
|
1416
1458
|
HWY_API Vec1<TTo> ConvertTo(DTo /* tag */, Vec1<TFrom> from) {
|
|
1417
1459
|
static_assert(sizeof(TTo) == sizeof(TFrom), "Should have same size");
|
|
1418
1460
|
// float## -> int##: return closest representable value.
|
|
1419
|
-
return Vec1<TTo>(
|
|
1420
|
-
detail::CastValueForF2IConv<TTo>(hwy::TypeTag<TTo>(), from.raw));
|
|
1461
|
+
return Vec1<TTo>(detail::CastValueForF2IConv<TTo>(from.raw));
|
|
1421
1462
|
}
|
|
1422
1463
|
|
|
1423
1464
|
template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
|
|
@@ -1792,6 +1833,11 @@ HWY_API Mask1<T> LoadMaskBits(D /* tag */, const uint8_t* HWY_RESTRICT bits) {
|
|
|
1792
1833
|
return Mask1<T>::FromBool((bits[0] & 1) != 0);
|
|
1793
1834
|
}
|
|
1794
1835
|
|
|
1836
|
+
template <class D, HWY_IF_LANES_D(D, 1)>
|
|
1837
|
+
HWY_API MFromD<D> Dup128MaskFromMaskBits(D /*d*/, unsigned mask_bits) {
|
|
1838
|
+
return MFromD<D>::FromBool((mask_bits & 1) != 0);
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1795
1841
|
// `p` points to at least 8 writable bytes.
|
|
1796
1842
|
template <class D, typename T = TFromD<D>>
|
|
1797
1843
|
HWY_API size_t StoreMaskBits(D d, const Mask1<T> mask, uint8_t* bits) {
|
|
@@ -1971,23 +2017,7 @@ HWY_API Vec1<TW> RearrangeToOddPlusEven(Vec1<TW> sum0, Vec1<TW> /* sum1 */) {
|
|
|
1971
2017
|
|
|
1972
2018
|
// ================================================== REDUCTIONS
|
|
1973
2019
|
|
|
1974
|
-
//
|
|
1975
|
-
template <class D, typename T = TFromD<D>>
|
|
1976
|
-
HWY_API Vec1<T> SumOfLanes(D /* tag */, const Vec1<T> v) {
|
|
1977
|
-
return v;
|
|
1978
|
-
}
|
|
1979
|
-
template <class D, typename T = TFromD<D>>
|
|
1980
|
-
HWY_API T ReduceSum(D /* tag */, const Vec1<T> v) {
|
|
1981
|
-
return GetLane(v);
|
|
1982
|
-
}
|
|
1983
|
-
template <class D, typename T = TFromD<D>>
|
|
1984
|
-
HWY_API Vec1<T> MinOfLanes(D /* tag */, const Vec1<T> v) {
|
|
1985
|
-
return v;
|
|
1986
|
-
}
|
|
1987
|
-
template <class D, typename T = TFromD<D>>
|
|
1988
|
-
HWY_API Vec1<T> MaxOfLanes(D /* tag */, const Vec1<T> v) {
|
|
1989
|
-
return v;
|
|
1990
|
-
}
|
|
2020
|
+
// Nothing native, generic_ops-inl defines SumOfLanes and ReduceSum.
|
|
1991
2021
|
|
|
1992
2022
|
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
1993
2023
|
} // namespace HWY_NAMESPACE
|