@img/sharp-libvips-dev 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/include/aom/aom_encoder.h +3 -3
- package/include/aom/aomcx.h +17 -8
- package/include/expat.h +21 -10
- package/include/expat_config.h +11 -5
- package/include/ffi.h +12 -25
- package/include/fontconfig/fontconfig.h +5 -3
- package/include/freetype2/freetype/config/ftoption.h +1 -1
- package/include/gio-unix-2.0/gio/gfiledescriptorbased.h +3 -7
- package/include/gio-unix-2.0/gio/gunixinputstream.h +0 -5
- package/include/gio-unix-2.0/gio/gunixoutputstream.h +0 -5
- package/include/glib-2.0/gio/gappinfo.h +0 -7
- package/include/glib-2.0/gio/gapplication.h +6 -0
- package/include/glib-2.0/gio/gapplicationcommandline.h +12 -1
- package/include/glib-2.0/gio/gasyncinitable.h +0 -7
- package/include/glib-2.0/gio/gasyncresult.h +0 -6
- package/include/glib-2.0/gio/gbufferedinputstream.h +0 -5
- package/include/glib-2.0/gio/gbufferedoutputstream.h +0 -5
- package/include/glib-2.0/gio/gbytesicon.h +0 -5
- package/include/glib-2.0/gio/gcancellable.h +0 -5
- package/include/glib-2.0/gio/gconverter.h +0 -7
- package/include/glib-2.0/gio/gconverterinputstream.h +0 -6
- package/include/glib-2.0/gio/gconverteroutputstream.h +0 -6
- package/include/glib-2.0/gio/gdatagrambased.h +0 -7
- package/include/glib-2.0/gio/gdatainputstream.h +0 -6
- package/include/glib-2.0/gio/gdataoutputstream.h +0 -6
- package/include/glib-2.0/gio/gdbusinterface.h +0 -8
- package/include/glib-2.0/gio/gdbusinterfaceskeleton.h +0 -8
- package/include/glib-2.0/gio/gdbusmessage.h +2 -1
- package/include/glib-2.0/gio/gdbusobjectmanagerclient.h +0 -8
- package/include/glib-2.0/gio/gdbusobjectmanagerserver.h +0 -8
- package/include/glib-2.0/gio/gdbusobjectproxy.h +0 -8
- package/include/glib-2.0/gio/gdbusobjectskeleton.h +0 -8
- package/include/glib-2.0/gio/gdbusproxy.h +0 -8
- package/include/glib-2.0/gio/gdebugcontroller.h +0 -8
- package/include/glib-2.0/gio/gdebugcontrollerdbus.h +0 -7
- package/include/glib-2.0/gio/gdtlsserverconnection.h +0 -8
- package/include/glib-2.0/gio/gemblem.h +0 -5
- package/include/glib-2.0/gio/gemblemedicon.h +0 -5
- package/include/glib-2.0/gio/gfile.h +0 -10
- package/include/glib-2.0/gio/gfileenumerator.h +0 -5
- package/include/glib-2.0/gio/gfileicon.h +0 -5
- package/include/glib-2.0/gio/gfileinfo.h +0 -5
- package/include/glib-2.0/gio/gfileinputstream.h +0 -8
- package/include/glib-2.0/gio/gfileiostream.h +0 -8
- package/include/glib-2.0/gio/gfilemonitor.h +0 -5
- package/include/glib-2.0/gio/gfilenamecompleter.h +0 -5
- package/include/glib-2.0/gio/gfileoutputstream.h +0 -8
- package/include/glib-2.0/gio/gfilterinputstream.h +0 -5
- package/include/glib-2.0/gio/gfilteroutputstream.h +0 -5
- package/include/glib-2.0/gio/gicon.h +0 -5
- package/include/glib-2.0/gio/ginitable.h +0 -7
- package/include/glib-2.0/gio/ginputstream.h +0 -5
- package/include/glib-2.0/gio/gio-autocleanups.h +4 -0
- package/include/glib-2.0/gio/gio-visibility.h +34 -0
- package/include/glib-2.0/gio/gioenums.h +6 -1
- package/include/glib-2.0/gio/giomodule.h +0 -5
- package/include/glib-2.0/gio/giostream.h +0 -5
- package/include/glib-2.0/gio/giotypes.h +5 -108
- package/include/glib-2.0/gio/gloadableicon.h +0 -6
- package/include/glib-2.0/gio/gmemoryinputstream.h +0 -5
- package/include/glib-2.0/gio/gmemoryoutputstream.h +0 -5
- package/include/glib-2.0/gio/gmountoperation.h +0 -6
- package/include/glib-2.0/gio/gnetworking.h +4 -0
- package/include/glib-2.0/gio/goutputstream.h +0 -9
- package/include/glib-2.0/gio/gpollableinputstream.h +0 -7
- package/include/glib-2.0/gio/gpollableoutputstream.h +0 -7
- package/include/glib-2.0/gio/gproxy.h +0 -7
- package/include/glib-2.0/gio/gproxyaddressenumerator.h +0 -8
- package/include/glib-2.0/gio/gseekable.h +0 -5
- package/include/glib-2.0/gio/gsettingsbackend.h +0 -5
- package/include/glib-2.0/gio/gsimpleactiongroup.h +0 -7
- package/include/glib-2.0/gio/gsimpleasyncresult.h +0 -5
- package/include/glib-2.0/gio/gsimpleproxyresolver.h +0 -5
- package/include/glib-2.0/gio/gsocket.h +13 -0
- package/include/glib-2.0/gio/gsocketaddressenumerator.h +0 -6
- package/include/glib-2.0/gio/gsocketconnectable.h +0 -5
- package/include/glib-2.0/gio/gtask.h +12 -0
- package/include/glib-2.0/gio/gthemedicon.h +0 -5
- package/include/glib-2.0/gio/gtlsserverconnection.h +0 -8
- package/include/glib-2.0/gio/gunixcredentialsmessage.h +0 -8
- package/include/glib-2.0/gio/gvfs.h +0 -5
- package/include/glib-2.0/gio/gvolume.h +2 -2
- package/include/glib-2.0/gio/gvolumemonitor.h +0 -5
- package/include/glib-2.0/girepository/gi-visibility.h +986 -0
- package/include/glib-2.0/girepository/giarginfo.h +100 -0
- package/include/glib-2.0/girepository/gibaseinfo.h +129 -0
- package/include/glib-2.0/girepository/gicallableinfo.h +119 -0
- package/include/glib-2.0/girepository/gicallbackinfo.h +60 -0
- package/include/glib-2.0/girepository/giconstantinfo.h +72 -0
- package/include/glib-2.0/girepository/gienuminfo.h +82 -0
- package/include/glib-2.0/girepository/gifieldinfo.h +84 -0
- package/include/glib-2.0/girepository/giflagsinfo.h +60 -0
- package/include/glib-2.0/girepository/gifunctioninfo.h +117 -0
- package/include/glib-2.0/girepository/giinterfaceinfo.h +120 -0
- package/include/glib-2.0/girepository/giobjectinfo.h +230 -0
- package/include/glib-2.0/girepository/gipropertyinfo.h +77 -0
- package/include/glib-2.0/girepository/giregisteredtypeinfo.h +75 -0
- package/include/glib-2.0/girepository/girepository-autocleanups.h +56 -0
- package/include/glib-2.0/girepository/girepository.h +247 -0
- package/include/glib-2.0/girepository/girffi.h +129 -0
- package/include/glib-2.0/girepository/gisignalinfo.h +72 -0
- package/include/glib-2.0/girepository/gistructinfo.h +102 -0
- package/include/glib-2.0/girepository/gitypeinfo.h +144 -0
- package/include/glib-2.0/girepository/gitypelib.h +61 -0
- package/include/glib-2.0/girepository/gitypes.h +421 -0
- package/include/glib-2.0/girepository/giunioninfo.h +105 -0
- package/include/glib-2.0/girepository/giunresolvedinfo.h +60 -0
- package/include/glib-2.0/girepository/givalueinfo.h +65 -0
- package/include/glib-2.0/girepository/givfuncinfo.h +88 -0
- package/include/glib-2.0/glib/deprecated/gcompletion.h +1 -1
- package/include/glib-2.0/glib/deprecated/grel.h +0 -23
- package/include/glib-2.0/glib/deprecated/gthread.h +10 -6
- package/include/glib-2.0/glib/gatomic.h +20 -20
- package/include/glib-2.0/glib/gbitlock.h +31 -0
- package/include/glib-2.0/glib/gbookmarkfile.h +39 -1
- package/include/glib-2.0/glib/gchecksum.h +0 -10
- package/include/glib-2.0/glib/gdate.h +0 -9
- package/include/glib-2.0/glib/gdatetime.h +33 -1
- package/include/glib-2.0/glib/gdir.h +5 -0
- package/include/glib-2.0/glib/ghmac.h +0 -9
- package/include/glib-2.0/glib/glib-autocleanups.h +4 -0
- package/include/glib-2.0/glib/glib-visibility.h +34 -0
- package/include/glib-2.0/glib/gmacros.h +1 -0
- package/include/glib-2.0/glib/gmessages.h +11 -0
- package/include/glib-2.0/glib/gpathbuf.h +0 -7
- package/include/glib-2.0/glib/gslice.h +2 -0
- package/include/glib-2.0/glib/gstdio.h +1 -1
- package/include/glib-2.0/glib/gstrfuncs.h +24 -18
- package/include/glib-2.0/glib/gstrvbuilder.h +4 -8
- package/include/glib-2.0/glib/gtestutils.h +5 -0
- package/include/glib-2.0/glib/gthread.h +216 -3
- package/include/glib-2.0/glib/gunicode.h +12 -2
- package/include/glib-2.0/glib/gvarianttype.h +1 -10
- package/include/glib-2.0/glib/gversionmacros.h +9 -0
- package/include/glib-2.0/glib/gwin32.h +4 -4
- package/include/glib-2.0/glib-unix.h +214 -0
- package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
- package/include/glib-2.0/gobject/gbinding.h +0 -8
- package/include/glib-2.0/gobject/gbindinggroup.h +0 -8
- package/include/glib-2.0/gobject/gclosure.h +1 -9
- package/include/glib-2.0/gobject/genums.h +6 -6
- package/include/glib-2.0/gobject/glib-types.h +44 -0
- package/include/glib-2.0/gobject/gobject-autocleanups.h +4 -0
- package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
- package/include/glib-2.0/gobject/gobject.h +1 -16
- package/include/glib-2.0/gobject/gparam.h +3 -12
- package/include/glib-2.0/gobject/gsignal.h +16 -6
- package/include/glib-2.0/gobject/gsignalgroup.h +0 -8
- package/include/glib-2.0/gobject/gtype.h +53 -20
- package/include/glib-2.0/gobject/gtypemodule.h +0 -7
- package/include/glib-2.0/gobject/gtypeplugin.h +0 -6
- package/include/glib-2.0/gobject/gvaluearray.h +0 -7
- package/include/glib-2.0/gobject/gvaluecollector.h +1 -11
- package/include/glib-2.0/gobject/gvaluetypes.h +2 -0
- package/include/hwy/aligned_allocator.h +171 -6
- package/include/hwy/base.h +1765 -543
- package/include/hwy/cache_control.h +24 -6
- package/include/hwy/detect_compiler_arch.h +23 -2
- package/include/hwy/detect_targets.h +56 -13
- package/include/hwy/foreach_target.h +24 -0
- package/include/hwy/highway.h +20 -3
- package/include/hwy/ops/arm_neon-inl.h +1086 -667
- package/include/hwy/ops/arm_sve-inl.h +1091 -235
- package/include/hwy/ops/emu128-inl.h +271 -196
- package/include/hwy/ops/generic_ops-inl.h +2270 -399
- package/include/hwy/ops/ppc_vsx-inl.h +1786 -563
- package/include/hwy/ops/rvv-inl.h +1043 -311
- package/include/hwy/ops/scalar-inl.h +189 -159
- package/include/hwy/ops/set_macros-inl.h +66 -6
- package/include/hwy/ops/shared-inl.h +175 -56
- package/include/hwy/ops/wasm_128-inl.h +153 -136
- package/include/hwy/ops/x86_128-inl.h +1647 -646
- package/include/hwy/ops/x86_256-inl.h +1003 -370
- package/include/hwy/ops/x86_512-inl.h +948 -353
- package/include/hwy/per_target.h +4 -0
- package/include/hwy/profiler.h +648 -0
- package/include/hwy/robust_statistics.h +2 -2
- package/include/hwy/targets.h +18 -11
- package/include/hwy/timer.h +11 -0
- package/include/lcms2.h +46 -7
- package/include/lcms2_plugin.h +4 -4
- package/include/libheif/heif_version.h +2 -2
- package/include/libpng16/png.h +32 -29
- package/include/libpng16/pngconf.h +2 -2
- package/include/libpng16/pnglibconf.h +7 -2
- package/include/librsvg-2.0/librsvg/rsvg-version.h +2 -2
- package/include/libxml2/libxml/HTMLparser.h +23 -0
- package/include/libxml2/libxml/SAX.h +0 -2
- package/include/libxml2/libxml/SAX2.h +0 -2
- package/include/libxml2/libxml/c14n.h +0 -2
- package/include/libxml2/libxml/dict.h +1 -0
- package/include/libxml2/libxml/encoding.h +16 -14
- package/include/libxml2/libxml/entities.h +4 -0
- package/include/libxml2/libxml/globals.h +15 -503
- package/include/libxml2/libxml/hash.h +57 -61
- package/include/libxml2/libxml/nanoftp.h +2 -2
- package/include/libxml2/libxml/parser.h +137 -18
- package/include/libxml2/libxml/parserInternals.h +1 -0
- package/include/libxml2/libxml/relaxng.h +2 -1
- package/include/libxml2/libxml/schemasInternals.h +1 -0
- package/include/libxml2/libxml/schematron.h +1 -0
- package/include/libxml2/libxml/threads.h +4 -11
- package/include/libxml2/libxml/tree.h +68 -20
- package/include/libxml2/libxml/uri.h +2 -1
- package/include/libxml2/libxml/valid.h +2 -0
- package/include/libxml2/libxml/xmlIO.h +65 -13
- package/include/libxml2/libxml/xmlerror.h +37 -8
- package/include/libxml2/libxml/xmlmemory.h +37 -40
- package/include/libxml2/libxml/xmlreader.h +6 -0
- package/include/libxml2/libxml/xmlregexp.h +2 -9
- package/include/libxml2/libxml/xmlsave.h +9 -0
- package/include/libxml2/libxml/xmlschemas.h +3 -0
- package/include/libxml2/libxml/xmlversion.h +28 -43
- package/include/libxml2/libxml/xpath.h +1 -1
- package/include/libxml2/libxml/xpathInternals.h +2 -1
- package/include/libxml2/libxml/xpointer.h +5 -4
- package/include/pango-1.0/pango/pango-features.h +3 -3
- package/include/pango-1.0/pango/pango-fontmap.h +7 -0
- package/include/pixman-1/pixman-version.h +3 -3
- package/include/pixman-1/pixman.h +9 -2
- package/include/png.h +32 -29
- package/include/pngconf.h +2 -2
- package/include/pnglibconf.h +7 -2
- package/include/vips/connection.h +9 -3
- package/include/vips/util.h +0 -9
- package/include/vips/version.h +4 -4
- package/include/zconf.h +3 -0
- package/include/zlib.h +3 -3
- package/package.json +1 -1
- package/versions.json +15 -15
|
@@ -25,11 +25,15 @@
|
|
|
25
25
|
#define HWY_DISABLE_CACHE_CONTROL
|
|
26
26
|
#endif
|
|
27
27
|
|
|
28
|
+
#ifndef HWY_DISABLE_CACHE_CONTROL
|
|
28
29
|
// intrin.h is sufficient on MSVC and already included by base.h.
|
|
29
|
-
#if HWY_ARCH_X86 && !
|
|
30
|
+
#if HWY_ARCH_X86 && !HWY_COMPILER_MSVC
|
|
30
31
|
#include <emmintrin.h> // SSE2
|
|
31
32
|
#include <xmmintrin.h> // _mm_prefetch
|
|
33
|
+
#elif HWY_ARCH_ARM_A64
|
|
34
|
+
#include <arm_acle.h>
|
|
32
35
|
#endif
|
|
36
|
+
#endif // HWY_DISABLE_CACHE_CONTROL
|
|
33
37
|
|
|
34
38
|
namespace hwy {
|
|
35
39
|
|
|
@@ -76,15 +80,16 @@ HWY_INLINE HWY_ATTR_CACHE void FlushStream() {
|
|
|
76
80
|
// subsequent actual loads.
|
|
77
81
|
template <typename T>
|
|
78
82
|
HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) {
|
|
79
|
-
|
|
83
|
+
(void)p;
|
|
84
|
+
#ifndef HWY_DISABLE_CACHE_CONTROL
|
|
85
|
+
#if HWY_ARCH_X86
|
|
80
86
|
_mm_prefetch(reinterpret_cast<const char*>(p), _MM_HINT_T0);
|
|
81
87
|
#elif HWY_COMPILER_GCC // includes clang
|
|
82
88
|
// Hint=0 (NTA) behavior differs, but skipping outer caches is probably not
|
|
83
89
|
// desirable, so use the default 3 (keep in caches).
|
|
84
90
|
__builtin_prefetch(p, /*write=*/0, /*hint=*/3);
|
|
85
|
-
#else
|
|
86
|
-
(void)p;
|
|
87
91
|
#endif
|
|
92
|
+
#endif // HWY_DISABLE_CACHE_CONTROL
|
|
88
93
|
}
|
|
89
94
|
|
|
90
95
|
// Invalidates and flushes the cache line containing "p", if possible.
|
|
@@ -96,11 +101,24 @@ HWY_INLINE HWY_ATTR_CACHE void FlushCacheline(const void* p) {
|
|
|
96
101
|
#endif
|
|
97
102
|
}
|
|
98
103
|
|
|
99
|
-
//
|
|
104
|
+
// Hints that we are inside a spin loop and potentially reduces power
|
|
105
|
+
// consumption and coherency traffic. For example, x86 avoids multiple
|
|
106
|
+
// outstanding load requests, which reduces the memory order violation penalty
|
|
107
|
+
// when exiting the loop.
|
|
100
108
|
HWY_INLINE HWY_ATTR_CACHE void Pause() {
|
|
101
|
-
#
|
|
109
|
+
#ifndef HWY_DISABLE_CACHE_CONTROL
|
|
110
|
+
#if HWY_ARCH_X86
|
|
102
111
|
_mm_pause();
|
|
112
|
+
#elif HWY_ARCH_ARM_A64 && HWY_COMPILER_CLANG
|
|
113
|
+
// This is documented in ACLE and the YIELD instruction is also available in
|
|
114
|
+
// Armv7, but the intrinsic is broken for Armv7 clang, hence A64 only.
|
|
115
|
+
__yield();
|
|
116
|
+
#elif HWY_ARCH_ARM && HWY_COMPILER_GCC // includes clang
|
|
117
|
+
__asm__ volatile("yield" ::: "memory");
|
|
118
|
+
#elif HWY_ARCH_PPC && HWY_COMPILER_GCC // includes clang
|
|
119
|
+
__asm__ volatile("or 27,27,27" ::: "memory");
|
|
103
120
|
#endif
|
|
121
|
+
#endif // HWY_DISABLE_CACHE_CONTROL
|
|
104
122
|
}
|
|
105
123
|
|
|
106
124
|
} // namespace hwy
|
|
@@ -73,7 +73,9 @@
|
|
|
73
73
|
// https://github.com/simd-everywhere/simde/blob/47d6e603de9d04ee05cdfbc57cf282a02be1bf2a/simde/simde-detect-clang.h#L59.
|
|
74
74
|
// Please send updates below to them as well, thanks!
|
|
75
75
|
#if defined(__apple_build_version__) || __clang_major__ >= 999
|
|
76
|
-
#if __has_attribute(
|
|
76
|
+
#if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0
|
|
77
|
+
#define HWY_COMPILER_CLANG 1700
|
|
78
|
+
#elif __has_attribute(nouwtable) // no new warnings in 16.0
|
|
77
79
|
#define HWY_COMPILER_CLANG 1600
|
|
78
80
|
#elif __has_warning("-Warray-parameter")
|
|
79
81
|
#define HWY_COMPILER_CLANG 1500
|
|
@@ -187,6 +189,12 @@
|
|
|
187
189
|
#define HWY_ARCH_PPC 0
|
|
188
190
|
#endif
|
|
189
191
|
|
|
192
|
+
#if defined(__powerpc64__) || (HWY_ARCH_PPC && defined(__64BIT__))
|
|
193
|
+
#define HWY_ARCH_PPC_64 1
|
|
194
|
+
#else
|
|
195
|
+
#define HWY_ARCH_PPC_64 0
|
|
196
|
+
#endif
|
|
197
|
+
|
|
190
198
|
// aarch32 is currently not supported; please raise an issue if you want it.
|
|
191
199
|
#if defined(__ARM_ARCH_ISA_A64) || defined(__aarch64__) || defined(_M_ARM64)
|
|
192
200
|
#define HWY_ARCH_ARM_A64 1
|
|
@@ -230,10 +238,16 @@
|
|
|
230
238
|
#define HWY_ARCH_RVV 0
|
|
231
239
|
#endif
|
|
232
240
|
|
|
241
|
+
#if defined(__s390x__)
|
|
242
|
+
#define HWY_ARCH_S390X 1
|
|
243
|
+
#else
|
|
244
|
+
#define HWY_ARCH_S390X 0
|
|
245
|
+
#endif
|
|
246
|
+
|
|
233
247
|
// It is an error to detect multiple architectures at the same time, but OK to
|
|
234
248
|
// detect none of the above.
|
|
235
249
|
#if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_ARM_OLD + \
|
|
236
|
-
HWY_ARCH_WASM + HWY_ARCH_RVV) > 1
|
|
250
|
+
HWY_ARCH_WASM + HWY_ARCH_RVV + HWY_ARCH_S390X) > 1
|
|
237
251
|
#error "Must not detect more than one architecture"
|
|
238
252
|
#endif
|
|
239
253
|
|
|
@@ -249,6 +263,13 @@
|
|
|
249
263
|
#define HWY_OS_LINUX 0
|
|
250
264
|
#endif
|
|
251
265
|
|
|
266
|
+
// iOS or Mac
|
|
267
|
+
#if defined(__APPLE__)
|
|
268
|
+
#define HWY_OS_APPLE 1
|
|
269
|
+
#else
|
|
270
|
+
#define HWY_OS_APPLE 0
|
|
271
|
+
#endif
|
|
272
|
+
|
|
252
273
|
//------------------------------------------------------------------------------
|
|
253
274
|
// Endianness
|
|
254
275
|
|
|
@@ -102,12 +102,13 @@
|
|
|
102
102
|
// --------------------------- Future expansion: 4 targets
|
|
103
103
|
// Bits 39..42 reserved
|
|
104
104
|
|
|
105
|
-
// --------------------------- IBM Power: 9 targets (+ one fallback)
|
|
105
|
+
// --------------------------- IBM Power/ZSeries: 9 targets (+ one fallback)
|
|
106
106
|
// Bits 43..46 reserved (4 targets)
|
|
107
107
|
#define HWY_PPC10 (1LL << 47) // v3.1
|
|
108
108
|
#define HWY_PPC9 (1LL << 48) // v3.0
|
|
109
109
|
#define HWY_PPC8 (1LL << 49) // v2.07
|
|
110
|
-
|
|
110
|
+
#define HWY_Z15 (1LL << 50) // Z15
|
|
111
|
+
#define HWY_Z14 (1LL << 51) // Z14
|
|
111
112
|
#define HWY_HIGHEST_TARGET_BIT_PPC 51
|
|
112
113
|
|
|
113
114
|
// --------------------------- WebAssembly: 9 targets (+ one fallback)
|
|
@@ -316,6 +317,18 @@
|
|
|
316
317
|
#define HWY_BASELINE_PPC10 0
|
|
317
318
|
#endif
|
|
318
319
|
|
|
320
|
+
#if HWY_ARCH_S390X && defined(__VEC__) && defined(__ARCH__) && __ARCH__ >= 12
|
|
321
|
+
#define HWY_BASELINE_Z14 HWY_Z14
|
|
322
|
+
#else
|
|
323
|
+
#define HWY_BASELINE_Z14 0
|
|
324
|
+
#endif
|
|
325
|
+
|
|
326
|
+
#if HWY_BASELINE_Z14 && __ARCH__ >= 13
|
|
327
|
+
#define HWY_BASELINE_Z15 HWY_Z15
|
|
328
|
+
#else
|
|
329
|
+
#define HWY_BASELINE_Z15 0
|
|
330
|
+
#endif
|
|
331
|
+
|
|
319
332
|
#define HWY_BASELINE_SVE2 0
|
|
320
333
|
#define HWY_BASELINE_SVE 0
|
|
321
334
|
#define HWY_BASELINE_NEON 0
|
|
@@ -498,13 +511,14 @@
|
|
|
498
511
|
|
|
499
512
|
// Allow the user to override this without any guarantee of success.
|
|
500
513
|
#ifndef HWY_BASELINE_TARGETS
|
|
501
|
-
#define HWY_BASELINE_TARGETS
|
|
502
|
-
(HWY_BASELINE_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 |
|
|
503
|
-
HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10 |
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
HWY_BASELINE_AVX3_SPR |
|
|
514
|
+
#define HWY_BASELINE_TARGETS \
|
|
515
|
+
(HWY_BASELINE_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 | \
|
|
516
|
+
HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10 | HWY_BASELINE_Z14 | \
|
|
517
|
+
HWY_BASELINE_Z15 | HWY_BASELINE_SVE2 | HWY_BASELINE_SVE | \
|
|
518
|
+
HWY_BASELINE_NEON | HWY_BASELINE_SSE2 | HWY_BASELINE_SSSE3 | \
|
|
519
|
+
HWY_BASELINE_SSE4 | HWY_BASELINE_AVX2 | HWY_BASELINE_AVX3 | \
|
|
520
|
+
HWY_BASELINE_AVX3_DL | HWY_BASELINE_AVX3_ZEN4 | HWY_BASELINE_AVX3_SPR | \
|
|
521
|
+
HWY_BASELINE_RVV)
|
|
508
522
|
#endif // HWY_BASELINE_TARGETS
|
|
509
523
|
|
|
510
524
|
//------------------------------------------------------------------------------
|
|
@@ -537,9 +551,11 @@
|
|
|
537
551
|
// Clang, GCC and MSVC allow runtime dispatch on x86.
|
|
538
552
|
#if HWY_ARCH_X86
|
|
539
553
|
#define HWY_HAVE_RUNTIME_DISPATCH 1
|
|
540
|
-
// On Arm/PPC,
|
|
541
|
-
// capabilities.
|
|
542
|
-
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC
|
|
554
|
+
// On Arm/PPC, GCC and Clang 16+ do, and we require Linux to detect CPU
|
|
555
|
+
// capabilities. Currently require opt-in for Clang because it is experimental.
|
|
556
|
+
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X) && \
|
|
557
|
+
(HWY_COMPILER_GCC_ACTUAL || (HWY_COMPILER_CLANG >= 1600 && \
|
|
558
|
+
defined(HWY_ENABLE_CLANG_ARM_DISPATCH))) && \
|
|
543
559
|
HWY_OS_LINUX && !defined(TOOLCHAIN_MISS_SYS_AUXV_H)
|
|
544
560
|
#define HWY_HAVE_RUNTIME_DISPATCH 1
|
|
545
561
|
#else
|
|
@@ -579,18 +595,41 @@
|
|
|
579
595
|
|
|
580
596
|
#if HWY_ARCH_PPC && defined(__ALTIVEC__) && \
|
|
581
597
|
(!HWY_COMPILER_CLANG || HWY_BASELINE_PPC8 != 0)
|
|
598
|
+
|
|
599
|
+
#if (HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10) && \
|
|
600
|
+
!defined(HWY_SKIP_NON_BEST_BASELINE)
|
|
601
|
+
// On POWER with -m flags, we get compile errors (#1707) for targets older than
|
|
602
|
+
// the baseline specified via -m, so only generate the static target and better.
|
|
603
|
+
// Note that some Linux distros actually do set POWER9 as the baseline.
|
|
604
|
+
// This works by skipping case 3 below, so case 4 is reached.
|
|
605
|
+
#define HWY_SKIP_NON_BEST_BASELINE
|
|
606
|
+
#endif
|
|
607
|
+
|
|
582
608
|
#define HWY_ATTAINABLE_PPC (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
|
|
609
|
+
|
|
583
610
|
#else
|
|
584
611
|
#define HWY_ATTAINABLE_PPC 0
|
|
585
612
|
#endif
|
|
586
613
|
|
|
614
|
+
#if HWY_ARCH_S390X && HWY_BASELINE_Z14 != 0
|
|
615
|
+
#define HWY_ATTAINABLE_S390X (HWY_Z14 | HWY_Z15)
|
|
616
|
+
#else
|
|
617
|
+
#define HWY_ATTAINABLE_S390X 0
|
|
618
|
+
#endif
|
|
619
|
+
|
|
587
620
|
// Attainable means enabled and the compiler allows intrinsics (even when not
|
|
588
621
|
// allowed to autovectorize). Used in 3 and 4.
|
|
589
622
|
#if HWY_ARCH_X86
|
|
623
|
+
#if HWY_COMPILER_MSVC
|
|
624
|
+
// Fewer targets for faster builds.
|
|
625
|
+
#define HWY_ATTAINABLE_TARGETS \
|
|
626
|
+
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_STATIC_TARGET | HWY_AVX2)
|
|
627
|
+
#else // !HWY_COMPILER_MSVC
|
|
590
628
|
#define HWY_ATTAINABLE_TARGETS \
|
|
591
629
|
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | \
|
|
592
630
|
HWY_AVX2 | HWY_AVX3 | HWY_ATTAINABLE_AVX3_DL | HWY_AVX3_ZEN4 | \
|
|
593
631
|
HWY_AVX3_SPR)
|
|
632
|
+
#endif // !HWY_COMPILER_MSVC
|
|
594
633
|
#elif HWY_ARCH_ARM
|
|
595
634
|
#define HWY_ATTAINABLE_TARGETS \
|
|
596
635
|
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_NEON | HWY_ATTAINABLE_SVE | \
|
|
@@ -598,6 +637,9 @@
|
|
|
598
637
|
#elif HWY_ARCH_PPC
|
|
599
638
|
#define HWY_ATTAINABLE_TARGETS \
|
|
600
639
|
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_PPC)
|
|
640
|
+
#elif HWY_ARCH_S390X
|
|
641
|
+
#define HWY_ATTAINABLE_TARGETS \
|
|
642
|
+
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_S390X)
|
|
601
643
|
#else
|
|
602
644
|
#define HWY_ATTAINABLE_TARGETS (HWY_ENABLED_BASELINE)
|
|
603
645
|
#endif // HWY_ARCH_*
|
|
@@ -621,7 +663,8 @@
|
|
|
621
663
|
#define HWY_TARGETS HWY_STATIC_TARGET
|
|
622
664
|
|
|
623
665
|
// 3) For tests: include all attainable targets (in particular: scalar)
|
|
624
|
-
#elif defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)
|
|
666
|
+
#elif (defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)) && \
|
|
667
|
+
!defined(HWY_SKIP_NON_BEST_BASELINE)
|
|
625
668
|
#define HWY_TARGETS HWY_ATTAINABLE_TARGETS
|
|
626
669
|
|
|
627
670
|
// 4) Default: attainable WITHOUT non-best baseline. This reduces code size by
|
|
@@ -271,6 +271,30 @@
|
|
|
271
271
|
#endif
|
|
272
272
|
#endif
|
|
273
273
|
|
|
274
|
+
// ------------------------------ HWY_ARCH_S390X
|
|
275
|
+
|
|
276
|
+
#if (HWY_TARGETS & HWY_Z14) && (HWY_STATIC_TARGET != HWY_Z14)
|
|
277
|
+
#undef HWY_TARGET
|
|
278
|
+
#define HWY_TARGET HWY_Z14
|
|
279
|
+
#include HWY_TARGET_INCLUDE
|
|
280
|
+
#ifdef HWY_TARGET_TOGGLE
|
|
281
|
+
#undef HWY_TARGET_TOGGLE
|
|
282
|
+
#else
|
|
283
|
+
#define HWY_TARGET_TOGGLE
|
|
284
|
+
#endif
|
|
285
|
+
#endif
|
|
286
|
+
|
|
287
|
+
#if (HWY_TARGETS & HWY_Z15) && (HWY_STATIC_TARGET != HWY_Z15)
|
|
288
|
+
#undef HWY_TARGET
|
|
289
|
+
#define HWY_TARGET HWY_Z15
|
|
290
|
+
#include HWY_TARGET_INCLUDE
|
|
291
|
+
#ifdef HWY_TARGET_TOGGLE
|
|
292
|
+
#undef HWY_TARGET_TOGGLE
|
|
293
|
+
#else
|
|
294
|
+
#define HWY_TARGET_TOGGLE
|
|
295
|
+
#endif
|
|
296
|
+
#endif
|
|
297
|
+
|
|
274
298
|
// ------------------------------ HWY_ARCH_RVV
|
|
275
299
|
|
|
276
300
|
#if (HWY_TARGETS & HWY_RVV) && (HWY_STATIC_TARGET != HWY_RVV)
|
package/include/hwy/highway.h
CHANGED
|
@@ -32,8 +32,8 @@ namespace hwy {
|
|
|
32
32
|
|
|
33
33
|
// API version (https://semver.org/); keep in sync with CMakeLists.txt.
|
|
34
34
|
#define HWY_MAJOR 1
|
|
35
|
-
#define HWY_MINOR
|
|
36
|
-
#define HWY_PATCH
|
|
35
|
+
#define HWY_MINOR 1
|
|
36
|
+
#define HWY_PATCH 0
|
|
37
37
|
|
|
38
38
|
//------------------------------------------------------------------------------
|
|
39
39
|
// Shorthand for tags (defined in shared-inl.h) used to select overloads.
|
|
@@ -98,6 +98,10 @@ namespace hwy {
|
|
|
98
98
|
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC9::FUNC_NAME
|
|
99
99
|
#elif HWY_STATIC_TARGET == HWY_PPC10
|
|
100
100
|
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC10::FUNC_NAME
|
|
101
|
+
#elif HWY_STATIC_TARGET == HWY_Z14
|
|
102
|
+
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_Z14::FUNC_NAME
|
|
103
|
+
#elif HWY_STATIC_TARGET == HWY_Z15
|
|
104
|
+
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_Z15::FUNC_NAME
|
|
101
105
|
#elif HWY_STATIC_TARGET == HWY_SSE2
|
|
102
106
|
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE2::FUNC_NAME
|
|
103
107
|
#elif HWY_STATIC_TARGET == HWY_SSSE3
|
|
@@ -200,6 +204,18 @@ namespace hwy {
|
|
|
200
204
|
#define HWY_CHOOSE_PPC10(FUNC_NAME) nullptr
|
|
201
205
|
#endif
|
|
202
206
|
|
|
207
|
+
#if HWY_TARGETS & HWY_Z14
|
|
208
|
+
#define HWY_CHOOSE_Z14(FUNC_NAME) &N_Z14::FUNC_NAME
|
|
209
|
+
#else
|
|
210
|
+
#define HWY_CHOOSE_Z14(FUNC_NAME) nullptr
|
|
211
|
+
#endif
|
|
212
|
+
|
|
213
|
+
#if HWY_TARGETS & HWY_Z15
|
|
214
|
+
#define HWY_CHOOSE_Z15(FUNC_NAME) &N_Z15::FUNC_NAME
|
|
215
|
+
#else
|
|
216
|
+
#define HWY_CHOOSE_Z15(FUNC_NAME) nullptr
|
|
217
|
+
#endif
|
|
218
|
+
|
|
203
219
|
#if HWY_TARGETS & HWY_SSE2
|
|
204
220
|
#define HWY_CHOOSE_SSE2(FUNC_NAME) &N_SSE2::FUNC_NAME
|
|
205
221
|
#else
|
|
@@ -408,7 +424,8 @@ FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
|
|
|
408
424
|
#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL || \
|
|
409
425
|
HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
|
|
410
426
|
#include "hwy/ops/x86_512-inl.h"
|
|
411
|
-
#elif HWY_TARGET ==
|
|
427
|
+
#elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15 || \
|
|
428
|
+
HWY_TARGET == HWY_PPC8 || HWY_TARGET == HWY_PPC9 || \
|
|
412
429
|
HWY_TARGET == HWY_PPC10
|
|
413
430
|
#include "hwy/ops/ppc_vsx-inl.h"
|
|
414
431
|
#elif HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
|