@img/sharp-libvips-dev 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +7 -1
- package/include/aom/aom_image.h +24 -12
- package/include/aom/aom_integer.h +3 -3
- package/include/aom/aomcx.h +15 -0
- package/include/aom/aomdx.h +5 -2
- package/include/archive.h +7 -5
- package/include/archive_entry.h +5 -3
- package/include/cgif.h +3 -0
- package/include/expat.h +21 -10
- package/include/expat_config.h +11 -5
- package/include/ffi.h +12 -25
- package/include/freetype2/freetype/config/ftoption.h +2 -2
- package/include/fribidi/fribidi-config.h +2 -2
- package/include/fribidi/fribidi-unicode-version.h +3 -3
- package/include/gio-unix-2.0/gio/gfiledescriptorbased.h +3 -2
- package/include/glib-2.0/gio/gappinfo.h +40 -25
- package/include/glib-2.0/gio/gapplication.h +6 -0
- package/include/glib-2.0/gio/gasyncresult.h +1 -1
- package/include/glib-2.0/gio/gconverter.h +5 -0
- package/include/glib-2.0/gio/gdbusintrospection.h +1 -1
- package/include/glib-2.0/gio/gfile.h +16 -0
- package/include/glib-2.0/gio/gio-visibility.h +34 -0
- package/include/glib-2.0/gio/giotypes.h +0 -1
- package/include/glib-2.0/gio/gsettings.h +8 -0
- package/include/glib-2.0/gio/gvfs.h +2 -2
- package/include/glib-2.0/girepository/gi-visibility.h +34 -0
- package/include/glib-2.0/girepository/giarginfo.h +23 -6
- package/include/glib-2.0/girepository/gibaseinfo.h +44 -18
- package/include/glib-2.0/girepository/gicallableinfo.h +26 -16
- package/include/glib-2.0/girepository/gicallbackinfo.h +17 -2
- package/include/glib-2.0/girepository/giconstantinfo.h +19 -4
- package/include/glib-2.0/girepository/gienuminfo.h +20 -21
- package/include/glib-2.0/girepository/gifieldinfo.h +22 -7
- package/include/glib-2.0/girepository/giflagsinfo.h +60 -0
- package/include/glib-2.0/girepository/gifunctioninfo.h +22 -7
- package/include/glib-2.0/girepository/giinterfaceinfo.h +33 -18
- package/include/glib-2.0/girepository/giobjectinfo.h +41 -26
- package/include/glib-2.0/girepository/gipropertyinfo.h +18 -3
- package/include/glib-2.0/girepository/giregisteredtypeinfo.h +22 -11
- package/include/glib-2.0/girepository/girepository-autocleanups.h +56 -0
- package/include/glib-2.0/girepository/girepository.h +53 -62
- package/include/glib-2.0/girepository/girffi.h +8 -7
- package/include/glib-2.0/girepository/gisignalinfo.h +18 -3
- package/include/glib-2.0/girepository/gistructinfo.h +26 -11
- package/include/glib-2.0/girepository/gitypeinfo.h +29 -16
- package/include/glib-2.0/girepository/gitypelib.h +9 -13
- package/include/glib-2.0/girepository/gitypes.h +52 -104
- package/include/glib-2.0/girepository/giunioninfo.h +28 -12
- package/include/glib-2.0/girepository/giunresolvedinfo.h +17 -2
- package/include/glib-2.0/girepository/givalueinfo.h +65 -0
- package/include/glib-2.0/girepository/givfuncinfo.h +23 -8
- package/include/glib-2.0/glib/deprecated/gthread.h +9 -5
- package/include/glib-2.0/glib/gbitlock.h +31 -0
- package/include/glib-2.0/glib/gbookmarkfile.h +1 -1
- package/include/glib-2.0/glib/giochannel.h +2 -2
- package/include/glib-2.0/glib/glib-visibility.h +34 -0
- package/include/glib-2.0/glib/gmacros.h +12 -5
- package/include/glib-2.0/glib/gmain.h +93 -7
- package/include/glib-2.0/glib/gmessages.h +8 -0
- package/include/glib-2.0/glib/gqsort.h +8 -1
- package/include/glib-2.0/glib/gslice.h +2 -0
- package/include/glib-2.0/glib/gstrfuncs.h +24 -30
- package/include/glib-2.0/glib/gstrvbuilder.h +3 -0
- package/include/glib-2.0/glib/gthread.h +191 -3
- package/include/glib-2.0/glib/gunicode.h +1 -1
- package/include/glib-2.0/glib/gversionmacros.h +9 -0
- package/include/glib-2.0/glib-unix.h +7 -1
- package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
- package/include/glib-2.0/gobject/genums.h +6 -6
- package/include/glib-2.0/gobject/glib-types.h +11 -0
- package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
- package/include/glib-2.0/gobject/gsignal.h +16 -6
- package/include/glib-2.0/gobject/gtype.h +6 -6
- package/include/harfbuzz/hb-buffer.h +6 -0
- package/include/harfbuzz/hb-common.h +6 -9
- package/include/harfbuzz/hb-cplusplus.hh +8 -11
- package/include/harfbuzz/hb-subset.h +17 -4
- package/include/harfbuzz/hb-version.h +3 -3
- package/include/hwy/abort.h +28 -0
- package/include/hwy/aligned_allocator.h +218 -6
- package/include/hwy/base.h +1935 -512
- package/include/hwy/cache_control.h +24 -6
- package/include/hwy/detect_compiler_arch.h +105 -10
- package/include/hwy/detect_targets.h +146 -37
- package/include/hwy/foreach_target.h +36 -1
- package/include/hwy/highway.h +222 -50
- package/include/hwy/ops/arm_neon-inl.h +2055 -894
- package/include/hwy/ops/arm_sve-inl.h +1476 -348
- package/include/hwy/ops/emu128-inl.h +711 -623
- package/include/hwy/ops/generic_ops-inl.h +4431 -2157
- package/include/hwy/ops/inside-inl.h +691 -0
- package/include/hwy/ops/ppc_vsx-inl.h +2186 -673
- package/include/hwy/ops/rvv-inl.h +1556 -536
- package/include/hwy/ops/scalar-inl.h +353 -233
- package/include/hwy/ops/set_macros-inl.h +171 -23
- package/include/hwy/ops/shared-inl.h +198 -56
- package/include/hwy/ops/wasm_128-inl.h +283 -244
- package/include/hwy/ops/x86_128-inl.h +3673 -1357
- package/include/hwy/ops/x86_256-inl.h +1737 -663
- package/include/hwy/ops/x86_512-inl.h +1697 -500
- package/include/hwy/per_target.h +4 -0
- package/include/hwy/profiler.h +648 -0
- package/include/hwy/robust_statistics.h +2 -2
- package/include/hwy/targets.h +40 -32
- package/include/hwy/timer-inl.h +3 -3
- package/include/hwy/timer.h +16 -1
- package/include/libheif/heif.h +170 -15
- package/include/libheif/heif_items.h +237 -0
- package/include/libheif/heif_properties.h +38 -2
- package/include/libheif/heif_regions.h +1 -1
- package/include/libheif/heif_version.h +2 -2
- package/include/libpng16/png.h +32 -29
- package/include/libpng16/pngconf.h +2 -2
- package/include/libpng16/pnglibconf.h +8 -3
- package/include/librsvg-2.0/librsvg/rsvg-cairo.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-features.h +3 -4
- package/include/librsvg-2.0/librsvg/rsvg-pixbuf.h +235 -0
- package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
- package/include/librsvg-2.0/librsvg/rsvg.h +55 -176
- package/include/libxml2/libxml/HTMLparser.h +12 -19
- package/include/libxml2/libxml/c14n.h +1 -12
- package/include/libxml2/libxml/debugXML.h +1 -1
- package/include/libxml2/libxml/encoding.h +9 -0
- package/include/libxml2/libxml/entities.h +12 -1
- package/include/libxml2/libxml/hash.h +19 -0
- package/include/libxml2/libxml/list.h +2 -2
- package/include/libxml2/libxml/nanohttp.h +17 -0
- package/include/libxml2/libxml/parser.h +73 -58
- package/include/libxml2/libxml/parserInternals.h +9 -1
- package/include/libxml2/libxml/pattern.h +6 -0
- package/include/libxml2/libxml/tree.h +32 -12
- package/include/libxml2/libxml/uri.h +11 -0
- package/include/libxml2/libxml/valid.h +29 -2
- package/include/libxml2/libxml/xinclude.h +7 -0
- package/include/libxml2/libxml/xmlIO.h +21 -5
- package/include/libxml2/libxml/xmlerror.h +14 -0
- package/include/libxml2/libxml/xmlexports.h +111 -15
- package/include/libxml2/libxml/xmlmemory.h +8 -45
- package/include/libxml2/libxml/xmlreader.h +2 -0
- package/include/libxml2/libxml/xmlsave.h +5 -0
- package/include/libxml2/libxml/xmlunicode.h +165 -1
- package/include/libxml2/libxml/xmlversion.h +15 -179
- package/include/libxml2/libxml/xmlwriter.h +1 -0
- package/include/libxml2/libxml/xpath.h +4 -0
- package/include/pango-1.0/pango/pango-features.h +2 -2
- package/include/pango-1.0/pango/pango-fontmap.h +7 -0
- package/include/pango-1.0/pango/pango-item.h +4 -2
- package/include/pango-1.0/pango/pango-version-macros.h +25 -0
- package/include/pango-1.0/pango/pangofc-font.h +2 -1
- package/include/pixman-1/pixman-version.h +2 -2
- package/include/png.h +32 -29
- package/include/pngconf.h +2 -2
- package/include/pnglibconf.h +8 -3
- package/include/vips/connection.h +9 -3
- package/include/vips/util.h +1 -11
- package/include/vips/version.h +4 -4
- package/include/webp/decode.h +58 -56
- package/include/webp/demux.h +25 -21
- package/include/webp/encode.h +44 -39
- package/include/webp/mux.h +76 -15
- package/include/webp/mux_types.h +2 -1
- package/include/webp/sharpyuv/sharpyuv.h +77 -8
- package/include/webp/types.h +29 -8
- package/include/zconf.h +1 -1
- package/include/zlib.h +12 -12
- package/package.json +1 -1
- package/versions.json +18 -19
|
@@ -25,11 +25,15 @@
|
|
|
25
25
|
#define HWY_DISABLE_CACHE_CONTROL
|
|
26
26
|
#endif
|
|
27
27
|
|
|
28
|
+
#ifndef HWY_DISABLE_CACHE_CONTROL
|
|
28
29
|
// intrin.h is sufficient on MSVC and already included by base.h.
|
|
29
|
-
#if HWY_ARCH_X86 && !
|
|
30
|
+
#if HWY_ARCH_X86 && !HWY_COMPILER_MSVC
|
|
30
31
|
#include <emmintrin.h> // SSE2
|
|
31
32
|
#include <xmmintrin.h> // _mm_prefetch
|
|
33
|
+
#elif HWY_ARCH_ARM_A64
|
|
34
|
+
#include <arm_acle.h>
|
|
32
35
|
#endif
|
|
36
|
+
#endif // HWY_DISABLE_CACHE_CONTROL
|
|
33
37
|
|
|
34
38
|
namespace hwy {
|
|
35
39
|
|
|
@@ -76,15 +80,16 @@ HWY_INLINE HWY_ATTR_CACHE void FlushStream() {
|
|
|
76
80
|
// subsequent actual loads.
|
|
77
81
|
template <typename T>
|
|
78
82
|
HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) {
|
|
79
|
-
|
|
83
|
+
(void)p;
|
|
84
|
+
#ifndef HWY_DISABLE_CACHE_CONTROL
|
|
85
|
+
#if HWY_ARCH_X86
|
|
80
86
|
_mm_prefetch(reinterpret_cast<const char*>(p), _MM_HINT_T0);
|
|
81
87
|
#elif HWY_COMPILER_GCC // includes clang
|
|
82
88
|
// Hint=0 (NTA) behavior differs, but skipping outer caches is probably not
|
|
83
89
|
// desirable, so use the default 3 (keep in caches).
|
|
84
90
|
__builtin_prefetch(p, /*write=*/0, /*hint=*/3);
|
|
85
|
-
#else
|
|
86
|
-
(void)p;
|
|
87
91
|
#endif
|
|
92
|
+
#endif // HWY_DISABLE_CACHE_CONTROL
|
|
88
93
|
}
|
|
89
94
|
|
|
90
95
|
// Invalidates and flushes the cache line containing "p", if possible.
|
|
@@ -96,11 +101,24 @@ HWY_INLINE HWY_ATTR_CACHE void FlushCacheline(const void* p) {
|
|
|
96
101
|
#endif
|
|
97
102
|
}
|
|
98
103
|
|
|
99
|
-
//
|
|
104
|
+
// Hints that we are inside a spin loop and potentially reduces power
|
|
105
|
+
// consumption and coherency traffic. For example, x86 avoids multiple
|
|
106
|
+
// outstanding load requests, which reduces the memory order violation penalty
|
|
107
|
+
// when exiting the loop.
|
|
100
108
|
HWY_INLINE HWY_ATTR_CACHE void Pause() {
|
|
101
|
-
#
|
|
109
|
+
#ifndef HWY_DISABLE_CACHE_CONTROL
|
|
110
|
+
#if HWY_ARCH_X86
|
|
102
111
|
_mm_pause();
|
|
112
|
+
#elif HWY_ARCH_ARM_A64 && HWY_COMPILER_CLANG
|
|
113
|
+
// This is documented in ACLE and the YIELD instruction is also available in
|
|
114
|
+
// Armv7, but the intrinsic is broken for Armv7 clang, hence A64 only.
|
|
115
|
+
__yield();
|
|
116
|
+
#elif HWY_ARCH_ARM && HWY_COMPILER_GCC // includes clang
|
|
117
|
+
__asm__ volatile("yield" ::: "memory");
|
|
118
|
+
#elif HWY_ARCH_PPC && HWY_COMPILER_GCC // includes clang
|
|
119
|
+
__asm__ volatile("or 27,27,27" ::: "memory");
|
|
103
120
|
#endif
|
|
121
|
+
#endif // HWY_DISABLE_CACHE_CONTROL
|
|
104
122
|
}
|
|
105
123
|
|
|
106
124
|
} // namespace hwy
|
|
@@ -73,7 +73,13 @@
|
|
|
73
73
|
// https://github.com/simd-everywhere/simde/blob/47d6e603de9d04ee05cdfbc57cf282a02be1bf2a/simde/simde-detect-clang.h#L59.
|
|
74
74
|
// Please send updates below to them as well, thanks!
|
|
75
75
|
#if defined(__apple_build_version__) || __clang_major__ >= 999
|
|
76
|
-
#if
|
|
76
|
+
#if __has_warning("-Woverriding-option")
|
|
77
|
+
#define HWY_COMPILER_CLANG 1801
|
|
78
|
+
// No new warnings in 17.0, and Apple LLVM 15.3, which should be 1600, already
|
|
79
|
+
// has the unsafe_buffer_usage attribute, so we instead check for new builtins.
|
|
80
|
+
#elif __has_builtin(__builtin_nondeterministic_value)
|
|
81
|
+
#define HWY_COMPILER_CLANG 1700
|
|
82
|
+
#elif __has_attribute(nouwtable) // no new warnings in 16.0
|
|
77
83
|
#define HWY_COMPILER_CLANG 1600
|
|
78
84
|
#elif __has_warning("-Warray-parameter")
|
|
79
85
|
#define HWY_COMPILER_CLANG 1500
|
|
@@ -113,7 +119,8 @@
|
|
|
113
119
|
#define HWY_COMPILER3_CLANG 0
|
|
114
120
|
#endif
|
|
115
121
|
|
|
116
|
-
#if HWY_COMPILER_GCC && !HWY_COMPILER_CLANG && !HWY_COMPILER_ICC
|
|
122
|
+
#if HWY_COMPILER_GCC && !HWY_COMPILER_CLANG && !HWY_COMPILER_ICC && \
|
|
123
|
+
!HWY_COMPILER_ICX
|
|
117
124
|
#define HWY_COMPILER_GCC_ACTUAL HWY_COMPILER_GCC
|
|
118
125
|
#else
|
|
119
126
|
#define HWY_COMPILER_GCC_ACTUAL 0
|
|
@@ -121,17 +128,20 @@
|
|
|
121
128
|
|
|
122
129
|
// More than one may be nonzero, but we want at least one.
|
|
123
130
|
#if 0 == (HWY_COMPILER_MSVC + HWY_COMPILER_CLANGCL + HWY_COMPILER_ICC + \
|
|
124
|
-
HWY_COMPILER_GCC + HWY_COMPILER_CLANG)
|
|
131
|
+
HWY_COMPILER_ICX + HWY_COMPILER_GCC + HWY_COMPILER_CLANG)
|
|
125
132
|
#error "Unsupported compiler"
|
|
126
133
|
#endif
|
|
127
134
|
|
|
128
|
-
// We should only detect one of these (only clang/clangcl overlap)
|
|
129
|
-
#if 1 <
|
|
130
|
-
|
|
131
|
-
|
|
135
|
+
// We should only detect one of these (only clang/clangcl/icx overlap)
|
|
136
|
+
#if 1 < (!!HWY_COMPILER_MSVC + (!!HWY_COMPILER_ICC & !HWY_COMPILER_ICX) + \
|
|
137
|
+
!!HWY_COMPILER_GCC_ACTUAL + \
|
|
138
|
+
!!(HWY_COMPILER_ICX | HWY_COMPILER_CLANGCL | HWY_COMPILER_CLANG))
|
|
132
139
|
#error "Detected multiple compilers"
|
|
133
140
|
#endif
|
|
134
141
|
|
|
142
|
+
//------------------------------------------------------------------------------
|
|
143
|
+
// Compiler features and C++ version
|
|
144
|
+
|
|
135
145
|
#ifdef __has_builtin
|
|
136
146
|
#define HWY_HAS_BUILTIN(name) __has_builtin(name)
|
|
137
147
|
#else
|
|
@@ -156,6 +166,32 @@
|
|
|
156
166
|
#define HWY_HAS_FEATURE(name) 0
|
|
157
167
|
#endif
|
|
158
168
|
|
|
169
|
+
// NOTE: clang ~17 does not correctly handle wrapping __has_include in a macro.
|
|
170
|
+
|
|
171
|
+
#if HWY_COMPILER_MSVC && defined(_MSVC_LANG) && _MSVC_LANG > __cplusplus
|
|
172
|
+
#define HWY_CXX_LANG _MSVC_LANG
|
|
173
|
+
#else
|
|
174
|
+
#define HWY_CXX_LANG __cplusplus
|
|
175
|
+
#endif
|
|
176
|
+
|
|
177
|
+
#if defined(__cpp_constexpr) && __cpp_constexpr >= 201603L
|
|
178
|
+
#define HWY_CXX17_CONSTEXPR constexpr
|
|
179
|
+
#else
|
|
180
|
+
#define HWY_CXX17_CONSTEXPR
|
|
181
|
+
#endif
|
|
182
|
+
|
|
183
|
+
#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304L
|
|
184
|
+
#define HWY_CXX14_CONSTEXPR constexpr
|
|
185
|
+
#else
|
|
186
|
+
#define HWY_CXX14_CONSTEXPR
|
|
187
|
+
#endif
|
|
188
|
+
|
|
189
|
+
#if HWY_CXX_LANG >= 201703L
|
|
190
|
+
#define HWY_IF_CONSTEXPR if constexpr
|
|
191
|
+
#else
|
|
192
|
+
#define HWY_IF_CONSTEXPR if
|
|
193
|
+
#endif
|
|
194
|
+
|
|
159
195
|
//------------------------------------------------------------------------------
|
|
160
196
|
// Architecture
|
|
161
197
|
|
|
@@ -187,6 +223,12 @@
|
|
|
187
223
|
#define HWY_ARCH_PPC 0
|
|
188
224
|
#endif
|
|
189
225
|
|
|
226
|
+
#if defined(__powerpc64__) || (HWY_ARCH_PPC && defined(__64BIT__))
|
|
227
|
+
#define HWY_ARCH_PPC_64 1
|
|
228
|
+
#else
|
|
229
|
+
#define HWY_ARCH_PPC_64 0
|
|
230
|
+
#endif
|
|
231
|
+
|
|
190
232
|
// aarch32 is currently not supported; please raise an issue if you want it.
|
|
191
233
|
#if defined(__ARM_ARCH_ISA_A64) || defined(__aarch64__) || defined(_M_ARM64)
|
|
192
234
|
#define HWY_ARCH_ARM_A64 1
|
|
@@ -225,18 +267,52 @@
|
|
|
225
267
|
#endif
|
|
226
268
|
|
|
227
269
|
#ifdef __riscv
|
|
228
|
-
#define
|
|
270
|
+
#define HWY_ARCH_RISCV 1
|
|
271
|
+
#else
|
|
272
|
+
#define HWY_ARCH_RISCV 0
|
|
273
|
+
#endif
|
|
274
|
+
// DEPRECATED names; please use HWY_ARCH_RISCV instead.
|
|
275
|
+
#define HWY_ARCH_RVV HWY_ARCH_RISCV
|
|
276
|
+
|
|
277
|
+
#if HWY_ARCH_RISCV && defined(__riscv_xlen)
|
|
278
|
+
|
|
279
|
+
#if __riscv_xlen == 32
|
|
280
|
+
#define HWY_ARCH_RISCV_32 1
|
|
281
|
+
#else
|
|
282
|
+
#define HWY_ARCH_RISCV_32 0
|
|
283
|
+
#endif
|
|
284
|
+
|
|
285
|
+
#if __riscv_xlen == 64
|
|
286
|
+
#define HWY_ARCH_RISCV_64 1
|
|
229
287
|
#else
|
|
230
|
-
#define
|
|
288
|
+
#define HWY_ARCH_RISCV_64 0
|
|
289
|
+
#endif
|
|
290
|
+
|
|
291
|
+
#else // !HWY_ARCH_RISCV || !defined(__riscv_xlen)
|
|
292
|
+
#define HWY_ARCH_RISCV_32 0
|
|
293
|
+
#define HWY_ARCH_RISCV_64 0
|
|
294
|
+
#endif // HWY_ARCH_RISCV && defined(__riscv_xlen)
|
|
295
|
+
|
|
296
|
+
#if HWY_ARCH_RISCV_32 && HWY_ARCH_RISCV_64
|
|
297
|
+
#error "Cannot have both RISCV_32 and RISCV_64"
|
|
298
|
+
#endif
|
|
299
|
+
|
|
300
|
+
#if defined(__s390x__)
|
|
301
|
+
#define HWY_ARCH_S390X 1
|
|
302
|
+
#else
|
|
303
|
+
#define HWY_ARCH_S390X 0
|
|
231
304
|
#endif
|
|
232
305
|
|
|
233
306
|
// It is an error to detect multiple architectures at the same time, but OK to
|
|
234
307
|
// detect none of the above.
|
|
235
308
|
#if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_ARM_OLD + \
|
|
236
|
-
HWY_ARCH_WASM +
|
|
309
|
+
HWY_ARCH_WASM + HWY_ARCH_RISCV + HWY_ARCH_S390X) > 1
|
|
237
310
|
#error "Must not detect more than one architecture"
|
|
238
311
|
#endif
|
|
239
312
|
|
|
313
|
+
//------------------------------------------------------------------------------
|
|
314
|
+
// Operating system
|
|
315
|
+
|
|
240
316
|
#if defined(_WIN32) || defined(_WIN64)
|
|
241
317
|
#define HWY_OS_WIN 1
|
|
242
318
|
#else
|
|
@@ -249,6 +325,25 @@
|
|
|
249
325
|
#define HWY_OS_LINUX 0
|
|
250
326
|
#endif
|
|
251
327
|
|
|
328
|
+
// iOS or Mac
|
|
329
|
+
#if defined(__APPLE__)
|
|
330
|
+
#define HWY_OS_APPLE 1
|
|
331
|
+
#else
|
|
332
|
+
#define HWY_OS_APPLE 0
|
|
333
|
+
#endif
|
|
334
|
+
|
|
335
|
+
#if defined(__FreeBSD__)
|
|
336
|
+
#define HWY_OS_FREEBSD 1
|
|
337
|
+
#else
|
|
338
|
+
#define HWY_OS_FREEBSD 0
|
|
339
|
+
#endif
|
|
340
|
+
|
|
341
|
+
// It is an error to detect multiple OSes at the same time, but OK to
|
|
342
|
+
// detect none of the above.
|
|
343
|
+
#if (HWY_OS_WIN + HWY_OS_LINUX + HWY_OS_APPLE + HWY_OS_FREEBSD) > 1
|
|
344
|
+
#error "Must not detect more than one OS"
|
|
345
|
+
#endif
|
|
346
|
+
|
|
252
347
|
//------------------------------------------------------------------------------
|
|
253
348
|
// Endianness
|
|
254
349
|
|
|
@@ -62,7 +62,8 @@
|
|
|
62
62
|
// Bits 0..3 reserved (4 targets)
|
|
63
63
|
#define HWY_AVX3_SPR (1LL << 4)
|
|
64
64
|
// Bit 5 reserved (likely AVX10.2 with 256-bit vectors)
|
|
65
|
-
// Currently HWY_AVX3_DL plus a special case for CompressStore
|
|
65
|
+
// Currently HWY_AVX3_DL plus AVX512BF16 and a special case for CompressStore
|
|
66
|
+
// (10x as fast).
|
|
66
67
|
// We may later also use VPCONFLICT.
|
|
67
68
|
#define HWY_AVX3_ZEN4 (1LL << 6) // see HWY_WANT_AVX3_ZEN4 below
|
|
68
69
|
|
|
@@ -84,15 +85,22 @@
|
|
|
84
85
|
#define HWY_HIGHEST_TARGET_BIT_X86 14
|
|
85
86
|
|
|
86
87
|
// --------------------------- Arm: 15 targets (+ one fallback)
|
|
87
|
-
// Bits 15..
|
|
88
|
-
#define HWY_SVE2_128 (1LL <<
|
|
89
|
-
#define HWY_SVE_256 (1LL <<
|
|
90
|
-
|
|
91
|
-
#define
|
|
88
|
+
// Bits 15..17 reserved (3 targets)
|
|
89
|
+
#define HWY_SVE2_128 (1LL << 18) // specialized (e.g. Neoverse V2/N2/N3)
|
|
90
|
+
#define HWY_SVE_256 (1LL << 19) // specialized (Neoverse V1)
|
|
91
|
+
// Bits 20-22 reserved for later SVE (3 targets)
|
|
92
|
+
#define HWY_SVE2 (1LL << 23)
|
|
93
|
+
#define HWY_SVE (1LL << 24)
|
|
94
|
+
// Bit 25 reserved for NEON
|
|
95
|
+
#define HWY_NEON_BF16 (1LL << 26) // fp16/dot/bf16 (e.g. Neoverse V2/N2/N3)
|
|
96
|
+
// Bit 27 reserved for NEON
|
|
92
97
|
#define HWY_NEON (1LL << 28) // Implies support for AES
|
|
93
98
|
#define HWY_NEON_WITHOUT_AES (1LL << 29)
|
|
94
99
|
#define HWY_HIGHEST_TARGET_BIT_ARM 29
|
|
95
100
|
|
|
101
|
+
#define HWY_ALL_NEON (HWY_NEON_WITHOUT_AES | HWY_NEON | HWY_NEON_BF16)
|
|
102
|
+
#define HWY_ALL_SVE (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128)
|
|
103
|
+
|
|
96
104
|
// --------------------------- RISC-V: 9 targets (+ one fallback)
|
|
97
105
|
// Bits 30..36 reserved (7 targets)
|
|
98
106
|
#define HWY_RVV (1LL << 37)
|
|
@@ -102,14 +110,17 @@
|
|
|
102
110
|
// --------------------------- Future expansion: 4 targets
|
|
103
111
|
// Bits 39..42 reserved
|
|
104
112
|
|
|
105
|
-
// --------------------------- IBM Power: 9 targets (+ one fallback)
|
|
113
|
+
// --------------------------- IBM Power/ZSeries: 9 targets (+ one fallback)
|
|
106
114
|
// Bits 43..46 reserved (4 targets)
|
|
107
115
|
#define HWY_PPC10 (1LL << 47) // v3.1
|
|
108
116
|
#define HWY_PPC9 (1LL << 48) // v3.0
|
|
109
117
|
#define HWY_PPC8 (1LL << 49) // v2.07
|
|
110
|
-
|
|
118
|
+
#define HWY_Z15 (1LL << 50) // Z15
|
|
119
|
+
#define HWY_Z14 (1LL << 51) // Z14
|
|
111
120
|
#define HWY_HIGHEST_TARGET_BIT_PPC 51
|
|
112
121
|
|
|
122
|
+
#define HWY_ALL_PPC (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
|
|
123
|
+
|
|
113
124
|
// --------------------------- WebAssembly: 9 targets (+ one fallback)
|
|
114
125
|
// Bits 52..57 reserved (6 targets)
|
|
115
126
|
#define HWY_WASM_EMU256 (1LL << 58) // Experimental
|
|
@@ -187,7 +198,7 @@
|
|
|
187
198
|
|
|
188
199
|
// armv7be has not been tested and is not yet supported.
|
|
189
200
|
#if HWY_ARCH_ARM_V7 && HWY_IS_BIG_ENDIAN
|
|
190
|
-
#define HWY_BROKEN_ARM7_BIG_ENDIAN
|
|
201
|
+
#define HWY_BROKEN_ARM7_BIG_ENDIAN HWY_ALL_NEON
|
|
191
202
|
#else
|
|
192
203
|
#define HWY_BROKEN_ARM7_BIG_ENDIAN 0
|
|
193
204
|
#endif
|
|
@@ -198,11 +209,19 @@
|
|
|
198
209
|
#if HWY_ARCH_ARM_V7 && (__ARM_ARCH_PROFILE == 'A') && \
|
|
199
210
|
!defined(__ARM_VFPV4__) && \
|
|
200
211
|
!((__ARM_NEON_FP & 0x2 /* half-float */) && (__ARM_FEATURE_FMA == 1))
|
|
201
|
-
#define HWY_BROKEN_ARM7_WITHOUT_VFP4
|
|
212
|
+
#define HWY_BROKEN_ARM7_WITHOUT_VFP4 HWY_ALL_NEON
|
|
202
213
|
#else
|
|
203
214
|
#define HWY_BROKEN_ARM7_WITHOUT_VFP4 0
|
|
204
215
|
#endif
|
|
205
216
|
|
|
217
|
+
// HWY_NEON_BF16 requires recent compilers.
|
|
218
|
+
#if (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1700) || \
|
|
219
|
+
(HWY_COMPILER_GCC_ACTUAL != 0 && HWY_COMPILER_GCC_ACTUAL < 1302)
|
|
220
|
+
#define HWY_BROKEN_NEON_BF16 (HWY_NEON_BF16)
|
|
221
|
+
#else
|
|
222
|
+
#define HWY_BROKEN_NEON_BF16 0
|
|
223
|
+
#endif
|
|
224
|
+
|
|
206
225
|
// SVE[2] require recent clang or gcc versions.
|
|
207
226
|
#if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
|
|
208
227
|
(HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
|
|
@@ -246,7 +265,7 @@
|
|
|
246
265
|
(HWY_BROKEN_CLANG6 | HWY_BROKEN_32BIT | HWY_BROKEN_MSVC | \
|
|
247
266
|
HWY_BROKEN_AVX3_DL_ZEN4 | HWY_BROKEN_AVX3_SPR | \
|
|
248
267
|
HWY_BROKEN_ARM7_BIG_ENDIAN | HWY_BROKEN_ARM7_WITHOUT_VFP4 | \
|
|
249
|
-
HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
|
|
268
|
+
HWY_BROKEN_NEON_BF16 | HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
|
|
250
269
|
|
|
251
270
|
#endif // HWY_BROKEN_TARGETS
|
|
252
271
|
|
|
@@ -316,13 +335,28 @@
|
|
|
316
335
|
#define HWY_BASELINE_PPC10 0
|
|
317
336
|
#endif
|
|
318
337
|
|
|
338
|
+
#if HWY_ARCH_S390X && defined(__VEC__) && defined(__ARCH__) && __ARCH__ >= 12
|
|
339
|
+
#define HWY_BASELINE_Z14 HWY_Z14
|
|
340
|
+
#else
|
|
341
|
+
#define HWY_BASELINE_Z14 0
|
|
342
|
+
#endif
|
|
343
|
+
|
|
344
|
+
#if HWY_BASELINE_Z14 && __ARCH__ >= 13
|
|
345
|
+
#define HWY_BASELINE_Z15 HWY_Z15
|
|
346
|
+
#else
|
|
347
|
+
#define HWY_BASELINE_Z15 0
|
|
348
|
+
#endif
|
|
349
|
+
|
|
319
350
|
#define HWY_BASELINE_SVE2 0
|
|
320
351
|
#define HWY_BASELINE_SVE 0
|
|
321
352
|
#define HWY_BASELINE_NEON 0
|
|
322
353
|
|
|
323
354
|
#if HWY_ARCH_ARM
|
|
324
355
|
|
|
325
|
-
|
|
356
|
+
// Also check compiler version as done for HWY_ATTAINABLE_SVE2 because the
|
|
357
|
+
// static target (influenced here) must be one of the attainable targets.
|
|
358
|
+
#if defined(__ARM_FEATURE_SVE2) && \
|
|
359
|
+
(HWY_COMPILER_CLANG >= 1400 || HWY_COMPILER_GCC_ACTUAL >= 1200)
|
|
326
360
|
#undef HWY_BASELINE_SVE2 // was 0, will be re-defined
|
|
327
361
|
// If user specified -msve-vector-bits=128, they assert the vector length is
|
|
328
362
|
// 128 bits and we should use the HWY_SVE2_128 (more efficient for some ops).
|
|
@@ -337,7 +371,8 @@
|
|
|
337
371
|
#endif // __ARM_FEATURE_SVE_BITS
|
|
338
372
|
#endif // __ARM_FEATURE_SVE2
|
|
339
373
|
|
|
340
|
-
#if defined(__ARM_FEATURE_SVE)
|
|
374
|
+
#if defined(__ARM_FEATURE_SVE) && \
|
|
375
|
+
(HWY_COMPILER_CLANG >= 900 || HWY_COMPILER_GCC_ACTUAL >= 800)
|
|
341
376
|
#undef HWY_BASELINE_SVE // was 0, will be re-defined
|
|
342
377
|
// See above. If user-specified vector length matches our optimization, use it.
|
|
343
378
|
#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
|
|
@@ -350,12 +385,17 @@
|
|
|
350
385
|
// GCC 4.5.4 only defines __ARM_NEON__; 5.4 defines both.
|
|
351
386
|
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
|
352
387
|
#undef HWY_BASELINE_NEON
|
|
353
|
-
#if defined(__ARM_FEATURE_AES)
|
|
354
|
-
|
|
388
|
+
#if defined(__ARM_FEATURE_AES) && \
|
|
389
|
+
defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && \
|
|
390
|
+
defined(__ARM_FEATURE_DOTPROD) && \
|
|
391
|
+
defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
|
|
392
|
+
#define HWY_BASELINE_NEON HWY_ALL_NEON
|
|
393
|
+
#elif defined(__ARM_FEATURE_AES)
|
|
394
|
+
#define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES | HWY_NEON)
|
|
355
395
|
#else
|
|
356
396
|
#define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES)
|
|
357
|
-
#endif
|
|
358
|
-
#endif
|
|
397
|
+
#endif // __ARM_FEATURE*
|
|
398
|
+
#endif // __ARM_NEON
|
|
359
399
|
|
|
360
400
|
#endif // HWY_ARCH_ARM
|
|
361
401
|
|
|
@@ -483,14 +523,16 @@
|
|
|
483
523
|
#define HWY_BASELINE_AVX3_ZEN4 0
|
|
484
524
|
#endif
|
|
485
525
|
|
|
486
|
-
#if HWY_BASELINE_AVX3_DL != 0 && defined(
|
|
526
|
+
#if HWY_BASELINE_AVX3_DL != 0 && defined(__AVX512BF16__) && \
|
|
527
|
+
defined(__AVX512FP16__)
|
|
487
528
|
#define HWY_BASELINE_AVX3_SPR HWY_AVX3_SPR
|
|
488
529
|
#else
|
|
489
530
|
#define HWY_BASELINE_AVX3_SPR 0
|
|
490
531
|
#endif
|
|
491
532
|
|
|
492
533
|
// RVV requires intrinsics 0.11 or later, see #1156.
|
|
493
|
-
#if
|
|
534
|
+
#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
|
|
535
|
+
__riscv_v_intrinsic >= 11000
|
|
494
536
|
#define HWY_BASELINE_RVV HWY_RVV
|
|
495
537
|
#else
|
|
496
538
|
#define HWY_BASELINE_RVV 0
|
|
@@ -498,13 +540,14 @@
|
|
|
498
540
|
|
|
499
541
|
// Allow the user to override this without any guarantee of success.
|
|
500
542
|
#ifndef HWY_BASELINE_TARGETS
|
|
501
|
-
#define HWY_BASELINE_TARGETS
|
|
502
|
-
(HWY_BASELINE_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 |
|
|
503
|
-
HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10 |
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
HWY_BASELINE_AVX3_SPR |
|
|
543
|
+
#define HWY_BASELINE_TARGETS \
|
|
544
|
+
(HWY_BASELINE_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 | \
|
|
545
|
+
HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10 | HWY_BASELINE_Z14 | \
|
|
546
|
+
HWY_BASELINE_Z15 | HWY_BASELINE_SVE2 | HWY_BASELINE_SVE | \
|
|
547
|
+
HWY_BASELINE_NEON | HWY_BASELINE_SSE2 | HWY_BASELINE_SSSE3 | \
|
|
548
|
+
HWY_BASELINE_SSE4 | HWY_BASELINE_AVX2 | HWY_BASELINE_AVX3 | \
|
|
549
|
+
HWY_BASELINE_AVX3_DL | HWY_BASELINE_AVX3_ZEN4 | HWY_BASELINE_AVX3_SPR | \
|
|
550
|
+
HWY_BASELINE_RVV)
|
|
508
551
|
#endif // HWY_BASELINE_TARGETS
|
|
509
552
|
|
|
510
553
|
//------------------------------------------------------------------------------
|
|
@@ -534,17 +577,43 @@
|
|
|
534
577
|
#endif
|
|
535
578
|
// Defining one of HWY_COMPILE_ONLY_* will trump HWY_COMPILE_ALL_ATTAINABLE.
|
|
536
579
|
|
|
580
|
+
#ifndef HWY_HAVE_AUXV // allow override
|
|
581
|
+
#ifdef TOOLCHAIN_MISS_SYS_AUXV_H
|
|
582
|
+
#define HWY_HAVE_AUXV 0 // CMake failed to find the header
|
|
583
|
+
// glibc 2.16 added auxv, but checking for that requires features.h, and we do
|
|
584
|
+
// not want to include system headers here. Instead check for the header
|
|
585
|
+
// directly, which has been supported at least since GCC 5.4 and Clang 3.
|
|
586
|
+
#elif defined(__has_include) // note: wrapper macro fails on Clang ~17
|
|
587
|
+
// clang-format off
|
|
588
|
+
#if __has_include(<sys/auxv.h>)
|
|
589
|
+
// clang-format on
|
|
590
|
+
#define HWY_HAVE_AUXV 1 // header present
|
|
591
|
+
#else
|
|
592
|
+
#define HWY_HAVE_AUXV 0 // header not present
|
|
593
|
+
#endif // __has_include
|
|
594
|
+
#else // compiler lacks __has_include
|
|
595
|
+
#define HWY_HAVE_AUXV 0
|
|
596
|
+
#endif
|
|
597
|
+
#endif // HWY_HAVE_AUXV
|
|
598
|
+
|
|
599
|
+
// Allow opting out, and without a guarantee of success, opting-in.
|
|
600
|
+
#ifndef HWY_HAVE_RUNTIME_DISPATCH
|
|
537
601
|
// Clang, GCC and MSVC allow runtime dispatch on x86.
|
|
538
602
|
#if HWY_ARCH_X86
|
|
539
603
|
#define HWY_HAVE_RUNTIME_DISPATCH 1
|
|
540
|
-
// On Arm
|
|
541
|
-
// capabilities.
|
|
542
|
-
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC
|
|
543
|
-
HWY_OS_LINUX &&
|
|
604
|
+
// On Arm, PPC, S390X, and RISC-V: GCC and Clang 17+ do, and we require Linux
|
|
605
|
+
// to detect CPU capabilities.
|
|
606
|
+
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
|
|
607
|
+
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_OS_LINUX && \
|
|
608
|
+
HWY_HAVE_AUXV
|
|
609
|
+
#define HWY_HAVE_RUNTIME_DISPATCH 1
|
|
610
|
+
#elif HWY_ARCH_ARM_A64 && HWY_OS_APPLE && \
|
|
611
|
+
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700)
|
|
544
612
|
#define HWY_HAVE_RUNTIME_DISPATCH 1
|
|
545
613
|
#else
|
|
546
614
|
#define HWY_HAVE_RUNTIME_DISPATCH 0
|
|
547
|
-
#endif
|
|
615
|
+
#endif // HWY_ARCH_*
|
|
616
|
+
#endif // HWY_HAVE_RUNTIME_DISPATCH
|
|
548
617
|
|
|
549
618
|
// AVX3_DL is not widely available yet. To reduce code size and compile time,
|
|
550
619
|
// only include it in the set of attainable targets (for dynamic dispatch) if
|
|
@@ -556,22 +625,26 @@
|
|
|
556
625
|
#endif
|
|
557
626
|
|
|
558
627
|
#if HWY_ARCH_ARM_A64 && HWY_HAVE_RUNTIME_DISPATCH
|
|
559
|
-
#define HWY_ATTAINABLE_NEON
|
|
628
|
+
#define HWY_ATTAINABLE_NEON HWY_ALL_NEON
|
|
560
629
|
#elif HWY_ARCH_ARM // static dispatch, or HWY_ARCH_ARM_V7
|
|
561
630
|
#define HWY_ATTAINABLE_NEON (HWY_BASELINE_NEON)
|
|
562
631
|
#else
|
|
563
632
|
#define HWY_ATTAINABLE_NEON 0
|
|
564
633
|
#endif
|
|
565
634
|
|
|
566
|
-
#if HWY_ARCH_ARM_A64 &&
|
|
567
|
-
|
|
635
|
+
#if HWY_ARCH_ARM_A64 && \
|
|
636
|
+
(HWY_COMPILER_CLANG >= 900 || HWY_COMPILER_GCC_ACTUAL >= 800) && \
|
|
637
|
+
(HWY_HAVE_RUNTIME_DISPATCH || \
|
|
638
|
+
(HWY_ENABLED_BASELINE & (HWY_SVE | HWY_SVE_256)))
|
|
568
639
|
#define HWY_ATTAINABLE_SVE (HWY_SVE | HWY_SVE_256)
|
|
569
640
|
#else
|
|
570
641
|
#define HWY_ATTAINABLE_SVE 0
|
|
571
642
|
#endif
|
|
572
643
|
|
|
573
|
-
#if HWY_ARCH_ARM_A64 &&
|
|
574
|
-
|
|
644
|
+
#if HWY_ARCH_ARM_A64 && \
|
|
645
|
+
(HWY_COMPILER_CLANG >= 1400 || HWY_COMPILER_GCC_ACTUAL >= 1200) && \
|
|
646
|
+
(HWY_HAVE_RUNTIME_DISPATCH || \
|
|
647
|
+
(HWY_ENABLED_BASELINE & (HWY_SVE2 | HWY_SVE2_128)))
|
|
575
648
|
#define HWY_ATTAINABLE_SVE2 (HWY_SVE2 | HWY_SVE2_128)
|
|
576
649
|
#else
|
|
577
650
|
#define HWY_ATTAINABLE_SVE2 0
|
|
@@ -579,18 +652,47 @@
|
|
|
579
652
|
|
|
580
653
|
#if HWY_ARCH_PPC && defined(__ALTIVEC__) && \
|
|
581
654
|
(!HWY_COMPILER_CLANG || HWY_BASELINE_PPC8 != 0)
|
|
655
|
+
|
|
656
|
+
#if (HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10) && \
|
|
657
|
+
!defined(HWY_SKIP_NON_BEST_BASELINE)
|
|
658
|
+
// On POWER with -m flags, we get compile errors (#1707) for targets older than
|
|
659
|
+
// the baseline specified via -m, so only generate the static target and better.
|
|
660
|
+
// Note that some Linux distros actually do set POWER9 as the baseline.
|
|
661
|
+
// This works by skipping case 3 below, so case 4 is reached.
|
|
662
|
+
#define HWY_SKIP_NON_BEST_BASELINE
|
|
663
|
+
#endif
|
|
664
|
+
|
|
582
665
|
#define HWY_ATTAINABLE_PPC (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
|
|
666
|
+
|
|
583
667
|
#else
|
|
584
668
|
#define HWY_ATTAINABLE_PPC 0
|
|
585
669
|
#endif
|
|
586
670
|
|
|
671
|
+
#if HWY_ARCH_S390X && HWY_BASELINE_Z14 != 0
|
|
672
|
+
#define HWY_ATTAINABLE_S390X (HWY_Z14 | HWY_Z15)
|
|
673
|
+
#else
|
|
674
|
+
#define HWY_ATTAINABLE_S390X 0
|
|
675
|
+
#endif
|
|
676
|
+
|
|
677
|
+
#if HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH
|
|
678
|
+
#define HWY_ATTAINABLE_RISCV (HWY_RVV)
|
|
679
|
+
#else
|
|
680
|
+
#define HWY_ATTAINABLE_RISCV 0
|
|
681
|
+
#endif
|
|
682
|
+
|
|
587
683
|
// Attainable means enabled and the compiler allows intrinsics (even when not
|
|
588
684
|
// allowed to autovectorize). Used in 3 and 4.
|
|
589
685
|
#if HWY_ARCH_X86
|
|
686
|
+
#if HWY_COMPILER_MSVC
|
|
687
|
+
// Fewer targets for faster builds.
|
|
688
|
+
#define HWY_ATTAINABLE_TARGETS \
|
|
689
|
+
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_STATIC_TARGET | HWY_AVX2)
|
|
690
|
+
#else // !HWY_COMPILER_MSVC
|
|
590
691
|
#define HWY_ATTAINABLE_TARGETS \
|
|
591
692
|
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | \
|
|
592
693
|
HWY_AVX2 | HWY_AVX3 | HWY_ATTAINABLE_AVX3_DL | HWY_AVX3_ZEN4 | \
|
|
593
694
|
HWY_AVX3_SPR)
|
|
695
|
+
#endif // !HWY_COMPILER_MSVC
|
|
594
696
|
#elif HWY_ARCH_ARM
|
|
595
697
|
#define HWY_ATTAINABLE_TARGETS \
|
|
596
698
|
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_NEON | HWY_ATTAINABLE_SVE | \
|
|
@@ -598,6 +700,12 @@
|
|
|
598
700
|
#elif HWY_ARCH_PPC
|
|
599
701
|
#define HWY_ATTAINABLE_TARGETS \
|
|
600
702
|
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_PPC)
|
|
703
|
+
#elif HWY_ARCH_S390X
|
|
704
|
+
#define HWY_ATTAINABLE_TARGETS \
|
|
705
|
+
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_S390X)
|
|
706
|
+
#elif HWY_ARCH_RVV
|
|
707
|
+
#define HWY_ATTAINABLE_TARGETS \
|
|
708
|
+
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_RISCV)
|
|
601
709
|
#else
|
|
602
710
|
#define HWY_ATTAINABLE_TARGETS (HWY_ENABLED_BASELINE)
|
|
603
711
|
#endif // HWY_ARCH_*
|
|
@@ -621,7 +729,8 @@
|
|
|
621
729
|
#define HWY_TARGETS HWY_STATIC_TARGET
|
|
622
730
|
|
|
623
731
|
// 3) For tests: include all attainable targets (in particular: scalar)
|
|
624
|
-
#elif defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)
|
|
732
|
+
#elif (defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)) && \
|
|
733
|
+
!defined(HWY_SKIP_NON_BEST_BASELINE)
|
|
625
734
|
#define HWY_TARGETS HWY_ATTAINABLE_TARGETS
|
|
626
735
|
|
|
627
736
|
// 4) Default: attainable WITHOUT non-best baseline. This reduces code size by
|
|
@@ -168,6 +168,17 @@
|
|
|
168
168
|
#endif
|
|
169
169
|
#endif
|
|
170
170
|
|
|
171
|
+
#if (HWY_TARGETS & HWY_NEON_BF16) && (HWY_STATIC_TARGET != HWY_NEON_BF16)
|
|
172
|
+
#undef HWY_TARGET
|
|
173
|
+
#define HWY_TARGET HWY_NEON_BF16
|
|
174
|
+
#include HWY_TARGET_INCLUDE
|
|
175
|
+
#ifdef HWY_TARGET_TOGGLE
|
|
176
|
+
#undef HWY_TARGET_TOGGLE
|
|
177
|
+
#else
|
|
178
|
+
#define HWY_TARGET_TOGGLE
|
|
179
|
+
#endif
|
|
180
|
+
#endif
|
|
181
|
+
|
|
171
182
|
#if (HWY_TARGETS & HWY_SVE) && (HWY_STATIC_TARGET != HWY_SVE)
|
|
172
183
|
#undef HWY_TARGET
|
|
173
184
|
#define HWY_TARGET HWY_SVE
|
|
@@ -271,7 +282,31 @@
|
|
|
271
282
|
#endif
|
|
272
283
|
#endif
|
|
273
284
|
|
|
274
|
-
// ------------------------------
|
|
285
|
+
// ------------------------------ HWY_ARCH_S390X
|
|
286
|
+
|
|
287
|
+
#if (HWY_TARGETS & HWY_Z14) && (HWY_STATIC_TARGET != HWY_Z14)
|
|
288
|
+
#undef HWY_TARGET
|
|
289
|
+
#define HWY_TARGET HWY_Z14
|
|
290
|
+
#include HWY_TARGET_INCLUDE
|
|
291
|
+
#ifdef HWY_TARGET_TOGGLE
|
|
292
|
+
#undef HWY_TARGET_TOGGLE
|
|
293
|
+
#else
|
|
294
|
+
#define HWY_TARGET_TOGGLE
|
|
295
|
+
#endif
|
|
296
|
+
#endif
|
|
297
|
+
|
|
298
|
+
#if (HWY_TARGETS & HWY_Z15) && (HWY_STATIC_TARGET != HWY_Z15)
|
|
299
|
+
#undef HWY_TARGET
|
|
300
|
+
#define HWY_TARGET HWY_Z15
|
|
301
|
+
#include HWY_TARGET_INCLUDE
|
|
302
|
+
#ifdef HWY_TARGET_TOGGLE
|
|
303
|
+
#undef HWY_TARGET_TOGGLE
|
|
304
|
+
#else
|
|
305
|
+
#define HWY_TARGET_TOGGLE
|
|
306
|
+
#endif
|
|
307
|
+
#endif
|
|
308
|
+
|
|
309
|
+
// ------------------------------ HWY_ARCH_RISCV
|
|
275
310
|
|
|
276
311
|
#if (HWY_TARGETS & HWY_RVV) && (HWY_STATIC_TARGET != HWY_RVV)
|
|
277
312
|
#undef HWY_TARGET
|