@img/sharp-libvips-dev 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +7 -1
- package/include/aom/aom_image.h +24 -12
- package/include/aom/aom_integer.h +3 -3
- package/include/aom/aomcx.h +15 -0
- package/include/aom/aomdx.h +5 -2
- package/include/archive.h +7 -5
- package/include/archive_entry.h +5 -3
- package/include/cgif.h +3 -0
- package/include/expat.h +21 -10
- package/include/expat_config.h +11 -5
- package/include/ffi.h +12 -25
- package/include/freetype2/freetype/config/ftoption.h +2 -2
- package/include/fribidi/fribidi-config.h +2 -2
- package/include/fribidi/fribidi-unicode-version.h +3 -3
- package/include/gio-unix-2.0/gio/gfiledescriptorbased.h +3 -2
- package/include/glib-2.0/gio/gappinfo.h +40 -25
- package/include/glib-2.0/gio/gapplication.h +6 -0
- package/include/glib-2.0/gio/gasyncresult.h +1 -1
- package/include/glib-2.0/gio/gconverter.h +5 -0
- package/include/glib-2.0/gio/gdbusintrospection.h +1 -1
- package/include/glib-2.0/gio/gfile.h +16 -0
- package/include/glib-2.0/gio/gio-visibility.h +34 -0
- package/include/glib-2.0/gio/giotypes.h +0 -1
- package/include/glib-2.0/gio/gsettings.h +8 -0
- package/include/glib-2.0/gio/gvfs.h +2 -2
- package/include/glib-2.0/girepository/gi-visibility.h +34 -0
- package/include/glib-2.0/girepository/giarginfo.h +23 -6
- package/include/glib-2.0/girepository/gibaseinfo.h +44 -18
- package/include/glib-2.0/girepository/gicallableinfo.h +26 -16
- package/include/glib-2.0/girepository/gicallbackinfo.h +17 -2
- package/include/glib-2.0/girepository/giconstantinfo.h +19 -4
- package/include/glib-2.0/girepository/gienuminfo.h +20 -21
- package/include/glib-2.0/girepository/gifieldinfo.h +22 -7
- package/include/glib-2.0/girepository/giflagsinfo.h +60 -0
- package/include/glib-2.0/girepository/gifunctioninfo.h +22 -7
- package/include/glib-2.0/girepository/giinterfaceinfo.h +33 -18
- package/include/glib-2.0/girepository/giobjectinfo.h +41 -26
- package/include/glib-2.0/girepository/gipropertyinfo.h +18 -3
- package/include/glib-2.0/girepository/giregisteredtypeinfo.h +22 -11
- package/include/glib-2.0/girepository/girepository-autocleanups.h +56 -0
- package/include/glib-2.0/girepository/girepository.h +53 -62
- package/include/glib-2.0/girepository/girffi.h +8 -7
- package/include/glib-2.0/girepository/gisignalinfo.h +18 -3
- package/include/glib-2.0/girepository/gistructinfo.h +26 -11
- package/include/glib-2.0/girepository/gitypeinfo.h +29 -16
- package/include/glib-2.0/girepository/gitypelib.h +9 -13
- package/include/glib-2.0/girepository/gitypes.h +52 -104
- package/include/glib-2.0/girepository/giunioninfo.h +28 -12
- package/include/glib-2.0/girepository/giunresolvedinfo.h +17 -2
- package/include/glib-2.0/girepository/givalueinfo.h +65 -0
- package/include/glib-2.0/girepository/givfuncinfo.h +23 -8
- package/include/glib-2.0/glib/deprecated/gthread.h +9 -5
- package/include/glib-2.0/glib/gbitlock.h +31 -0
- package/include/glib-2.0/glib/gbookmarkfile.h +1 -1
- package/include/glib-2.0/glib/giochannel.h +2 -2
- package/include/glib-2.0/glib/glib-visibility.h +34 -0
- package/include/glib-2.0/glib/gmacros.h +12 -5
- package/include/glib-2.0/glib/gmain.h +93 -7
- package/include/glib-2.0/glib/gmessages.h +8 -0
- package/include/glib-2.0/glib/gqsort.h +8 -1
- package/include/glib-2.0/glib/gslice.h +2 -0
- package/include/glib-2.0/glib/gstrfuncs.h +24 -30
- package/include/glib-2.0/glib/gstrvbuilder.h +3 -0
- package/include/glib-2.0/glib/gthread.h +191 -3
- package/include/glib-2.0/glib/gunicode.h +1 -1
- package/include/glib-2.0/glib/gversionmacros.h +9 -0
- package/include/glib-2.0/glib-unix.h +7 -1
- package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
- package/include/glib-2.0/gobject/genums.h +6 -6
- package/include/glib-2.0/gobject/glib-types.h +11 -0
- package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
- package/include/glib-2.0/gobject/gsignal.h +16 -6
- package/include/glib-2.0/gobject/gtype.h +6 -6
- package/include/harfbuzz/hb-buffer.h +6 -0
- package/include/harfbuzz/hb-common.h +6 -9
- package/include/harfbuzz/hb-cplusplus.hh +8 -11
- package/include/harfbuzz/hb-subset.h +17 -4
- package/include/harfbuzz/hb-version.h +3 -3
- package/include/hwy/abort.h +28 -0
- package/include/hwy/aligned_allocator.h +218 -6
- package/include/hwy/base.h +1935 -512
- package/include/hwy/cache_control.h +24 -6
- package/include/hwy/detect_compiler_arch.h +105 -10
- package/include/hwy/detect_targets.h +146 -37
- package/include/hwy/foreach_target.h +36 -1
- package/include/hwy/highway.h +222 -50
- package/include/hwy/ops/arm_neon-inl.h +2055 -894
- package/include/hwy/ops/arm_sve-inl.h +1476 -348
- package/include/hwy/ops/emu128-inl.h +711 -623
- package/include/hwy/ops/generic_ops-inl.h +4431 -2157
- package/include/hwy/ops/inside-inl.h +691 -0
- package/include/hwy/ops/ppc_vsx-inl.h +2186 -673
- package/include/hwy/ops/rvv-inl.h +1556 -536
- package/include/hwy/ops/scalar-inl.h +353 -233
- package/include/hwy/ops/set_macros-inl.h +171 -23
- package/include/hwy/ops/shared-inl.h +198 -56
- package/include/hwy/ops/wasm_128-inl.h +283 -244
- package/include/hwy/ops/x86_128-inl.h +3673 -1357
- package/include/hwy/ops/x86_256-inl.h +1737 -663
- package/include/hwy/ops/x86_512-inl.h +1697 -500
- package/include/hwy/per_target.h +4 -0
- package/include/hwy/profiler.h +648 -0
- package/include/hwy/robust_statistics.h +2 -2
- package/include/hwy/targets.h +40 -32
- package/include/hwy/timer-inl.h +3 -3
- package/include/hwy/timer.h +16 -1
- package/include/libheif/heif.h +170 -15
- package/include/libheif/heif_items.h +237 -0
- package/include/libheif/heif_properties.h +38 -2
- package/include/libheif/heif_regions.h +1 -1
- package/include/libheif/heif_version.h +2 -2
- package/include/libpng16/png.h +32 -29
- package/include/libpng16/pngconf.h +2 -2
- package/include/libpng16/pnglibconf.h +8 -3
- package/include/librsvg-2.0/librsvg/rsvg-cairo.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-features.h +3 -4
- package/include/librsvg-2.0/librsvg/rsvg-pixbuf.h +235 -0
- package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
- package/include/librsvg-2.0/librsvg/rsvg.h +55 -176
- package/include/libxml2/libxml/HTMLparser.h +12 -19
- package/include/libxml2/libxml/c14n.h +1 -12
- package/include/libxml2/libxml/debugXML.h +1 -1
- package/include/libxml2/libxml/encoding.h +9 -0
- package/include/libxml2/libxml/entities.h +12 -1
- package/include/libxml2/libxml/hash.h +19 -0
- package/include/libxml2/libxml/list.h +2 -2
- package/include/libxml2/libxml/nanohttp.h +17 -0
- package/include/libxml2/libxml/parser.h +73 -58
- package/include/libxml2/libxml/parserInternals.h +9 -1
- package/include/libxml2/libxml/pattern.h +6 -0
- package/include/libxml2/libxml/tree.h +32 -12
- package/include/libxml2/libxml/uri.h +11 -0
- package/include/libxml2/libxml/valid.h +29 -2
- package/include/libxml2/libxml/xinclude.h +7 -0
- package/include/libxml2/libxml/xmlIO.h +21 -5
- package/include/libxml2/libxml/xmlerror.h +14 -0
- package/include/libxml2/libxml/xmlexports.h +111 -15
- package/include/libxml2/libxml/xmlmemory.h +8 -45
- package/include/libxml2/libxml/xmlreader.h +2 -0
- package/include/libxml2/libxml/xmlsave.h +5 -0
- package/include/libxml2/libxml/xmlunicode.h +165 -1
- package/include/libxml2/libxml/xmlversion.h +15 -179
- package/include/libxml2/libxml/xmlwriter.h +1 -0
- package/include/libxml2/libxml/xpath.h +4 -0
- package/include/pango-1.0/pango/pango-features.h +2 -2
- package/include/pango-1.0/pango/pango-fontmap.h +7 -0
- package/include/pango-1.0/pango/pango-item.h +4 -2
- package/include/pango-1.0/pango/pango-version-macros.h +25 -0
- package/include/pango-1.0/pango/pangofc-font.h +2 -1
- package/include/pixman-1/pixman-version.h +2 -2
- package/include/png.h +32 -29
- package/include/pngconf.h +2 -2
- package/include/pnglibconf.h +8 -3
- package/include/vips/connection.h +9 -3
- package/include/vips/util.h +1 -11
- package/include/vips/version.h +4 -4
- package/include/webp/decode.h +58 -56
- package/include/webp/demux.h +25 -21
- package/include/webp/encode.h +44 -39
- package/include/webp/mux.h +76 -15
- package/include/webp/mux_types.h +2 -1
- package/include/webp/sharpyuv/sharpyuv.h +77 -8
- package/include/webp/types.h +29 -8
- package/include/zconf.h +1 -1
- package/include/zlib.h +12 -12
- package/package.json +1 -1
- package/versions.json +18 -19
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
// Copyright 2020 Google LLC
|
|
2
|
+
// Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
|
|
2
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
// SPDX-License-Identifier: BSD-3-Clause
|
|
3
5
|
//
|
|
4
6
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
7
|
// you may not use this file except in compliance with the License.
|
|
@@ -41,9 +43,31 @@
|
|
|
41
43
|
#undef HWY_HAVE_FLOAT64
|
|
42
44
|
#undef HWY_MEM_OPS_MIGHT_FAULT
|
|
43
45
|
#undef HWY_NATIVE_FMA
|
|
46
|
+
#undef HWY_NATIVE_DOT_BF16
|
|
44
47
|
#undef HWY_CAP_GE256
|
|
45
48
|
#undef HWY_CAP_GE512
|
|
46
49
|
|
|
50
|
+
#undef HWY_TARGET_IS_SVE
|
|
51
|
+
#if HWY_TARGET & HWY_ALL_SVE
|
|
52
|
+
#define HWY_TARGET_IS_SVE 1
|
|
53
|
+
#else
|
|
54
|
+
#define HWY_TARGET_IS_SVE 0
|
|
55
|
+
#endif
|
|
56
|
+
|
|
57
|
+
#undef HWY_TARGET_IS_NEON
|
|
58
|
+
#if HWY_TARGET & HWY_ALL_NEON
|
|
59
|
+
#define HWY_TARGET_IS_NEON 1
|
|
60
|
+
#else
|
|
61
|
+
#define HWY_TARGET_IS_NEON 0
|
|
62
|
+
#endif
|
|
63
|
+
|
|
64
|
+
#undef HWY_TARGET_IS_PPC
|
|
65
|
+
#if HWY_TARGET & HWY_ALL_PPC
|
|
66
|
+
#define HWY_TARGET_IS_PPC 1
|
|
67
|
+
#else
|
|
68
|
+
#define HWY_TARGET_IS_PPC 0
|
|
69
|
+
#endif
|
|
70
|
+
|
|
47
71
|
// Supported on all targets except RVV (requires GCC 14 or upcoming Clang)
|
|
48
72
|
#if HWY_TARGET == HWY_RVV && \
|
|
49
73
|
((HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400) || \
|
|
@@ -116,7 +140,21 @@
|
|
|
116
140
|
",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avx512vnni,avx512bitalg," \
|
|
117
141
|
"avx512vpopcntdq,gfni"
|
|
118
142
|
|
|
119
|
-
|
|
143
|
+
// Force-disable for compilers that do not properly support avx512bf16.
|
|
144
|
+
#if !defined(HWY_AVX3_DISABLE_AVX512BF16) && \
|
|
145
|
+
(HWY_COMPILER_CLANGCL || \
|
|
146
|
+
(HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000) || \
|
|
147
|
+
(HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 900))
|
|
148
|
+
#define HWY_AVX3_DISABLE_AVX512BF16
|
|
149
|
+
#endif
|
|
150
|
+
|
|
151
|
+
#if !defined(HWY_AVX3_DISABLE_AVX512BF16)
|
|
152
|
+
#define HWY_TARGET_STR_AVX3_ZEN4 HWY_TARGET_STR_AVX3_DL ",avx512bf16"
|
|
153
|
+
#else
|
|
154
|
+
#define HWY_TARGET_STR_AVX3_ZEN4 HWY_TARGET_STR_AVX3_DL
|
|
155
|
+
#endif
|
|
156
|
+
|
|
157
|
+
#define HWY_TARGET_STR_AVX3_SPR HWY_TARGET_STR_AVX3_ZEN4 ",avx512fp16"
|
|
120
158
|
|
|
121
159
|
#if defined(HWY_DISABLE_PPC8_CRYPTO)
|
|
122
160
|
#define HWY_TARGET_STR_PPC8_CRYPTO ""
|
|
@@ -131,9 +169,21 @@
|
|
|
131
169
|
#if HWY_COMPILER_CLANG
|
|
132
170
|
#define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",power10-vector"
|
|
133
171
|
#else
|
|
134
|
-
#
|
|
172
|
+
// See #1707 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102059#c35.
|
|
173
|
+
// When the baseline is PPC 8 or 9, inlining functions such as PreventElision
|
|
174
|
+
// into PPC10 code fails because PPC10 defaults to no-htm and is thus worse than
|
|
175
|
+
// the baseline, which has htm. We cannot have pragma target on functions
|
|
176
|
+
// outside HWY_NAMESPACE such as those in base.h. It would be possible for users
|
|
177
|
+
// to set -mno-htm globally, but we can also work around this at the library
|
|
178
|
+
// level by claiming that PPC10 still has HTM, thus avoiding the mismatch. This
|
|
179
|
+
// seems to be safe because HTM uses builtins rather than modifying codegen, see
|
|
180
|
+
// https://gcc.gnu.org/legacy-ml/gcc-patches/2013-07/msg00167.html.
|
|
181
|
+
#define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",cpu=power10,htm"
|
|
135
182
|
#endif
|
|
136
183
|
|
|
184
|
+
#define HWY_TARGET_STR_Z14 "arch=z14"
|
|
185
|
+
#define HWY_TARGET_STR_Z15 "arch=z15"
|
|
186
|
+
|
|
137
187
|
// Before include guard so we redefine HWY_TARGET_STR on each include,
|
|
138
188
|
// governed by the current HWY_TARGET.
|
|
139
189
|
|
|
@@ -152,6 +202,7 @@
|
|
|
152
202
|
#define HWY_HAVE_FLOAT64 1
|
|
153
203
|
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
154
204
|
#define HWY_NATIVE_FMA 0
|
|
205
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
155
206
|
#define HWY_CAP_GE256 0
|
|
156
207
|
#define HWY_CAP_GE512 0
|
|
157
208
|
|
|
@@ -171,6 +222,7 @@
|
|
|
171
222
|
#define HWY_HAVE_FLOAT64 1
|
|
172
223
|
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
173
224
|
#define HWY_NATIVE_FMA 0
|
|
225
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
174
226
|
#define HWY_CAP_GE256 0
|
|
175
227
|
#define HWY_CAP_GE512 0
|
|
176
228
|
|
|
@@ -191,6 +243,7 @@
|
|
|
191
243
|
#define HWY_HAVE_FLOAT64 1
|
|
192
244
|
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
193
245
|
#define HWY_NATIVE_FMA 0
|
|
246
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
194
247
|
#define HWY_CAP_GE256 0
|
|
195
248
|
#define HWY_CAP_GE512 0
|
|
196
249
|
|
|
@@ -216,6 +269,7 @@
|
|
|
216
269
|
#else
|
|
217
270
|
#define HWY_NATIVE_FMA 1
|
|
218
271
|
#endif
|
|
272
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
219
273
|
|
|
220
274
|
#define HWY_CAP_GE256 1
|
|
221
275
|
#define HWY_CAP_GE512 0
|
|
@@ -233,7 +287,10 @@
|
|
|
233
287
|
|
|
234
288
|
#define HWY_HAVE_SCALABLE 0
|
|
235
289
|
#define HWY_HAVE_INTEGER64 1
|
|
236
|
-
#if
|
|
290
|
+
#if HWY_TARGET == HWY_AVX3_SPR && HWY_COMPILER_GCC_ACTUAL && \
|
|
291
|
+
HWY_HAVE_SCALAR_F16_TYPE
|
|
292
|
+
// TODO: enable F16 for AVX3_SPR target with Clang once compilation issues are
|
|
293
|
+
// fixed
|
|
237
294
|
#define HWY_HAVE_FLOAT16 1
|
|
238
295
|
#else
|
|
239
296
|
#define HWY_HAVE_FLOAT16 0
|
|
@@ -241,6 +298,11 @@
|
|
|
241
298
|
#define HWY_HAVE_FLOAT64 1
|
|
242
299
|
#define HWY_MEM_OPS_MIGHT_FAULT 0
|
|
243
300
|
#define HWY_NATIVE_FMA 1
|
|
301
|
+
#if (HWY_TARGET <= HWY_AVX3_ZEN4) && !defined(HWY_AVX3_DISABLE_AVX512BF16)
|
|
302
|
+
#define HWY_NATIVE_DOT_BF16 1
|
|
303
|
+
#else
|
|
304
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
305
|
+
#endif
|
|
244
306
|
#define HWY_CAP_GE256 1
|
|
245
307
|
#define HWY_CAP_GE512 1
|
|
246
308
|
|
|
@@ -257,8 +319,7 @@
|
|
|
257
319
|
#elif HWY_TARGET == HWY_AVX3_ZEN4
|
|
258
320
|
|
|
259
321
|
#define HWY_NAMESPACE N_AVX3_ZEN4
|
|
260
|
-
|
|
261
|
-
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_DL
|
|
322
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_ZEN4
|
|
262
323
|
|
|
263
324
|
#elif HWY_TARGET == HWY_AVX3_SPR
|
|
264
325
|
|
|
@@ -271,8 +332,7 @@
|
|
|
271
332
|
|
|
272
333
|
//-----------------------------------------------------------------------------
|
|
273
334
|
// PPC8, PPC9, PPC10
|
|
274
|
-
#elif
|
|
275
|
-
HWY_TARGET == HWY_PPC10
|
|
335
|
+
#elif HWY_TARGET_IS_PPC
|
|
276
336
|
|
|
277
337
|
#define HWY_ALIGN alignas(16)
|
|
278
338
|
#define HWY_MAX_BYTES 16
|
|
@@ -284,6 +344,7 @@
|
|
|
284
344
|
#define HWY_HAVE_FLOAT64 1
|
|
285
345
|
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
286
346
|
#define HWY_NATIVE_FMA 1
|
|
347
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
287
348
|
#define HWY_CAP_GE256 0
|
|
288
349
|
#define HWY_CAP_GE512 0
|
|
289
350
|
|
|
@@ -304,11 +365,43 @@
|
|
|
304
365
|
|
|
305
366
|
#else
|
|
306
367
|
#error "Logic error"
|
|
307
|
-
#endif // HWY_TARGET
|
|
368
|
+
#endif // HWY_TARGET
|
|
369
|
+
|
|
370
|
+
//-----------------------------------------------------------------------------
|
|
371
|
+
// Z14, Z15
|
|
372
|
+
#elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15
|
|
373
|
+
|
|
374
|
+
#define HWY_ALIGN alignas(16)
|
|
375
|
+
#define HWY_MAX_BYTES 16
|
|
376
|
+
#define HWY_LANES(T) (16 / sizeof(T))
|
|
377
|
+
|
|
378
|
+
#define HWY_HAVE_SCALABLE 0
|
|
379
|
+
#define HWY_HAVE_INTEGER64 1
|
|
380
|
+
#define HWY_HAVE_FLOAT16 0
|
|
381
|
+
#define HWY_HAVE_FLOAT64 1
|
|
382
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
383
|
+
#define HWY_NATIVE_FMA 1
|
|
384
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
385
|
+
#define HWY_CAP_GE256 0
|
|
386
|
+
#define HWY_CAP_GE512 0
|
|
387
|
+
|
|
388
|
+
#if HWY_TARGET == HWY_Z14
|
|
389
|
+
|
|
390
|
+
#define HWY_NAMESPACE N_Z14
|
|
391
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_Z14
|
|
392
|
+
|
|
393
|
+
#elif HWY_TARGET == HWY_Z15
|
|
394
|
+
|
|
395
|
+
#define HWY_NAMESPACE N_Z15
|
|
396
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_Z15
|
|
397
|
+
|
|
398
|
+
#else
|
|
399
|
+
#error "Logic error"
|
|
400
|
+
#endif // HWY_TARGET == HWY_Z15
|
|
308
401
|
|
|
309
402
|
//-----------------------------------------------------------------------------
|
|
310
403
|
// NEON
|
|
311
|
-
#elif
|
|
404
|
+
#elif HWY_TARGET_IS_NEON
|
|
312
405
|
|
|
313
406
|
#define HWY_ALIGN alignas(16)
|
|
314
407
|
#define HWY_MAX_BYTES 16
|
|
@@ -316,7 +409,7 @@
|
|
|
316
409
|
|
|
317
410
|
#define HWY_HAVE_SCALABLE 0
|
|
318
411
|
#define HWY_HAVE_INTEGER64 1
|
|
319
|
-
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
|
412
|
+
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) || HWY_TARGET == HWY_NEON_BF16
|
|
320
413
|
#define HWY_HAVE_FLOAT16 1
|
|
321
414
|
#else
|
|
322
415
|
#define HWY_HAVE_FLOAT16 0
|
|
@@ -330,20 +423,29 @@
|
|
|
330
423
|
|
|
331
424
|
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
332
425
|
|
|
333
|
-
#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
|
|
426
|
+
#if defined(__ARM_FEATURE_FMA) || defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
|
|
334
427
|
#define HWY_NATIVE_FMA 1
|
|
335
428
|
#else
|
|
336
429
|
#define HWY_NATIVE_FMA 0
|
|
337
430
|
#endif
|
|
431
|
+
#if HWY_NEON_HAVE_F32_TO_BF16C || HWY_TARGET == HWY_NEON_BF16
|
|
432
|
+
#define HWY_NATIVE_DOT_BF16 1
|
|
433
|
+
#else
|
|
434
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
435
|
+
#endif
|
|
338
436
|
|
|
339
437
|
#define HWY_CAP_GE256 0
|
|
340
438
|
#define HWY_CAP_GE512 0
|
|
341
439
|
|
|
342
440
|
#if HWY_TARGET == HWY_NEON_WITHOUT_AES
|
|
343
441
|
#define HWY_NAMESPACE N_NEON_WITHOUT_AES
|
|
344
|
-
#
|
|
442
|
+
#elif HWY_TARGET == HWY_NEON
|
|
345
443
|
#define HWY_NAMESPACE N_NEON
|
|
346
|
-
#
|
|
444
|
+
#elif HWY_TARGET == HWY_NEON_BF16
|
|
445
|
+
#define HWY_NAMESPACE N_NEON_BF16
|
|
446
|
+
#else
|
|
447
|
+
#error "Logic error, missing case"
|
|
448
|
+
#endif // HWY_TARGET
|
|
347
449
|
|
|
348
450
|
// Can use pragmas instead of -march compiler flag
|
|
349
451
|
#if HWY_HAVE_RUNTIME_DISPATCH
|
|
@@ -358,21 +460,43 @@
|
|
|
358
460
|
|
|
359
461
|
#else // !HWY_ARCH_ARM_V7
|
|
360
462
|
|
|
463
|
+
#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1300) || \
|
|
464
|
+
(HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1300)
|
|
465
|
+
// GCC 12 or earlier and Clang 12 or earlier require +crypto be added to the
|
|
466
|
+
// target string to enable AArch64 AES intrinsics
|
|
467
|
+
#define HWY_TARGET_STR_NEON "+crypto"
|
|
468
|
+
#else
|
|
469
|
+
#define HWY_TARGET_STR_NEON "+aes"
|
|
470
|
+
#endif
|
|
471
|
+
|
|
472
|
+
// Clang >= 16 requires +fullfp16 instead of fp16, but Apple Clang 15 = 1600
|
|
473
|
+
// fails to parse unless the string starts with armv8, whereas 1700 refuses it.
|
|
474
|
+
#if HWY_COMPILER_CLANG >= 1700
|
|
475
|
+
#define HWY_TARGET_STR_FP16 "+fullfp16"
|
|
476
|
+
#elif HWY_COMPILER_CLANG >= 1600 && defined(__apple_build_version__)
|
|
477
|
+
#define HWY_TARGET_STR_FP16 "armv8.4-a+fullfp16"
|
|
478
|
+
#else
|
|
479
|
+
#define HWY_TARGET_STR_FP16 "+fp16"
|
|
480
|
+
#endif
|
|
481
|
+
|
|
361
482
|
#if HWY_TARGET == HWY_NEON_WITHOUT_AES
|
|
362
483
|
// Do not define HWY_TARGET_STR (no pragma).
|
|
484
|
+
#elif HWY_TARGET == HWY_NEON
|
|
485
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_NEON
|
|
486
|
+
#elif HWY_TARGET == HWY_NEON_BF16
|
|
487
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_FP16 "+bf16+dotprod" HWY_TARGET_STR_NEON
|
|
363
488
|
#else
|
|
364
|
-
#
|
|
365
|
-
#endif // HWY_TARGET
|
|
489
|
+
#error "Logic error, missing case"
|
|
490
|
+
#endif // HWY_TARGET
|
|
366
491
|
|
|
367
|
-
#endif // HWY_ARCH_ARM_V7
|
|
492
|
+
#endif // !HWY_ARCH_ARM_V7
|
|
368
493
|
#else // !HWY_HAVE_RUNTIME_DISPATCH
|
|
369
494
|
// HWY_TARGET_STR remains undefined
|
|
370
495
|
#endif
|
|
371
496
|
|
|
372
497
|
//-----------------------------------------------------------------------------
|
|
373
498
|
// SVE[2]
|
|
374
|
-
#elif
|
|
375
|
-
HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
|
|
499
|
+
#elif HWY_TARGET_IS_SVE
|
|
376
500
|
|
|
377
501
|
// SVE only requires lane alignment, not natural alignment of the entire vector.
|
|
378
502
|
#define HWY_ALIGN alignas(8)
|
|
@@ -382,10 +506,15 @@
|
|
|
382
506
|
#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
|
|
383
507
|
|
|
384
508
|
#define HWY_HAVE_INTEGER64 1
|
|
385
|
-
#define HWY_HAVE_FLOAT16
|
|
509
|
+
#define HWY_HAVE_FLOAT16 1
|
|
386
510
|
#define HWY_HAVE_FLOAT64 1
|
|
387
511
|
#define HWY_MEM_OPS_MIGHT_FAULT 0
|
|
388
512
|
#define HWY_NATIVE_FMA 1
|
|
513
|
+
#if HWY_SVE_HAVE_BF16_FEATURE
|
|
514
|
+
#define HWY_NATIVE_DOT_BF16 1
|
|
515
|
+
#else
|
|
516
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
517
|
+
#endif
|
|
389
518
|
#define HWY_CAP_GE256 0
|
|
390
519
|
#define HWY_CAP_GE512 0
|
|
391
520
|
|
|
@@ -410,11 +539,17 @@
|
|
|
410
539
|
// Can use pragmas instead of -march compiler flag
|
|
411
540
|
#if HWY_HAVE_RUNTIME_DISPATCH
|
|
412
541
|
#if HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE2_128
|
|
413
|
-
|
|
414
|
-
|
|
542
|
+
// Static dispatch with -march=armv8-a+sve2+aes, or no baseline, hence dynamic
|
|
543
|
+
// dispatch, which checks for AES support at runtime.
|
|
544
|
+
#if defined(__ARM_FEATURE_SVE2_AES) || (HWY_BASELINE_SVE2 == 0)
|
|
545
|
+
#define HWY_TARGET_STR "+sve2+sve2-aes,+sve"
|
|
546
|
+
#else // SVE2 without AES
|
|
547
|
+
#define HWY_TARGET_STR "+sve2,+sve"
|
|
548
|
+
#endif
|
|
549
|
+
#else // not SVE2 target
|
|
415
550
|
#define HWY_TARGET_STR "+sve"
|
|
416
551
|
#endif
|
|
417
|
-
#else
|
|
552
|
+
#else // !HWY_HAVE_RUNTIME_DISPATCH
|
|
418
553
|
// HWY_TARGET_STR remains undefined
|
|
419
554
|
#endif
|
|
420
555
|
|
|
@@ -432,6 +567,7 @@
|
|
|
432
567
|
#define HWY_HAVE_FLOAT64 1
|
|
433
568
|
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
434
569
|
#define HWY_NATIVE_FMA 0
|
|
570
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
435
571
|
#define HWY_CAP_GE256 0
|
|
436
572
|
#define HWY_CAP_GE512 0
|
|
437
573
|
|
|
@@ -453,6 +589,7 @@
|
|
|
453
589
|
#define HWY_HAVE_FLOAT64 0
|
|
454
590
|
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
455
591
|
#define HWY_NATIVE_FMA 0
|
|
592
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
456
593
|
#define HWY_CAP_GE256 1
|
|
457
594
|
#define HWY_CAP_GE512 0
|
|
458
595
|
|
|
@@ -480,10 +617,11 @@
|
|
|
480
617
|
#define HWY_HAVE_FLOAT64 1
|
|
481
618
|
#define HWY_MEM_OPS_MIGHT_FAULT 0
|
|
482
619
|
#define HWY_NATIVE_FMA 1
|
|
620
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
483
621
|
#define HWY_CAP_GE256 0
|
|
484
622
|
#define HWY_CAP_GE512 0
|
|
485
623
|
|
|
486
|
-
#if
|
|
624
|
+
#if HWY_RVV_HAVE_F16_VEC
|
|
487
625
|
#define HWY_HAVE_FLOAT16 1
|
|
488
626
|
#else
|
|
489
627
|
#define HWY_HAVE_FLOAT16 0
|
|
@@ -508,6 +646,7 @@
|
|
|
508
646
|
#define HWY_HAVE_FLOAT64 1
|
|
509
647
|
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
510
648
|
#define HWY_NATIVE_FMA 0
|
|
649
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
511
650
|
#define HWY_CAP_GE256 0
|
|
512
651
|
#define HWY_CAP_GE512 0
|
|
513
652
|
|
|
@@ -529,6 +668,7 @@
|
|
|
529
668
|
#define HWY_HAVE_FLOAT64 1
|
|
530
669
|
#define HWY_MEM_OPS_MIGHT_FAULT 0
|
|
531
670
|
#define HWY_NATIVE_FMA 0
|
|
671
|
+
#define HWY_NATIVE_DOT_BF16 0
|
|
532
672
|
#define HWY_CAP_GE256 0
|
|
533
673
|
#define HWY_CAP_GE512 0
|
|
534
674
|
|
|
@@ -540,6 +680,14 @@
|
|
|
540
680
|
#pragma message("HWY_TARGET does not match any known target")
|
|
541
681
|
#endif // HWY_TARGET
|
|
542
682
|
|
|
683
|
+
//-----------------------------------------------------------------------------
|
|
684
|
+
|
|
685
|
+
// Sanity check: if we have f16 vector support, then base.h should also be
|
|
686
|
+
// using a built-in type for f16 scalars.
|
|
687
|
+
#if HWY_HAVE_FLOAT16 && !HWY_HAVE_SCALAR_F16_TYPE
|
|
688
|
+
#error "Logic error: f16 vectors but no scalars"
|
|
689
|
+
#endif
|
|
690
|
+
|
|
543
691
|
// Override this to 1 in asan/msan builds, which will still fault.
|
|
544
692
|
#if HWY_IS_ASAN || HWY_IS_MSAN
|
|
545
693
|
#undef HWY_MEM_OPS_MIGHT_FAULT
|