@img/sharp-libvips-dev 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/README.md +1 -2
  2. package/include/aom/aom_decoder.h +1 -1
  3. package/include/aom/aom_encoder.h +7 -1
  4. package/include/aom/aom_image.h +24 -12
  5. package/include/aom/aom_integer.h +3 -3
  6. package/include/aom/aomcx.h +15 -0
  7. package/include/aom/aomdx.h +5 -2
  8. package/include/archive.h +7 -5
  9. package/include/archive_entry.h +5 -3
  10. package/include/cgif.h +3 -0
  11. package/include/expat.h +21 -10
  12. package/include/expat_config.h +11 -5
  13. package/include/ffi.h +12 -25
  14. package/include/freetype2/freetype/config/ftoption.h +2 -2
  15. package/include/fribidi/fribidi-config.h +2 -2
  16. package/include/fribidi/fribidi-unicode-version.h +3 -3
  17. package/include/gio-unix-2.0/gio/gfiledescriptorbased.h +3 -2
  18. package/include/glib-2.0/gio/gappinfo.h +40 -25
  19. package/include/glib-2.0/gio/gapplication.h +6 -0
  20. package/include/glib-2.0/gio/gasyncresult.h +1 -1
  21. package/include/glib-2.0/gio/gconverter.h +5 -0
  22. package/include/glib-2.0/gio/gdbusintrospection.h +1 -1
  23. package/include/glib-2.0/gio/gfile.h +16 -0
  24. package/include/glib-2.0/gio/gio-visibility.h +34 -0
  25. package/include/glib-2.0/gio/giotypes.h +0 -1
  26. package/include/glib-2.0/gio/gsettings.h +8 -0
  27. package/include/glib-2.0/gio/gvfs.h +2 -2
  28. package/include/glib-2.0/girepository/gi-visibility.h +34 -0
  29. package/include/glib-2.0/girepository/giarginfo.h +23 -6
  30. package/include/glib-2.0/girepository/gibaseinfo.h +44 -18
  31. package/include/glib-2.0/girepository/gicallableinfo.h +26 -16
  32. package/include/glib-2.0/girepository/gicallbackinfo.h +17 -2
  33. package/include/glib-2.0/girepository/giconstantinfo.h +19 -4
  34. package/include/glib-2.0/girepository/gienuminfo.h +20 -21
  35. package/include/glib-2.0/girepository/gifieldinfo.h +22 -7
  36. package/include/glib-2.0/girepository/giflagsinfo.h +60 -0
  37. package/include/glib-2.0/girepository/gifunctioninfo.h +22 -7
  38. package/include/glib-2.0/girepository/giinterfaceinfo.h +33 -18
  39. package/include/glib-2.0/girepository/giobjectinfo.h +41 -26
  40. package/include/glib-2.0/girepository/gipropertyinfo.h +18 -3
  41. package/include/glib-2.0/girepository/giregisteredtypeinfo.h +22 -11
  42. package/include/glib-2.0/girepository/girepository-autocleanups.h +56 -0
  43. package/include/glib-2.0/girepository/girepository.h +53 -62
  44. package/include/glib-2.0/girepository/girffi.h +8 -7
  45. package/include/glib-2.0/girepository/gisignalinfo.h +18 -3
  46. package/include/glib-2.0/girepository/gistructinfo.h +26 -11
  47. package/include/glib-2.0/girepository/gitypeinfo.h +29 -16
  48. package/include/glib-2.0/girepository/gitypelib.h +9 -13
  49. package/include/glib-2.0/girepository/gitypes.h +52 -104
  50. package/include/glib-2.0/girepository/giunioninfo.h +28 -12
  51. package/include/glib-2.0/girepository/giunresolvedinfo.h +17 -2
  52. package/include/glib-2.0/girepository/givalueinfo.h +65 -0
  53. package/include/glib-2.0/girepository/givfuncinfo.h +23 -8
  54. package/include/glib-2.0/glib/deprecated/gthread.h +9 -5
  55. package/include/glib-2.0/glib/gbitlock.h +31 -0
  56. package/include/glib-2.0/glib/gbookmarkfile.h +1 -1
  57. package/include/glib-2.0/glib/giochannel.h +2 -2
  58. package/include/glib-2.0/glib/glib-visibility.h +34 -0
  59. package/include/glib-2.0/glib/gmacros.h +12 -5
  60. package/include/glib-2.0/glib/gmain.h +93 -7
  61. package/include/glib-2.0/glib/gmessages.h +8 -0
  62. package/include/glib-2.0/glib/gqsort.h +8 -1
  63. package/include/glib-2.0/glib/gslice.h +2 -0
  64. package/include/glib-2.0/glib/gstrfuncs.h +24 -30
  65. package/include/glib-2.0/glib/gstrvbuilder.h +3 -0
  66. package/include/glib-2.0/glib/gthread.h +191 -3
  67. package/include/glib-2.0/glib/gunicode.h +1 -1
  68. package/include/glib-2.0/glib/gversionmacros.h +9 -0
  69. package/include/glib-2.0/glib-unix.h +7 -1
  70. package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
  71. package/include/glib-2.0/gobject/genums.h +6 -6
  72. package/include/glib-2.0/gobject/glib-types.h +11 -0
  73. package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
  74. package/include/glib-2.0/gobject/gsignal.h +16 -6
  75. package/include/glib-2.0/gobject/gtype.h +6 -6
  76. package/include/harfbuzz/hb-buffer.h +6 -0
  77. package/include/harfbuzz/hb-common.h +6 -9
  78. package/include/harfbuzz/hb-cplusplus.hh +8 -11
  79. package/include/harfbuzz/hb-subset.h +17 -4
  80. package/include/harfbuzz/hb-version.h +3 -3
  81. package/include/hwy/abort.h +28 -0
  82. package/include/hwy/aligned_allocator.h +218 -6
  83. package/include/hwy/base.h +1935 -512
  84. package/include/hwy/cache_control.h +24 -6
  85. package/include/hwy/detect_compiler_arch.h +105 -10
  86. package/include/hwy/detect_targets.h +146 -37
  87. package/include/hwy/foreach_target.h +36 -1
  88. package/include/hwy/highway.h +222 -50
  89. package/include/hwy/ops/arm_neon-inl.h +2055 -894
  90. package/include/hwy/ops/arm_sve-inl.h +1476 -348
  91. package/include/hwy/ops/emu128-inl.h +711 -623
  92. package/include/hwy/ops/generic_ops-inl.h +4431 -2157
  93. package/include/hwy/ops/inside-inl.h +691 -0
  94. package/include/hwy/ops/ppc_vsx-inl.h +2186 -673
  95. package/include/hwy/ops/rvv-inl.h +1556 -536
  96. package/include/hwy/ops/scalar-inl.h +353 -233
  97. package/include/hwy/ops/set_macros-inl.h +171 -23
  98. package/include/hwy/ops/shared-inl.h +198 -56
  99. package/include/hwy/ops/wasm_128-inl.h +283 -244
  100. package/include/hwy/ops/x86_128-inl.h +3673 -1357
  101. package/include/hwy/ops/x86_256-inl.h +1737 -663
  102. package/include/hwy/ops/x86_512-inl.h +1697 -500
  103. package/include/hwy/per_target.h +4 -0
  104. package/include/hwy/profiler.h +648 -0
  105. package/include/hwy/robust_statistics.h +2 -2
  106. package/include/hwy/targets.h +40 -32
  107. package/include/hwy/timer-inl.h +3 -3
  108. package/include/hwy/timer.h +16 -1
  109. package/include/libheif/heif.h +170 -15
  110. package/include/libheif/heif_items.h +237 -0
  111. package/include/libheif/heif_properties.h +38 -2
  112. package/include/libheif/heif_regions.h +1 -1
  113. package/include/libheif/heif_version.h +2 -2
  114. package/include/libpng16/png.h +32 -29
  115. package/include/libpng16/pngconf.h +2 -2
  116. package/include/libpng16/pnglibconf.h +8 -3
  117. package/include/librsvg-2.0/librsvg/rsvg-cairo.h +1 -1
  118. package/include/librsvg-2.0/librsvg/rsvg-features.h +3 -4
  119. package/include/librsvg-2.0/librsvg/rsvg-pixbuf.h +235 -0
  120. package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
  121. package/include/librsvg-2.0/librsvg/rsvg.h +55 -176
  122. package/include/libxml2/libxml/HTMLparser.h +12 -19
  123. package/include/libxml2/libxml/c14n.h +1 -12
  124. package/include/libxml2/libxml/debugXML.h +1 -1
  125. package/include/libxml2/libxml/encoding.h +9 -0
  126. package/include/libxml2/libxml/entities.h +12 -1
  127. package/include/libxml2/libxml/hash.h +19 -0
  128. package/include/libxml2/libxml/list.h +2 -2
  129. package/include/libxml2/libxml/nanohttp.h +17 -0
  130. package/include/libxml2/libxml/parser.h +73 -58
  131. package/include/libxml2/libxml/parserInternals.h +9 -1
  132. package/include/libxml2/libxml/pattern.h +6 -0
  133. package/include/libxml2/libxml/tree.h +32 -12
  134. package/include/libxml2/libxml/uri.h +11 -0
  135. package/include/libxml2/libxml/valid.h +29 -2
  136. package/include/libxml2/libxml/xinclude.h +7 -0
  137. package/include/libxml2/libxml/xmlIO.h +21 -5
  138. package/include/libxml2/libxml/xmlerror.h +14 -0
  139. package/include/libxml2/libxml/xmlexports.h +111 -15
  140. package/include/libxml2/libxml/xmlmemory.h +8 -45
  141. package/include/libxml2/libxml/xmlreader.h +2 -0
  142. package/include/libxml2/libxml/xmlsave.h +5 -0
  143. package/include/libxml2/libxml/xmlunicode.h +165 -1
  144. package/include/libxml2/libxml/xmlversion.h +15 -179
  145. package/include/libxml2/libxml/xmlwriter.h +1 -0
  146. package/include/libxml2/libxml/xpath.h +4 -0
  147. package/include/pango-1.0/pango/pango-features.h +2 -2
  148. package/include/pango-1.0/pango/pango-fontmap.h +7 -0
  149. package/include/pango-1.0/pango/pango-item.h +4 -2
  150. package/include/pango-1.0/pango/pango-version-macros.h +25 -0
  151. package/include/pango-1.0/pango/pangofc-font.h +2 -1
  152. package/include/pixman-1/pixman-version.h +2 -2
  153. package/include/png.h +32 -29
  154. package/include/pngconf.h +2 -2
  155. package/include/pnglibconf.h +8 -3
  156. package/include/vips/connection.h +9 -3
  157. package/include/vips/util.h +1 -11
  158. package/include/vips/version.h +4 -4
  159. package/include/webp/decode.h +58 -56
  160. package/include/webp/demux.h +25 -21
  161. package/include/webp/encode.h +44 -39
  162. package/include/webp/mux.h +76 -15
  163. package/include/webp/mux_types.h +2 -1
  164. package/include/webp/sharpyuv/sharpyuv.h +77 -8
  165. package/include/webp/types.h +29 -8
  166. package/include/zconf.h +1 -1
  167. package/include/zlib.h +12 -12
  168. package/package.json +1 -1
  169. package/versions.json +18 -19
@@ -1,5 +1,7 @@
1
1
  // Copyright 2020 Google LLC
2
+ // Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
2
3
  // SPDX-License-Identifier: Apache-2.0
4
+ // SPDX-License-Identifier: BSD-3-Clause
3
5
  //
4
6
  // Licensed under the Apache License, Version 2.0 (the "License");
5
7
  // you may not use this file except in compliance with the License.
@@ -41,9 +43,31 @@
41
43
  #undef HWY_HAVE_FLOAT64
42
44
  #undef HWY_MEM_OPS_MIGHT_FAULT
43
45
  #undef HWY_NATIVE_FMA
46
+ #undef HWY_NATIVE_DOT_BF16
44
47
  #undef HWY_CAP_GE256
45
48
  #undef HWY_CAP_GE512
46
49
 
50
+ #undef HWY_TARGET_IS_SVE
51
+ #if HWY_TARGET & HWY_ALL_SVE
52
+ #define HWY_TARGET_IS_SVE 1
53
+ #else
54
+ #define HWY_TARGET_IS_SVE 0
55
+ #endif
56
+
57
+ #undef HWY_TARGET_IS_NEON
58
+ #if HWY_TARGET & HWY_ALL_NEON
59
+ #define HWY_TARGET_IS_NEON 1
60
+ #else
61
+ #define HWY_TARGET_IS_NEON 0
62
+ #endif
63
+
64
+ #undef HWY_TARGET_IS_PPC
65
+ #if HWY_TARGET & HWY_ALL_PPC
66
+ #define HWY_TARGET_IS_PPC 1
67
+ #else
68
+ #define HWY_TARGET_IS_PPC 0
69
+ #endif
70
+
47
71
  // Supported on all targets except RVV (requires GCC 14 or upcoming Clang)
48
72
  #if HWY_TARGET == HWY_RVV && \
49
73
  ((HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400) || \
@@ -116,7 +140,21 @@
116
140
  ",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avx512vnni,avx512bitalg," \
117
141
  "avx512vpopcntdq,gfni"
118
142
 
119
- #define HWY_TARGET_STR_AVX3_SPR HWY_TARGET_STR_AVX3_DL ",avx512fp16"
143
+ // Force-disable for compilers that do not properly support avx512bf16.
144
+ #if !defined(HWY_AVX3_DISABLE_AVX512BF16) && \
145
+ (HWY_COMPILER_CLANGCL || \
146
+ (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000) || \
147
+ (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 900))
148
+ #define HWY_AVX3_DISABLE_AVX512BF16
149
+ #endif
150
+
151
+ #if !defined(HWY_AVX3_DISABLE_AVX512BF16)
152
+ #define HWY_TARGET_STR_AVX3_ZEN4 HWY_TARGET_STR_AVX3_DL ",avx512bf16"
153
+ #else
154
+ #define HWY_TARGET_STR_AVX3_ZEN4 HWY_TARGET_STR_AVX3_DL
155
+ #endif
156
+
157
+ #define HWY_TARGET_STR_AVX3_SPR HWY_TARGET_STR_AVX3_ZEN4 ",avx512fp16"
120
158
 
121
159
  #if defined(HWY_DISABLE_PPC8_CRYPTO)
122
160
  #define HWY_TARGET_STR_PPC8_CRYPTO ""
@@ -131,9 +169,21 @@
131
169
  #if HWY_COMPILER_CLANG
132
170
  #define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",power10-vector"
133
171
  #else
134
- #define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",cpu=power10"
172
+ // See #1707 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102059#c35.
173
+ // When the baseline is PPC 8 or 9, inlining functions such as PreventElision
174
+ // into PPC10 code fails because PPC10 defaults to no-htm and is thus worse than
175
+ // the baseline, which has htm. We cannot have pragma target on functions
176
+ // outside HWY_NAMESPACE such as those in base.h. It would be possible for users
177
+ // to set -mno-htm globally, but we can also work around this at the library
178
+ // level by claiming that PPC10 still has HTM, thus avoiding the mismatch. This
179
+ // seems to be safe because HTM uses builtins rather than modifying codegen, see
180
+ // https://gcc.gnu.org/legacy-ml/gcc-patches/2013-07/msg00167.html.
181
+ #define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",cpu=power10,htm"
135
182
  #endif
136
183
 
184
+ #define HWY_TARGET_STR_Z14 "arch=z14"
185
+ #define HWY_TARGET_STR_Z15 "arch=z15"
186
+
137
187
  // Before include guard so we redefine HWY_TARGET_STR on each include,
138
188
  // governed by the current HWY_TARGET.
139
189
 
@@ -152,6 +202,7 @@
152
202
  #define HWY_HAVE_FLOAT64 1
153
203
  #define HWY_MEM_OPS_MIGHT_FAULT 1
154
204
  #define HWY_NATIVE_FMA 0
205
+ #define HWY_NATIVE_DOT_BF16 0
155
206
  #define HWY_CAP_GE256 0
156
207
  #define HWY_CAP_GE512 0
157
208
 
@@ -171,6 +222,7 @@
171
222
  #define HWY_HAVE_FLOAT64 1
172
223
  #define HWY_MEM_OPS_MIGHT_FAULT 1
173
224
  #define HWY_NATIVE_FMA 0
225
+ #define HWY_NATIVE_DOT_BF16 0
174
226
  #define HWY_CAP_GE256 0
175
227
  #define HWY_CAP_GE512 0
176
228
 
@@ -191,6 +243,7 @@
191
243
  #define HWY_HAVE_FLOAT64 1
192
244
  #define HWY_MEM_OPS_MIGHT_FAULT 1
193
245
  #define HWY_NATIVE_FMA 0
246
+ #define HWY_NATIVE_DOT_BF16 0
194
247
  #define HWY_CAP_GE256 0
195
248
  #define HWY_CAP_GE512 0
196
249
 
@@ -216,6 +269,7 @@
216
269
  #else
217
270
  #define HWY_NATIVE_FMA 1
218
271
  #endif
272
+ #define HWY_NATIVE_DOT_BF16 0
219
273
 
220
274
  #define HWY_CAP_GE256 1
221
275
  #define HWY_CAP_GE512 0
@@ -233,7 +287,10 @@
233
287
 
234
288
  #define HWY_HAVE_SCALABLE 0
235
289
  #define HWY_HAVE_INTEGER64 1
236
- #if (HWY_TARGET == HWY_AVX3_SPR) && 0 // TODO(janwas): enable after testing
290
+ #if HWY_TARGET == HWY_AVX3_SPR && HWY_COMPILER_GCC_ACTUAL && \
291
+ HWY_HAVE_SCALAR_F16_TYPE
292
+ // TODO: enable F16 for AVX3_SPR target with Clang once compilation issues are
293
+ // fixed
237
294
  #define HWY_HAVE_FLOAT16 1
238
295
  #else
239
296
  #define HWY_HAVE_FLOAT16 0
@@ -241,6 +298,11 @@
241
298
  #define HWY_HAVE_FLOAT64 1
242
299
  #define HWY_MEM_OPS_MIGHT_FAULT 0
243
300
  #define HWY_NATIVE_FMA 1
301
+ #if (HWY_TARGET <= HWY_AVX3_ZEN4) && !defined(HWY_AVX3_DISABLE_AVX512BF16)
302
+ #define HWY_NATIVE_DOT_BF16 1
303
+ #else
304
+ #define HWY_NATIVE_DOT_BF16 0
305
+ #endif
244
306
  #define HWY_CAP_GE256 1
245
307
  #define HWY_CAP_GE512 1
246
308
 
@@ -257,8 +319,7 @@
257
319
  #elif HWY_TARGET == HWY_AVX3_ZEN4
258
320
 
259
321
  #define HWY_NAMESPACE N_AVX3_ZEN4
260
- // Currently the same as HWY_AVX3_DL: both support Icelake.
261
- #define HWY_TARGET_STR HWY_TARGET_STR_AVX3_DL
322
+ #define HWY_TARGET_STR HWY_TARGET_STR_AVX3_ZEN4
262
323
 
263
324
  #elif HWY_TARGET == HWY_AVX3_SPR
264
325
 
@@ -271,8 +332,7 @@
271
332
 
272
333
  //-----------------------------------------------------------------------------
273
334
  // PPC8, PPC9, PPC10
274
- #elif HWY_TARGET == HWY_PPC8 || HWY_TARGET == HWY_PPC9 || \
275
- HWY_TARGET == HWY_PPC10
335
+ #elif HWY_TARGET_IS_PPC
276
336
 
277
337
  #define HWY_ALIGN alignas(16)
278
338
  #define HWY_MAX_BYTES 16
@@ -284,6 +344,7 @@
284
344
  #define HWY_HAVE_FLOAT64 1
285
345
  #define HWY_MEM_OPS_MIGHT_FAULT 1
286
346
  #define HWY_NATIVE_FMA 1
347
+ #define HWY_NATIVE_DOT_BF16 0
287
348
  #define HWY_CAP_GE256 0
288
349
  #define HWY_CAP_GE512 0
289
350
 
@@ -304,11 +365,43 @@
304
365
 
305
366
  #else
306
367
  #error "Logic error"
307
- #endif // HWY_TARGET == HWY_PPC10
368
+ #endif // HWY_TARGET
369
+
370
+ //-----------------------------------------------------------------------------
371
+ // Z14, Z15
372
+ #elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15
373
+
374
+ #define HWY_ALIGN alignas(16)
375
+ #define HWY_MAX_BYTES 16
376
+ #define HWY_LANES(T) (16 / sizeof(T))
377
+
378
+ #define HWY_HAVE_SCALABLE 0
379
+ #define HWY_HAVE_INTEGER64 1
380
+ #define HWY_HAVE_FLOAT16 0
381
+ #define HWY_HAVE_FLOAT64 1
382
+ #define HWY_MEM_OPS_MIGHT_FAULT 1
383
+ #define HWY_NATIVE_FMA 1
384
+ #define HWY_NATIVE_DOT_BF16 0
385
+ #define HWY_CAP_GE256 0
386
+ #define HWY_CAP_GE512 0
387
+
388
+ #if HWY_TARGET == HWY_Z14
389
+
390
+ #define HWY_NAMESPACE N_Z14
391
+ #define HWY_TARGET_STR HWY_TARGET_STR_Z14
392
+
393
+ #elif HWY_TARGET == HWY_Z15
394
+
395
+ #define HWY_NAMESPACE N_Z15
396
+ #define HWY_TARGET_STR HWY_TARGET_STR_Z15
397
+
398
+ #else
399
+ #error "Logic error"
400
+ #endif // HWY_TARGET == HWY_Z15
308
401
 
309
402
  //-----------------------------------------------------------------------------
310
403
  // NEON
311
- #elif HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
404
+ #elif HWY_TARGET_IS_NEON
312
405
 
313
406
  #define HWY_ALIGN alignas(16)
314
407
  #define HWY_MAX_BYTES 16
@@ -316,7 +409,7 @@
316
409
 
317
410
  #define HWY_HAVE_SCALABLE 0
318
411
  #define HWY_HAVE_INTEGER64 1
319
- #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
412
+ #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) || HWY_TARGET == HWY_NEON_BF16
320
413
  #define HWY_HAVE_FLOAT16 1
321
414
  #else
322
415
  #define HWY_HAVE_FLOAT16 0
@@ -330,20 +423,29 @@
330
423
 
331
424
  #define HWY_MEM_OPS_MIGHT_FAULT 1
332
425
 
333
- #if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
426
+ #if defined(__ARM_FEATURE_FMA) || defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
334
427
  #define HWY_NATIVE_FMA 1
335
428
  #else
336
429
  #define HWY_NATIVE_FMA 0
337
430
  #endif
431
+ #if HWY_NEON_HAVE_F32_TO_BF16C || HWY_TARGET == HWY_NEON_BF16
432
+ #define HWY_NATIVE_DOT_BF16 1
433
+ #else
434
+ #define HWY_NATIVE_DOT_BF16 0
435
+ #endif
338
436
 
339
437
  #define HWY_CAP_GE256 0
340
438
  #define HWY_CAP_GE512 0
341
439
 
342
440
  #if HWY_TARGET == HWY_NEON_WITHOUT_AES
343
441
  #define HWY_NAMESPACE N_NEON_WITHOUT_AES
344
- #else
442
+ #elif HWY_TARGET == HWY_NEON
345
443
  #define HWY_NAMESPACE N_NEON
346
- #endif
444
+ #elif HWY_TARGET == HWY_NEON_BF16
445
+ #define HWY_NAMESPACE N_NEON_BF16
446
+ #else
447
+ #error "Logic error, missing case"
448
+ #endif // HWY_TARGET
347
449
 
348
450
  // Can use pragmas instead of -march compiler flag
349
451
  #if HWY_HAVE_RUNTIME_DISPATCH
@@ -358,21 +460,43 @@
358
460
 
359
461
  #else // !HWY_ARCH_ARM_V7
360
462
 
463
+ #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1300) || \
464
+ (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1300)
465
+ // GCC 12 or earlier and Clang 12 or earlier require +crypto be added to the
466
+ // target string to enable AArch64 AES intrinsics
467
+ #define HWY_TARGET_STR_NEON "+crypto"
468
+ #else
469
+ #define HWY_TARGET_STR_NEON "+aes"
470
+ #endif
471
+
472
+ // Clang >= 16 requires +fullfp16 instead of fp16, but Apple Clang 15 = 1600
473
+ // fails to parse unless the string starts with armv8, whereas 1700 refuses it.
474
+ #if HWY_COMPILER_CLANG >= 1700
475
+ #define HWY_TARGET_STR_FP16 "+fullfp16"
476
+ #elif HWY_COMPILER_CLANG >= 1600 && defined(__apple_build_version__)
477
+ #define HWY_TARGET_STR_FP16 "armv8.4-a+fullfp16"
478
+ #else
479
+ #define HWY_TARGET_STR_FP16 "+fp16"
480
+ #endif
481
+
361
482
  #if HWY_TARGET == HWY_NEON_WITHOUT_AES
362
483
  // Do not define HWY_TARGET_STR (no pragma).
484
+ #elif HWY_TARGET == HWY_NEON
485
+ #define HWY_TARGET_STR HWY_TARGET_STR_NEON
486
+ #elif HWY_TARGET == HWY_NEON_BF16
487
+ #define HWY_TARGET_STR HWY_TARGET_STR_FP16 "+bf16+dotprod" HWY_TARGET_STR_NEON
363
488
  #else
364
- #define HWY_TARGET_STR "+crypto"
365
- #endif // HWY_TARGET == HWY_NEON_WITHOUT_AES
489
+ #error "Logic error, missing case"
490
+ #endif // HWY_TARGET
366
491
 
367
- #endif // HWY_ARCH_ARM_V7
492
+ #endif // !HWY_ARCH_ARM_V7
368
493
  #else // !HWY_HAVE_RUNTIME_DISPATCH
369
494
  // HWY_TARGET_STR remains undefined
370
495
  #endif
371
496
 
372
497
  //-----------------------------------------------------------------------------
373
498
  // SVE[2]
374
- #elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
375
- HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
499
+ #elif HWY_TARGET_IS_SVE
376
500
 
377
501
  // SVE only requires lane alignment, not natural alignment of the entire vector.
378
502
  #define HWY_ALIGN alignas(8)
@@ -382,10 +506,15 @@
382
506
  #define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
383
507
 
384
508
  #define HWY_HAVE_INTEGER64 1
385
- #define HWY_HAVE_FLOAT16 0
509
+ #define HWY_HAVE_FLOAT16 1
386
510
  #define HWY_HAVE_FLOAT64 1
387
511
  #define HWY_MEM_OPS_MIGHT_FAULT 0
388
512
  #define HWY_NATIVE_FMA 1
513
+ #if HWY_SVE_HAVE_BF16_FEATURE
514
+ #define HWY_NATIVE_DOT_BF16 1
515
+ #else
516
+ #define HWY_NATIVE_DOT_BF16 0
517
+ #endif
389
518
  #define HWY_CAP_GE256 0
390
519
  #define HWY_CAP_GE512 0
391
520
 
@@ -410,11 +539,17 @@
410
539
  // Can use pragmas instead of -march compiler flag
411
540
  #if HWY_HAVE_RUNTIME_DISPATCH
412
541
  #if HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE2_128
413
- #define HWY_TARGET_STR "+sve2-aes"
414
- #else
542
+ // Static dispatch with -march=armv8-a+sve2+aes, or no baseline, hence dynamic
543
+ // dispatch, which checks for AES support at runtime.
544
+ #if defined(__ARM_FEATURE_SVE2_AES) || (HWY_BASELINE_SVE2 == 0)
545
+ #define HWY_TARGET_STR "+sve2+sve2-aes,+sve"
546
+ #else // SVE2 without AES
547
+ #define HWY_TARGET_STR "+sve2,+sve"
548
+ #endif
549
+ #else // not SVE2 target
415
550
  #define HWY_TARGET_STR "+sve"
416
551
  #endif
417
- #else
552
+ #else // !HWY_HAVE_RUNTIME_DISPATCH
418
553
  // HWY_TARGET_STR remains undefined
419
554
  #endif
420
555
 
@@ -432,6 +567,7 @@
432
567
  #define HWY_HAVE_FLOAT64 1
433
568
  #define HWY_MEM_OPS_MIGHT_FAULT 1
434
569
  #define HWY_NATIVE_FMA 0
570
+ #define HWY_NATIVE_DOT_BF16 0
435
571
  #define HWY_CAP_GE256 0
436
572
  #define HWY_CAP_GE512 0
437
573
 
@@ -453,6 +589,7 @@
453
589
  #define HWY_HAVE_FLOAT64 0
454
590
  #define HWY_MEM_OPS_MIGHT_FAULT 1
455
591
  #define HWY_NATIVE_FMA 0
592
+ #define HWY_NATIVE_DOT_BF16 0
456
593
  #define HWY_CAP_GE256 1
457
594
  #define HWY_CAP_GE512 0
458
595
 
@@ -480,10 +617,11 @@
480
617
  #define HWY_HAVE_FLOAT64 1
481
618
  #define HWY_MEM_OPS_MIGHT_FAULT 0
482
619
  #define HWY_NATIVE_FMA 1
620
+ #define HWY_NATIVE_DOT_BF16 0
483
621
  #define HWY_CAP_GE256 0
484
622
  #define HWY_CAP_GE512 0
485
623
 
486
- #if defined(__riscv_zvfh)
624
+ #if HWY_RVV_HAVE_F16_VEC
487
625
  #define HWY_HAVE_FLOAT16 1
488
626
  #else
489
627
  #define HWY_HAVE_FLOAT16 0
@@ -508,6 +646,7 @@
508
646
  #define HWY_HAVE_FLOAT64 1
509
647
  #define HWY_MEM_OPS_MIGHT_FAULT 1
510
648
  #define HWY_NATIVE_FMA 0
649
+ #define HWY_NATIVE_DOT_BF16 0
511
650
  #define HWY_CAP_GE256 0
512
651
  #define HWY_CAP_GE512 0
513
652
 
@@ -529,6 +668,7 @@
529
668
  #define HWY_HAVE_FLOAT64 1
530
669
  #define HWY_MEM_OPS_MIGHT_FAULT 0
531
670
  #define HWY_NATIVE_FMA 0
671
+ #define HWY_NATIVE_DOT_BF16 0
532
672
  #define HWY_CAP_GE256 0
533
673
  #define HWY_CAP_GE512 0
534
674
 
@@ -540,6 +680,14 @@
540
680
  #pragma message("HWY_TARGET does not match any known target")
541
681
  #endif // HWY_TARGET
542
682
 
683
+ //-----------------------------------------------------------------------------
684
+
685
+ // Sanity check: if we have f16 vector support, then base.h should also be
686
+ // using a built-in type for f16 scalars.
687
+ #if HWY_HAVE_FLOAT16 && !HWY_HAVE_SCALAR_F16_TYPE
688
+ #error "Logic error: f16 vectors but no scalars"
689
+ #endif
690
+
543
691
  // Override this to 1 in asan/msan builds, which will still fault.
544
692
  #if HWY_IS_ASAN || HWY_IS_MSAN
545
693
  #undef HWY_MEM_OPS_MIGHT_FAULT