@img/sharp-libvips-dev 1.2.1 → 1.2.2-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/include/aom/aom_decoder.h +1 -1
  2. package/include/aom/aom_encoder.h +2 -0
  3. package/include/aom/aomcx.h +106 -25
  4. package/include/ffi.h +3 -3
  5. package/include/freetype2/freetype/config/ftconfig.h +1 -1
  6. package/include/freetype2/freetype/config/ftheader.h +1 -1
  7. package/include/freetype2/freetype/config/ftoption.h +37 -12
  8. package/include/freetype2/freetype/config/ftstdlib.h +1 -1
  9. package/include/freetype2/freetype/config/integer-types.h +29 -2
  10. package/include/freetype2/freetype/config/mac-support.h +1 -1
  11. package/include/freetype2/freetype/config/public-macros.h +3 -3
  12. package/include/freetype2/freetype/freetype.h +51 -47
  13. package/include/freetype2/freetype/ftadvanc.h +1 -1
  14. package/include/freetype2/freetype/ftbbox.h +1 -1
  15. package/include/freetype2/freetype/ftbdf.h +1 -1
  16. package/include/freetype2/freetype/ftbitmap.h +1 -1
  17. package/include/freetype2/freetype/ftbzip2.h +1 -1
  18. package/include/freetype2/freetype/ftcache.h +1 -1
  19. package/include/freetype2/freetype/ftcid.h +1 -1
  20. package/include/freetype2/freetype/ftcolor.h +13 -4
  21. package/include/freetype2/freetype/ftdriver.h +3 -3
  22. package/include/freetype2/freetype/fterrdef.h +1 -1
  23. package/include/freetype2/freetype/fterrors.h +1 -1
  24. package/include/freetype2/freetype/ftfntfmt.h +1 -1
  25. package/include/freetype2/freetype/ftgasp.h +1 -1
  26. package/include/freetype2/freetype/ftglyph.h +1 -1
  27. package/include/freetype2/freetype/ftgxval.h +1 -1
  28. package/include/freetype2/freetype/ftgzip.h +1 -1
  29. package/include/freetype2/freetype/ftimage.h +6 -2
  30. package/include/freetype2/freetype/ftincrem.h +1 -1
  31. package/include/freetype2/freetype/ftlcdfil.h +1 -1
  32. package/include/freetype2/freetype/ftlist.h +1 -1
  33. package/include/freetype2/freetype/ftlogging.h +184 -0
  34. package/include/freetype2/freetype/ftlzw.h +1 -1
  35. package/include/freetype2/freetype/ftmac.h +1 -1
  36. package/include/freetype2/freetype/ftmm.h +159 -103
  37. package/include/freetype2/freetype/ftmodapi.h +1 -1
  38. package/include/freetype2/freetype/ftmoderr.h +1 -1
  39. package/include/freetype2/freetype/ftotval.h +1 -1
  40. package/include/freetype2/freetype/ftoutln.h +1 -1
  41. package/include/freetype2/freetype/ftparams.h +1 -1
  42. package/include/freetype2/freetype/ftpfr.h +1 -1
  43. package/include/freetype2/freetype/ftrender.h +1 -1
  44. package/include/freetype2/freetype/ftsizes.h +1 -1
  45. package/include/freetype2/freetype/ftsnames.h +1 -1
  46. package/include/freetype2/freetype/ftstroke.h +1 -1
  47. package/include/freetype2/freetype/ftsynth.h +1 -1
  48. package/include/freetype2/freetype/ftsystem.h +1 -1
  49. package/include/freetype2/freetype/fttrigon.h +1 -1
  50. package/include/freetype2/freetype/fttypes.h +1 -1
  51. package/include/freetype2/freetype/ftwinfnt.h +2 -3
  52. package/include/freetype2/freetype/otsvg.h +1 -1
  53. package/include/freetype2/freetype/t1tables.h +1 -1
  54. package/include/freetype2/freetype/ttnameid.h +129 -129
  55. package/include/freetype2/freetype/tttables.h +8 -5
  56. package/include/freetype2/freetype/tttags.h +1 -1
  57. package/include/freetype2/ft2build.h +1 -1
  58. package/include/glib-2.0/gio/gdbuserror.h +9 -8
  59. package/include/glib-2.0/gio/ginetaddress.h +12 -0
  60. package/include/glib-2.0/gio/gioenums.h +9 -2
  61. package/include/glib-2.0/glib/gstring.h +2 -2
  62. package/include/glib-2.0/glib/gunicode.h +1 -1
  63. package/include/glib-2.0/gobject/glib-types.h +1 -1
  64. package/include/glib-2.0/gobject/gparam.h +1 -1
  65. package/include/glib-2.0/gobject/gvalue.h +78 -35
  66. package/include/harfbuzz/hb-script-list.h +12 -0
  67. package/include/harfbuzz/hb-version.h +3 -3
  68. package/include/hwy/abort.h +2 -19
  69. package/include/hwy/aligned_allocator.h +11 -7
  70. package/include/hwy/auto_tune.h +504 -0
  71. package/include/hwy/base.h +425 -104
  72. package/include/hwy/cache_control.h +16 -0
  73. package/include/hwy/detect_compiler_arch.h +32 -1
  74. package/include/hwy/detect_targets.h +251 -67
  75. package/include/hwy/foreach_target.h +35 -0
  76. package/include/hwy/highway.h +185 -76
  77. package/include/hwy/nanobenchmark.h +1 -19
  78. package/include/hwy/ops/arm_neon-inl.h +969 -458
  79. package/include/hwy/ops/arm_sve-inl.h +1137 -359
  80. package/include/hwy/ops/emu128-inl.h +97 -11
  81. package/include/hwy/ops/generic_ops-inl.h +1222 -34
  82. package/include/hwy/ops/loongarch_lasx-inl.h +4664 -0
  83. package/include/hwy/ops/loongarch_lsx-inl.h +5933 -0
  84. package/include/hwy/ops/ppc_vsx-inl.h +306 -126
  85. package/include/hwy/ops/rvv-inl.h +546 -51
  86. package/include/hwy/ops/scalar-inl.h +77 -22
  87. package/include/hwy/ops/set_macros-inl.h +138 -17
  88. package/include/hwy/ops/shared-inl.h +50 -10
  89. package/include/hwy/ops/wasm_128-inl.h +137 -92
  90. package/include/hwy/ops/x86_128-inl.h +773 -214
  91. package/include/hwy/ops/x86_256-inl.h +712 -255
  92. package/include/hwy/ops/x86_512-inl.h +429 -753
  93. package/include/hwy/ops/x86_avx3-inl.h +501 -0
  94. package/include/hwy/per_target.h +2 -1
  95. package/include/hwy/profiler.h +622 -486
  96. package/include/hwy/targets.h +62 -20
  97. package/include/hwy/timer-inl.h +8 -160
  98. package/include/hwy/timer.h +170 -3
  99. package/include/hwy/x86_cpuid.h +81 -0
  100. package/include/libheif/heif_cxx.h +25 -5
  101. package/include/libheif/heif_regions.h +5 -5
  102. package/include/libheif/heif_version.h +2 -2
  103. package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
  104. package/include/libxml2/libxml/valid.h +0 -3
  105. package/include/libxml2/libxml/xmlerror.h +1 -1
  106. package/include/libxml2/libxml/xmlversion.h +4 -4
  107. package/include/pango-1.0/pango/pango-enum-types.h +3 -0
  108. package/include/pango-1.0/pango/pango-features.h +3 -3
  109. package/include/pango-1.0/pango/pango-font.h +30 -0
  110. package/include/pango-1.0/pango/pango-version-macros.h +26 -0
  111. package/include/vips/connection.h +4 -4
  112. package/include/vips/version.h +4 -4
  113. package/include/zlib.h +3 -3
  114. package/package.json +1 -1
  115. package/versions.json +13 -13
@@ -16,6 +16,7 @@
16
16
  #ifndef HIGHWAY_HWY_CACHE_CONTROL_H_
17
17
  #define HIGHWAY_HWY_CACHE_CONTROL_H_
18
18
 
19
+ #include "hwy/aligned_allocator.h" // HWY_ALIGNMENT
19
20
  #include "hwy/base.h"
20
21
 
21
22
  // Requires SSE2; fails to compile on 32-bit Clang 7 (see
@@ -66,6 +67,21 @@ HWY_INLINE HWY_ATTR_CACHE void LoadFence() {
66
67
  // TODO(janwas): remove when this function is removed. (See above.)
67
68
  #pragma pop_macro("LoadFence")
68
69
 
70
+ // Overwrites "to" while attempting to bypass the cache (read-for-ownership).
71
+ // Both pointers must be aligned.
72
+ static HWY_INLINE void StreamCacheLine(const uint64_t* HWY_RESTRICT from,
73
+ uint64_t* HWY_RESTRICT to) {
74
+ HWY_DASSERT(IsAligned(from));
75
+ HWY_DASSERT(IsAligned(to));
76
+ #if HWY_COMPILER_CLANG && !defined(HWY_DISABLE_CACHE_CONTROL)
77
+ for (size_t i = 0; i < HWY_ALIGNMENT / sizeof(uint64_t); ++i) {
78
+ __builtin_nontemporal_store(from[i], to + i);
79
+ }
80
+ #else
81
+ hwy::CopyBytes(from, to, HWY_ALIGNMENT);
82
+ #endif
83
+ }
84
+
69
85
  // Ensures values written by previous `Stream` calls are visible on the current
70
86
  // core. This is NOT sufficient for synchronizing across cores; when `Stream`
71
87
  // outputs are to be consumed by other core(s), the producer must publish
@@ -192,6 +192,18 @@
192
192
  #define HWY_IF_CONSTEXPR if
193
193
  #endif
194
194
 
195
+ // Use for constexpr variables at namespace scope in headers. Constexpr is
196
+ // separate to allow using `HWY_CXX14_CONSTEXPR` if required.
197
+ #ifndef HWY_INLINE_VAR
198
+ #if __cplusplus > 201402L
199
+ // C++17: mark as COMDAT to ensure linkers de-duplicate it. See
200
+ // https://quuxplusone.github.io/blog/2022/07/08/inline-constexpr/
201
+ #define HWY_INLINE_VAR inline
202
+ #else
203
+ #define HWY_INLINE_VAR
204
+ #endif
205
+ #endif
206
+
195
207
  //------------------------------------------------------------------------------
196
208
  // Architecture
197
209
 
@@ -303,10 +315,29 @@
303
315
  #define HWY_ARCH_S390X 0
304
316
  #endif
305
317
 
318
+ #if defined(__loongarch64__) || defined(__loongarch64) || \
319
+ (defined(__loongarch_grlen) && __loongarch_grlen == 64)
320
+ #define HWY_ARCH_LOONGARCH_64 1
321
+ #else
322
+ #define HWY_ARCH_LOONGARCH_64 0
323
+ #endif
324
+
325
+ #if defined(__loongarch__) && !HWY_ARCH_LOONGARCH_64
326
+ #define HWY_ARCH_LOONGARCH_32 1
327
+ #else
328
+ #define HWY_ARCH_LOONGARCH_32 0
329
+ #endif
330
+
331
+ #if HWY_ARCH_LOONGARCH_64 || HWY_ARCH_LOONGARCH_32
332
+ #define HWY_ARCH_LOONGARCH 1
333
+ #else
334
+ #define HWY_ARCH_LOONGARCH 0
335
+ #endif
336
+
306
337
  // It is an error to detect multiple architectures at the same time, but OK to
307
338
  // detect none of the above.
308
339
  #if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_ARM_OLD + \
309
- HWY_ARCH_WASM + HWY_ARCH_RISCV + HWY_ARCH_S390X) > 1
340
+ HWY_ARCH_WASM + HWY_ARCH_RISCV + HWY_ARCH_S390X + HWY_ARCH_LOONGARCH) > 1
310
341
  #error "Must not detect more than one architecture"
311
342
  #endif
312
343
 
@@ -59,20 +59,20 @@
59
59
  // left-shifting 2^62), but still do not use bit 63 because it is the sign bit.
60
60
 
61
61
  // --------------------------- x86: 15 targets (+ one fallback)
62
- // Bits 0..3 reserved (4 targets)
62
+ // Bits 0..2 reserved (3 targets)
63
+ #define HWY_AVX10_2 (1LL << 3) // AVX10.2 with 512-bit vectors
63
64
  #define HWY_AVX3_SPR (1LL << 4)
64
- // Bit 5 reserved (likely AVX10.2 with 256-bit vectors)
65
- // Currently HWY_AVX3_DL plus AVX512BF16 and a special case for CompressStore
66
- // (10x as fast).
67
- // We may later also use VPCONFLICT.
65
+ // Bit 5: reserved (1 target)
66
+ // Currently `HWY_AVX3_DL` plus `AVX512BF16` and a special case for
67
+ // `CompressStore` (10x as fast, still useful on Zen5). We may later also use
68
+ // `VPCONFLICT`. Note that `VP2INTERSECT` is available in Zen5.
68
69
  #define HWY_AVX3_ZEN4 (1LL << 6) // see HWY_WANT_AVX3_ZEN4 below
69
70
 
70
- // Currently satisfiable by Ice Lake (VNNI, VPCLMULQDQ, VPOPCNTDQ, VBMI, VBMI2,
71
- // VAES, BITALG, GFNI). Later to be added: BF16 (Cooper Lake). VP2INTERSECT is
72
- // only in Tiger Lake?
73
- #define HWY_AVX3_DL (1LL << 7) // see HWY_WANT_AVX3_DL below
74
- #define HWY_AVX3 (1LL << 8) // HWY_AVX2 plus AVX-512F/BW/CD/DQ/VL
75
- #define HWY_AVX2 (1LL << 9) // HWY_SSE4 plus BMI2 + F16 + FMA
71
+ // Currently satisfiable by Ice Lake (`VNNI`, `VPCLMULQDQ`, `VPOPCNTDQ`,
72
+ // `VBMI`, `VBMI2`, `VAES`, `BITALG`, `GFNI`).
73
+ #define HWY_AVX3_DL (1LL << 7)
74
+ #define HWY_AVX3 (1LL << 8) // HWY_AVX2 plus AVX-512F/BW/CD/DQ/VL
75
+ #define HWY_AVX2 (1LL << 9) // HWY_SSE4 plus BMI2 + F16 + FMA
76
76
  // Bit 10: reserved
77
77
  #define HWY_SSE4 (1LL << 11) // SSE4.2 plus AES + CLMUL
78
78
  #define HWY_SSSE3 (1LL << 12) // S-SSE3
@@ -107,8 +107,14 @@
107
107
  // Bit 38 reserved
108
108
  #define HWY_HIGHEST_TARGET_BIT_RVV 38
109
109
 
110
- // --------------------------- Future expansion: 4 targets
111
- // Bits 39..42 reserved
110
+ // --------------------------- LoongArch: 3 targets (+ one fallback)
111
+ // Bits 39 reserved (1 target)
112
+ #define HWY_LASX (1LL << 40)
113
+ #define HWY_LSX (1LL << 41)
114
+ #define HWY_HIGHEST_TARGET_BIT_LOONGARCH 41
115
+
116
+ // --------------------------- Future expansion: 1 target
117
+ // Bits 42 reserved
112
118
 
113
119
  // --------------------------- IBM Power/ZSeries: 9 targets (+ one fallback)
114
120
  // Bits 43..46 reserved (4 targets)
@@ -149,10 +155,10 @@
149
155
  // Broken means excluded from enabled due to known compiler issues. We define
150
156
  // separate HWY_BROKEN_* and then OR them together (more than one might apply).
151
157
 
158
+ #ifndef HWY_BROKEN_CLANG6 // allow override
152
159
  // x86 clang-6: we saw multiple AVX2/3 compile errors and in one case invalid
153
160
  // SSE4 codegen (possibly only for msan), so disable all those targets.
154
161
  #if HWY_ARCH_X86 && (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700)
155
-
156
162
  #define HWY_BROKEN_CLANG6 (HWY_SSE4 | (HWY_SSE4 - 1))
157
163
  // This entails a major speed reduction, so warn unless the user explicitly
158
164
  // opts in to scalar-only.
@@ -163,21 +169,32 @@
163
169
  #else
164
170
  #define HWY_BROKEN_CLANG6 0
165
171
  #endif
172
+ #endif // HWY_BROKEN_CLANG6
166
173
 
174
+ #ifndef HWY_BROKEN_32BIT // allow override
167
175
  // 32-bit may fail to compile AVX2/3.
168
176
  #if HWY_ARCH_X86_32
177
+ // GCC-13 is ok with AVX2:
178
+ #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL >= 1300)
179
+ #define HWY_BROKEN_32BIT (HWY_AVX3 | (HWY_AVX3 - 1))
180
+ #else
169
181
  #define HWY_BROKEN_32BIT (HWY_AVX2 | (HWY_AVX2 - 1))
182
+ #endif
170
183
  #else
171
184
  #define HWY_BROKEN_32BIT 0
172
185
  #endif
186
+ #endif // HWY_BROKEN_32BIT
173
187
 
188
+ #ifndef HWY_BROKEN_MSVC // allow override
174
189
  // MSVC AVX3 support is buggy: https://github.com/Mysticial/Flops/issues/16
175
190
  #if HWY_COMPILER_MSVC != 0
176
191
  #define HWY_BROKEN_MSVC (HWY_AVX3 | (HWY_AVX3 - 1))
177
192
  #else
178
193
  #define HWY_BROKEN_MSVC 0
179
194
  #endif
195
+ #endif // HWY_BROKEN_MSVC
180
196
 
197
+ #ifndef HWY_BROKEN_AVX3_DL_ZEN4 // allow override
181
198
  // AVX3_DL and AVX3_ZEN4 require clang >= 7 (ensured above), gcc >= 8.1 or ICC
182
199
  // 2021.
183
200
  #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 801) || \
@@ -186,7 +203,9 @@
186
203
  #else
187
204
  #define HWY_BROKEN_AVX3_DL_ZEN4 0
188
205
  #endif
206
+ #endif // HWY_BROKEN_AVX3_DL_ZEN4
189
207
 
208
+ #ifndef HWY_BROKEN_AVX3_SPR // allow override
190
209
  // AVX3_SPR requires clang >= 14, gcc >= 12, or ICC 2021.
191
210
  #if (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1400) || \
192
211
  (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1200) || \
@@ -195,25 +214,37 @@
195
214
  #else
196
215
  #define HWY_BROKEN_AVX3_SPR 0
197
216
  #endif
217
+ #endif // HWY_BROKEN_AVX3_SPR
198
218
 
219
+ #ifndef HWY_BROKEN_ARM7_BIG_ENDIAN // allow override
199
220
  // armv7be has not been tested and is not yet supported.
200
221
  #if HWY_ARCH_ARM_V7 && HWY_IS_BIG_ENDIAN
201
222
  #define HWY_BROKEN_ARM7_BIG_ENDIAN HWY_ALL_NEON
202
223
  #else
203
224
  #define HWY_BROKEN_ARM7_BIG_ENDIAN 0
204
225
  #endif
226
+ #endif // HWY_BROKEN_ARM7_BIG_ENDIAN
205
227
 
228
+ #ifdef __ARM_NEON_FP
229
+ #define HWY_HAVE_NEON_FP __ARM_NEON_FP
230
+ #else
231
+ #define HWY_HAVE_NEON_FP 0
232
+ #endif
233
+
234
+ #ifndef HWY_BROKEN_ARM7_WITHOUT_VFP4 // allow override
206
235
  // armv7-a without a detected vfpv4 is not supported
207
236
  // (for example Cortex-A8, Cortex-A9)
208
237
  // vfpv4 always have neon half-float _and_ FMA.
209
238
  #if HWY_ARCH_ARM_V7 && (__ARM_ARCH_PROFILE == 'A') && \
210
239
  !defined(__ARM_VFPV4__) && \
211
- !((__ARM_NEON_FP & 0x2 /* half-float */) && (__ARM_FEATURE_FMA == 1))
240
+ !((HWY_HAVE_NEON_FP & 0x2 /* half-float */) && (__ARM_FEATURE_FMA == 1))
212
241
  #define HWY_BROKEN_ARM7_WITHOUT_VFP4 HWY_ALL_NEON
213
242
  #else
214
243
  #define HWY_BROKEN_ARM7_WITHOUT_VFP4 0
215
244
  #endif
245
+ #endif // HWY_BROKEN_ARM7_WITHOUT_VFP4
216
246
 
247
+ #ifndef HWY_BROKEN_NEON_BF16 // allow override
217
248
  // HWY_NEON_BF16 requires recent compilers.
218
249
  #if (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1700) || \
219
250
  (HWY_COMPILER_GCC_ACTUAL != 0 && HWY_COMPILER_GCC_ACTUAL < 1302)
@@ -221,15 +252,34 @@
221
252
  #else
222
253
  #define HWY_BROKEN_NEON_BF16 0
223
254
  #endif
255
+ #endif // HWY_BROKEN_NEON_BF16
224
256
 
225
257
  // SVE[2] require recent clang or gcc versions.
226
- #if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
227
- (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
228
- #define HWY_BROKEN_SVE (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128)
258
+
259
+ #ifndef HWY_BROKEN_SVE // allow override
260
+ // GCC 10+. Clang 19 still has many test failures for SVE. No Apple CPU (at
261
+ // least up to and including M4 and A18) has SVE.
262
+ #if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 2000) || \
263
+ (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000) || \
264
+ HWY_OS_APPLE
265
+ #define HWY_BROKEN_SVE (HWY_SVE | HWY_SVE_256)
229
266
  #else
230
267
  #define HWY_BROKEN_SVE 0
231
268
  #endif
269
+ #endif // HWY_BROKEN_SVE
270
+
271
+ #ifndef HWY_BROKEN_SVE2 // allow override
272
+ // Clang 19 still has many test failures for SVE2.
273
+ #if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 2000) || \
274
+ (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000) || \
275
+ HWY_OS_APPLE
276
+ #define HWY_BROKEN_SVE2 (HWY_SVE2 | HWY_SVE2_128)
277
+ #else
278
+ #define HWY_BROKEN_SVE2 0
279
+ #endif
280
+ #endif // HWY_BROKEN_SVE2
232
281
 
282
+ #ifndef HWY_BROKEN_PPC10 // allow override
233
283
  #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1100)
234
284
  // GCC 10 supports the -mcpu=power10 option but does not support the PPC10
235
285
  // vector intrinsics
@@ -257,6 +307,55 @@
257
307
  #else
258
308
  #define HWY_BROKEN_PPC10 0
259
309
  #endif
310
+ #endif // HWY_BROKEN_PPC10
311
+
312
+ #ifndef HWY_BROKEN_PPC_32BIT // allow override
313
+ // PPC8/PPC9/PPC10 targets may fail to compile on 32-bit PowerPC
314
+ #if HWY_ARCH_PPC && !HWY_ARCH_PPC_64
315
+ #define HWY_BROKEN_PPC_32BIT (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
316
+ #else
317
+ #define HWY_BROKEN_PPC_32BIT 0
318
+ #endif
319
+ #endif // HWY_BROKEN_PPC_32BIT
320
+
321
+ #ifndef HWY_BROKEN_RVV // allow override
322
+ // HWY_RVV fails to compile with GCC < 13 or Clang < 16.
323
+ #if HWY_ARCH_RISCV && \
324
+ ((HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1600) || \
325
+ (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1300))
326
+ #define HWY_BROKEN_RVV (HWY_RVV)
327
+ #else
328
+ #define HWY_BROKEN_RVV 0
329
+ #endif
330
+ #endif // HWY_BROKEN_RVV
331
+
332
+ #ifndef HWY_BROKEN_LOONGARCH // allow override
333
+ // Using __loongarch_sx and __loongarch_asx macros to
334
+ // check whether LSX/LASX targets are available.
335
+ #if !defined(__loongarch_sx)
336
+ #define HWY_BROKEN_LOONGARCH (HWY_LSX | HWY_LASX)
337
+ #elif !defined(__loongarch_asx)
338
+ #define HWY_BROKEN_LOONGARCH (HWY_LASX)
339
+ #else
340
+ #define HWY_BROKEN_LOONGARCH 0
341
+ #endif
342
+ #endif // HWY_BROKEN_LOONGARCH
343
+
344
+ #ifndef HWY_BROKEN_Z14 // allow override
345
+ #if HWY_ARCH_S390X
346
+ #if HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1900
347
+ // Clang 18 and earlier have bugs with some ZVector intrinsics
348
+ #define HWY_BROKEN_Z14 (HWY_Z14 | HWY_Z15)
349
+ #elif HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 900
350
+ // Z15 target requires GCC 9 or later
351
+ #define HWY_BROKEN_Z14 (HWY_Z15)
352
+ #else
353
+ #define HWY_BROKEN_Z14 0
354
+ #endif
355
+ #else // !HWY_ARCH_S390X
356
+ #define HWY_BROKEN_Z14 0
357
+ #endif // HWY_ARCH_S390X
358
+ #endif // HWY_BROKEN_Z14
260
359
 
261
360
  // Allow the user to override this without any guarantee of success.
262
361
  #ifndef HWY_BROKEN_TARGETS
@@ -265,7 +364,9 @@
265
364
  (HWY_BROKEN_CLANG6 | HWY_BROKEN_32BIT | HWY_BROKEN_MSVC | \
266
365
  HWY_BROKEN_AVX3_DL_ZEN4 | HWY_BROKEN_AVX3_SPR | \
267
366
  HWY_BROKEN_ARM7_BIG_ENDIAN | HWY_BROKEN_ARM7_WITHOUT_VFP4 | \
268
- HWY_BROKEN_NEON_BF16 | HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
367
+ HWY_BROKEN_NEON_BF16 | HWY_BROKEN_SVE | HWY_BROKEN_SVE2 | \
368
+ HWY_BROKEN_PPC10 | HWY_BROKEN_PPC_32BIT | HWY_BROKEN_RVV | \
369
+ HWY_BROKEN_LOONGARCH | HWY_BROKEN_Z14)
269
370
 
270
371
  #endif // HWY_BROKEN_TARGETS
271
372
 
@@ -279,7 +380,7 @@
279
380
  // because it affects the fallback target, which must always be enabled. If 1,
280
381
  // we instead choose HWY_SCALAR even without HWY_COMPILE_ONLY_SCALAR being set.
281
382
  #if !defined(HWY_BROKEN_EMU128) // allow overriding
282
- #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400) || \
383
+ #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1600) || \
283
384
  defined(HWY_NO_LIBCXX)
284
385
  #define HWY_BROKEN_EMU128 1
285
386
  #else
@@ -471,19 +572,22 @@
471
572
 
472
573
  #endif // non-MSVC
473
574
 
474
- #if HWY_ARCH_X86 && (HWY_WANT_SSE2 || HWY_CHECK_SSE2)
575
+ #if HWY_ARCH_X86 && \
576
+ ((defined(HWY_WANT_SSE2) && HWY_WANT_SSE2) || HWY_CHECK_SSE2)
475
577
  #define HWY_BASELINE_SSE2 HWY_SSE2
476
578
  #else
477
579
  #define HWY_BASELINE_SSE2 0
478
580
  #endif
479
581
 
480
- #if HWY_ARCH_X86 && (HWY_WANT_SSSE3 || HWY_CHECK_SSSE3)
582
+ #if HWY_ARCH_X86 && \
583
+ ((defined(HWY_WANT_SSSE3) && HWY_WANT_SSSE3) || HWY_CHECK_SSSE3)
481
584
  #define HWY_BASELINE_SSSE3 HWY_SSSE3
482
585
  #else
483
586
  #define HWY_BASELINE_SSSE3 0
484
587
  #endif
485
588
 
486
- #if HWY_ARCH_X86 && (HWY_WANT_SSE4 || (HWY_CHECK_SSE4 && HWY_CHECK_PCLMUL_AES))
589
+ #if HWY_ARCH_X86 && ((defined(HWY_WANT_SSE4) && HWY_WANT_SSE4) || \
590
+ (HWY_CHECK_SSE4 && HWY_CHECK_PCLMUL_AES))
487
591
  #define HWY_BASELINE_SSE4 HWY_SSE4
488
592
  #else
489
593
  #define HWY_BASELINE_SSE4 0
@@ -497,18 +601,25 @@
497
601
  #endif
498
602
 
499
603
  // Require everything in AVX2 plus AVX-512 flags (also set by MSVC)
500
- #if HWY_BASELINE_AVX2 != 0 && defined(__AVX512F__) && defined(__AVX512BW__) && \
501
- defined(__AVX512DQ__) && defined(__AVX512VL__)
604
+ #if HWY_BASELINE_AVX2 != 0 && \
605
+ ((defined(__AVX512F__) && defined(__AVX512BW__) && \
606
+ defined(__AVX512DQ__) && defined(__AVX512VL__)) || \
607
+ defined(__AVX10_2__)) && \
608
+ ((!HWY_COMPILER_GCC_ACTUAL && !HWY_COMPILER_CLANG) || \
609
+ HWY_COMPILER_GCC_ACTUAL < 1400 || HWY_COMPILER_CLANG < 1800 || \
610
+ defined(__EVEX512__))
502
611
  #define HWY_BASELINE_AVX3 HWY_AVX3
503
612
  #else
504
613
  #define HWY_BASELINE_AVX3 0
505
614
  #endif
506
615
 
507
616
  // TODO(janwas): not yet known whether these will be set by MSVC
508
- #if HWY_BASELINE_AVX3 != 0 && defined(__AVX512VNNI__) && defined(__VAES__) && \
509
- defined(__VPCLMULQDQ__) && defined(__AVX512VBMI__) && \
510
- defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
511
- defined(__AVX512BITALG__)
617
+ #if HWY_BASELINE_AVX3 != 0 && \
618
+ ((defined(__AVX512VNNI__) && defined(__VAES__) && \
619
+ defined(__VPCLMULQDQ__) && defined(__AVX512VBMI__) && \
620
+ defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
621
+ defined(__AVX512BITALG__)) || \
622
+ defined(__AVX10_2__))
512
623
  #define HWY_BASELINE_AVX3_DL HWY_AVX3_DL
513
624
  #else
514
625
  #define HWY_BASELINE_AVX3_DL 0
@@ -523,21 +634,41 @@
523
634
  #define HWY_BASELINE_AVX3_ZEN4 0
524
635
  #endif
525
636
 
526
- #if HWY_BASELINE_AVX3_DL != 0 && defined(__AVX512BF16__) && \
527
- defined(__AVX512FP16__)
637
+ #if HWY_BASELINE_AVX3_DL != 0 && \
638
+ ((defined(__AVX512BF16__) && defined(__AVX512FP16__)) || \
639
+ defined(__AVX10_2__))
528
640
  #define HWY_BASELINE_AVX3_SPR HWY_AVX3_SPR
529
641
  #else
530
642
  #define HWY_BASELINE_AVX3_SPR 0
531
643
  #endif
532
644
 
645
+ #if HWY_BASELINE_AVX3_SPR != 0 && defined(__AVX10_2__) && \
646
+ (HWY_COMPILER_GCC_ACTUAL >= 1500 || HWY_COMPILER_CLANG >= 2001)
647
+ #define HWY_BASELINE_AVX10_2 HWY_AVX10_2
648
+ #else
649
+ #define HWY_BASELINE_AVX10_2 0
650
+ #endif
651
+
533
652
  // RVV requires intrinsics 0.11 or later, see #1156.
534
- #if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
653
+
654
+ // Also check that the __riscv_v macro is defined as GCC or Clang will define
655
+ // the __risc_v macro if the RISC-V "V" extension is enabled.
656
+
657
+ #if HWY_ARCH_RISCV && defined(__riscv_v) && defined(__riscv_v_intrinsic) && \
535
658
  __riscv_v_intrinsic >= 11000
536
659
  #define HWY_BASELINE_RVV HWY_RVV
537
660
  #else
538
661
  #define HWY_BASELINE_RVV 0
539
662
  #endif
540
663
 
664
+ #if HWY_ARCH_LOONGARCH && defined(__loongarch_sx) && defined(__loongarch_asx)
665
+ #define HWY_BASELINE_LOONGARCH (HWY_LSX | HWY_LASX)
666
+ #elif HWY_ARCH_LOONGARCH && defined(__loongarch_sx)
667
+ #define HWY_BASELINE_LOONGARCH (HWY_LSX)
668
+ #else
669
+ #define HWY_BASELINE_LOONGARCH 0
670
+ #endif
671
+
541
672
  // Allow the user to override this without any guarantee of success.
542
673
  #ifndef HWY_BASELINE_TARGETS
543
674
  #define HWY_BASELINE_TARGETS \
@@ -547,7 +678,7 @@
547
678
  HWY_BASELINE_NEON | HWY_BASELINE_SSE2 | HWY_BASELINE_SSSE3 | \
548
679
  HWY_BASELINE_SSE4 | HWY_BASELINE_AVX2 | HWY_BASELINE_AVX3 | \
549
680
  HWY_BASELINE_AVX3_DL | HWY_BASELINE_AVX3_ZEN4 | HWY_BASELINE_AVX3_SPR | \
550
- HWY_BASELINE_RVV)
681
+ HWY_BASELINE_AVX10_2 | HWY_BASELINE_RVV | HWY_BASELINE_LOONGARCH)
551
682
  #endif // HWY_BASELINE_TARGETS
552
683
 
553
684
  //------------------------------------------------------------------------------
@@ -577,6 +708,22 @@
577
708
  #endif
578
709
  // Defining one of HWY_COMPILE_ONLY_* will trump HWY_COMPILE_ALL_ATTAINABLE.
579
710
 
711
+ #ifndef HWY_HAVE_ASM_HWCAP // allow override
712
+ #ifdef TOOLCHAIN_MISS_ASM_HWCAP_H
713
+ #define HWY_HAVE_ASM_HWCAP 0 // CMake failed to find the header
714
+ #elif defined(__has_include) // note: wrapper macro fails on Clang ~17
715
+ // clang-format off
716
+ #if __has_include(<asm/hwcap.h>)
717
+ // clang-format on
718
+ #define HWY_HAVE_ASM_HWCAP 1 // header present
719
+ #else
720
+ #define HWY_HAVE_ASM_HWCAP 0 // header not present
721
+ #endif // __has_include
722
+ #else // compiler lacks __has_include
723
+ #define HWY_HAVE_ASM_HWCAP 0
724
+ #endif
725
+ #endif // HWY_HAVE_ASM_HWCAP
726
+
580
727
  #ifndef HWY_HAVE_AUXV // allow override
581
728
  #ifdef TOOLCHAIN_MISS_SYS_AUXV_H
582
729
  #define HWY_HAVE_AUXV 0 // CMake failed to find the header
@@ -587,7 +734,7 @@
587
734
  // clang-format off
588
735
  #if __has_include(<sys/auxv.h>)
589
736
  // clang-format on
590
- #define HWY_HAVE_AUXV 1 // header present
737
+ #define HWY_HAVE_AUXV 1 // header present
591
738
  #else
592
739
  #define HWY_HAVE_AUXV 0 // header not present
593
740
  #endif // __has_include
@@ -596,33 +743,57 @@
596
743
  #endif
597
744
  #endif // HWY_HAVE_AUXV
598
745
 
746
+ #ifndef HWY_HAVE_RUNTIME_DISPATCH_RVV // allow override
747
+ // The riscv_vector.h in Clang 16-18 requires compiler flags, and 19 still has
748
+ // some missing intrinsics, see
749
+ // https://github.com/llvm/llvm-project/issues/56592. GCC 13.3 also has an
750
+ // #error check, whereas 14.1 fails with "argument type 'vuint16m8_t' requires
751
+ // the V ISA extension": https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115325.
752
+ #if HWY_ARCH_RISCV && HWY_COMPILER_CLANG >= 1900 && 0
753
+ #define HWY_HAVE_RUNTIME_DISPATCH_RVV 1
754
+ #else
755
+ #define HWY_HAVE_RUNTIME_DISPATCH_RVV 0
756
+ #endif
757
+ #endif // HWY_HAVE_RUNTIME_DISPATCH_RVV
758
+
759
+ #ifndef HWY_HAVE_RUNTIME_DISPATCH_APPLE // allow override
760
+ #if HWY_ARCH_ARM_A64 && HWY_OS_APPLE && \
761
+ (HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700)
762
+ #define HWY_HAVE_RUNTIME_DISPATCH_APPLE 1
763
+ #else
764
+ #define HWY_HAVE_RUNTIME_DISPATCH_APPLE 0
765
+ #endif
766
+ #endif // HWY_HAVE_RUNTIME_DISPATCH_APPLE
767
+
768
+ #ifndef HWY_HAVE_RUNTIME_DISPATCH_LOONGARCH // allow override
769
+ #if HWY_ARCH_LOONGARCH && HWY_HAVE_AUXV && (defined(__loongarch_sx) || \
770
+ defined(__loongarch_asx))
771
+ #define HWY_HAVE_RUNTIME_DISPATCH_LOONGARCH 1
772
+ #else
773
+ #define HWY_HAVE_RUNTIME_DISPATCH_LOONGARCH 0
774
+ #endif
775
+ #endif // HWY_HAVE_RUNTIME_DISPATCH_LOONGARCH
776
+
777
+ #ifndef HWY_HAVE_RUNTIME_DISPATCH_LINUX // allow override
778
+ #if (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X) && HWY_OS_LINUX && \
779
+ (HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_HAVE_AUXV
780
+ #define HWY_HAVE_RUNTIME_DISPATCH_LINUX 1
781
+ #else
782
+ #define HWY_HAVE_RUNTIME_DISPATCH_LINUX 0
783
+ #endif
784
+ #endif // HWY_HAVE_RUNTIME_DISPATCH_LINUX
785
+
599
786
  // Allow opting out, and without a guarantee of success, opting-in.
600
787
  #ifndef HWY_HAVE_RUNTIME_DISPATCH
601
- // Clang, GCC and MSVC allow runtime dispatch on x86.
602
- #if HWY_ARCH_X86
603
- #define HWY_HAVE_RUNTIME_DISPATCH 1
604
- // On Arm, PPC, S390X, and RISC-V: GCC and Clang 17+ do, and we require Linux
605
- // to detect CPU capabilities.
606
- #elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
607
- (HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_OS_LINUX && \
608
- HWY_HAVE_AUXV
609
- #define HWY_HAVE_RUNTIME_DISPATCH 1
610
- #elif HWY_ARCH_ARM_A64 && HWY_OS_APPLE && \
611
- (HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700)
788
+ // Clang, GCC and MSVC allow OS-independent runtime dispatch on x86.
789
+ #if HWY_ARCH_X86 || HWY_HAVE_RUNTIME_DISPATCH_RVV || \
790
+ HWY_HAVE_RUNTIME_DISPATCH_APPLE || HWY_HAVE_RUNTIME_DISPATCH_LOONGARCH || \
791
+ HWY_HAVE_RUNTIME_DISPATCH_LINUX
612
792
  #define HWY_HAVE_RUNTIME_DISPATCH 1
613
793
  #else
614
794
  #define HWY_HAVE_RUNTIME_DISPATCH 0
615
- #endif // HWY_ARCH_*
616
- #endif // HWY_HAVE_RUNTIME_DISPATCH
617
-
618
- // AVX3_DL is not widely available yet. To reduce code size and compile time,
619
- // only include it in the set of attainable targets (for dynamic dispatch) if
620
- // the user opts in, OR it is in the baseline (we check whether enabled below).
621
- #if defined(HWY_WANT_AVX3_DL) || (HWY_BASELINE_TARGETS & HWY_AVX3_DL)
622
- #define HWY_ATTAINABLE_AVX3_DL (HWY_AVX3_DL)
623
- #else
624
- #define HWY_ATTAINABLE_AVX3_DL 0
625
795
  #endif
796
+ #endif // HWY_HAVE_RUNTIME_DISPATCH
626
797
 
627
798
  #if HWY_ARCH_ARM_A64 && HWY_HAVE_RUNTIME_DISPATCH
628
799
  #define HWY_ATTAINABLE_NEON HWY_ALL_NEON
@@ -675,24 +846,34 @@
675
846
  #endif
676
847
 
677
848
  #if HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH
678
- #define HWY_ATTAINABLE_RISCV (HWY_RVV)
849
+ #define HWY_ATTAINABLE_RISCV HWY_RVV
679
850
  #else
680
- #define HWY_ATTAINABLE_RISCV 0
851
+ #define HWY_ATTAINABLE_RISCV HWY_BASELINE_RVV
681
852
  #endif
682
853
 
683
- // Attainable means enabled and the compiler allows intrinsics (even when not
684
- // allowed to autovectorize). Used in 3 and 4.
685
- #if HWY_ARCH_X86
686
- #if HWY_COMPILER_MSVC
854
+ #if HWY_ARCH_LOONGARCH && HWY_HAVE_RUNTIME_DISPATCH
855
+ #define HWY_ATTAINABLE_LOONGARCH (HWY_LSX | HWY_LASX)
856
+ #else
857
+ #define HWY_ATTAINABLE_LOONGARCH HWY_BASELINE_LOONGARCH
858
+ #endif
859
+
860
+ #ifndef HWY_ATTAINABLE_TARGETS_X86 // allow override
861
+ #if HWY_COMPILER_MSVC && defined(HWY_SLOW_MSVC)
687
862
  // Fewer targets for faster builds.
688
- #define HWY_ATTAINABLE_TARGETS \
863
+ #define HWY_ATTAINABLE_TARGETS_X86 \
689
864
  HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_STATIC_TARGET | HWY_AVX2)
690
865
  #else // !HWY_COMPILER_MSVC
691
- #define HWY_ATTAINABLE_TARGETS \
692
- HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | \
693
- HWY_AVX2 | HWY_AVX3 | HWY_ATTAINABLE_AVX3_DL | HWY_AVX3_ZEN4 | \
694
- HWY_AVX3_SPR)
866
+ #define HWY_ATTAINABLE_TARGETS_X86 \
867
+ HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | \
868
+ HWY_AVX2 | HWY_AVX3 | HWY_AVX3_DL | HWY_AVX3_ZEN4 | \
869
+ HWY_AVX3_SPR | HWY_AVX10_2)
695
870
  #endif // !HWY_COMPILER_MSVC
871
+ #endif // HWY_ATTAINABLE_TARGETS_X86
872
+
873
+ // Attainable means enabled and the compiler allows intrinsics (even when not
874
+ // allowed to auto-vectorize). Used in 3 and 4.
875
+ #if HWY_ARCH_X86
876
+ #define HWY_ATTAINABLE_TARGETS HWY_ATTAINABLE_TARGETS_X86
696
877
  #elif HWY_ARCH_ARM
697
878
  #define HWY_ATTAINABLE_TARGETS \
698
879
  HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_NEON | HWY_ATTAINABLE_SVE | \
@@ -703,9 +884,12 @@
703
884
  #elif HWY_ARCH_S390X
704
885
  #define HWY_ATTAINABLE_TARGETS \
705
886
  HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_S390X)
706
- #elif HWY_ARCH_RVV
887
+ #elif HWY_ARCH_RISCV
707
888
  #define HWY_ATTAINABLE_TARGETS \
708
889
  HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_RISCV)
890
+ #elif HWY_ARCH_LOONGARCH
891
+ #define HWY_ATTAINABLE_TARGETS \
892
+ HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_LOONGARCH)
709
893
  #else
710
894
  #define HWY_ATTAINABLE_TARGETS (HWY_ENABLED_BASELINE)
711
895
  #endif // HWY_ARCH_*
@@ -143,6 +143,17 @@
143
143
  #endif
144
144
  #endif
145
145
 
146
+ #if (HWY_TARGETS & HWY_AVX10_2) && (HWY_STATIC_TARGET != HWY_AVX10_2)
147
+ #undef HWY_TARGET
148
+ #define HWY_TARGET HWY_AVX10_2
149
+ #include HWY_TARGET_INCLUDE
150
+ #ifdef HWY_TARGET_TOGGLE
151
+ #undef HWY_TARGET_TOGGLE
152
+ #else
153
+ #define HWY_TARGET_TOGGLE
154
+ #endif
155
+ #endif
156
+
146
157
  // ------------------------------ HWY_ARCH_ARM
147
158
 
148
159
  #if (HWY_TARGETS & HWY_NEON_WITHOUT_AES) && \
@@ -319,6 +330,30 @@
319
330
  #endif
320
331
  #endif
321
332
 
333
+ // ------------------------------ HWY_ARCH_LOONGARCH
334
+
335
+ #if (HWY_TARGETS & HWY_LSX) && (HWY_STATIC_TARGET != HWY_LSX)
336
+ #undef HWY_TARGET
337
+ #define HWY_TARGET HWY_LSX
338
+ #include HWY_TARGET_INCLUDE
339
+ #ifdef HWY_TARGET_TOGGLE
340
+ #undef HWY_TARGET_TOGGLE
341
+ #else
342
+ #define HWY_TARGET_TOGGLE
343
+ #endif
344
+ #endif
345
+
346
+ #if (HWY_TARGETS & HWY_LASX) && (HWY_STATIC_TARGET != HWY_LASX)
347
+ #undef HWY_TARGET
348
+ #define HWY_TARGET HWY_LASX
349
+ #include HWY_TARGET_INCLUDE
350
+ #ifdef HWY_TARGET_TOGGLE
351
+ #undef HWY_TARGET_TOGGLE
352
+ #else
353
+ #define HWY_TARGET_TOGGLE
354
+ #endif
355
+ #endif
356
+
322
357
  // ------------------------------ Scalar
323
358
 
324
359
  #if (HWY_TARGETS & HWY_EMU128) && (HWY_STATIC_TARGET != HWY_EMU128)