@img/sharp-libvips-dev 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +2 -2
  2. package/cplusplus/VConnection.cpp +54 -54
  3. package/cplusplus/VError.cpp +20 -18
  4. package/cplusplus/VImage.cpp +636 -589
  5. package/cplusplus/VInterpolate.cpp +22 -22
  6. package/cplusplus/VRegion.cpp +4 -4
  7. package/cplusplus/vips-operators.cpp +2326 -2301
  8. package/include/aom/aom_codec.h +10 -6
  9. package/include/aom/aom_decoder.h +1 -1
  10. package/include/aom/aom_encoder.h +9 -2
  11. package/include/aom/aomcx.h +72 -3
  12. package/include/cairo/cairo-ft.h +1 -1
  13. package/include/cairo/cairo-gobject.h +8 -0
  14. package/include/cairo/cairo-svg.h +3 -3
  15. package/include/cairo/cairo-version.h +2 -2
  16. package/include/cairo/cairo.h +91 -24
  17. package/include/harfbuzz/hb-version.h +2 -2
  18. package/include/hwy/aligned_allocator.h +211 -0
  19. package/include/hwy/base.h +1517 -0
  20. package/include/hwy/cache_control.h +108 -0
  21. package/include/hwy/detect_compiler_arch.h +281 -0
  22. package/include/hwy/detect_targets.h +644 -0
  23. package/include/hwy/foreach_target.h +340 -0
  24. package/include/hwy/highway.h +435 -0
  25. package/include/hwy/highway_export.h +74 -0
  26. package/include/hwy/nanobenchmark.h +171 -0
  27. package/include/hwy/ops/arm_neon-inl.h +8913 -0
  28. package/include/hwy/ops/arm_sve-inl.h +5105 -0
  29. package/include/hwy/ops/emu128-inl.h +2811 -0
  30. package/include/hwy/ops/generic_ops-inl.h +4745 -0
  31. package/include/hwy/ops/ppc_vsx-inl.h +5716 -0
  32. package/include/hwy/ops/rvv-inl.h +5070 -0
  33. package/include/hwy/ops/scalar-inl.h +1995 -0
  34. package/include/hwy/ops/set_macros-inl.h +578 -0
  35. package/include/hwy/ops/shared-inl.h +539 -0
  36. package/include/hwy/ops/tuple-inl.h +125 -0
  37. package/include/hwy/ops/wasm_128-inl.h +5917 -0
  38. package/include/hwy/ops/x86_128-inl.h +11173 -0
  39. package/include/hwy/ops/x86_256-inl.h +7529 -0
  40. package/include/hwy/ops/x86_512-inl.h +6849 -0
  41. package/include/hwy/per_target.h +44 -0
  42. package/include/hwy/print-inl.h +62 -0
  43. package/include/hwy/print.h +75 -0
  44. package/include/hwy/robust_statistics.h +148 -0
  45. package/include/hwy/targets.h +338 -0
  46. package/include/hwy/timer-inl.h +200 -0
  47. package/include/hwy/timer.h +55 -0
  48. package/include/jconfig.h +2 -2
  49. package/include/jpeglib.h +3 -2
  50. package/include/libheif/heif.h +443 -377
  51. package/include/libheif/heif_cxx.h +4 -1
  52. package/include/libheif/heif_plugin.h +1 -1
  53. package/include/libheif/heif_properties.h +138 -0
  54. package/include/libheif/heif_regions.h +866 -0
  55. package/include/libheif/heif_version.h +3 -3
  56. package/include/vips/VConnection8.h +43 -49
  57. package/include/vips/VError8.h +27 -24
  58. package/include/vips/VImage8.h +4861 -4597
  59. package/include/vips/VInterpolate8.h +24 -27
  60. package/include/vips/VRegion8.h +32 -33
  61. package/include/vips/arithmetic.h +169 -169
  62. package/include/vips/basic.h +33 -33
  63. package/include/vips/buf.h +56 -54
  64. package/include/vips/colour.h +95 -95
  65. package/include/vips/connection.h +190 -193
  66. package/include/vips/conversion.h +91 -91
  67. package/include/vips/convolution.h +36 -30
  68. package/include/vips/create.h +63 -63
  69. package/include/vips/dbuf.h +35 -37
  70. package/include/vips/debug.h +65 -33
  71. package/include/vips/draw.h +41 -41
  72. package/include/vips/enumtypes.h +54 -51
  73. package/include/vips/error.h +63 -63
  74. package/include/vips/foreign.h +263 -223
  75. package/include/vips/format.h +48 -48
  76. package/include/vips/freqfilt.h +22 -22
  77. package/include/vips/gate.h +55 -47
  78. package/include/vips/generate.h +34 -34
  79. package/include/vips/header.h +111 -101
  80. package/include/vips/histogram.h +28 -28
  81. package/include/vips/image.h +213 -213
  82. package/include/vips/interpolate.h +40 -41
  83. package/include/vips/memory.h +61 -52
  84. package/include/vips/morphology.h +24 -24
  85. package/include/vips/mosaicing.h +32 -33
  86. package/include/vips/object.h +371 -357
  87. package/include/vips/operation.h +68 -67
  88. package/include/vips/private.h +76 -76
  89. package/include/vips/rect.h +26 -26
  90. package/include/vips/region.h +92 -92
  91. package/include/vips/resample.h +38 -38
  92. package/include/vips/sbuf.h +53 -54
  93. package/include/vips/semaphore.h +24 -24
  94. package/include/vips/thread.h +30 -27
  95. package/include/vips/threadpool.h +48 -49
  96. package/include/vips/transform.h +39 -39
  97. package/include/vips/type.h +90 -85
  98. package/include/vips/util.h +274 -229
  99. package/include/vips/vector.h +24 -144
  100. package/include/vips/version.h +9 -9
  101. package/include/vips/vips.h +41 -40
  102. package/package.json +1 -1
  103. package/versions.json +7 -7
@@ -0,0 +1,644 @@
1
+ // Copyright 2021 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef HIGHWAY_HWY_DETECT_TARGETS_H_
17
+ #define HIGHWAY_HWY_DETECT_TARGETS_H_
18
+
19
+ // Defines targets and chooses which to enable.
20
+
21
+ #include "hwy/detect_compiler_arch.h"
22
+
23
+ //------------------------------------------------------------------------------
24
+ // Optional configuration
25
+
26
+ // See g3doc/quick_reference.md for documentation of these macros.
27
+
28
+ // Uncomment to override the default baseline determined from predefined macros:
29
+ // #define HWY_BASELINE_TARGETS (HWY_SSE4 | HWY_SCALAR)
30
+
31
+ // Uncomment to override the default blocklist:
32
+ // #define HWY_BROKEN_TARGETS HWY_AVX3
33
+
34
+ // Uncomment to definitely avoid generating those target(s):
35
+ // #define HWY_DISABLED_TARGETS HWY_SSE4
36
+
37
+ // Uncomment to avoid emitting BMI/BMI2/FMA instructions (allows generating
38
+ // AVX2 target for VMs which support AVX2 but not the other instruction sets)
39
+ // #define HWY_DISABLE_BMI2_FMA
40
+
41
+ // Uncomment to enable these on MSVC even if the predefined macros are not set.
42
+ // #define HWY_WANT_SSE2 1
43
+ // #define HWY_WANT_SSSE3 1
44
+ // #define HWY_WANT_SSE4 1
45
+
46
+ //------------------------------------------------------------------------------
47
+ // Targets
48
+
49
+ // Unique bit value for each target. A lower value is "better" (e.g. more lanes)
50
+ // than a higher value within the same group/platform - see HWY_STATIC_TARGET.
51
+ //
52
+ // All values are unconditionally defined so we can test HWY_TARGETS without
53
+ // first checking the HWY_ARCH_*.
54
+ //
55
+ // The C99 preprocessor evaluates #if expressions using intmax_t types. This
56
+ // holds at least 64 bits in practice (verified 2022-07-18 via Godbolt on
57
+ // 32-bit clang/GCC/MSVC compilers for x86/Arm7/AArch32/RISC-V/WASM). We now
58
+ // avoid overflow when computing HWY_TARGETS (subtracting one instead of
59
+ // left-shifting 2^62), but still do not use bit 63 because it is the sign bit.
60
+
61
+ // --------------------------- x86: 15 targets (+ one fallback)
62
+ // Bits 0..3 reserved (4 targets)
63
+ #define HWY_AVX3_SPR (1LL << 4)
64
+ // Bit 5 reserved (likely AVX10.2 with 256-bit vectors)
65
+ // Currently HWY_AVX3_DL plus a special case for CompressStore (10x as fast).
66
+ // We may later also use VPCONFLICT.
67
+ #define HWY_AVX3_ZEN4 (1LL << 6) // see HWY_WANT_AVX3_ZEN4 below
68
+
69
+ // Currently satisfiable by Ice Lake (VNNI, VPCLMULQDQ, VPOPCNTDQ, VBMI, VBMI2,
70
+ // VAES, BITALG, GFNI). Later to be added: BF16 (Cooper Lake). VP2INTERSECT is
71
+ // only in Tiger Lake?
72
+ #define HWY_AVX3_DL (1LL << 7) // see HWY_WANT_AVX3_DL below
73
+ #define HWY_AVX3 (1LL << 8) // HWY_AVX2 plus AVX-512F/BW/CD/DQ/VL
74
+ #define HWY_AVX2 (1LL << 9) // HWY_SSE4 plus BMI2 + F16 + FMA
75
+ // Bit 10: reserved
76
+ #define HWY_SSE4 (1LL << 11) // SSE4.2 plus AES + CLMUL
77
+ #define HWY_SSSE3 (1LL << 12) // S-SSE3
78
+ // Bit 13: reserved for SSE3
79
+ #define HWY_SSE2 (1LL << 14)
80
+ // The highest bit in the HWY_TARGETS mask that a x86 target can have. Used for
81
+ // dynamic dispatch. All x86 target bits must be lower or equal to
82
+ // (1 << HWY_HIGHEST_TARGET_BIT_X86) and they can only use
83
+ // HWY_MAX_DYNAMIC_TARGETS in total.
84
+ #define HWY_HIGHEST_TARGET_BIT_X86 14
85
+
86
+ // --------------------------- Arm: 15 targets (+ one fallback)
87
+ // Bits 15..23 reserved (9 targets)
88
+ #define HWY_SVE2_128 (1LL << 24) // specialized target (e.g. Arm N2)
89
+ #define HWY_SVE_256 (1LL << 25) // specialized target (e.g. Arm V1)
90
+ #define HWY_SVE2 (1LL << 26)
91
+ #define HWY_SVE (1LL << 27)
92
+ #define HWY_NEON (1LL << 28) // Implies support for AES
93
+ #define HWY_NEON_WITHOUT_AES (1LL << 29)
94
+ #define HWY_HIGHEST_TARGET_BIT_ARM 29
95
+
96
+ // --------------------------- RISC-V: 9 targets (+ one fallback)
97
+ // Bits 30..36 reserved (7 targets)
98
+ #define HWY_RVV (1LL << 37)
99
+ // Bit 38 reserved
100
+ #define HWY_HIGHEST_TARGET_BIT_RVV 38
101
+
102
+ // --------------------------- Future expansion: 4 targets
103
+ // Bits 39..42 reserved
104
+
105
+ // --------------------------- IBM Power: 9 targets (+ one fallback)
106
+ // Bits 43..46 reserved (4 targets)
107
+ #define HWY_PPC10 (1LL << 47) // v3.1
108
+ #define HWY_PPC9 (1LL << 48) // v3.0
109
+ #define HWY_PPC8 (1LL << 49) // v2.07
110
+ // Bits 50..51 reserved for prior VSX/AltiVec (2 targets)
111
+ #define HWY_HIGHEST_TARGET_BIT_PPC 51
112
+
113
+ // --------------------------- WebAssembly: 9 targets (+ one fallback)
114
+ // Bits 52..57 reserved (6 targets)
115
+ #define HWY_WASM_EMU256 (1LL << 58) // Experimental
116
+ #define HWY_WASM (1LL << 59)
117
+ // Bits 60 reserved
118
+ #define HWY_HIGHEST_TARGET_BIT_WASM 60
119
+
120
+ // --------------------------- Emulation: 2 targets
121
+
122
+ #define HWY_EMU128 (1LL << 61)
123
+ // We do not add/left-shift, so this will not overflow to a negative number.
124
+ #define HWY_SCALAR (1LL << 62)
125
+ #define HWY_HIGHEST_TARGET_BIT_SCALAR 62
126
+
127
+ // Do not use bit 63 - would be confusing to have negative numbers.
128
+
129
+ //------------------------------------------------------------------------------
130
+ // Set default blocklists
131
+
132
+ // Disabled means excluded from enabled at user's request. A separate config
133
+ // macro allows disabling without deactivating the blocklist below.
134
+ #ifndef HWY_DISABLED_TARGETS
135
+ #define HWY_DISABLED_TARGETS 0
136
+ #endif
137
+
138
+ // Broken means excluded from enabled due to known compiler issues. We define
139
+ // separate HWY_BROKEN_* and then OR them together (more than one might apply).
140
+
141
+ // x86 clang-6: we saw multiple AVX2/3 compile errors and in one case invalid
142
+ // SSE4 codegen (possibly only for msan), so disable all those targets.
143
+ #if HWY_ARCH_X86 && (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700)
144
+
145
+ #define HWY_BROKEN_CLANG6 (HWY_SSE4 | (HWY_SSE4 - 1))
146
+ // This entails a major speed reduction, so warn unless the user explicitly
147
+ // opts in to scalar-only.
148
+ #if !defined(HWY_COMPILE_ONLY_SCALAR)
149
+ #pragma message("x86 Clang <= 6: define HWY_COMPILE_ONLY_SCALAR or upgrade.")
150
+ #endif
151
+
152
+ #else
153
+ #define HWY_BROKEN_CLANG6 0
154
+ #endif
155
+
156
+ // 32-bit may fail to compile AVX2/3.
157
+ #if HWY_ARCH_X86_32
158
+ #define HWY_BROKEN_32BIT (HWY_AVX2 | (HWY_AVX2 - 1))
159
+ #else
160
+ #define HWY_BROKEN_32BIT 0
161
+ #endif
162
+
163
+ // MSVC AVX3 support is buggy: https://github.com/Mysticial/Flops/issues/16
164
+ #if HWY_COMPILER_MSVC != 0
165
+ #define HWY_BROKEN_MSVC (HWY_AVX3 | (HWY_AVX3 - 1))
166
+ #else
167
+ #define HWY_BROKEN_MSVC 0
168
+ #endif
169
+
170
+ // AVX3_DL and AVX3_ZEN4 require clang >= 7 (ensured above), gcc >= 8.1 or ICC
171
+ // 2021.
172
+ #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 801) || \
173
+ (HWY_COMPILER_ICC && HWY_COMPILER_ICC < 2021)
174
+ #define HWY_BROKEN_AVX3_DL_ZEN4 (HWY_AVX3_DL | HWY_AVX3_ZEN4)
175
+ #else
176
+ #define HWY_BROKEN_AVX3_DL_ZEN4 0
177
+ #endif
178
+
179
+ // AVX3_SPR requires clang >= 14, gcc >= 12, or ICC 2021.
180
+ #if (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1400) || \
181
+ (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1200) || \
182
+ (HWY_COMPILER_ICC && HWY_COMPILER_ICC < 2021)
183
+ #define HWY_BROKEN_AVX3_SPR (HWY_AVX3_SPR)
184
+ #else
185
+ #define HWY_BROKEN_AVX3_SPR 0
186
+ #endif
187
+
188
+ // armv7be has not been tested and is not yet supported.
189
+ #if HWY_ARCH_ARM_V7 && HWY_IS_BIG_ENDIAN
190
+ #define HWY_BROKEN_ARM7_BIG_ENDIAN (HWY_NEON | HWY_NEON_WITHOUT_AES)
191
+ #else
192
+ #define HWY_BROKEN_ARM7_BIG_ENDIAN 0
193
+ #endif
194
+
195
+ // armv7-a without a detected vfpv4 is not supported
196
+ // (for example Cortex-A8, Cortex-A9)
197
+ // vfpv4 always have neon half-float _and_ FMA.
198
+ #if HWY_ARCH_ARM_V7 && (__ARM_ARCH_PROFILE == 'A') && \
199
+ !defined(__ARM_VFPV4__) && \
200
+ !((__ARM_NEON_FP & 0x2 /* half-float */) && (__ARM_FEATURE_FMA == 1))
201
+ #define HWY_BROKEN_ARM7_WITHOUT_VFP4 (HWY_NEON | HWY_NEON_WITHOUT_AES)
202
+ #else
203
+ #define HWY_BROKEN_ARM7_WITHOUT_VFP4 0
204
+ #endif
205
+
206
+ // SVE[2] require recent clang or gcc versions.
207
+ #if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
208
+ (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
209
+ #define HWY_BROKEN_SVE (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128)
210
+ #else
211
+ #define HWY_BROKEN_SVE 0
212
+ #endif
213
+
214
+ #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1100)
215
+ // GCC 10 supports the -mcpu=power10 option but does not support the PPC10
216
+ // vector intrinsics
217
+ #define HWY_BROKEN_PPC10 (HWY_PPC10)
218
+ #elif HWY_ARCH_PPC && HWY_IS_BIG_ENDIAN && \
219
+ ((HWY_COMPILER3_CLANG && HWY_COMPILER3_CLANG < 160001) || \
220
+ (HWY_COMPILER_GCC_ACTUAL >= 1200 && HWY_COMPILER_GCC_ACTUAL <= 1203) || \
221
+ (HWY_COMPILER_GCC_ACTUAL >= 1300 && HWY_COMPILER_GCC_ACTUAL <= 1301))
222
+ // GCC 12.0 through 12.3 and GCC 13.0 through 13.1 have a compiler bug where the
223
+ // vsldoi instruction is sometimes incorrectly optimized out (and this causes
224
+ // some of the Highway unit tests to fail on big-endian PPC10). Details about
225
+ // this compiler bug can be found at
226
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109069, and this bug will be
227
+ // fixed in the upcoming GCC 12.4 and 13.2 releases.
228
+
229
+ // Clang 16.0.0 and earlier (but not Clang 16.0.1 and later) have a compiler
230
+ // bug in the LLVM DAGCombiner that causes a zero-extend followed by an
231
+ // element insert into a vector, followed by a vector shuffle to be incorrectly
232
+ // optimized on big-endian PPC (and which caused some of the Highway unit tests
233
+ // to fail on big-endian PPC10).
234
+
235
+ // Details about this bug, which has already been fixed in Clang 16.0.1 and
236
+ // later, can be found at https://github.com/llvm/llvm-project/issues/61315.
237
+ #define HWY_BROKEN_PPC10 (HWY_PPC10)
238
+ #else
239
+ #define HWY_BROKEN_PPC10 0
240
+ #endif
241
+
242
+ // Allow the user to override this without any guarantee of success.
243
+ #ifndef HWY_BROKEN_TARGETS
244
+
245
+ #define HWY_BROKEN_TARGETS \
246
+ (HWY_BROKEN_CLANG6 | HWY_BROKEN_32BIT | HWY_BROKEN_MSVC | \
247
+ HWY_BROKEN_AVX3_DL_ZEN4 | HWY_BROKEN_AVX3_SPR | \
248
+ HWY_BROKEN_ARM7_BIG_ENDIAN | HWY_BROKEN_ARM7_WITHOUT_VFP4 | \
249
+ HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
250
+
251
+ #endif // HWY_BROKEN_TARGETS
252
+
253
+ // Enabled means not disabled nor blocklisted.
254
+ #define HWY_ENABLED(targets) \
255
+ ((targets) & ~((HWY_DISABLED_TARGETS) | (HWY_BROKEN_TARGETS)))
256
+
257
+ // Opt-out for EMU128 (affected by a GCC bug on multiple arches, fixed in 12.3:
258
+ // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106322). An issue still
259
+ // remains with 13.2, see #1683. This is separate from HWY_BROKEN_TARGETS
260
+ // because it affects the fallback target, which must always be enabled. If 1,
261
+ // we instead choose HWY_SCALAR even without HWY_COMPILE_ONLY_SCALAR being set.
262
+ #if !defined(HWY_BROKEN_EMU128) // allow overriding
263
+ #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400) || \
264
+ defined(HWY_NO_LIBCXX)
265
+ #define HWY_BROKEN_EMU128 1
266
+ #else
267
+ #define HWY_BROKEN_EMU128 0
268
+ #endif
269
+ #endif // HWY_BROKEN_EMU128
270
+
271
+ //------------------------------------------------------------------------------
272
+ // Detect baseline targets using predefined macros
273
+
274
+ // Baseline means the targets for which the compiler is allowed to generate
275
+ // instructions, implying the target CPU would have to support them. This does
276
+ // not take the blocklist into account.
277
+
278
+ #if defined(HWY_COMPILE_ONLY_SCALAR) || HWY_BROKEN_EMU128
279
+ #define HWY_BASELINE_SCALAR HWY_SCALAR
280
+ #else
281
+ #define HWY_BASELINE_SCALAR HWY_EMU128
282
+ #endif
283
+
284
+ // Also check HWY_ARCH to ensure that simulating unknown platforms ends up with
285
+ // HWY_TARGET == HWY_BASELINE_SCALAR.
286
+
287
+ #if HWY_ARCH_WASM && defined(__wasm_simd128__)
288
+ #if defined(HWY_WANT_WASM2)
289
+ #define HWY_BASELINE_WASM HWY_WASM_EMU256
290
+ #else
291
+ #define HWY_BASELINE_WASM HWY_WASM
292
+ #endif // HWY_WANT_WASM2
293
+ #else
294
+ #define HWY_BASELINE_WASM 0
295
+ #endif
296
+
297
+ // GCC or Clang.
298
+ #if HWY_ARCH_PPC && HWY_COMPILER_GCC && defined(__ALTIVEC__) && \
299
+ defined(__VSX__) && defined(__POWER8_VECTOR__) && \
300
+ (defined(__CRYPTO__) || defined(HWY_DISABLE_PPC8_CRYPTO))
301
+ #define HWY_BASELINE_PPC8 HWY_PPC8
302
+ #else
303
+ #define HWY_BASELINE_PPC8 0
304
+ #endif
305
+
306
+ #if HWY_BASELINE_PPC8 != 0 && defined(__POWER9_VECTOR__)
307
+ #define HWY_BASELINE_PPC9 HWY_PPC9
308
+ #else
309
+ #define HWY_BASELINE_PPC9 0
310
+ #endif
311
+
312
+ #if HWY_BASELINE_PPC9 != 0 && \
313
+ (defined(_ARCH_PWR10) || defined(__POWER10_VECTOR__))
314
+ #define HWY_BASELINE_PPC10 HWY_PPC10
315
+ #else
316
+ #define HWY_BASELINE_PPC10 0
317
+ #endif
318
+
319
+ #define HWY_BASELINE_SVE2 0
320
+ #define HWY_BASELINE_SVE 0
321
+ #define HWY_BASELINE_NEON 0
322
+
323
+ #if HWY_ARCH_ARM
324
+
325
+ #if defined(__ARM_FEATURE_SVE2)
326
+ #undef HWY_BASELINE_SVE2 // was 0, will be re-defined
327
+ // If user specified -msve-vector-bits=128, they assert the vector length is
328
+ // 128 bits and we should use the HWY_SVE2_128 (more efficient for some ops).
329
+ #if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 128
330
+ #define HWY_BASELINE_SVE2 HWY_SVE2_128
331
+ // Otherwise we're not sure what the vector length will be. The baseline must be
332
+ // unconditionally valid, so we can only assume HWY_SVE2. However, when running
333
+ // on a CPU with 128-bit vectors, user code that supports dynamic dispatch will
334
+ // still benefit from HWY_SVE2_128 because we add it to HWY_ATTAINABLE_TARGETS.
335
+ #else
336
+ #define HWY_BASELINE_SVE2 HWY_SVE2
337
+ #endif // __ARM_FEATURE_SVE_BITS
338
+ #endif // __ARM_FEATURE_SVE2
339
+
340
+ #if defined(__ARM_FEATURE_SVE)
341
+ #undef HWY_BASELINE_SVE // was 0, will be re-defined
342
+ // See above. If user-specified vector length matches our optimization, use it.
343
+ #if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
344
+ #define HWY_BASELINE_SVE HWY_SVE_256
345
+ #else
346
+ #define HWY_BASELINE_SVE HWY_SVE
347
+ #endif // __ARM_FEATURE_SVE_BITS
348
+ #endif // __ARM_FEATURE_SVE
349
+
350
+ // GCC 4.5.4 only defines __ARM_NEON__; 5.4 defines both.
351
+ #if defined(__ARM_NEON__) || defined(__ARM_NEON)
352
+ #undef HWY_BASELINE_NEON
353
+ #if defined(__ARM_FEATURE_AES)
354
+ #define HWY_BASELINE_NEON (HWY_NEON | HWY_NEON_WITHOUT_AES)
355
+ #else
356
+ #define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES)
357
+ #endif
358
+ #endif
359
+
360
+ #endif // HWY_ARCH_ARM
361
+
362
+ // Special handling for MSVC because it has fewer predefined macros:
363
+ #if HWY_COMPILER_MSVC
364
+
365
+ #if HWY_ARCH_X86_32
366
+ #if _M_IX86_FP >= 2
367
+ #define HWY_CHECK_SSE2 1
368
+ #else
369
+ #define HWY_CHECK_SSE2 0
370
+ #endif
371
+ #elif HWY_ARCH_X86_64
372
+ #define HWY_CHECK_SSE2 1
373
+ #else
374
+ #define HWY_CHECK_SSE2 0
375
+ #endif
376
+
377
+ // 1) We can only be sure SSSE3/SSE4 are enabled if AVX is:
378
+ // https://stackoverflow.com/questions/18563978/.
379
+ #if defined(__AVX__)
380
+ #define HWY_CHECK_SSSE3 1
381
+ #define HWY_CHECK_SSE4 1
382
+ #else
383
+ #define HWY_CHECK_SSSE3 0
384
+ #define HWY_CHECK_SSE4 0
385
+ #endif
386
+
387
+ // 2) Cannot check for PCLMUL/AES and BMI2/FMA/F16C individually; we assume
388
+ // PCLMUL/AES are available if SSE4 is, and BMI2/FMA/F16C if AVX2 is.
389
+ #define HWY_CHECK_PCLMUL_AES 1
390
+ #define HWY_CHECK_BMI2_FMA 1
391
+ #define HWY_CHECK_F16C 1
392
+
393
+ #else // non-MSVC
394
+
395
+ #if defined(__SSE2__)
396
+ #define HWY_CHECK_SSE2 1
397
+ #else
398
+ #define HWY_CHECK_SSE2 0
399
+ #endif
400
+
401
+ #if defined(__SSSE3__)
402
+ #define HWY_CHECK_SSSE3 1
403
+ #else
404
+ #define HWY_CHECK_SSSE3 0
405
+ #endif
406
+
407
+ #if defined(__SSE4_1__) && defined(__SSE4_2__)
408
+ #define HWY_CHECK_SSE4 1
409
+ #else
410
+ #define HWY_CHECK_SSE4 0
411
+ #endif
412
+
413
+ // If these are disabled, they should not gate the availability of SSE4/AVX2.
414
+ #if defined(HWY_DISABLE_PCLMUL_AES) || (defined(__PCLMUL__) && defined(__AES__))
415
+ #define HWY_CHECK_PCLMUL_AES 1
416
+ #else
417
+ #define HWY_CHECK_PCLMUL_AES 0
418
+ #endif
419
+
420
+ #if defined(HWY_DISABLE_BMI2_FMA) || (defined(__BMI2__) && defined(__FMA__))
421
+ #define HWY_CHECK_BMI2_FMA 1
422
+ #else
423
+ #define HWY_CHECK_BMI2_FMA 0
424
+ #endif
425
+
426
+ #if defined(HWY_DISABLE_F16C) || defined(__F16C__)
427
+ #define HWY_CHECK_F16C 1
428
+ #else
429
+ #define HWY_CHECK_F16C 0
430
+ #endif
431
+
432
+ #endif // non-MSVC
433
+
434
+ #if HWY_ARCH_X86 && (HWY_WANT_SSE2 || HWY_CHECK_SSE2)
435
+ #define HWY_BASELINE_SSE2 HWY_SSE2
436
+ #else
437
+ #define HWY_BASELINE_SSE2 0
438
+ #endif
439
+
440
+ #if HWY_ARCH_X86 && (HWY_WANT_SSSE3 || HWY_CHECK_SSSE3)
441
+ #define HWY_BASELINE_SSSE3 HWY_SSSE3
442
+ #else
443
+ #define HWY_BASELINE_SSSE3 0
444
+ #endif
445
+
446
+ #if HWY_ARCH_X86 && (HWY_WANT_SSE4 || (HWY_CHECK_SSE4 && HWY_CHECK_PCLMUL_AES))
447
+ #define HWY_BASELINE_SSE4 HWY_SSE4
448
+ #else
449
+ #define HWY_BASELINE_SSE4 0
450
+ #endif
451
+
452
+ #if HWY_BASELINE_SSE4 != 0 && HWY_CHECK_BMI2_FMA && HWY_CHECK_F16C && \
453
+ defined(__AVX2__)
454
+ #define HWY_BASELINE_AVX2 HWY_AVX2
455
+ #else
456
+ #define HWY_BASELINE_AVX2 0
457
+ #endif
458
+
459
+ // Require everything in AVX2 plus AVX-512 flags (also set by MSVC)
460
+ #if HWY_BASELINE_AVX2 != 0 && defined(__AVX512F__) && defined(__AVX512BW__) && \
461
+ defined(__AVX512DQ__) && defined(__AVX512VL__)
462
+ #define HWY_BASELINE_AVX3 HWY_AVX3
463
+ #else
464
+ #define HWY_BASELINE_AVX3 0
465
+ #endif
466
+
467
+ // TODO(janwas): not yet known whether these will be set by MSVC
468
+ #if HWY_BASELINE_AVX3 != 0 && defined(__AVX512VNNI__) && defined(__VAES__) && \
469
+ defined(__VPCLMULQDQ__) && defined(__AVX512VBMI__) && \
470
+ defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
471
+ defined(__AVX512BITALG__)
472
+ #define HWY_BASELINE_AVX3_DL HWY_AVX3_DL
473
+ #else
474
+ #define HWY_BASELINE_AVX3_DL 0
475
+ #endif
476
+
477
+ // The ZEN4-optimized AVX3 target is numerically lower than AVX3_DL and is thus
478
+ // considered better. Do not enable it unless the user explicitly requests it -
479
+ // we do not want to choose the ZEN4 path on Intel because it could be slower.
480
+ #if defined(HWY_WANT_AVX3_ZEN4) && HWY_BASELINE_AVX3_DL != 0
481
+ #define HWY_BASELINE_AVX3_ZEN4 HWY_AVX3_ZEN4
482
+ #else
483
+ #define HWY_BASELINE_AVX3_ZEN4 0
484
+ #endif
485
+
486
+ #if HWY_BASELINE_AVX3_DL != 0 && defined(__AVX512FP16__)
487
+ #define HWY_BASELINE_AVX3_SPR HWY_AVX3_SPR
488
+ #else
489
+ #define HWY_BASELINE_AVX3_SPR 0
490
+ #endif
491
+
492
+ // RVV requires intrinsics 0.11 or later, see #1156.
493
+ #if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
494
+ #define HWY_BASELINE_RVV HWY_RVV
495
+ #else
496
+ #define HWY_BASELINE_RVV 0
497
+ #endif
498
+
499
+ // Allow the user to override this without any guarantee of success.
500
+ #ifndef HWY_BASELINE_TARGETS
501
+ #define HWY_BASELINE_TARGETS \
502
+ (HWY_BASELINE_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 | \
503
+ HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10 | HWY_BASELINE_SVE2 | \
504
+ HWY_BASELINE_SVE | HWY_BASELINE_NEON | HWY_BASELINE_SSE2 | \
505
+ HWY_BASELINE_SSSE3 | HWY_BASELINE_SSE4 | HWY_BASELINE_AVX2 | \
506
+ HWY_BASELINE_AVX3 | HWY_BASELINE_AVX3_DL | HWY_BASELINE_AVX3_ZEN4 | \
507
+ HWY_BASELINE_AVX3_SPR | HWY_BASELINE_RVV)
508
+ #endif // HWY_BASELINE_TARGETS
509
+
510
+ //------------------------------------------------------------------------------
511
+ // Choose target for static dispatch
512
+
513
+ #define HWY_ENABLED_BASELINE HWY_ENABLED(HWY_BASELINE_TARGETS)
514
+ #if HWY_ENABLED_BASELINE == 0
515
+ #error "At least one baseline target must be defined and enabled"
516
+ #endif
517
+
518
+ // Best baseline, used for static dispatch. This is the least-significant 1-bit
519
+ // within HWY_ENABLED_BASELINE and lower bit values imply "better".
520
+ #define HWY_STATIC_TARGET (HWY_ENABLED_BASELINE & -HWY_ENABLED_BASELINE)
521
+
522
+ // Start by assuming static dispatch. If we later use dynamic dispatch, this
523
+ // will be defined to other targets during the multiple-inclusion, and finally
524
+ // return to the initial value. Defining this outside begin/end_target ensures
525
+ // inl headers successfully compile by themselves (required by Bazel).
526
+ #define HWY_TARGET HWY_STATIC_TARGET
527
+
528
+ //------------------------------------------------------------------------------
529
+ // Choose targets for dynamic dispatch according to one of four policies
530
+
531
+ #if 1 < (defined(HWY_COMPILE_ONLY_SCALAR) + defined(HWY_COMPILE_ONLY_EMU128) + \
532
+ defined(HWY_COMPILE_ONLY_STATIC))
533
+ #error "Can only define one of HWY_COMPILE_ONLY_{SCALAR|EMU128|STATIC} - bug?"
534
+ #endif
535
+ // Defining one of HWY_COMPILE_ONLY_* will trump HWY_COMPILE_ALL_ATTAINABLE.
536
+
537
+ // Clang, GCC and MSVC allow runtime dispatch on x86.
538
+ #if HWY_ARCH_X86
539
+ #define HWY_HAVE_RUNTIME_DISPATCH 1
540
+ // On Arm/PPC, currently only GCC does, and we require Linux to detect CPU
541
+ // capabilities.
542
+ #elif (HWY_ARCH_ARM || HWY_ARCH_PPC) && HWY_COMPILER_GCC_ACTUAL && \
543
+ HWY_OS_LINUX && !defined(TOOLCHAIN_MISS_SYS_AUXV_H)
544
+ #define HWY_HAVE_RUNTIME_DISPATCH 1
545
+ #else
546
+ #define HWY_HAVE_RUNTIME_DISPATCH 0
547
+ #endif
548
+
549
+ // AVX3_DL is not widely available yet. To reduce code size and compile time,
550
+ // only include it in the set of attainable targets (for dynamic dispatch) if
551
+ // the user opts in, OR it is in the baseline (we check whether enabled below).
552
+ #if defined(HWY_WANT_AVX3_DL) || (HWY_BASELINE_TARGETS & HWY_AVX3_DL)
553
+ #define HWY_ATTAINABLE_AVX3_DL (HWY_AVX3_DL)
554
+ #else
555
+ #define HWY_ATTAINABLE_AVX3_DL 0
556
+ #endif
557
+
558
+ #if HWY_ARCH_ARM_A64 && HWY_HAVE_RUNTIME_DISPATCH
559
+ #define HWY_ATTAINABLE_NEON (HWY_NEON | HWY_NEON_WITHOUT_AES)
560
+ #elif HWY_ARCH_ARM // static dispatch, or HWY_ARCH_ARM_V7
561
+ #define HWY_ATTAINABLE_NEON (HWY_BASELINE_NEON)
562
+ #else
563
+ #define HWY_ATTAINABLE_NEON 0
564
+ #endif
565
+
566
+ #if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
567
+ (HWY_ENABLED_BASELINE & (HWY_SVE | HWY_SVE_256)))
568
+ #define HWY_ATTAINABLE_SVE (HWY_SVE | HWY_SVE_256)
569
+ #else
570
+ #define HWY_ATTAINABLE_SVE 0
571
+ #endif
572
+
573
+ #if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
574
+ (HWY_ENABLED_BASELINE & (HWY_SVE2 | HWY_SVE2_128)))
575
+ #define HWY_ATTAINABLE_SVE2 (HWY_SVE2 | HWY_SVE2_128)
576
+ #else
577
+ #define HWY_ATTAINABLE_SVE2 0
578
+ #endif
579
+
580
+ #if HWY_ARCH_PPC && defined(__ALTIVEC__) && \
581
+ (!HWY_COMPILER_CLANG || HWY_BASELINE_PPC8 != 0)
582
+ #define HWY_ATTAINABLE_PPC (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
583
+ #else
584
+ #define HWY_ATTAINABLE_PPC 0
585
+ #endif
586
+
587
+ // Attainable means enabled and the compiler allows intrinsics (even when not
588
+ // allowed to autovectorize). Used in 3 and 4.
589
+ #if HWY_ARCH_X86
590
+ #define HWY_ATTAINABLE_TARGETS \
591
+ HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | \
592
+ HWY_AVX2 | HWY_AVX3 | HWY_ATTAINABLE_AVX3_DL | HWY_AVX3_ZEN4 | \
593
+ HWY_AVX3_SPR)
594
+ #elif HWY_ARCH_ARM
595
+ #define HWY_ATTAINABLE_TARGETS \
596
+ HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_NEON | HWY_ATTAINABLE_SVE | \
597
+ HWY_ATTAINABLE_SVE2)
598
+ #elif HWY_ARCH_PPC
599
+ #define HWY_ATTAINABLE_TARGETS \
600
+ HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_PPC)
601
+ #else
602
+ #define HWY_ATTAINABLE_TARGETS (HWY_ENABLED_BASELINE)
603
+ #endif // HWY_ARCH_*
604
+
605
+ // 1) For older compilers: avoid SIMD intrinsics, but still support all ops.
606
+ #if defined(HWY_COMPILE_ONLY_EMU128) && !HWY_BROKEN_EMU128
607
+ #undef HWY_STATIC_TARGET
608
+ #define HWY_STATIC_TARGET HWY_EMU128 // override baseline
609
+ #define HWY_TARGETS HWY_EMU128
610
+
611
+ // 1b) HWY_SCALAR is less capable than HWY_EMU128 (which supports all ops), but
612
+ // we currently still support it for backwards compatibility.
613
+ #elif defined(HWY_COMPILE_ONLY_SCALAR) || \
614
+ (defined(HWY_COMPILE_ONLY_EMU128) && HWY_BROKEN_EMU128)
615
+ #undef HWY_STATIC_TARGET
616
+ #define HWY_STATIC_TARGET HWY_SCALAR // override baseline
617
+ #define HWY_TARGETS HWY_SCALAR
618
+
619
+ // 2) For forcing static dispatch without code changes (removing HWY_EXPORT)
620
+ #elif defined(HWY_COMPILE_ONLY_STATIC)
621
+ #define HWY_TARGETS HWY_STATIC_TARGET
622
+
623
+ // 3) For tests: include all attainable targets (in particular: scalar)
624
+ #elif defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)
625
+ #define HWY_TARGETS HWY_ATTAINABLE_TARGETS
626
+
627
+ // 4) Default: attainable WITHOUT non-best baseline. This reduces code size by
628
+ // excluding superseded targets, in particular scalar. Note: HWY_STATIC_TARGET
629
+ // may be 2^62 (HWY_SCALAR), so we must not left-shift/add it. Subtracting one
630
+ // sets all lower bits (better targets), then we also include the static target.
631
+ #else
632
+ #define HWY_TARGETS \
633
+ (HWY_ATTAINABLE_TARGETS & ((HWY_STATIC_TARGET - 1LL) | HWY_STATIC_TARGET))
634
+
635
+ #endif // target policy
636
+
637
+ // HWY_ONCE and the multiple-inclusion mechanism rely on HWY_STATIC_TARGET being
638
+ // one of the dynamic targets. This also implies HWY_TARGETS != 0 and
639
+ // (HWY_TARGETS & HWY_ENABLED_BASELINE) != 0.
640
+ #if (HWY_TARGETS & HWY_STATIC_TARGET) == 0
641
+ #error "Logic error: best baseline should be included in dynamic targets"
642
+ #endif
643
+
644
+ #endif // HIGHWAY_HWY_DETECT_TARGETS_H_