@img/sharp-libvips-dev 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/cplusplus/VConnection.cpp +54 -54
- package/cplusplus/VError.cpp +20 -18
- package/cplusplus/VImage.cpp +636 -589
- package/cplusplus/VInterpolate.cpp +22 -22
- package/cplusplus/VRegion.cpp +4 -4
- package/cplusplus/vips-operators.cpp +2326 -2301
- package/include/aom/aom_codec.h +10 -6
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +9 -2
- package/include/aom/aomcx.h +72 -3
- package/include/cairo/cairo-ft.h +1 -1
- package/include/cairo/cairo-gobject.h +8 -0
- package/include/cairo/cairo-svg.h +3 -3
- package/include/cairo/cairo-version.h +2 -2
- package/include/cairo/cairo.h +91 -24
- package/include/glib-2.0/glib/gmacros.h +1 -1
- package/include/glib-2.0/glib/gtestutils.h +1 -1
- package/include/glib-2.0/gobject/gtype.h +7 -7
- package/include/harfbuzz/hb-version.h +2 -2
- package/include/hwy/aligned_allocator.h +211 -0
- package/include/hwy/base.h +1517 -0
- package/include/hwy/cache_control.h +108 -0
- package/include/hwy/detect_compiler_arch.h +281 -0
- package/include/hwy/detect_targets.h +644 -0
- package/include/hwy/foreach_target.h +340 -0
- package/include/hwy/highway.h +435 -0
- package/include/hwy/highway_export.h +74 -0
- package/include/hwy/nanobenchmark.h +171 -0
- package/include/hwy/ops/arm_neon-inl.h +8913 -0
- package/include/hwy/ops/arm_sve-inl.h +5105 -0
- package/include/hwy/ops/emu128-inl.h +2811 -0
- package/include/hwy/ops/generic_ops-inl.h +4745 -0
- package/include/hwy/ops/ppc_vsx-inl.h +5716 -0
- package/include/hwy/ops/rvv-inl.h +5070 -0
- package/include/hwy/ops/scalar-inl.h +1995 -0
- package/include/hwy/ops/set_macros-inl.h +578 -0
- package/include/hwy/ops/shared-inl.h +539 -0
- package/include/hwy/ops/tuple-inl.h +125 -0
- package/include/hwy/ops/wasm_128-inl.h +5917 -0
- package/include/hwy/ops/x86_128-inl.h +11173 -0
- package/include/hwy/ops/x86_256-inl.h +7529 -0
- package/include/hwy/ops/x86_512-inl.h +6849 -0
- package/include/hwy/per_target.h +44 -0
- package/include/hwy/print-inl.h +62 -0
- package/include/hwy/print.h +75 -0
- package/include/hwy/robust_statistics.h +148 -0
- package/include/hwy/targets.h +338 -0
- package/include/hwy/timer-inl.h +200 -0
- package/include/hwy/timer.h +55 -0
- package/include/jconfig.h +2 -2
- package/include/jpeglib.h +3 -2
- package/include/libheif/heif.h +461 -384
- package/include/libheif/heif_cxx.h +4 -1
- package/include/libheif/heif_plugin.h +1 -1
- package/include/libheif/heif_properties.h +138 -0
- package/include/libheif/heif_regions.h +866 -0
- package/include/libheif/heif_version.h +3 -3
- package/include/libpng16/pnglibconf.h +1 -1
- package/include/pnglibconf.h +1 -1
- package/include/vips/VConnection8.h +43 -49
- package/include/vips/VError8.h +27 -24
- package/include/vips/VImage8.h +4861 -4597
- package/include/vips/VInterpolate8.h +24 -27
- package/include/vips/VRegion8.h +32 -33
- package/include/vips/arithmetic.h +169 -169
- package/include/vips/basic.h +33 -33
- package/include/vips/buf.h +56 -54
- package/include/vips/colour.h +95 -95
- package/include/vips/connection.h +190 -193
- package/include/vips/conversion.h +91 -91
- package/include/vips/convolution.h +36 -30
- package/include/vips/create.h +63 -63
- package/include/vips/dbuf.h +35 -37
- package/include/vips/debug.h +65 -33
- package/include/vips/draw.h +41 -41
- package/include/vips/enumtypes.h +54 -51
- package/include/vips/error.h +63 -63
- package/include/vips/foreign.h +263 -223
- package/include/vips/format.h +48 -48
- package/include/vips/freqfilt.h +22 -22
- package/include/vips/gate.h +55 -47
- package/include/vips/generate.h +34 -34
- package/include/vips/header.h +111 -101
- package/include/vips/histogram.h +28 -28
- package/include/vips/image.h +213 -213
- package/include/vips/interpolate.h +40 -41
- package/include/vips/memory.h +61 -52
- package/include/vips/morphology.h +24 -24
- package/include/vips/mosaicing.h +32 -33
- package/include/vips/object.h +371 -357
- package/include/vips/operation.h +68 -67
- package/include/vips/private.h +76 -76
- package/include/vips/rect.h +26 -26
- package/include/vips/region.h +92 -92
- package/include/vips/resample.h +38 -38
- package/include/vips/sbuf.h +53 -54
- package/include/vips/semaphore.h +24 -24
- package/include/vips/thread.h +30 -27
- package/include/vips/threadpool.h +48 -49
- package/include/vips/transform.h +39 -39
- package/include/vips/type.h +90 -85
- package/include/vips/util.h +274 -229
- package/include/vips/vector.h +24 -144
- package/include/vips/version.h +9 -9
- package/include/vips/vips.h +41 -40
- package/include/zlib.h +23 -19
- package/package.json +1 -1
- package/versions.json +9 -9
|
@@ -0,0 +1,578 @@
|
|
|
1
|
+
// Copyright 2020 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
// Sets macros based on HWY_TARGET.
|
|
17
|
+
|
|
18
|
+
// This include guard is toggled by foreach_target, so avoid the usual _H_
|
|
19
|
+
// suffix to prevent copybara from renaming it.
|
|
20
|
+
#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
|
|
21
|
+
#ifdef HWY_SET_MACROS_PER_TARGET
|
|
22
|
+
#undef HWY_SET_MACROS_PER_TARGET
|
|
23
|
+
#else
|
|
24
|
+
#define HWY_SET_MACROS_PER_TARGET
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
#endif // HWY_SET_MACROS_PER_TARGET
|
|
28
|
+
|
|
29
|
+
#include "hwy/detect_compiler_arch.h" // IWYU: export
|
|
30
|
+
#include "hwy/detect_targets.h" // IWYU: export
|
|
31
|
+
|
|
32
|
+
#undef HWY_NAMESPACE
|
|
33
|
+
#undef HWY_ALIGN
|
|
34
|
+
#undef HWY_MAX_BYTES
|
|
35
|
+
#undef HWY_LANES
|
|
36
|
+
|
|
37
|
+
#undef HWY_HAVE_SCALABLE
|
|
38
|
+
#undef HWY_HAVE_TUPLE
|
|
39
|
+
#undef HWY_HAVE_INTEGER64
|
|
40
|
+
#undef HWY_HAVE_FLOAT16
|
|
41
|
+
#undef HWY_HAVE_FLOAT64
|
|
42
|
+
#undef HWY_MEM_OPS_MIGHT_FAULT
|
|
43
|
+
#undef HWY_NATIVE_FMA
|
|
44
|
+
#undef HWY_CAP_GE256
|
|
45
|
+
#undef HWY_CAP_GE512
|
|
46
|
+
|
|
47
|
+
// Supported on all targets except RVV (requires GCC 14 or upcoming Clang)
|
|
48
|
+
#if HWY_TARGET == HWY_RVV && \
|
|
49
|
+
((HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400) || \
|
|
50
|
+
(HWY_COMPILER_CLANG))
|
|
51
|
+
#define HWY_HAVE_TUPLE 0
|
|
52
|
+
#else
|
|
53
|
+
#define HWY_HAVE_TUPLE 1
|
|
54
|
+
#endif
|
|
55
|
+
|
|
56
|
+
// For internal use (clamping/validating N for Simd<>)
|
|
57
|
+
#undef HWY_MAX_N
|
|
58
|
+
#if HWY_TARGET == HWY_SCALAR
|
|
59
|
+
#define HWY_MAX_N 1
|
|
60
|
+
#else
|
|
61
|
+
#define HWY_MAX_N 65536
|
|
62
|
+
#endif
|
|
63
|
+
|
|
64
|
+
// For internal use (clamping kPow2 for Simd<>)
|
|
65
|
+
#undef HWY_MAX_POW2
|
|
66
|
+
// For HWY_TARGET == HWY_RVV, LMUL <= 8. Even on other targets, we want to
|
|
67
|
+
// support say Rebind<uint64_t, Simd<uint8_t, 1, 0>> d; whose kPow2 is also 3.
|
|
68
|
+
// However, those other targets do not actually support multiple vectors, and
|
|
69
|
+
// thus Lanes(d) must not exceed Lanes(ScalableTag<T>()).
|
|
70
|
+
#define HWY_MAX_POW2 3
|
|
71
|
+
|
|
72
|
+
// User-visible. Loose lower bound that guarantees HWY_MAX_BYTES >>
|
|
73
|
+
// (-HWY_MIN_POW2) <= 1. Useful for terminating compile-time recursions.
|
|
74
|
+
#undef HWY_MIN_POW2
|
|
75
|
+
#if HWY_TARGET == HWY_RVV
|
|
76
|
+
#define HWY_MIN_POW2 -16
|
|
77
|
+
#else
|
|
78
|
+
// Tighter bound for other targets, whose vectors are smaller, to potentially
|
|
79
|
+
// save compile time.
|
|
80
|
+
#define HWY_MIN_POW2 -8
|
|
81
|
+
#endif // HWY_TARGET == HWY_RVV
|
|
82
|
+
|
|
83
|
+
#undef HWY_TARGET_STR
|
|
84
|
+
|
|
85
|
+
#if defined(HWY_DISABLE_PCLMUL_AES)
|
|
86
|
+
#define HWY_TARGET_STR_PCLMUL_AES ""
|
|
87
|
+
#else
|
|
88
|
+
#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
|
|
89
|
+
#endif
|
|
90
|
+
|
|
91
|
+
#if defined(HWY_DISABLE_BMI2_FMA)
|
|
92
|
+
#define HWY_TARGET_STR_BMI2_FMA ""
|
|
93
|
+
#else
|
|
94
|
+
#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
|
|
95
|
+
#endif
|
|
96
|
+
|
|
97
|
+
#if defined(HWY_DISABLE_F16C)
|
|
98
|
+
#define HWY_TARGET_STR_F16C ""
|
|
99
|
+
#else
|
|
100
|
+
#define HWY_TARGET_STR_F16C ",f16c"
|
|
101
|
+
#endif
|
|
102
|
+
|
|
103
|
+
#define HWY_TARGET_STR_SSE2 "sse2"
|
|
104
|
+
|
|
105
|
+
#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
|
|
106
|
+
|
|
107
|
+
#define HWY_TARGET_STR_SSE4 \
|
|
108
|
+
HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2" HWY_TARGET_STR_PCLMUL_AES
|
|
109
|
+
// Include previous targets, which are the half-vectors of the next target.
|
|
110
|
+
#define HWY_TARGET_STR_AVX2 \
|
|
111
|
+
HWY_TARGET_STR_SSE4 ",avx,avx2" HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
|
|
112
|
+
#define HWY_TARGET_STR_AVX3 \
|
|
113
|
+
HWY_TARGET_STR_AVX2 ",avx512f,avx512cd,avx512vl,avx512dq,avx512bw"
|
|
114
|
+
#define HWY_TARGET_STR_AVX3_DL \
|
|
115
|
+
HWY_TARGET_STR_AVX3 \
|
|
116
|
+
",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avx512vnni,avx512bitalg," \
|
|
117
|
+
"avx512vpopcntdq,gfni"
|
|
118
|
+
|
|
119
|
+
#define HWY_TARGET_STR_AVX3_SPR HWY_TARGET_STR_AVX3_DL ",avx512fp16"
|
|
120
|
+
|
|
121
|
+
#if defined(HWY_DISABLE_PPC8_CRYPTO)
|
|
122
|
+
#define HWY_TARGET_STR_PPC8_CRYPTO ""
|
|
123
|
+
#else
|
|
124
|
+
#define HWY_TARGET_STR_PPC8_CRYPTO ",crypto"
|
|
125
|
+
#endif
|
|
126
|
+
|
|
127
|
+
#define HWY_TARGET_STR_PPC8 \
|
|
128
|
+
"altivec,vsx,power8-vector" HWY_TARGET_STR_PPC8_CRYPTO
|
|
129
|
+
#define HWY_TARGET_STR_PPC9 HWY_TARGET_STR_PPC8 ",power9-vector"
|
|
130
|
+
|
|
131
|
+
#if HWY_COMPILER_CLANG
|
|
132
|
+
#define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",power10-vector"
|
|
133
|
+
#else
|
|
134
|
+
#define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",cpu=power10"
|
|
135
|
+
#endif
|
|
136
|
+
|
|
137
|
+
// Before include guard so we redefine HWY_TARGET_STR on each include,
|
|
138
|
+
// governed by the current HWY_TARGET.
|
|
139
|
+
|
|
140
|
+
//-----------------------------------------------------------------------------
|
|
141
|
+
// SSE2
|
|
142
|
+
#if HWY_TARGET == HWY_SSE2
|
|
143
|
+
|
|
144
|
+
#define HWY_NAMESPACE N_SSE2
|
|
145
|
+
#define HWY_ALIGN alignas(16)
|
|
146
|
+
#define HWY_MAX_BYTES 16
|
|
147
|
+
#define HWY_LANES(T) (16 / sizeof(T))
|
|
148
|
+
|
|
149
|
+
#define HWY_HAVE_SCALABLE 0
|
|
150
|
+
#define HWY_HAVE_INTEGER64 1
|
|
151
|
+
#define HWY_HAVE_FLOAT16 0
|
|
152
|
+
#define HWY_HAVE_FLOAT64 1
|
|
153
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
154
|
+
#define HWY_NATIVE_FMA 0
|
|
155
|
+
#define HWY_CAP_GE256 0
|
|
156
|
+
#define HWY_CAP_GE512 0
|
|
157
|
+
|
|
158
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_SSE2
|
|
159
|
+
//-----------------------------------------------------------------------------
|
|
160
|
+
// SSSE3
|
|
161
|
+
#elif HWY_TARGET == HWY_SSSE3
|
|
162
|
+
|
|
163
|
+
#define HWY_NAMESPACE N_SSSE3
|
|
164
|
+
#define HWY_ALIGN alignas(16)
|
|
165
|
+
#define HWY_MAX_BYTES 16
|
|
166
|
+
#define HWY_LANES(T) (16 / sizeof(T))
|
|
167
|
+
|
|
168
|
+
#define HWY_HAVE_SCALABLE 0
|
|
169
|
+
#define HWY_HAVE_INTEGER64 1
|
|
170
|
+
#define HWY_HAVE_FLOAT16 0
|
|
171
|
+
#define HWY_HAVE_FLOAT64 1
|
|
172
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
173
|
+
#define HWY_NATIVE_FMA 0
|
|
174
|
+
#define HWY_CAP_GE256 0
|
|
175
|
+
#define HWY_CAP_GE512 0
|
|
176
|
+
|
|
177
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
|
|
178
|
+
|
|
179
|
+
//-----------------------------------------------------------------------------
|
|
180
|
+
// SSE4
|
|
181
|
+
#elif HWY_TARGET == HWY_SSE4
|
|
182
|
+
|
|
183
|
+
#define HWY_NAMESPACE N_SSE4
|
|
184
|
+
#define HWY_ALIGN alignas(16)
|
|
185
|
+
#define HWY_MAX_BYTES 16
|
|
186
|
+
#define HWY_LANES(T) (16 / sizeof(T))
|
|
187
|
+
|
|
188
|
+
#define HWY_HAVE_SCALABLE 0
|
|
189
|
+
#define HWY_HAVE_INTEGER64 1
|
|
190
|
+
#define HWY_HAVE_FLOAT16 0
|
|
191
|
+
#define HWY_HAVE_FLOAT64 1
|
|
192
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
193
|
+
#define HWY_NATIVE_FMA 0
|
|
194
|
+
#define HWY_CAP_GE256 0
|
|
195
|
+
#define HWY_CAP_GE512 0
|
|
196
|
+
|
|
197
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
|
|
198
|
+
|
|
199
|
+
//-----------------------------------------------------------------------------
|
|
200
|
+
// AVX2
|
|
201
|
+
#elif HWY_TARGET == HWY_AVX2
|
|
202
|
+
|
|
203
|
+
#define HWY_NAMESPACE N_AVX2
|
|
204
|
+
#define HWY_ALIGN alignas(32)
|
|
205
|
+
#define HWY_MAX_BYTES 32
|
|
206
|
+
#define HWY_LANES(T) (32 / sizeof(T))
|
|
207
|
+
|
|
208
|
+
#define HWY_HAVE_SCALABLE 0
|
|
209
|
+
#define HWY_HAVE_INTEGER64 1
|
|
210
|
+
#define HWY_HAVE_FLOAT16 0
|
|
211
|
+
#define HWY_HAVE_FLOAT64 1
|
|
212
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
213
|
+
|
|
214
|
+
#ifdef HWY_DISABLE_BMI2_FMA
|
|
215
|
+
#define HWY_NATIVE_FMA 0
|
|
216
|
+
#else
|
|
217
|
+
#define HWY_NATIVE_FMA 1
|
|
218
|
+
#endif
|
|
219
|
+
|
|
220
|
+
#define HWY_CAP_GE256 1
|
|
221
|
+
#define HWY_CAP_GE512 0
|
|
222
|
+
|
|
223
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
|
|
224
|
+
|
|
225
|
+
//-----------------------------------------------------------------------------
|
|
226
|
+
// AVX3[_DL]
|
|
227
|
+
#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL || \
|
|
228
|
+
HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
|
|
229
|
+
|
|
230
|
+
#define HWY_ALIGN alignas(64)
|
|
231
|
+
#define HWY_MAX_BYTES 64
|
|
232
|
+
#define HWY_LANES(T) (64 / sizeof(T))
|
|
233
|
+
|
|
234
|
+
#define HWY_HAVE_SCALABLE 0
|
|
235
|
+
#define HWY_HAVE_INTEGER64 1
|
|
236
|
+
#if (HWY_TARGET == HWY_AVX3_SPR) && 0 // TODO(janwas): enable after testing
|
|
237
|
+
#define HWY_HAVE_FLOAT16 1
|
|
238
|
+
#else
|
|
239
|
+
#define HWY_HAVE_FLOAT16 0
|
|
240
|
+
#endif
|
|
241
|
+
#define HWY_HAVE_FLOAT64 1
|
|
242
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 0
|
|
243
|
+
#define HWY_NATIVE_FMA 1
|
|
244
|
+
#define HWY_CAP_GE256 1
|
|
245
|
+
#define HWY_CAP_GE512 1
|
|
246
|
+
|
|
247
|
+
#if HWY_TARGET == HWY_AVX3
|
|
248
|
+
|
|
249
|
+
#define HWY_NAMESPACE N_AVX3
|
|
250
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
|
|
251
|
+
|
|
252
|
+
#elif HWY_TARGET == HWY_AVX3_DL
|
|
253
|
+
|
|
254
|
+
#define HWY_NAMESPACE N_AVX3_DL
|
|
255
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_DL
|
|
256
|
+
|
|
257
|
+
#elif HWY_TARGET == HWY_AVX3_ZEN4
|
|
258
|
+
|
|
259
|
+
#define HWY_NAMESPACE N_AVX3_ZEN4
|
|
260
|
+
// Currently the same as HWY_AVX3_DL: both support Icelake.
|
|
261
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_DL
|
|
262
|
+
|
|
263
|
+
#elif HWY_TARGET == HWY_AVX3_SPR
|
|
264
|
+
|
|
265
|
+
#define HWY_NAMESPACE N_AVX3_SPR
|
|
266
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_SPR
|
|
267
|
+
|
|
268
|
+
#else
|
|
269
|
+
#error "Logic error"
|
|
270
|
+
#endif // HWY_TARGET
|
|
271
|
+
|
|
272
|
+
//-----------------------------------------------------------------------------
|
|
273
|
+
// PPC8, PPC9, PPC10
|
|
274
|
+
#elif HWY_TARGET == HWY_PPC8 || HWY_TARGET == HWY_PPC9 || \
|
|
275
|
+
HWY_TARGET == HWY_PPC10
|
|
276
|
+
|
|
277
|
+
#define HWY_ALIGN alignas(16)
|
|
278
|
+
#define HWY_MAX_BYTES 16
|
|
279
|
+
#define HWY_LANES(T) (16 / sizeof(T))
|
|
280
|
+
|
|
281
|
+
#define HWY_HAVE_SCALABLE 0
|
|
282
|
+
#define HWY_HAVE_INTEGER64 1
|
|
283
|
+
#define HWY_HAVE_FLOAT16 0
|
|
284
|
+
#define HWY_HAVE_FLOAT64 1
|
|
285
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
286
|
+
#define HWY_NATIVE_FMA 1
|
|
287
|
+
#define HWY_CAP_GE256 0
|
|
288
|
+
#define HWY_CAP_GE512 0
|
|
289
|
+
|
|
290
|
+
#if HWY_TARGET == HWY_PPC8
|
|
291
|
+
|
|
292
|
+
#define HWY_NAMESPACE N_PPC8
|
|
293
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_PPC8
|
|
294
|
+
|
|
295
|
+
#elif HWY_TARGET == HWY_PPC9
|
|
296
|
+
|
|
297
|
+
#define HWY_NAMESPACE N_PPC9
|
|
298
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_PPC9
|
|
299
|
+
|
|
300
|
+
#elif HWY_TARGET == HWY_PPC10
|
|
301
|
+
|
|
302
|
+
#define HWY_NAMESPACE N_PPC10
|
|
303
|
+
#define HWY_TARGET_STR HWY_TARGET_STR_PPC10
|
|
304
|
+
|
|
305
|
+
#else
|
|
306
|
+
#error "Logic error"
|
|
307
|
+
#endif // HWY_TARGET == HWY_PPC10
|
|
308
|
+
|
|
309
|
+
//-----------------------------------------------------------------------------
|
|
310
|
+
// NEON
|
|
311
|
+
#elif HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
|
|
312
|
+
|
|
313
|
+
#define HWY_ALIGN alignas(16)
|
|
314
|
+
#define HWY_MAX_BYTES 16
|
|
315
|
+
#define HWY_LANES(T) (16 / sizeof(T))
|
|
316
|
+
|
|
317
|
+
#define HWY_HAVE_SCALABLE 0
|
|
318
|
+
#define HWY_HAVE_INTEGER64 1
|
|
319
|
+
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
|
320
|
+
#define HWY_HAVE_FLOAT16 1
|
|
321
|
+
#else
|
|
322
|
+
#define HWY_HAVE_FLOAT16 0
|
|
323
|
+
#endif
|
|
324
|
+
|
|
325
|
+
#if HWY_ARCH_ARM_A64
|
|
326
|
+
#define HWY_HAVE_FLOAT64 1
|
|
327
|
+
#else
|
|
328
|
+
#define HWY_HAVE_FLOAT64 0
|
|
329
|
+
#endif
|
|
330
|
+
|
|
331
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
332
|
+
|
|
333
|
+
#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
|
|
334
|
+
#define HWY_NATIVE_FMA 1
|
|
335
|
+
#else
|
|
336
|
+
#define HWY_NATIVE_FMA 0
|
|
337
|
+
#endif
|
|
338
|
+
|
|
339
|
+
#define HWY_CAP_GE256 0
|
|
340
|
+
#define HWY_CAP_GE512 0
|
|
341
|
+
|
|
342
|
+
#if HWY_TARGET == HWY_NEON_WITHOUT_AES
|
|
343
|
+
#define HWY_NAMESPACE N_NEON_WITHOUT_AES
|
|
344
|
+
#else
|
|
345
|
+
#define HWY_NAMESPACE N_NEON
|
|
346
|
+
#endif
|
|
347
|
+
|
|
348
|
+
// Can use pragmas instead of -march compiler flag
|
|
349
|
+
#if HWY_HAVE_RUNTIME_DISPATCH
|
|
350
|
+
#if HWY_ARCH_ARM_V7
|
|
351
|
+
|
|
352
|
+
// The __attribute__((target(+neon-vfpv4)) was introduced in gcc >= 8.
|
|
353
|
+
#if HWY_COMPILER_GCC_ACTUAL >= 800
|
|
354
|
+
#define HWY_TARGET_STR "+neon-vfpv4"
|
|
355
|
+
#else // GCC < 7
|
|
356
|
+
// Do not define HWY_TARGET_STR (no pragma).
|
|
357
|
+
#endif // HWY_COMPILER_GCC_ACTUAL
|
|
358
|
+
|
|
359
|
+
#else // !HWY_ARCH_ARM_V7
|
|
360
|
+
|
|
361
|
+
#if HWY_TARGET == HWY_NEON_WITHOUT_AES
|
|
362
|
+
// Do not define HWY_TARGET_STR (no pragma).
|
|
363
|
+
#else
|
|
364
|
+
#define HWY_TARGET_STR "+crypto"
|
|
365
|
+
#endif // HWY_TARGET == HWY_NEON_WITHOUT_AES
|
|
366
|
+
|
|
367
|
+
#endif // HWY_ARCH_ARM_V7
|
|
368
|
+
#else // !HWY_HAVE_RUNTIME_DISPATCH
|
|
369
|
+
// HWY_TARGET_STR remains undefined
|
|
370
|
+
#endif
|
|
371
|
+
|
|
372
|
+
//-----------------------------------------------------------------------------
|
|
373
|
+
// SVE[2]
|
|
374
|
+
#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
|
|
375
|
+
HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
|
|
376
|
+
|
|
377
|
+
// SVE only requires lane alignment, not natural alignment of the entire vector.
|
|
378
|
+
#define HWY_ALIGN alignas(8)
|
|
379
|
+
|
|
380
|
+
// Value ensures MaxLanes() is the tightest possible upper bound to reduce
|
|
381
|
+
// overallocation.
|
|
382
|
+
#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
|
|
383
|
+
|
|
384
|
+
#define HWY_HAVE_INTEGER64 1
|
|
385
|
+
#define HWY_HAVE_FLOAT16 0
|
|
386
|
+
#define HWY_HAVE_FLOAT64 1
|
|
387
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 0
|
|
388
|
+
#define HWY_NATIVE_FMA 1
|
|
389
|
+
#define HWY_CAP_GE256 0
|
|
390
|
+
#define HWY_CAP_GE512 0
|
|
391
|
+
|
|
392
|
+
#if HWY_TARGET == HWY_SVE2
|
|
393
|
+
#define HWY_NAMESPACE N_SVE2
|
|
394
|
+
#define HWY_MAX_BYTES 256
|
|
395
|
+
#define HWY_HAVE_SCALABLE 1
|
|
396
|
+
#elif HWY_TARGET == HWY_SVE_256
|
|
397
|
+
#define HWY_NAMESPACE N_SVE_256
|
|
398
|
+
#define HWY_MAX_BYTES 32
|
|
399
|
+
#define HWY_HAVE_SCALABLE 0
|
|
400
|
+
#elif HWY_TARGET == HWY_SVE2_128
|
|
401
|
+
#define HWY_NAMESPACE N_SVE2_128
|
|
402
|
+
#define HWY_MAX_BYTES 16
|
|
403
|
+
#define HWY_HAVE_SCALABLE 0
|
|
404
|
+
#else
|
|
405
|
+
#define HWY_NAMESPACE N_SVE
|
|
406
|
+
#define HWY_MAX_BYTES 256
|
|
407
|
+
#define HWY_HAVE_SCALABLE 1
|
|
408
|
+
#endif
|
|
409
|
+
|
|
410
|
+
// Can use pragmas instead of -march compiler flag
|
|
411
|
+
#if HWY_HAVE_RUNTIME_DISPATCH
|
|
412
|
+
#if HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE2_128
|
|
413
|
+
#define HWY_TARGET_STR "+sve2-aes"
|
|
414
|
+
#else
|
|
415
|
+
#define HWY_TARGET_STR "+sve"
|
|
416
|
+
#endif
|
|
417
|
+
#else
|
|
418
|
+
// HWY_TARGET_STR remains undefined
|
|
419
|
+
#endif
|
|
420
|
+
|
|
421
|
+
//-----------------------------------------------------------------------------
|
|
422
|
+
// WASM
|
|
423
|
+
#elif HWY_TARGET == HWY_WASM
|
|
424
|
+
|
|
425
|
+
#define HWY_ALIGN alignas(16)
|
|
426
|
+
#define HWY_MAX_BYTES 16
|
|
427
|
+
#define HWY_LANES(T) (16 / sizeof(T))
|
|
428
|
+
|
|
429
|
+
#define HWY_HAVE_SCALABLE 0
|
|
430
|
+
#define HWY_HAVE_INTEGER64 1
|
|
431
|
+
#define HWY_HAVE_FLOAT16 0
|
|
432
|
+
#define HWY_HAVE_FLOAT64 1
|
|
433
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
434
|
+
#define HWY_NATIVE_FMA 0
|
|
435
|
+
#define HWY_CAP_GE256 0
|
|
436
|
+
#define HWY_CAP_GE512 0
|
|
437
|
+
|
|
438
|
+
#define HWY_NAMESPACE N_WASM
|
|
439
|
+
|
|
440
|
+
#define HWY_TARGET_STR "simd128"
|
|
441
|
+
|
|
442
|
+
//-----------------------------------------------------------------------------
|
|
443
|
+
// WASM_EMU256
|
|
444
|
+
#elif HWY_TARGET == HWY_WASM_EMU256
|
|
445
|
+
|
|
446
|
+
#define HWY_ALIGN alignas(32)
|
|
447
|
+
#define HWY_MAX_BYTES 32
|
|
448
|
+
#define HWY_LANES(T) (32 / sizeof(T))
|
|
449
|
+
|
|
450
|
+
#define HWY_HAVE_SCALABLE 0
|
|
451
|
+
#define HWY_HAVE_INTEGER64 1
|
|
452
|
+
#define HWY_HAVE_FLOAT16 0
|
|
453
|
+
#define HWY_HAVE_FLOAT64 0
|
|
454
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
455
|
+
#define HWY_NATIVE_FMA 0
|
|
456
|
+
#define HWY_CAP_GE256 1
|
|
457
|
+
#define HWY_CAP_GE512 0
|
|
458
|
+
|
|
459
|
+
#define HWY_NAMESPACE N_WASM_EMU256
|
|
460
|
+
|
|
461
|
+
#define HWY_TARGET_STR "simd128"
|
|
462
|
+
|
|
463
|
+
//-----------------------------------------------------------------------------
|
|
464
|
+
// RVV
|
|
465
|
+
#elif HWY_TARGET == HWY_RVV
|
|
466
|
+
|
|
467
|
+
// RVV only requires lane alignment, not natural alignment of the entire vector,
|
|
468
|
+
// and the compiler already aligns builtin types, so nothing to do here.
|
|
469
|
+
#define HWY_ALIGN
|
|
470
|
+
|
|
471
|
+
// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
|
|
472
|
+
#define HWY_MAX_BYTES 65536
|
|
473
|
+
|
|
474
|
+
// = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
|
|
475
|
+
// LMUL. This is the tightest possible upper bound.
|
|
476
|
+
#define HWY_LANES(T) (8192 / sizeof(T))
|
|
477
|
+
|
|
478
|
+
#define HWY_HAVE_SCALABLE 1
|
|
479
|
+
#define HWY_HAVE_INTEGER64 1
|
|
480
|
+
#define HWY_HAVE_FLOAT64 1
|
|
481
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 0
|
|
482
|
+
#define HWY_NATIVE_FMA 1
|
|
483
|
+
#define HWY_CAP_GE256 0
|
|
484
|
+
#define HWY_CAP_GE512 0
|
|
485
|
+
|
|
486
|
+
#if defined(__riscv_zvfh)
|
|
487
|
+
#define HWY_HAVE_FLOAT16 1
|
|
488
|
+
#else
|
|
489
|
+
#define HWY_HAVE_FLOAT16 0
|
|
490
|
+
#endif
|
|
491
|
+
|
|
492
|
+
#define HWY_NAMESPACE N_RVV
|
|
493
|
+
|
|
494
|
+
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
|
|
495
|
+
// (rv64gcv is not a valid target)
|
|
496
|
+
|
|
497
|
+
//-----------------------------------------------------------------------------
|
|
498
|
+
// EMU128
|
|
499
|
+
#elif HWY_TARGET == HWY_EMU128
|
|
500
|
+
|
|
501
|
+
#define HWY_ALIGN alignas(16)
|
|
502
|
+
#define HWY_MAX_BYTES 16
|
|
503
|
+
#define HWY_LANES(T) (16 / sizeof(T))
|
|
504
|
+
|
|
505
|
+
#define HWY_HAVE_SCALABLE 0
|
|
506
|
+
#define HWY_HAVE_INTEGER64 1
|
|
507
|
+
#define HWY_HAVE_FLOAT16 0
|
|
508
|
+
#define HWY_HAVE_FLOAT64 1
|
|
509
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
510
|
+
#define HWY_NATIVE_FMA 0
|
|
511
|
+
#define HWY_CAP_GE256 0
|
|
512
|
+
#define HWY_CAP_GE512 0
|
|
513
|
+
|
|
514
|
+
#define HWY_NAMESPACE N_EMU128
|
|
515
|
+
|
|
516
|
+
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
|
|
517
|
+
|
|
518
|
+
//-----------------------------------------------------------------------------
|
|
519
|
+
// SCALAR
|
|
520
|
+
#elif HWY_TARGET == HWY_SCALAR
|
|
521
|
+
|
|
522
|
+
#define HWY_ALIGN
|
|
523
|
+
#define HWY_MAX_BYTES 8
|
|
524
|
+
#define HWY_LANES(T) 1
|
|
525
|
+
|
|
526
|
+
#define HWY_HAVE_SCALABLE 0
|
|
527
|
+
#define HWY_HAVE_INTEGER64 1
|
|
528
|
+
#define HWY_HAVE_FLOAT16 0
|
|
529
|
+
#define HWY_HAVE_FLOAT64 1
|
|
530
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 0
|
|
531
|
+
#define HWY_NATIVE_FMA 0
|
|
532
|
+
#define HWY_CAP_GE256 0
|
|
533
|
+
#define HWY_CAP_GE512 0
|
|
534
|
+
|
|
535
|
+
#define HWY_NAMESPACE N_SCALAR
|
|
536
|
+
|
|
537
|
+
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
|
|
538
|
+
|
|
539
|
+
#else
|
|
540
|
+
#pragma message("HWY_TARGET does not match any known target")
|
|
541
|
+
#endif // HWY_TARGET
|
|
542
|
+
|
|
543
|
+
// Override this to 1 in asan/msan builds, which will still fault.
|
|
544
|
+
#if HWY_IS_ASAN || HWY_IS_MSAN
|
|
545
|
+
#undef HWY_MEM_OPS_MIGHT_FAULT
|
|
546
|
+
#define HWY_MEM_OPS_MIGHT_FAULT 1
|
|
547
|
+
#endif
|
|
548
|
+
|
|
549
|
+
// Clang <9 requires this be invoked at file scope, before any namespace.
|
|
550
|
+
#undef HWY_BEFORE_NAMESPACE
|
|
551
|
+
#if defined(HWY_TARGET_STR)
|
|
552
|
+
#define HWY_BEFORE_NAMESPACE() \
|
|
553
|
+
HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
|
|
554
|
+
static_assert(true, "For requiring trailing semicolon")
|
|
555
|
+
#else
|
|
556
|
+
// avoids compiler warning if no HWY_TARGET_STR
|
|
557
|
+
#define HWY_BEFORE_NAMESPACE() \
|
|
558
|
+
static_assert(true, "For requiring trailing semicolon")
|
|
559
|
+
#endif
|
|
560
|
+
|
|
561
|
+
// Clang <9 requires any namespaces be closed before this macro.
|
|
562
|
+
#undef HWY_AFTER_NAMESPACE
|
|
563
|
+
#if defined(HWY_TARGET_STR)
|
|
564
|
+
#define HWY_AFTER_NAMESPACE() \
|
|
565
|
+
HWY_POP_ATTRIBUTES \
|
|
566
|
+
static_assert(true, "For requiring trailing semicolon")
|
|
567
|
+
#else
|
|
568
|
+
// avoids compiler warning if no HWY_TARGET_STR
|
|
569
|
+
#define HWY_AFTER_NAMESPACE() \
|
|
570
|
+
static_assert(true, "For requiring trailing semicolon")
|
|
571
|
+
#endif
|
|
572
|
+
|
|
573
|
+
#undef HWY_ATTR
|
|
574
|
+
#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
|
|
575
|
+
#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
|
|
576
|
+
#else
|
|
577
|
+
#define HWY_ATTR
|
|
578
|
+
#endif
|