@img/sharp-libvips-dev 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +7 -1
- package/include/aom/aom_image.h +24 -12
- package/include/aom/aom_integer.h +3 -3
- package/include/aom/aomcx.h +15 -0
- package/include/aom/aomdx.h +5 -2
- package/include/archive.h +7 -5
- package/include/archive_entry.h +5 -3
- package/include/cgif.h +3 -0
- package/include/freetype2/freetype/config/ftoption.h +1 -1
- package/include/fribidi/fribidi-config.h +2 -2
- package/include/fribidi/fribidi-unicode-version.h +3 -3
- package/include/glib-2.0/gio/gappinfo.h +40 -25
- package/include/glib-2.0/gio/gasyncresult.h +1 -1
- package/include/glib-2.0/gio/gconverter.h +5 -0
- package/include/glib-2.0/gio/gdbusintrospection.h +1 -1
- package/include/glib-2.0/gio/gfile.h +16 -0
- package/include/glib-2.0/gio/gio-visibility.h +34 -0
- package/include/glib-2.0/gio/gsettings.h +8 -0
- package/include/glib-2.0/gio/gvfs.h +2 -2
- package/include/glib-2.0/girepository/gi-visibility.h +34 -0
- package/include/glib-2.0/glib/gbookmarkfile.h +1 -1
- package/include/glib-2.0/glib/giochannel.h +2 -2
- package/include/glib-2.0/glib/glib-visibility.h +34 -0
- package/include/glib-2.0/glib/gmacros.h +12 -5
- package/include/glib-2.0/glib/gmain.h +93 -7
- package/include/glib-2.0/glib/gqsort.h +8 -1
- package/include/glib-2.0/glib/gstrfuncs.h +0 -12
- package/include/glib-2.0/glib/gstrvbuilder.h +3 -0
- package/include/glib-2.0/glib/gunicode.h +1 -1
- package/include/glib-2.0/glib/gversionmacros.h +9 -0
- package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
- package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
- package/include/glib-2.0/gobject/gtype.h +6 -6
- package/include/harfbuzz/hb-buffer.h +6 -0
- package/include/harfbuzz/hb-common.h +6 -9
- package/include/harfbuzz/hb-cplusplus.hh +8 -11
- package/include/harfbuzz/hb-subset.h +17 -4
- package/include/harfbuzz/hb-version.h +3 -3
- package/include/hwy/abort.h +28 -0
- package/include/hwy/aligned_allocator.h +48 -1
- package/include/hwy/base.h +235 -34
- package/include/hwy/detect_compiler_arch.h +84 -10
- package/include/hwy/detect_targets.h +95 -29
- package/include/hwy/foreach_target.h +12 -1
- package/include/hwy/highway.h +205 -50
- package/include/hwy/ops/arm_neon-inl.h +841 -99
- package/include/hwy/ops/arm_sve-inl.h +413 -141
- package/include/hwy/ops/emu128-inl.h +373 -360
- package/include/hwy/ops/generic_ops-inl.h +804 -401
- package/include/hwy/ops/inside-inl.h +691 -0
- package/include/hwy/ops/ppc_vsx-inl.h +456 -166
- package/include/hwy/ops/rvv-inl.h +537 -249
- package/include/hwy/ops/scalar-inl.h +169 -79
- package/include/hwy/ops/set_macros-inl.h +106 -18
- package/include/hwy/ops/shared-inl.h +23 -0
- package/include/hwy/ops/wasm_128-inl.h +130 -108
- package/include/hwy/ops/x86_128-inl.h +1892 -577
- package/include/hwy/ops/x86_256-inl.h +625 -184
- package/include/hwy/ops/x86_512-inl.h +733 -131
- package/include/hwy/targets.h +22 -21
- package/include/hwy/timer-inl.h +3 -3
- package/include/hwy/timer.h +5 -1
- package/include/libheif/heif.h +170 -15
- package/include/libheif/heif_items.h +237 -0
- package/include/libheif/heif_properties.h +38 -2
- package/include/libheif/heif_regions.h +1 -1
- package/include/libheif/heif_version.h +2 -2
- package/include/libpng16/pnglibconf.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-cairo.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-features.h +3 -4
- package/include/librsvg-2.0/librsvg/rsvg-pixbuf.h +235 -0
- package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
- package/include/librsvg-2.0/librsvg/rsvg.h +55 -176
- package/include/libxml2/libxml/HTMLparser.h +12 -19
- package/include/libxml2/libxml/c14n.h +1 -12
- package/include/libxml2/libxml/debugXML.h +1 -1
- package/include/libxml2/libxml/encoding.h +9 -0
- package/include/libxml2/libxml/entities.h +12 -1
- package/include/libxml2/libxml/hash.h +19 -0
- package/include/libxml2/libxml/list.h +2 -2
- package/include/libxml2/libxml/nanohttp.h +17 -0
- package/include/libxml2/libxml/parser.h +61 -55
- package/include/libxml2/libxml/parserInternals.h +9 -1
- package/include/libxml2/libxml/pattern.h +6 -0
- package/include/libxml2/libxml/tree.h +32 -12
- package/include/libxml2/libxml/uri.h +11 -0
- package/include/libxml2/libxml/valid.h +29 -2
- package/include/libxml2/libxml/xinclude.h +7 -0
- package/include/libxml2/libxml/xmlIO.h +21 -4
- package/include/libxml2/libxml/xmlerror.h +14 -0
- package/include/libxml2/libxml/xmlexports.h +111 -15
- package/include/libxml2/libxml/xmlmemory.h +8 -45
- package/include/libxml2/libxml/xmlreader.h +2 -0
- package/include/libxml2/libxml/xmlsave.h +5 -0
- package/include/libxml2/libxml/xmlunicode.h +165 -1
- package/include/libxml2/libxml/xmlversion.h +15 -179
- package/include/libxml2/libxml/xmlwriter.h +1 -0
- package/include/libxml2/libxml/xpath.h +4 -0
- package/include/pango-1.0/pango/pango-features.h +3 -3
- package/include/pango-1.0/pango/pango-item.h +4 -2
- package/include/pango-1.0/pango/pango-version-macros.h +25 -0
- package/include/pango-1.0/pango/pangofc-font.h +2 -1
- package/include/pnglibconf.h +1 -1
- package/include/vips/util.h +1 -2
- package/include/vips/version.h +4 -4
- package/include/webp/decode.h +58 -56
- package/include/webp/demux.h +25 -21
- package/include/webp/encode.h +44 -39
- package/include/webp/mux.h +76 -15
- package/include/webp/mux_types.h +2 -1
- package/include/webp/sharpyuv/sharpyuv.h +77 -8
- package/include/webp/types.h +29 -8
- package/include/zconf.h +1 -1
- package/include/zlib.h +12 -12
- package/package.json +1 -1
- package/versions.json +14 -15
|
@@ -62,7 +62,8 @@
|
|
|
62
62
|
// Bits 0..3 reserved (4 targets)
|
|
63
63
|
#define HWY_AVX3_SPR (1LL << 4)
|
|
64
64
|
// Bit 5 reserved (likely AVX10.2 with 256-bit vectors)
|
|
65
|
-
// Currently HWY_AVX3_DL plus a special case for CompressStore
|
|
65
|
+
// Currently HWY_AVX3_DL plus AVX512BF16 and a special case for CompressStore
|
|
66
|
+
// (10x as fast).
|
|
66
67
|
// We may later also use VPCONFLICT.
|
|
67
68
|
#define HWY_AVX3_ZEN4 (1LL << 6) // see HWY_WANT_AVX3_ZEN4 below
|
|
68
69
|
|
|
@@ -84,15 +85,22 @@
|
|
|
84
85
|
#define HWY_HIGHEST_TARGET_BIT_X86 14
|
|
85
86
|
|
|
86
87
|
// --------------------------- Arm: 15 targets (+ one fallback)
|
|
87
|
-
// Bits 15..
|
|
88
|
-
#define HWY_SVE2_128 (1LL <<
|
|
89
|
-
#define HWY_SVE_256 (1LL <<
|
|
90
|
-
|
|
91
|
-
#define
|
|
88
|
+
// Bits 15..17 reserved (3 targets)
|
|
89
|
+
#define HWY_SVE2_128 (1LL << 18) // specialized (e.g. Neoverse V2/N2/N3)
|
|
90
|
+
#define HWY_SVE_256 (1LL << 19) // specialized (Neoverse V1)
|
|
91
|
+
// Bits 20-22 reserved for later SVE (3 targets)
|
|
92
|
+
#define HWY_SVE2 (1LL << 23)
|
|
93
|
+
#define HWY_SVE (1LL << 24)
|
|
94
|
+
// Bit 25 reserved for NEON
|
|
95
|
+
#define HWY_NEON_BF16 (1LL << 26) // fp16/dot/bf16 (e.g. Neoverse V2/N2/N3)
|
|
96
|
+
// Bit 27 reserved for NEON
|
|
92
97
|
#define HWY_NEON (1LL << 28) // Implies support for AES
|
|
93
98
|
#define HWY_NEON_WITHOUT_AES (1LL << 29)
|
|
94
99
|
#define HWY_HIGHEST_TARGET_BIT_ARM 29
|
|
95
100
|
|
|
101
|
+
#define HWY_ALL_NEON (HWY_NEON_WITHOUT_AES | HWY_NEON | HWY_NEON_BF16)
|
|
102
|
+
#define HWY_ALL_SVE (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128)
|
|
103
|
+
|
|
96
104
|
// --------------------------- RISC-V: 9 targets (+ one fallback)
|
|
97
105
|
// Bits 30..36 reserved (7 targets)
|
|
98
106
|
#define HWY_RVV (1LL << 37)
|
|
@@ -111,6 +119,8 @@
|
|
|
111
119
|
#define HWY_Z14 (1LL << 51) // Z14
|
|
112
120
|
#define HWY_HIGHEST_TARGET_BIT_PPC 51
|
|
113
121
|
|
|
122
|
+
#define HWY_ALL_PPC (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
|
|
123
|
+
|
|
114
124
|
// --------------------------- WebAssembly: 9 targets (+ one fallback)
|
|
115
125
|
// Bits 52..57 reserved (6 targets)
|
|
116
126
|
#define HWY_WASM_EMU256 (1LL << 58) // Experimental
|
|
@@ -188,7 +198,7 @@
|
|
|
188
198
|
|
|
189
199
|
// armv7be has not been tested and is not yet supported.
|
|
190
200
|
#if HWY_ARCH_ARM_V7 && HWY_IS_BIG_ENDIAN
|
|
191
|
-
#define HWY_BROKEN_ARM7_BIG_ENDIAN
|
|
201
|
+
#define HWY_BROKEN_ARM7_BIG_ENDIAN HWY_ALL_NEON
|
|
192
202
|
#else
|
|
193
203
|
#define HWY_BROKEN_ARM7_BIG_ENDIAN 0
|
|
194
204
|
#endif
|
|
@@ -199,11 +209,19 @@
|
|
|
199
209
|
#if HWY_ARCH_ARM_V7 && (__ARM_ARCH_PROFILE == 'A') && \
|
|
200
210
|
!defined(__ARM_VFPV4__) && \
|
|
201
211
|
!((__ARM_NEON_FP & 0x2 /* half-float */) && (__ARM_FEATURE_FMA == 1))
|
|
202
|
-
#define HWY_BROKEN_ARM7_WITHOUT_VFP4
|
|
212
|
+
#define HWY_BROKEN_ARM7_WITHOUT_VFP4 HWY_ALL_NEON
|
|
203
213
|
#else
|
|
204
214
|
#define HWY_BROKEN_ARM7_WITHOUT_VFP4 0
|
|
205
215
|
#endif
|
|
206
216
|
|
|
217
|
+
// HWY_NEON_BF16 requires recent compilers.
|
|
218
|
+
#if (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1700) || \
|
|
219
|
+
(HWY_COMPILER_GCC_ACTUAL != 0 && HWY_COMPILER_GCC_ACTUAL < 1302)
|
|
220
|
+
#define HWY_BROKEN_NEON_BF16 (HWY_NEON_BF16)
|
|
221
|
+
#else
|
|
222
|
+
#define HWY_BROKEN_NEON_BF16 0
|
|
223
|
+
#endif
|
|
224
|
+
|
|
207
225
|
// SVE[2] require recent clang or gcc versions.
|
|
208
226
|
#if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
|
|
209
227
|
(HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
|
|
@@ -247,7 +265,7 @@
|
|
|
247
265
|
(HWY_BROKEN_CLANG6 | HWY_BROKEN_32BIT | HWY_BROKEN_MSVC | \
|
|
248
266
|
HWY_BROKEN_AVX3_DL_ZEN4 | HWY_BROKEN_AVX3_SPR | \
|
|
249
267
|
HWY_BROKEN_ARM7_BIG_ENDIAN | HWY_BROKEN_ARM7_WITHOUT_VFP4 | \
|
|
250
|
-
HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
|
|
268
|
+
HWY_BROKEN_NEON_BF16 | HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
|
|
251
269
|
|
|
252
270
|
#endif // HWY_BROKEN_TARGETS
|
|
253
271
|
|
|
@@ -335,7 +353,10 @@
|
|
|
335
353
|
|
|
336
354
|
#if HWY_ARCH_ARM
|
|
337
355
|
|
|
338
|
-
|
|
356
|
+
// Also check compiler version as done for HWY_ATTAINABLE_SVE2 because the
|
|
357
|
+
// static target (influenced here) must be one of the attainable targets.
|
|
358
|
+
#if defined(__ARM_FEATURE_SVE2) && \
|
|
359
|
+
(HWY_COMPILER_CLANG >= 1400 || HWY_COMPILER_GCC_ACTUAL >= 1200)
|
|
339
360
|
#undef HWY_BASELINE_SVE2 // was 0, will be re-defined
|
|
340
361
|
// If user specified -msve-vector-bits=128, they assert the vector length is
|
|
341
362
|
// 128 bits and we should use the HWY_SVE2_128 (more efficient for some ops).
|
|
@@ -350,7 +371,8 @@
|
|
|
350
371
|
#endif // __ARM_FEATURE_SVE_BITS
|
|
351
372
|
#endif // __ARM_FEATURE_SVE2
|
|
352
373
|
|
|
353
|
-
#if defined(__ARM_FEATURE_SVE)
|
|
374
|
+
#if defined(__ARM_FEATURE_SVE) && \
|
|
375
|
+
(HWY_COMPILER_CLANG >= 900 || HWY_COMPILER_GCC_ACTUAL >= 800)
|
|
354
376
|
#undef HWY_BASELINE_SVE // was 0, will be re-defined
|
|
355
377
|
// See above. If user-specified vector length matches our optimization, use it.
|
|
356
378
|
#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
|
|
@@ -363,12 +385,17 @@
|
|
|
363
385
|
// GCC 4.5.4 only defines __ARM_NEON__; 5.4 defines both.
|
|
364
386
|
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
|
365
387
|
#undef HWY_BASELINE_NEON
|
|
366
|
-
#if defined(__ARM_FEATURE_AES)
|
|
367
|
-
|
|
388
|
+
#if defined(__ARM_FEATURE_AES) && \
|
|
389
|
+
defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && \
|
|
390
|
+
defined(__ARM_FEATURE_DOTPROD) && \
|
|
391
|
+
defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
|
|
392
|
+
#define HWY_BASELINE_NEON HWY_ALL_NEON
|
|
393
|
+
#elif defined(__ARM_FEATURE_AES)
|
|
394
|
+
#define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES | HWY_NEON)
|
|
368
395
|
#else
|
|
369
396
|
#define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES)
|
|
370
|
-
#endif
|
|
371
|
-
#endif
|
|
397
|
+
#endif // __ARM_FEATURE*
|
|
398
|
+
#endif // __ARM_NEON
|
|
372
399
|
|
|
373
400
|
#endif // HWY_ARCH_ARM
|
|
374
401
|
|
|
@@ -496,14 +523,16 @@
|
|
|
496
523
|
#define HWY_BASELINE_AVX3_ZEN4 0
|
|
497
524
|
#endif
|
|
498
525
|
|
|
499
|
-
#if HWY_BASELINE_AVX3_DL != 0 && defined(
|
|
526
|
+
#if HWY_BASELINE_AVX3_DL != 0 && defined(__AVX512BF16__) && \
|
|
527
|
+
defined(__AVX512FP16__)
|
|
500
528
|
#define HWY_BASELINE_AVX3_SPR HWY_AVX3_SPR
|
|
501
529
|
#else
|
|
502
530
|
#define HWY_BASELINE_AVX3_SPR 0
|
|
503
531
|
#endif
|
|
504
532
|
|
|
505
533
|
// RVV requires intrinsics 0.11 or later, see #1156.
|
|
506
|
-
#if
|
|
534
|
+
#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
|
|
535
|
+
__riscv_v_intrinsic >= 11000
|
|
507
536
|
#define HWY_BASELINE_RVV HWY_RVV
|
|
508
537
|
#else
|
|
509
538
|
#define HWY_BASELINE_RVV 0
|
|
@@ -548,19 +577,43 @@
|
|
|
548
577
|
#endif
|
|
549
578
|
// Defining one of HWY_COMPILE_ONLY_* will trump HWY_COMPILE_ALL_ATTAINABLE.
|
|
550
579
|
|
|
580
|
+
#ifndef HWY_HAVE_AUXV // allow override
|
|
581
|
+
#ifdef TOOLCHAIN_MISS_SYS_AUXV_H
|
|
582
|
+
#define HWY_HAVE_AUXV 0 // CMake failed to find the header
|
|
583
|
+
// glibc 2.16 added auxv, but checking for that requires features.h, and we do
|
|
584
|
+
// not want to include system headers here. Instead check for the header
|
|
585
|
+
// directly, which has been supported at least since GCC 5.4 and Clang 3.
|
|
586
|
+
#elif defined(__has_include) // note: wrapper macro fails on Clang ~17
|
|
587
|
+
// clang-format off
|
|
588
|
+
#if __has_include(<sys/auxv.h>)
|
|
589
|
+
// clang-format on
|
|
590
|
+
#define HWY_HAVE_AUXV 1 // header present
|
|
591
|
+
#else
|
|
592
|
+
#define HWY_HAVE_AUXV 0 // header not present
|
|
593
|
+
#endif // __has_include
|
|
594
|
+
#else // compiler lacks __has_include
|
|
595
|
+
#define HWY_HAVE_AUXV 0
|
|
596
|
+
#endif
|
|
597
|
+
#endif // HWY_HAVE_AUXV
|
|
598
|
+
|
|
599
|
+
// Allow opting out, and without a guarantee of success, opting-in.
|
|
600
|
+
#ifndef HWY_HAVE_RUNTIME_DISPATCH
|
|
551
601
|
// Clang, GCC and MSVC allow runtime dispatch on x86.
|
|
552
602
|
#if HWY_ARCH_X86
|
|
553
603
|
#define HWY_HAVE_RUNTIME_DISPATCH 1
|
|
554
|
-
// On Arm
|
|
555
|
-
//
|
|
556
|
-
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X) &&
|
|
557
|
-
(HWY_COMPILER_GCC_ACTUAL ||
|
|
558
|
-
|
|
559
|
-
|
|
604
|
+
// On Arm, PPC, S390X, and RISC-V: GCC and Clang 17+ do, and we require Linux
|
|
605
|
+
// to detect CPU capabilities.
|
|
606
|
+
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
|
|
607
|
+
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_OS_LINUX && \
|
|
608
|
+
HWY_HAVE_AUXV
|
|
609
|
+
#define HWY_HAVE_RUNTIME_DISPATCH 1
|
|
610
|
+
#elif HWY_ARCH_ARM_A64 && HWY_OS_APPLE && \
|
|
611
|
+
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700)
|
|
560
612
|
#define HWY_HAVE_RUNTIME_DISPATCH 1
|
|
561
613
|
#else
|
|
562
614
|
#define HWY_HAVE_RUNTIME_DISPATCH 0
|
|
563
|
-
#endif
|
|
615
|
+
#endif // HWY_ARCH_*
|
|
616
|
+
#endif // HWY_HAVE_RUNTIME_DISPATCH
|
|
564
617
|
|
|
565
618
|
// AVX3_DL is not widely available yet. To reduce code size and compile time,
|
|
566
619
|
// only include it in the set of attainable targets (for dynamic dispatch) if
|
|
@@ -572,22 +625,26 @@
|
|
|
572
625
|
#endif
|
|
573
626
|
|
|
574
627
|
#if HWY_ARCH_ARM_A64 && HWY_HAVE_RUNTIME_DISPATCH
|
|
575
|
-
#define HWY_ATTAINABLE_NEON
|
|
628
|
+
#define HWY_ATTAINABLE_NEON HWY_ALL_NEON
|
|
576
629
|
#elif HWY_ARCH_ARM // static dispatch, or HWY_ARCH_ARM_V7
|
|
577
630
|
#define HWY_ATTAINABLE_NEON (HWY_BASELINE_NEON)
|
|
578
631
|
#else
|
|
579
632
|
#define HWY_ATTAINABLE_NEON 0
|
|
580
633
|
#endif
|
|
581
634
|
|
|
582
|
-
#if HWY_ARCH_ARM_A64 &&
|
|
583
|
-
|
|
635
|
+
#if HWY_ARCH_ARM_A64 && \
|
|
636
|
+
(HWY_COMPILER_CLANG >= 900 || HWY_COMPILER_GCC_ACTUAL >= 800) && \
|
|
637
|
+
(HWY_HAVE_RUNTIME_DISPATCH || \
|
|
638
|
+
(HWY_ENABLED_BASELINE & (HWY_SVE | HWY_SVE_256)))
|
|
584
639
|
#define HWY_ATTAINABLE_SVE (HWY_SVE | HWY_SVE_256)
|
|
585
640
|
#else
|
|
586
641
|
#define HWY_ATTAINABLE_SVE 0
|
|
587
642
|
#endif
|
|
588
643
|
|
|
589
|
-
#if HWY_ARCH_ARM_A64 &&
|
|
590
|
-
|
|
644
|
+
#if HWY_ARCH_ARM_A64 && \
|
|
645
|
+
(HWY_COMPILER_CLANG >= 1400 || HWY_COMPILER_GCC_ACTUAL >= 1200) && \
|
|
646
|
+
(HWY_HAVE_RUNTIME_DISPATCH || \
|
|
647
|
+
(HWY_ENABLED_BASELINE & (HWY_SVE2 | HWY_SVE2_128)))
|
|
591
648
|
#define HWY_ATTAINABLE_SVE2 (HWY_SVE2 | HWY_SVE2_128)
|
|
592
649
|
#else
|
|
593
650
|
#define HWY_ATTAINABLE_SVE2 0
|
|
@@ -617,6 +674,12 @@
|
|
|
617
674
|
#define HWY_ATTAINABLE_S390X 0
|
|
618
675
|
#endif
|
|
619
676
|
|
|
677
|
+
#if HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH
|
|
678
|
+
#define HWY_ATTAINABLE_RISCV (HWY_RVV)
|
|
679
|
+
#else
|
|
680
|
+
#define HWY_ATTAINABLE_RISCV 0
|
|
681
|
+
#endif
|
|
682
|
+
|
|
620
683
|
// Attainable means enabled and the compiler allows intrinsics (even when not
|
|
621
684
|
// allowed to autovectorize). Used in 3 and 4.
|
|
622
685
|
#if HWY_ARCH_X86
|
|
@@ -640,6 +703,9 @@
|
|
|
640
703
|
#elif HWY_ARCH_S390X
|
|
641
704
|
#define HWY_ATTAINABLE_TARGETS \
|
|
642
705
|
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_S390X)
|
|
706
|
+
#elif HWY_ARCH_RVV
|
|
707
|
+
#define HWY_ATTAINABLE_TARGETS \
|
|
708
|
+
HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_RISCV)
|
|
643
709
|
#else
|
|
644
710
|
#define HWY_ATTAINABLE_TARGETS (HWY_ENABLED_BASELINE)
|
|
645
711
|
#endif // HWY_ARCH_*
|
|
@@ -168,6 +168,17 @@
|
|
|
168
168
|
#endif
|
|
169
169
|
#endif
|
|
170
170
|
|
|
171
|
+
#if (HWY_TARGETS & HWY_NEON_BF16) && (HWY_STATIC_TARGET != HWY_NEON_BF16)
|
|
172
|
+
#undef HWY_TARGET
|
|
173
|
+
#define HWY_TARGET HWY_NEON_BF16
|
|
174
|
+
#include HWY_TARGET_INCLUDE
|
|
175
|
+
#ifdef HWY_TARGET_TOGGLE
|
|
176
|
+
#undef HWY_TARGET_TOGGLE
|
|
177
|
+
#else
|
|
178
|
+
#define HWY_TARGET_TOGGLE
|
|
179
|
+
#endif
|
|
180
|
+
#endif
|
|
181
|
+
|
|
171
182
|
#if (HWY_TARGETS & HWY_SVE) && (HWY_STATIC_TARGET != HWY_SVE)
|
|
172
183
|
#undef HWY_TARGET
|
|
173
184
|
#define HWY_TARGET HWY_SVE
|
|
@@ -295,7 +306,7 @@
|
|
|
295
306
|
#endif
|
|
296
307
|
#endif
|
|
297
308
|
|
|
298
|
-
// ------------------------------
|
|
309
|
+
// ------------------------------ HWY_ARCH_RISCV
|
|
299
310
|
|
|
300
311
|
#if (HWY_TARGETS & HWY_RVV) && (HWY_STATIC_TARGET != HWY_RVV)
|
|
301
312
|
#undef HWY_TARGET
|
package/include/hwy/highway.h
CHANGED
|
@@ -18,10 +18,17 @@
|
|
|
18
18
|
// IWYU pragma: begin_exports
|
|
19
19
|
#include "hwy/base.h"
|
|
20
20
|
#include "hwy/detect_compiler_arch.h"
|
|
21
|
+
#include "hwy/detect_targets.h"
|
|
21
22
|
#include "hwy/highway_export.h"
|
|
22
23
|
#include "hwy/targets.h"
|
|
23
24
|
// IWYU pragma: end_exports
|
|
24
25
|
|
|
26
|
+
#if HWY_CXX_LANG < 201703L
|
|
27
|
+
#define HWY_DISPATCH_MAP 1
|
|
28
|
+
#else
|
|
29
|
+
#define HWY_DISPATCH_MAP 0
|
|
30
|
+
#endif
|
|
31
|
+
|
|
25
32
|
// This include guard is checked by foreach_target, so avoid the usual _H_
|
|
26
33
|
// suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
|
|
27
34
|
// after/outside this include guard.
|
|
@@ -30,11 +37,6 @@
|
|
|
30
37
|
|
|
31
38
|
namespace hwy {
|
|
32
39
|
|
|
33
|
-
// API version (https://semver.org/); keep in sync with CMakeLists.txt.
|
|
34
|
-
#define HWY_MAJOR 1
|
|
35
|
-
#define HWY_MINOR 1
|
|
36
|
-
#define HWY_PATCH 0
|
|
37
|
-
|
|
38
40
|
//------------------------------------------------------------------------------
|
|
39
41
|
// Shorthand for tags (defined in shared-inl.h) used to select overloads.
|
|
40
42
|
// Note that ScalableTag<T> is preferred over HWY_FULL, and CappedTag<T, N> over
|
|
@@ -84,6 +86,8 @@ namespace hwy {
|
|
|
84
86
|
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_WITHOUT_AES::FUNC_NAME
|
|
85
87
|
#elif HWY_STATIC_TARGET == HWY_NEON
|
|
86
88
|
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
|
|
89
|
+
#elif HWY_STATIC_TARGET == HWY_NEON_BF16
|
|
90
|
+
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_BF16::FUNC_NAME
|
|
87
91
|
#elif HWY_STATIC_TARGET == HWY_SVE
|
|
88
92
|
#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
|
|
89
93
|
#elif HWY_STATIC_TARGET == HWY_SVE2
|
|
@@ -162,6 +166,12 @@ namespace hwy {
|
|
|
162
166
|
#define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
|
|
163
167
|
#endif
|
|
164
168
|
|
|
169
|
+
#if HWY_TARGETS & HWY_NEON_BF16
|
|
170
|
+
#define HWY_CHOOSE_NEON_BF16(FUNC_NAME) &N_NEON_BF16::FUNC_NAME
|
|
171
|
+
#else
|
|
172
|
+
#define HWY_CHOOSE_NEON_BF16(FUNC_NAME) nullptr
|
|
173
|
+
#endif
|
|
174
|
+
|
|
165
175
|
#if HWY_TARGETS & HWY_SVE
|
|
166
176
|
#define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
|
|
167
177
|
#else
|
|
@@ -268,41 +278,68 @@ namespace hwy {
|
|
|
268
278
|
// apparently cannot be an array. Use a function pointer instead, which has the
|
|
269
279
|
// disadvantage that we call the static (not best) target on the first call to
|
|
270
280
|
// any HWY_DYNAMIC_DISPATCH.
|
|
271
|
-
#if HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915
|
|
281
|
+
#if (HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915) || \
|
|
282
|
+
(HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700)
|
|
272
283
|
#define HWY_DISPATCH_WORKAROUND 1
|
|
273
284
|
#else
|
|
274
285
|
#define HWY_DISPATCH_WORKAROUND 0
|
|
275
286
|
#endif
|
|
276
287
|
|
|
288
|
+
#if HWY_DISPATCH_MAP
|
|
289
|
+
struct AllExports {
|
|
290
|
+
template <class FuncPtr, class ExportsKey, uint64_t kHash>
|
|
291
|
+
static const FuncPtr*& GetRefToExportsPtr() {
|
|
292
|
+
static const FuncPtr* s_exports = nullptr;
|
|
293
|
+
return s_exports;
|
|
294
|
+
}
|
|
295
|
+
};
|
|
296
|
+
#endif
|
|
297
|
+
|
|
277
298
|
// Provides a static member function which is what is called during the first
|
|
278
299
|
// HWY_DYNAMIC_DISPATCH, where GetIndex is still zero, and instantiations of
|
|
279
|
-
// this function are the first entry in the tables created by HWY_EXPORT.
|
|
300
|
+
// this function are the first entry in the tables created by HWY_EXPORT[_T].
|
|
280
301
|
template <typename RetType, typename... Args>
|
|
281
302
|
struct FunctionCache {
|
|
282
303
|
public:
|
|
283
|
-
typedef RetType(
|
|
304
|
+
typedef RetType(FuncType)(Args...);
|
|
305
|
+
using FuncPtr = FuncType*;
|
|
284
306
|
|
|
285
|
-
#if HWY_DISPATCH_WORKAROUND
|
|
286
|
-
template <FunctionType* const func>
|
|
287
|
-
static RetType ChooseAndCall(Args... args) {
|
|
288
|
-
ChosenTarget& chosen_target = GetChosenTarget();
|
|
289
|
-
chosen_target.Update(SupportedTargets());
|
|
290
|
-
return (*func)(args...);
|
|
291
|
-
}
|
|
292
|
-
#else
|
|
293
307
|
// A template function that when instantiated has the same signature as the
|
|
294
308
|
// function being called. This function initializes the bit array of targets
|
|
295
309
|
// supported by the current CPU and then calls the appropriate entry within
|
|
296
310
|
// the HWY_EXPORT table. Subsequent calls via HWY_DYNAMIC_DISPATCH to any
|
|
297
311
|
// exported functions, even those defined by different translation units,
|
|
298
312
|
// will dispatch directly to the best available target.
|
|
299
|
-
|
|
313
|
+
#if HWY_DISPATCH_MAP
|
|
314
|
+
template <class ExportsKey, uint64_t kHash>
|
|
300
315
|
static RetType ChooseAndCall(Args... args) {
|
|
301
316
|
ChosenTarget& chosen_target = GetChosenTarget();
|
|
302
317
|
chosen_target.Update(SupportedTargets());
|
|
318
|
+
|
|
319
|
+
const FuncPtr* table = AllExports::template GetRefToExportsPtr<
|
|
320
|
+
FuncPtr, RemoveCvRef<ExportsKey>, kHash>();
|
|
321
|
+
HWY_ASSERT(table);
|
|
322
|
+
|
|
303
323
|
return (table[chosen_target.GetIndex()])(args...);
|
|
304
324
|
}
|
|
305
|
-
|
|
325
|
+
|
|
326
|
+
#if !HWY_DISPATCH_WORKAROUND
|
|
327
|
+
template <const FuncPtr* table>
|
|
328
|
+
static RetType TableChooseAndCall(Args... args) {
|
|
329
|
+
ChosenTarget& chosen_target = GetChosenTarget();
|
|
330
|
+
chosen_target.Update(SupportedTargets());
|
|
331
|
+
return (table[chosen_target.GetIndex()])(args...);
|
|
332
|
+
}
|
|
333
|
+
#endif // !HWY_DISPATCH_WORKAROUND
|
|
334
|
+
|
|
335
|
+
#else // !HWY_DISPATCH_MAP: zero-overhead, but requires C++17
|
|
336
|
+
template <const FuncPtr* table>
|
|
337
|
+
static RetType ChooseAndCall(Args... args) {
|
|
338
|
+
ChosenTarget& chosen_target = GetChosenTarget();
|
|
339
|
+
chosen_target.Update(SupportedTargets());
|
|
340
|
+
return (table[chosen_target.GetIndex()])(args...);
|
|
341
|
+
}
|
|
342
|
+
#endif // HWY_DISPATCH_MAP
|
|
306
343
|
};
|
|
307
344
|
|
|
308
345
|
// Used to deduce the template parameters RetType and Args from a function.
|
|
@@ -315,9 +352,7 @@ FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
|
|
|
315
352
|
HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
|
|
316
353
|
|
|
317
354
|
// HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
|
|
318
|
-
// HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime.
|
|
319
|
-
// static array must be defined at the same namespace level as the function
|
|
320
|
-
// it is exporting.
|
|
355
|
+
// HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime.
|
|
321
356
|
// After being exported, it can be called from other parts of the same source
|
|
322
357
|
// file using HWY_DYNAMIC_DISPATCH(), in particular from a function wrapper
|
|
323
358
|
// like in the following example:
|
|
@@ -342,59 +377,181 @@ FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
|
|
|
342
377
|
// }
|
|
343
378
|
// } // namespace skeleton
|
|
344
379
|
//
|
|
380
|
+
// For templated code with a single type parameter, instead use HWY_EXPORT_T and
|
|
381
|
+
// its HWY_DYNAMIC_DISPATCH_T counterpart:
|
|
382
|
+
//
|
|
383
|
+
// template <typename T>
|
|
384
|
+
// void MyFunctionCaller(T ...) {
|
|
385
|
+
// // First argument to both HWY_EXPORT_T and HWY_DYNAMIC_DISPATCH_T is an
|
|
386
|
+
// // arbitrary table name; you must provide the same name for each call.
|
|
387
|
+
// // It is fine to have multiple HWY_EXPORT_T in a function, but a 64-bit
|
|
388
|
+
// // FNV hash collision among *any* table names will trigger HWY_ABORT.
|
|
389
|
+
// HWY_EXPORT_T(Table1, MyFunction<T>)
|
|
390
|
+
// HWY_DYNAMIC_DISPATCH_T(Table1)(a, b, c);
|
|
391
|
+
// }
|
|
392
|
+
//
|
|
393
|
+
// Note that HWY_EXPORT_T must be invoked inside a template (in the above
|
|
394
|
+
// example: `MyFunctionCaller`), so that a separate table will be created for
|
|
395
|
+
// each template instantiation. For convenience, we also provide a macro that
|
|
396
|
+
// combines both steps and avoids the need to pick a table name:
|
|
397
|
+
//
|
|
398
|
+
// template <typename T>
|
|
399
|
+
// void MyFunctionCaller(T ...) {
|
|
400
|
+
// // Table name is automatically chosen. Note that this variant must be
|
|
401
|
+
// // called in statement context; it is not a valid expression.
|
|
402
|
+
// HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(MyFunction<T>)(a, b, c);
|
|
403
|
+
// }
|
|
345
404
|
|
|
405
|
+
// Simplified version for IDE or the dynamic dispatch case with only one target.
|
|
346
406
|
#if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
|
|
347
407
|
|
|
348
|
-
//
|
|
349
|
-
//
|
|
350
|
-
|
|
351
|
-
// targets are being compiled.
|
|
352
|
-
#define HWY_EXPORT(FUNC_NAME) \
|
|
408
|
+
// We use a table to provide the same compile error conditions as with the
|
|
409
|
+
// non-simplified case, but the table only has a single entry.
|
|
410
|
+
#define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \
|
|
353
411
|
HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
|
|
354
|
-
HWY_DISPATCH_TABLE(
|
|
355
|
-
|
|
412
|
+
HWY_DISPATCH_TABLE(TABLE_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
|
|
413
|
+
|
|
414
|
+
// Use the table, not just STATIC_DISPATCH as in DYNAMIC_DISPATCH, because
|
|
415
|
+
// TABLE_NAME might not match the function name.
|
|
416
|
+
#define HWY_DYNAMIC_POINTER_T(TABLE_NAME) (HWY_DISPATCH_TABLE(TABLE_NAME)[0])
|
|
417
|
+
#define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) \
|
|
418
|
+
(*(HWY_DYNAMIC_POINTER_T(TABLE_NAME)))
|
|
419
|
+
|
|
420
|
+
#define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
|
|
356
421
|
#define HWY_DYNAMIC_POINTER(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
|
|
422
|
+
#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
|
|
357
423
|
|
|
358
|
-
#else
|
|
424
|
+
#else // not simplified: full table
|
|
425
|
+
|
|
426
|
+
// Pre-C++17 workaround: non-type template arguments must have linkage, which
|
|
427
|
+
// means we cannot pass &table as a template argument to ChooseAndCall.
|
|
428
|
+
// ChooseAndCall must find a way to access the table in order to dispatch to the
|
|
429
|
+
// chosen target:
|
|
430
|
+
// 0) Skipping this by dispatching to the static target would be surprising to
|
|
431
|
+
// users and may have serious performance implications.
|
|
432
|
+
// 1) An extra function parameter would be unacceptable because it changes the
|
|
433
|
+
// user-visible function signature.
|
|
434
|
+
// 2) Declaring a table, then defining a pointer to it would work, but requires
|
|
435
|
+
// an additional DECLARE step outside the function so that the pointer has
|
|
436
|
+
// linkage, which breaks existing code.
|
|
437
|
+
// 3) We instead associate the function with the table using an instance of an
|
|
438
|
+
// unnamed struct and the hash of the table name as the key. Because
|
|
439
|
+
// ChooseAndCall has the type information, it can then cast to the function
|
|
440
|
+
// pointer type. However, we cannot simply pass the name as a template
|
|
441
|
+
// argument to ChooseAndCall because this requires char*, which hits the same
|
|
442
|
+
// linkage problem. We instead hash the table name, which assumes the
|
|
443
|
+
// function names do not have collisions.
|
|
444
|
+
#if HWY_DISPATCH_MAP
|
|
445
|
+
|
|
446
|
+
static constexpr uint64_t FNV(const char* name) {
|
|
447
|
+
return *name ? static_cast<uint64_t>(static_cast<uint8_t>(*name)) ^
|
|
448
|
+
(0x100000001b3ULL * FNV(name + 1))
|
|
449
|
+
: 0xcbf29ce484222325ULL;
|
|
450
|
+
}
|
|
359
451
|
|
|
360
|
-
|
|
361
|
-
|
|
452
|
+
template <uint64_t kHash>
|
|
453
|
+
struct AddExport {
|
|
454
|
+
template <class ExportsKey, class FuncPtr>
|
|
455
|
+
AddExport(ExportsKey /*exports_key*/, const char* table_name,
|
|
456
|
+
const FuncPtr* table) {
|
|
457
|
+
using FuncCache = decltype(DeduceFunctionCache(hwy::DeclVal<FuncPtr>()));
|
|
458
|
+
static_assert(
|
|
459
|
+
hwy::IsSame<RemoveCvRef<FuncPtr>, typename FuncCache::FuncPtr>(),
|
|
460
|
+
"FuncPtr should be same type as FuncCache::FuncPtr");
|
|
461
|
+
|
|
462
|
+
const FuncPtr*& exports_ptr = AllExports::template GetRefToExportsPtr<
|
|
463
|
+
RemoveCvRef<FuncPtr>, RemoveCvRef<ExportsKey>, kHash>();
|
|
464
|
+
if (exports_ptr && exports_ptr != table) {
|
|
465
|
+
HWY_ABORT("Hash collision for %s, rename the function\n", table_name);
|
|
466
|
+
} else {
|
|
467
|
+
exports_ptr = table;
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
};
|
|
362
471
|
|
|
472
|
+
// Dynamic dispatch: defines table of function pointers. This must be invoked
|
|
473
|
+
// from inside the function template that calls the template we are exporting.
|
|
474
|
+
// TABLE_NAME must match the one passed to HWY_DYNAMIC_DISPATCH_T. This
|
|
475
|
+
// argument allows multiple exports within one function.
|
|
476
|
+
#define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \
|
|
477
|
+
static const struct { \
|
|
478
|
+
} HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey) = {}; \
|
|
479
|
+
static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
|
|
480
|
+
TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \
|
|
481
|
+
/* The first entry in the table initializes the global cache and \
|
|
482
|
+
* calls the appropriate function. */ \
|
|
483
|
+
&decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
|
|
484
|
+
template ChooseAndCall<decltype(HWY_CONCAT( \
|
|
485
|
+
TABLE_NAME, HighwayDispatchExportsKey)), \
|
|
486
|
+
hwy::FNV(#TABLE_NAME)>, \
|
|
487
|
+
HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
|
|
488
|
+
HWY_CHOOSE_FALLBACK(FUNC_NAME), \
|
|
489
|
+
}; \
|
|
490
|
+
HWY_MAYBE_UNUSED static hwy::AddExport<hwy::FNV(#TABLE_NAME)> HWY_CONCAT( \
|
|
491
|
+
HighwayAddTable, __LINE__)( \
|
|
492
|
+
HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey), #TABLE_NAME, \
|
|
493
|
+
HWY_DISPATCH_TABLE(TABLE_NAME))
|
|
494
|
+
|
|
495
|
+
// For non-template functions. Not necessarily invoked within a function, hence
|
|
496
|
+
// we derive the string and variable names from FUNC_NAME, not HWY_FUNCTION.
|
|
497
|
+
#if HWY_DISPATCH_WORKAROUND
|
|
498
|
+
#define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
|
|
499
|
+
#else
|
|
363
500
|
#define HWY_EXPORT(FUNC_NAME) \
|
|
364
501
|
static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
|
|
365
|
-
FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = {
|
|
502
|
+
FUNC_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \
|
|
366
503
|
/* The first entry in the table initializes the global cache and \
|
|
367
|
-
* calls the function
|
|
368
|
-
&decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(
|
|
369
|
-
|
|
504
|
+
* calls the appropriate function. */ \
|
|
505
|
+
&decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
|
|
506
|
+
template TableChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
|
|
370
507
|
HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
|
|
371
508
|
HWY_CHOOSE_FALLBACK(FUNC_NAME), \
|
|
372
509
|
}
|
|
510
|
+
#endif // HWY_DISPATCH_WORKAROUND
|
|
373
511
|
|
|
374
|
-
#else
|
|
512
|
+
#else // !HWY_DISPATCH_MAP
|
|
375
513
|
|
|
376
|
-
//
|
|
377
|
-
//
|
|
378
|
-
#define
|
|
514
|
+
// Zero-overhead, but requires C++17 for non-type template arguments without
|
|
515
|
+
// linkage, because HWY_EXPORT_T tables are local static variables.
|
|
516
|
+
#define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \
|
|
379
517
|
static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
|
|
380
|
-
|
|
518
|
+
TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \
|
|
381
519
|
/* The first entry in the table initializes the global cache and \
|
|
382
520
|
* calls the appropriate function. */ \
|
|
383
|
-
&decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(
|
|
384
|
-
|
|
521
|
+
&decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
|
|
522
|
+
template ChooseAndCall<HWY_DISPATCH_TABLE(TABLE_NAME)>, \
|
|
385
523
|
HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
|
|
386
524
|
HWY_CHOOSE_FALLBACK(FUNC_NAME), \
|
|
387
525
|
}
|
|
388
526
|
|
|
389
|
-
#
|
|
527
|
+
#define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
|
|
528
|
+
|
|
529
|
+
#endif // HWY_DISPATCH_MAP
|
|
390
530
|
|
|
391
|
-
|
|
392
|
-
|
|
531
|
+
// HWY_DISPATCH_MAP only affects how tables are created, not their usage.
|
|
532
|
+
|
|
533
|
+
// Evaluates to the function pointer for the chosen target.
|
|
393
534
|
#define HWY_DYNAMIC_POINTER(FUNC_NAME) \
|
|
394
535
|
(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()])
|
|
395
536
|
|
|
537
|
+
// Calls the function pointer for the chosen target.
|
|
538
|
+
#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) (*(HWY_DYNAMIC_POINTER(FUNC_NAME)))
|
|
539
|
+
|
|
540
|
+
// Same as DISPATCH, but provide a different arg name to clarify usage.
|
|
541
|
+
#define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) HWY_DYNAMIC_DISPATCH(TABLE_NAME)
|
|
542
|
+
#define HWY_DYNAMIC_POINTER_T(TABLE_NAME) HWY_DYNAMIC_POINTER(TABLE_NAME)
|
|
543
|
+
|
|
396
544
|
#endif // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
|
|
397
545
|
|
|
546
|
+
// Returns the name of an anonymous dispatch table that is only shared with
|
|
547
|
+
// macro invocations coming from the same source line.
|
|
548
|
+
#define HWY_DISPATCH_TABLE_T() HWY_CONCAT(HighwayDispatchTableT, __LINE__)
|
|
549
|
+
|
|
550
|
+
// For templated code, combines export and dispatch using an anonymous table.
|
|
551
|
+
#define HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(FUNC_NAME) \
|
|
552
|
+
HWY_EXPORT_T(HWY_DISPATCH_TABLE_T(), FUNC_NAME); \
|
|
553
|
+
HWY_DYNAMIC_DISPATCH_T(HWY_DISPATCH_TABLE_T())
|
|
554
|
+
|
|
398
555
|
// DEPRECATED names; please use HWY_HAVE_* instead.
|
|
399
556
|
#define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
|
|
400
557
|
#define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
|
|
@@ -425,13 +582,11 @@ FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
|
|
|
425
582
|
HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
|
|
426
583
|
#include "hwy/ops/x86_512-inl.h"
|
|
427
584
|
#elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15 || \
|
|
428
|
-
HWY_TARGET
|
|
429
|
-
HWY_TARGET == HWY_PPC10
|
|
585
|
+
(HWY_TARGET & HWY_ALL_PPC)
|
|
430
586
|
#include "hwy/ops/ppc_vsx-inl.h"
|
|
431
|
-
#elif HWY_TARGET
|
|
587
|
+
#elif HWY_TARGET & HWY_ALL_NEON
|
|
432
588
|
#include "hwy/ops/arm_neon-inl.h"
|
|
433
|
-
#elif HWY_TARGET
|
|
434
|
-
HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
|
|
589
|
+
#elif HWY_TARGET & HWY_ALL_SVE
|
|
435
590
|
#include "hwy/ops/arm_sve-inl.h"
|
|
436
591
|
#elif HWY_TARGET == HWY_WASM_EMU256
|
|
437
592
|
#include "hwy/ops/wasm_256-inl.h"
|