@img/sharp-libvips-dev 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +1 -2
  2. package/include/aom/aom_decoder.h +1 -1
  3. package/include/aom/aom_encoder.h +7 -1
  4. package/include/aom/aom_image.h +24 -12
  5. package/include/aom/aom_integer.h +3 -3
  6. package/include/aom/aomcx.h +15 -0
  7. package/include/aom/aomdx.h +5 -2
  8. package/include/archive.h +7 -5
  9. package/include/archive_entry.h +5 -3
  10. package/include/cgif.h +3 -0
  11. package/include/freetype2/freetype/config/ftoption.h +1 -1
  12. package/include/fribidi/fribidi-config.h +2 -2
  13. package/include/fribidi/fribidi-unicode-version.h +3 -3
  14. package/include/glib-2.0/gio/gappinfo.h +40 -25
  15. package/include/glib-2.0/gio/gasyncresult.h +1 -1
  16. package/include/glib-2.0/gio/gconverter.h +5 -0
  17. package/include/glib-2.0/gio/gdbusintrospection.h +1 -1
  18. package/include/glib-2.0/gio/gfile.h +16 -0
  19. package/include/glib-2.0/gio/gio-visibility.h +34 -0
  20. package/include/glib-2.0/gio/gsettings.h +8 -0
  21. package/include/glib-2.0/gio/gvfs.h +2 -2
  22. package/include/glib-2.0/girepository/gi-visibility.h +34 -0
  23. package/include/glib-2.0/glib/gbookmarkfile.h +1 -1
  24. package/include/glib-2.0/glib/giochannel.h +2 -2
  25. package/include/glib-2.0/glib/glib-visibility.h +34 -0
  26. package/include/glib-2.0/glib/gmacros.h +12 -5
  27. package/include/glib-2.0/glib/gmain.h +93 -7
  28. package/include/glib-2.0/glib/gqsort.h +8 -1
  29. package/include/glib-2.0/glib/gstrfuncs.h +0 -12
  30. package/include/glib-2.0/glib/gstrvbuilder.h +3 -0
  31. package/include/glib-2.0/glib/gunicode.h +1 -1
  32. package/include/glib-2.0/glib/gversionmacros.h +9 -0
  33. package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
  34. package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
  35. package/include/glib-2.0/gobject/gtype.h +6 -6
  36. package/include/harfbuzz/hb-buffer.h +6 -0
  37. package/include/harfbuzz/hb-common.h +6 -9
  38. package/include/harfbuzz/hb-cplusplus.hh +8 -11
  39. package/include/harfbuzz/hb-subset.h +17 -4
  40. package/include/harfbuzz/hb-version.h +3 -3
  41. package/include/hwy/abort.h +28 -0
  42. package/include/hwy/aligned_allocator.h +48 -1
  43. package/include/hwy/base.h +235 -34
  44. package/include/hwy/detect_compiler_arch.h +84 -10
  45. package/include/hwy/detect_targets.h +95 -29
  46. package/include/hwy/foreach_target.h +12 -1
  47. package/include/hwy/highway.h +205 -50
  48. package/include/hwy/ops/arm_neon-inl.h +841 -99
  49. package/include/hwy/ops/arm_sve-inl.h +413 -141
  50. package/include/hwy/ops/emu128-inl.h +373 -360
  51. package/include/hwy/ops/generic_ops-inl.h +804 -401
  52. package/include/hwy/ops/inside-inl.h +691 -0
  53. package/include/hwy/ops/ppc_vsx-inl.h +456 -166
  54. package/include/hwy/ops/rvv-inl.h +537 -249
  55. package/include/hwy/ops/scalar-inl.h +169 -79
  56. package/include/hwy/ops/set_macros-inl.h +106 -18
  57. package/include/hwy/ops/shared-inl.h +23 -0
  58. package/include/hwy/ops/wasm_128-inl.h +130 -108
  59. package/include/hwy/ops/x86_128-inl.h +1892 -577
  60. package/include/hwy/ops/x86_256-inl.h +625 -184
  61. package/include/hwy/ops/x86_512-inl.h +733 -131
  62. package/include/hwy/targets.h +22 -21
  63. package/include/hwy/timer-inl.h +3 -3
  64. package/include/hwy/timer.h +5 -1
  65. package/include/libheif/heif.h +170 -15
  66. package/include/libheif/heif_items.h +237 -0
  67. package/include/libheif/heif_properties.h +38 -2
  68. package/include/libheif/heif_regions.h +1 -1
  69. package/include/libheif/heif_version.h +2 -2
  70. package/include/libpng16/pnglibconf.h +1 -1
  71. package/include/librsvg-2.0/librsvg/rsvg-cairo.h +1 -1
  72. package/include/librsvg-2.0/librsvg/rsvg-features.h +3 -4
  73. package/include/librsvg-2.0/librsvg/rsvg-pixbuf.h +235 -0
  74. package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
  75. package/include/librsvg-2.0/librsvg/rsvg.h +55 -176
  76. package/include/libxml2/libxml/HTMLparser.h +12 -19
  77. package/include/libxml2/libxml/c14n.h +1 -12
  78. package/include/libxml2/libxml/debugXML.h +1 -1
  79. package/include/libxml2/libxml/encoding.h +9 -0
  80. package/include/libxml2/libxml/entities.h +12 -1
  81. package/include/libxml2/libxml/hash.h +19 -0
  82. package/include/libxml2/libxml/list.h +2 -2
  83. package/include/libxml2/libxml/nanohttp.h +17 -0
  84. package/include/libxml2/libxml/parser.h +61 -55
  85. package/include/libxml2/libxml/parserInternals.h +9 -1
  86. package/include/libxml2/libxml/pattern.h +6 -0
  87. package/include/libxml2/libxml/tree.h +32 -12
  88. package/include/libxml2/libxml/uri.h +11 -0
  89. package/include/libxml2/libxml/valid.h +29 -2
  90. package/include/libxml2/libxml/xinclude.h +7 -0
  91. package/include/libxml2/libxml/xmlIO.h +21 -4
  92. package/include/libxml2/libxml/xmlerror.h +14 -0
  93. package/include/libxml2/libxml/xmlexports.h +111 -15
  94. package/include/libxml2/libxml/xmlmemory.h +8 -45
  95. package/include/libxml2/libxml/xmlreader.h +2 -0
  96. package/include/libxml2/libxml/xmlsave.h +5 -0
  97. package/include/libxml2/libxml/xmlunicode.h +165 -1
  98. package/include/libxml2/libxml/xmlversion.h +15 -179
  99. package/include/libxml2/libxml/xmlwriter.h +1 -0
  100. package/include/libxml2/libxml/xpath.h +4 -0
  101. package/include/pango-1.0/pango/pango-features.h +3 -3
  102. package/include/pango-1.0/pango/pango-item.h +4 -2
  103. package/include/pango-1.0/pango/pango-version-macros.h +25 -0
  104. package/include/pango-1.0/pango/pangofc-font.h +2 -1
  105. package/include/pnglibconf.h +1 -1
  106. package/include/vips/util.h +1 -2
  107. package/include/vips/version.h +4 -4
  108. package/include/webp/decode.h +58 -56
  109. package/include/webp/demux.h +25 -21
  110. package/include/webp/encode.h +44 -39
  111. package/include/webp/mux.h +76 -15
  112. package/include/webp/mux_types.h +2 -1
  113. package/include/webp/sharpyuv/sharpyuv.h +77 -8
  114. package/include/webp/types.h +29 -8
  115. package/include/zconf.h +1 -1
  116. package/include/zlib.h +12 -12
  117. package/package.json +1 -1
  118. package/versions.json +14 -15
@@ -62,7 +62,8 @@
62
62
  // Bits 0..3 reserved (4 targets)
63
63
  #define HWY_AVX3_SPR (1LL << 4)
64
64
  // Bit 5 reserved (likely AVX10.2 with 256-bit vectors)
65
- // Currently HWY_AVX3_DL plus a special case for CompressStore (10x as fast).
65
+ // Currently HWY_AVX3_DL plus AVX512BF16 and a special case for CompressStore
66
+ // (10x as fast).
66
67
  // We may later also use VPCONFLICT.
67
68
  #define HWY_AVX3_ZEN4 (1LL << 6) // see HWY_WANT_AVX3_ZEN4 below
68
69
 
@@ -84,15 +85,22 @@
84
85
  #define HWY_HIGHEST_TARGET_BIT_X86 14
85
86
 
86
87
  // --------------------------- Arm: 15 targets (+ one fallback)
87
- // Bits 15..23 reserved (9 targets)
88
- #define HWY_SVE2_128 (1LL << 24) // specialized target (e.g. Arm N2)
89
- #define HWY_SVE_256 (1LL << 25) // specialized target (e.g. Arm V1)
90
- #define HWY_SVE2 (1LL << 26)
91
- #define HWY_SVE (1LL << 27)
88
+ // Bits 15..17 reserved (3 targets)
89
+ #define HWY_SVE2_128 (1LL << 18) // specialized (e.g. Neoverse V2/N2/N3)
90
+ #define HWY_SVE_256 (1LL << 19) // specialized (Neoverse V1)
91
+ // Bits 20-22 reserved for later SVE (3 targets)
92
+ #define HWY_SVE2 (1LL << 23)
93
+ #define HWY_SVE (1LL << 24)
94
+ // Bit 25 reserved for NEON
95
+ #define HWY_NEON_BF16 (1LL << 26) // fp16/dot/bf16 (e.g. Neoverse V2/N2/N3)
96
+ // Bit 27 reserved for NEON
92
97
  #define HWY_NEON (1LL << 28) // Implies support for AES
93
98
  #define HWY_NEON_WITHOUT_AES (1LL << 29)
94
99
  #define HWY_HIGHEST_TARGET_BIT_ARM 29
95
100
 
101
+ #define HWY_ALL_NEON (HWY_NEON_WITHOUT_AES | HWY_NEON | HWY_NEON_BF16)
102
+ #define HWY_ALL_SVE (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128)
103
+
96
104
  // --------------------------- RISC-V: 9 targets (+ one fallback)
97
105
  // Bits 30..36 reserved (7 targets)
98
106
  #define HWY_RVV (1LL << 37)
@@ -111,6 +119,8 @@
111
119
  #define HWY_Z14 (1LL << 51) // Z14
112
120
  #define HWY_HIGHEST_TARGET_BIT_PPC 51
113
121
 
122
+ #define HWY_ALL_PPC (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
123
+
114
124
  // --------------------------- WebAssembly: 9 targets (+ one fallback)
115
125
  // Bits 52..57 reserved (6 targets)
116
126
  #define HWY_WASM_EMU256 (1LL << 58) // Experimental
@@ -188,7 +198,7 @@
188
198
 
189
199
  // armv7be has not been tested and is not yet supported.
190
200
  #if HWY_ARCH_ARM_V7 && HWY_IS_BIG_ENDIAN
191
- #define HWY_BROKEN_ARM7_BIG_ENDIAN (HWY_NEON | HWY_NEON_WITHOUT_AES)
201
+ #define HWY_BROKEN_ARM7_BIG_ENDIAN HWY_ALL_NEON
192
202
  #else
193
203
  #define HWY_BROKEN_ARM7_BIG_ENDIAN 0
194
204
  #endif
@@ -199,11 +209,19 @@
199
209
  #if HWY_ARCH_ARM_V7 && (__ARM_ARCH_PROFILE == 'A') && \
200
210
  !defined(__ARM_VFPV4__) && \
201
211
  !((__ARM_NEON_FP & 0x2 /* half-float */) && (__ARM_FEATURE_FMA == 1))
202
- #define HWY_BROKEN_ARM7_WITHOUT_VFP4 (HWY_NEON | HWY_NEON_WITHOUT_AES)
212
+ #define HWY_BROKEN_ARM7_WITHOUT_VFP4 HWY_ALL_NEON
203
213
  #else
204
214
  #define HWY_BROKEN_ARM7_WITHOUT_VFP4 0
205
215
  #endif
206
216
 
217
+ // HWY_NEON_BF16 requires recent compilers.
218
+ #if (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1700) || \
219
+ (HWY_COMPILER_GCC_ACTUAL != 0 && HWY_COMPILER_GCC_ACTUAL < 1302)
220
+ #define HWY_BROKEN_NEON_BF16 (HWY_NEON_BF16)
221
+ #else
222
+ #define HWY_BROKEN_NEON_BF16 0
223
+ #endif
224
+
207
225
  // SVE[2] require recent clang or gcc versions.
208
226
  #if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
209
227
  (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
@@ -247,7 +265,7 @@
247
265
  (HWY_BROKEN_CLANG6 | HWY_BROKEN_32BIT | HWY_BROKEN_MSVC | \
248
266
  HWY_BROKEN_AVX3_DL_ZEN4 | HWY_BROKEN_AVX3_SPR | \
249
267
  HWY_BROKEN_ARM7_BIG_ENDIAN | HWY_BROKEN_ARM7_WITHOUT_VFP4 | \
250
- HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
268
+ HWY_BROKEN_NEON_BF16 | HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
251
269
 
252
270
  #endif // HWY_BROKEN_TARGETS
253
271
 
@@ -335,7 +353,10 @@
335
353
 
336
354
  #if HWY_ARCH_ARM
337
355
 
338
- #if defined(__ARM_FEATURE_SVE2)
356
+ // Also check compiler version as done for HWY_ATTAINABLE_SVE2 because the
357
+ // static target (influenced here) must be one of the attainable targets.
358
+ #if defined(__ARM_FEATURE_SVE2) && \
359
+ (HWY_COMPILER_CLANG >= 1400 || HWY_COMPILER_GCC_ACTUAL >= 1200)
339
360
  #undef HWY_BASELINE_SVE2 // was 0, will be re-defined
340
361
  // If user specified -msve-vector-bits=128, they assert the vector length is
341
362
  // 128 bits and we should use the HWY_SVE2_128 (more efficient for some ops).
@@ -350,7 +371,8 @@
350
371
  #endif // __ARM_FEATURE_SVE_BITS
351
372
  #endif // __ARM_FEATURE_SVE2
352
373
 
353
- #if defined(__ARM_FEATURE_SVE)
374
+ #if defined(__ARM_FEATURE_SVE) && \
375
+ (HWY_COMPILER_CLANG >= 900 || HWY_COMPILER_GCC_ACTUAL >= 800)
354
376
  #undef HWY_BASELINE_SVE // was 0, will be re-defined
355
377
  // See above. If user-specified vector length matches our optimization, use it.
356
378
  #if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
@@ -363,12 +385,17 @@
363
385
  // GCC 4.5.4 only defines __ARM_NEON__; 5.4 defines both.
364
386
  #if defined(__ARM_NEON__) || defined(__ARM_NEON)
365
387
  #undef HWY_BASELINE_NEON
366
- #if defined(__ARM_FEATURE_AES)
367
- #define HWY_BASELINE_NEON (HWY_NEON | HWY_NEON_WITHOUT_AES)
388
+ #if defined(__ARM_FEATURE_AES) && \
389
+ defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && \
390
+ defined(__ARM_FEATURE_DOTPROD) && \
391
+ defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
392
+ #define HWY_BASELINE_NEON HWY_ALL_NEON
393
+ #elif defined(__ARM_FEATURE_AES)
394
+ #define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES | HWY_NEON)
368
395
  #else
369
396
  #define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES)
370
- #endif
371
- #endif
397
+ #endif // __ARM_FEATURE*
398
+ #endif // __ARM_NEON
372
399
 
373
400
  #endif // HWY_ARCH_ARM
374
401
 
@@ -496,14 +523,16 @@
496
523
  #define HWY_BASELINE_AVX3_ZEN4 0
497
524
  #endif
498
525
 
499
- #if HWY_BASELINE_AVX3_DL != 0 && defined(__AVX512FP16__)
526
+ #if HWY_BASELINE_AVX3_DL != 0 && defined(__AVX512BF16__) && \
527
+ defined(__AVX512FP16__)
500
528
  #define HWY_BASELINE_AVX3_SPR HWY_AVX3_SPR
501
529
  #else
502
530
  #define HWY_BASELINE_AVX3_SPR 0
503
531
  #endif
504
532
 
505
533
  // RVV requires intrinsics 0.11 or later, see #1156.
506
- #if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
534
+ #if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
535
+ __riscv_v_intrinsic >= 11000
507
536
  #define HWY_BASELINE_RVV HWY_RVV
508
537
  #else
509
538
  #define HWY_BASELINE_RVV 0
@@ -548,19 +577,43 @@
548
577
  #endif
549
578
  // Defining one of HWY_COMPILE_ONLY_* will trump HWY_COMPILE_ALL_ATTAINABLE.
550
579
 
580
+ #ifndef HWY_HAVE_AUXV // allow override
581
+ #ifdef TOOLCHAIN_MISS_SYS_AUXV_H
582
+ #define HWY_HAVE_AUXV 0 // CMake failed to find the header
583
+ // glibc 2.16 added auxv, but checking for that requires features.h, and we do
584
+ // not want to include system headers here. Instead check for the header
585
+ // directly, which has been supported at least since GCC 5.4 and Clang 3.
586
+ #elif defined(__has_include) // note: wrapper macro fails on Clang ~17
587
+ // clang-format off
588
+ #if __has_include(<sys/auxv.h>)
589
+ // clang-format on
590
+ #define HWY_HAVE_AUXV 1 // header present
591
+ #else
592
+ #define HWY_HAVE_AUXV 0 // header not present
593
+ #endif // __has_include
594
+ #else // compiler lacks __has_include
595
+ #define HWY_HAVE_AUXV 0
596
+ #endif
597
+ #endif // HWY_HAVE_AUXV
598
+
599
+ // Allow opting out, and without a guarantee of success, opting-in.
600
+ #ifndef HWY_HAVE_RUNTIME_DISPATCH
551
601
  // Clang, GCC and MSVC allow runtime dispatch on x86.
552
602
  #if HWY_ARCH_X86
553
603
  #define HWY_HAVE_RUNTIME_DISPATCH 1
554
- // On Arm/PPC, GCC and Clang 16+ do, and we require Linux to detect CPU
555
- // capabilities. Currently require opt-in for Clang because it is experimental.
556
- #elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X) && \
557
- (HWY_COMPILER_GCC_ACTUAL || (HWY_COMPILER_CLANG >= 1600 && \
558
- defined(HWY_ENABLE_CLANG_ARM_DISPATCH))) && \
559
- HWY_OS_LINUX && !defined(TOOLCHAIN_MISS_SYS_AUXV_H)
604
+ // On Arm, PPC, S390X, and RISC-V: GCC and Clang 17+ do, and we require Linux
605
+ // to detect CPU capabilities.
606
+ #elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
607
+ (HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_OS_LINUX && \
608
+ HWY_HAVE_AUXV
609
+ #define HWY_HAVE_RUNTIME_DISPATCH 1
610
+ #elif HWY_ARCH_ARM_A64 && HWY_OS_APPLE && \
611
+ (HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700)
560
612
  #define HWY_HAVE_RUNTIME_DISPATCH 1
561
613
  #else
562
614
  #define HWY_HAVE_RUNTIME_DISPATCH 0
563
- #endif
615
+ #endif // HWY_ARCH_*
616
+ #endif // HWY_HAVE_RUNTIME_DISPATCH
564
617
 
565
618
  // AVX3_DL is not widely available yet. To reduce code size and compile time,
566
619
  // only include it in the set of attainable targets (for dynamic dispatch) if
@@ -572,22 +625,26 @@
572
625
  #endif
573
626
 
574
627
  #if HWY_ARCH_ARM_A64 && HWY_HAVE_RUNTIME_DISPATCH
575
- #define HWY_ATTAINABLE_NEON (HWY_NEON | HWY_NEON_WITHOUT_AES)
628
+ #define HWY_ATTAINABLE_NEON HWY_ALL_NEON
576
629
  #elif HWY_ARCH_ARM // static dispatch, or HWY_ARCH_ARM_V7
577
630
  #define HWY_ATTAINABLE_NEON (HWY_BASELINE_NEON)
578
631
  #else
579
632
  #define HWY_ATTAINABLE_NEON 0
580
633
  #endif
581
634
 
582
- #if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
583
- (HWY_ENABLED_BASELINE & (HWY_SVE | HWY_SVE_256)))
635
+ #if HWY_ARCH_ARM_A64 && \
636
+ (HWY_COMPILER_CLANG >= 900 || HWY_COMPILER_GCC_ACTUAL >= 800) && \
637
+ (HWY_HAVE_RUNTIME_DISPATCH || \
638
+ (HWY_ENABLED_BASELINE & (HWY_SVE | HWY_SVE_256)))
584
639
  #define HWY_ATTAINABLE_SVE (HWY_SVE | HWY_SVE_256)
585
640
  #else
586
641
  #define HWY_ATTAINABLE_SVE 0
587
642
  #endif
588
643
 
589
- #if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
590
- (HWY_ENABLED_BASELINE & (HWY_SVE2 | HWY_SVE2_128)))
644
+ #if HWY_ARCH_ARM_A64 && \
645
+ (HWY_COMPILER_CLANG >= 1400 || HWY_COMPILER_GCC_ACTUAL >= 1200) && \
646
+ (HWY_HAVE_RUNTIME_DISPATCH || \
647
+ (HWY_ENABLED_BASELINE & (HWY_SVE2 | HWY_SVE2_128)))
591
648
  #define HWY_ATTAINABLE_SVE2 (HWY_SVE2 | HWY_SVE2_128)
592
649
  #else
593
650
  #define HWY_ATTAINABLE_SVE2 0
@@ -617,6 +674,12 @@
617
674
  #define HWY_ATTAINABLE_S390X 0
618
675
  #endif
619
676
 
677
+ #if HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH
678
+ #define HWY_ATTAINABLE_RISCV (HWY_RVV)
679
+ #else
680
+ #define HWY_ATTAINABLE_RISCV 0
681
+ #endif
682
+
620
683
  // Attainable means enabled and the compiler allows intrinsics (even when not
621
684
  // allowed to autovectorize). Used in 3 and 4.
622
685
  #if HWY_ARCH_X86
@@ -640,6 +703,9 @@
640
703
  #elif HWY_ARCH_S390X
641
704
  #define HWY_ATTAINABLE_TARGETS \
642
705
  HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_S390X)
706
+ #elif HWY_ARCH_RVV
707
+ #define HWY_ATTAINABLE_TARGETS \
708
+ HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_RISCV)
643
709
  #else
644
710
  #define HWY_ATTAINABLE_TARGETS (HWY_ENABLED_BASELINE)
645
711
  #endif // HWY_ARCH_*
@@ -168,6 +168,17 @@
168
168
  #endif
169
169
  #endif
170
170
 
171
+ #if (HWY_TARGETS & HWY_NEON_BF16) && (HWY_STATIC_TARGET != HWY_NEON_BF16)
172
+ #undef HWY_TARGET
173
+ #define HWY_TARGET HWY_NEON_BF16
174
+ #include HWY_TARGET_INCLUDE
175
+ #ifdef HWY_TARGET_TOGGLE
176
+ #undef HWY_TARGET_TOGGLE
177
+ #else
178
+ #define HWY_TARGET_TOGGLE
179
+ #endif
180
+ #endif
181
+
171
182
  #if (HWY_TARGETS & HWY_SVE) && (HWY_STATIC_TARGET != HWY_SVE)
172
183
  #undef HWY_TARGET
173
184
  #define HWY_TARGET HWY_SVE
@@ -295,7 +306,7 @@
295
306
  #endif
296
307
  #endif
297
308
 
298
- // ------------------------------ HWY_ARCH_RVV
309
+ // ------------------------------ HWY_ARCH_RISCV
299
310
 
300
311
  #if (HWY_TARGETS & HWY_RVV) && (HWY_STATIC_TARGET != HWY_RVV)
301
312
  #undef HWY_TARGET
@@ -18,10 +18,17 @@
18
18
  // IWYU pragma: begin_exports
19
19
  #include "hwy/base.h"
20
20
  #include "hwy/detect_compiler_arch.h"
21
+ #include "hwy/detect_targets.h"
21
22
  #include "hwy/highway_export.h"
22
23
  #include "hwy/targets.h"
23
24
  // IWYU pragma: end_exports
24
25
 
26
+ #if HWY_CXX_LANG < 201703L
27
+ #define HWY_DISPATCH_MAP 1
28
+ #else
29
+ #define HWY_DISPATCH_MAP 0
30
+ #endif
31
+
25
32
  // This include guard is checked by foreach_target, so avoid the usual _H_
26
33
  // suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
27
34
  // after/outside this include guard.
@@ -30,11 +37,6 @@
30
37
 
31
38
  namespace hwy {
32
39
 
33
- // API version (https://semver.org/); keep in sync with CMakeLists.txt.
34
- #define HWY_MAJOR 1
35
- #define HWY_MINOR 1
36
- #define HWY_PATCH 0
37
-
38
40
  //------------------------------------------------------------------------------
39
41
  // Shorthand for tags (defined in shared-inl.h) used to select overloads.
40
42
  // Note that ScalableTag<T> is preferred over HWY_FULL, and CappedTag<T, N> over
@@ -84,6 +86,8 @@ namespace hwy {
84
86
  #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_WITHOUT_AES::FUNC_NAME
85
87
  #elif HWY_STATIC_TARGET == HWY_NEON
86
88
  #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
89
+ #elif HWY_STATIC_TARGET == HWY_NEON_BF16
90
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_BF16::FUNC_NAME
87
91
  #elif HWY_STATIC_TARGET == HWY_SVE
88
92
  #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
89
93
  #elif HWY_STATIC_TARGET == HWY_SVE2
@@ -162,6 +166,12 @@ namespace hwy {
162
166
  #define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
163
167
  #endif
164
168
 
169
+ #if HWY_TARGETS & HWY_NEON_BF16
170
+ #define HWY_CHOOSE_NEON_BF16(FUNC_NAME) &N_NEON_BF16::FUNC_NAME
171
+ #else
172
+ #define HWY_CHOOSE_NEON_BF16(FUNC_NAME) nullptr
173
+ #endif
174
+
165
175
  #if HWY_TARGETS & HWY_SVE
166
176
  #define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
167
177
  #else
@@ -268,41 +278,68 @@ namespace hwy {
268
278
  // apparently cannot be an array. Use a function pointer instead, which has the
269
279
  // disadvantage that we call the static (not best) target on the first call to
270
280
  // any HWY_DYNAMIC_DISPATCH.
271
- #if HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915
281
+ #if (HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915) || \
282
+ (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700)
272
283
  #define HWY_DISPATCH_WORKAROUND 1
273
284
  #else
274
285
  #define HWY_DISPATCH_WORKAROUND 0
275
286
  #endif
276
287
 
288
+ #if HWY_DISPATCH_MAP
289
+ struct AllExports {
290
+ template <class FuncPtr, class ExportsKey, uint64_t kHash>
291
+ static const FuncPtr*& GetRefToExportsPtr() {
292
+ static const FuncPtr* s_exports = nullptr;
293
+ return s_exports;
294
+ }
295
+ };
296
+ #endif
297
+
277
298
  // Provides a static member function which is what is called during the first
278
299
  // HWY_DYNAMIC_DISPATCH, where GetIndex is still zero, and instantiations of
279
- // this function are the first entry in the tables created by HWY_EXPORT.
300
+ // this function are the first entry in the tables created by HWY_EXPORT[_T].
280
301
  template <typename RetType, typename... Args>
281
302
  struct FunctionCache {
282
303
  public:
283
- typedef RetType(FunctionType)(Args...);
304
+ typedef RetType(FuncType)(Args...);
305
+ using FuncPtr = FuncType*;
284
306
 
285
- #if HWY_DISPATCH_WORKAROUND
286
- template <FunctionType* const func>
287
- static RetType ChooseAndCall(Args... args) {
288
- ChosenTarget& chosen_target = GetChosenTarget();
289
- chosen_target.Update(SupportedTargets());
290
- return (*func)(args...);
291
- }
292
- #else
293
307
  // A template function that when instantiated has the same signature as the
294
308
  // function being called. This function initializes the bit array of targets
295
309
  // supported by the current CPU and then calls the appropriate entry within
296
310
  // the HWY_EXPORT table. Subsequent calls via HWY_DYNAMIC_DISPATCH to any
297
311
  // exported functions, even those defined by different translation units,
298
312
  // will dispatch directly to the best available target.
299
- template <FunctionType* const table[]>
313
+ #if HWY_DISPATCH_MAP
314
+ template <class ExportsKey, uint64_t kHash>
300
315
  static RetType ChooseAndCall(Args... args) {
301
316
  ChosenTarget& chosen_target = GetChosenTarget();
302
317
  chosen_target.Update(SupportedTargets());
318
+
319
+ const FuncPtr* table = AllExports::template GetRefToExportsPtr<
320
+ FuncPtr, RemoveCvRef<ExportsKey>, kHash>();
321
+ HWY_ASSERT(table);
322
+
303
323
  return (table[chosen_target.GetIndex()])(args...);
304
324
  }
305
- #endif // HWY_DISPATCH_WORKAROUND
325
+
326
+ #if !HWY_DISPATCH_WORKAROUND
327
+ template <const FuncPtr* table>
328
+ static RetType TableChooseAndCall(Args... args) {
329
+ ChosenTarget& chosen_target = GetChosenTarget();
330
+ chosen_target.Update(SupportedTargets());
331
+ return (table[chosen_target.GetIndex()])(args...);
332
+ }
333
+ #endif // !HWY_DISPATCH_WORKAROUND
334
+
335
+ #else // !HWY_DISPATCH_MAP: zero-overhead, but requires C++17
336
+ template <const FuncPtr* table>
337
+ static RetType ChooseAndCall(Args... args) {
338
+ ChosenTarget& chosen_target = GetChosenTarget();
339
+ chosen_target.Update(SupportedTargets());
340
+ return (table[chosen_target.GetIndex()])(args...);
341
+ }
342
+ #endif // HWY_DISPATCH_MAP
306
343
  };
307
344
 
308
345
  // Used to deduce the template parameters RetType and Args from a function.
@@ -315,9 +352,7 @@ FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
315
352
  HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
316
353
 
317
354
  // HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
318
- // HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime. This
319
- // static array must be defined at the same namespace level as the function
320
- // it is exporting.
355
+ // HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime.
321
356
  // After being exported, it can be called from other parts of the same source
322
357
  // file using HWY_DYNAMIC_DISPATCH(), in particular from a function wrapper
323
358
  // like in the following example:
@@ -342,59 +377,181 @@ FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
342
377
  // }
343
378
  // } // namespace skeleton
344
379
  //
380
+ // For templated code with a single type parameter, instead use HWY_EXPORT_T and
381
+ // its HWY_DYNAMIC_DISPATCH_T counterpart:
382
+ //
383
+ // template <typename T>
384
+ // void MyFunctionCaller(T ...) {
385
+ // // First argument to both HWY_EXPORT_T and HWY_DYNAMIC_DISPATCH_T is an
386
+ // // arbitrary table name; you must provide the same name for each call.
387
+ // // It is fine to have multiple HWY_EXPORT_T in a function, but a 64-bit
388
+ // // FNV hash collision among *any* table names will trigger HWY_ABORT.
389
+ // HWY_EXPORT_T(Table1, MyFunction<T>)
390
+ // HWY_DYNAMIC_DISPATCH_T(Table1)(a, b, c);
391
+ // }
392
+ //
393
+ // Note that HWY_EXPORT_T must be invoked inside a template (in the above
394
+ // example: `MyFunctionCaller`), so that a separate table will be created for
395
+ // each template instantiation. For convenience, we also provide a macro that
396
+ // combines both steps and avoids the need to pick a table name:
397
+ //
398
+ // template <typename T>
399
+ // void MyFunctionCaller(T ...) {
400
+ // // Table name is automatically chosen. Note that this variant must be
401
+ // // called in statement context; it is not a valid expression.
402
+ // HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(MyFunction<T>)(a, b, c);
403
+ // }
345
404
 
405
+ // Simplified version for IDE or the dynamic dispatch case with only one target.
346
406
  #if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
347
407
 
348
- // Simplified version for IDE or the dynamic dispatch case with only one target.
349
- // This case still uses a table, although of a single element, to provide the
350
- // same compile error conditions as with the dynamic dispatch case when multiple
351
- // targets are being compiled.
352
- #define HWY_EXPORT(FUNC_NAME) \
408
+ // We use a table to provide the same compile error conditions as with the
409
+ // non-simplified case, but the table only has a single entry.
410
+ #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \
353
411
  HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
354
- HWY_DISPATCH_TABLE(FUNC_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
355
- #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
412
+ HWY_DISPATCH_TABLE(TABLE_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
413
+
414
+ // Use the table, not just STATIC_DISPATCH as in DYNAMIC_DISPATCH, because
415
+ // TABLE_NAME might not match the function name.
416
+ #define HWY_DYNAMIC_POINTER_T(TABLE_NAME) (HWY_DISPATCH_TABLE(TABLE_NAME)[0])
417
+ #define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) \
418
+ (*(HWY_DYNAMIC_POINTER_T(TABLE_NAME)))
419
+
420
+ #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
356
421
  #define HWY_DYNAMIC_POINTER(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
422
+ #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
357
423
 
358
- #else
424
+ #else // not simplified: full table
425
+
426
+ // Pre-C++17 workaround: non-type template arguments must have linkage, which
427
+ // means we cannot pass &table as a template argument to ChooseAndCall.
428
+ // ChooseAndCall must find a way to access the table in order to dispatch to the
429
+ // chosen target:
430
+ // 0) Skipping this by dispatching to the static target would be surprising to
431
+ // users and may have serious performance implications.
432
+ // 1) An extra function parameter would be unacceptable because it changes the
433
+ // user-visible function signature.
434
+ // 2) Declaring a table, then defining a pointer to it would work, but requires
435
+ // an additional DECLARE step outside the function so that the pointer has
436
+ // linkage, which breaks existing code.
437
+ // 3) We instead associate the function with the table using an instance of an
438
+ // unnamed struct and the hash of the table name as the key. Because
439
+ // ChooseAndCall has the type information, it can then cast to the function
440
+ // pointer type. However, we cannot simply pass the name as a template
441
+ // argument to ChooseAndCall because this requires char*, which hits the same
442
+ // linkage problem. We instead hash the table name, which assumes the
443
+ // function names do not have collisions.
444
+ #if HWY_DISPATCH_MAP
445
+
446
+ static constexpr uint64_t FNV(const char* name) {
447
+ return *name ? static_cast<uint64_t>(static_cast<uint8_t>(*name)) ^
448
+ (0x100000001b3ULL * FNV(name + 1))
449
+ : 0xcbf29ce484222325ULL;
450
+ }
359
451
 
360
- // Simplified version for MSVC 2017: function pointer instead of table.
361
- #if HWY_DISPATCH_WORKAROUND
452
+ template <uint64_t kHash>
453
+ struct AddExport {
454
+ template <class ExportsKey, class FuncPtr>
455
+ AddExport(ExportsKey /*exports_key*/, const char* table_name,
456
+ const FuncPtr* table) {
457
+ using FuncCache = decltype(DeduceFunctionCache(hwy::DeclVal<FuncPtr>()));
458
+ static_assert(
459
+ hwy::IsSame<RemoveCvRef<FuncPtr>, typename FuncCache::FuncPtr>(),
460
+ "FuncPtr should be same type as FuncCache::FuncPtr");
461
+
462
+ const FuncPtr*& exports_ptr = AllExports::template GetRefToExportsPtr<
463
+ RemoveCvRef<FuncPtr>, RemoveCvRef<ExportsKey>, kHash>();
464
+ if (exports_ptr && exports_ptr != table) {
465
+ HWY_ABORT("Hash collision for %s, rename the function\n", table_name);
466
+ } else {
467
+ exports_ptr = table;
468
+ }
469
+ }
470
+ };
362
471
 
472
+ // Dynamic dispatch: defines table of function pointers. This must be invoked
473
+ // from inside the function template that calls the template we are exporting.
474
+ // TABLE_NAME must match the one passed to HWY_DYNAMIC_DISPATCH_T. This
475
+ // argument allows multiple exports within one function.
476
+ #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \
477
+ static const struct { \
478
+ } HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey) = {}; \
479
+ static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
480
+ TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \
481
+ /* The first entry in the table initializes the global cache and \
482
+ * calls the appropriate function. */ \
483
+ &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
484
+ template ChooseAndCall<decltype(HWY_CONCAT( \
485
+ TABLE_NAME, HighwayDispatchExportsKey)), \
486
+ hwy::FNV(#TABLE_NAME)>, \
487
+ HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
488
+ HWY_CHOOSE_FALLBACK(FUNC_NAME), \
489
+ }; \
490
+ HWY_MAYBE_UNUSED static hwy::AddExport<hwy::FNV(#TABLE_NAME)> HWY_CONCAT( \
491
+ HighwayAddTable, __LINE__)( \
492
+ HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey), #TABLE_NAME, \
493
+ HWY_DISPATCH_TABLE(TABLE_NAME))
494
+
495
+ // For non-template functions. Not necessarily invoked within a function, hence
496
+ // we derive the string and variable names from FUNC_NAME, not HWY_FUNCTION.
497
+ #if HWY_DISPATCH_WORKAROUND
498
+ #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
499
+ #else
363
500
  #define HWY_EXPORT(FUNC_NAME) \
364
501
  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
365
- FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
502
+ FUNC_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \
366
503
  /* The first entry in the table initializes the global cache and \
367
- * calls the function from HWY_STATIC_TARGET. */ \
368
- &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH( \
369
- FUNC_NAME)))::ChooseAndCall<&HWY_STATIC_DISPATCH(FUNC_NAME)>, \
504
+ * calls the appropriate function. */ \
505
+ &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
506
+ template TableChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
370
507
  HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
371
508
  HWY_CHOOSE_FALLBACK(FUNC_NAME), \
372
509
  }
510
+ #endif // HWY_DISPATCH_WORKAROUND
373
511
 
374
- #else
512
+ #else // !HWY_DISPATCH_MAP
375
513
 
376
- // Dynamic dispatch case with one entry per dynamic target plus the fallback
377
- // target and the initialization wrapper.
378
- #define HWY_EXPORT(FUNC_NAME) \
514
+ // Zero-overhead, but requires C++17 for non-type template arguments without
515
+ // linkage, because HWY_EXPORT_T tables are local static variables.
516
+ #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME) \
379
517
  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
380
- FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
518
+ TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = { \
381
519
  /* The first entry in the table initializes the global cache and \
382
520
  * calls the appropriate function. */ \
383
- &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH( \
384
- FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
521
+ &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
522
+ template ChooseAndCall<HWY_DISPATCH_TABLE(TABLE_NAME)>, \
385
523
  HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
386
524
  HWY_CHOOSE_FALLBACK(FUNC_NAME), \
387
525
  }
388
526
 
389
- #endif // HWY_DISPATCH_WORKAROUND
527
+ #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
528
+
529
+ #endif // HWY_DISPATCH_MAP
390
530
 
391
- #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
392
- (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()]))
531
+ // HWY_DISPATCH_MAP only affects how tables are created, not their usage.
532
+
533
+ // Evaluates to the function pointer for the chosen target.
393
534
  #define HWY_DYNAMIC_POINTER(FUNC_NAME) \
394
535
  (HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()])
395
536
 
537
+ // Calls the function pointer for the chosen target.
538
+ #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) (*(HWY_DYNAMIC_POINTER(FUNC_NAME)))
539
+
540
+ // Same as DISPATCH, but provide a different arg name to clarify usage.
541
+ #define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) HWY_DYNAMIC_DISPATCH(TABLE_NAME)
542
+ #define HWY_DYNAMIC_POINTER_T(TABLE_NAME) HWY_DYNAMIC_POINTER(TABLE_NAME)
543
+
396
544
  #endif // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
397
545
 
546
+ // Returns the name of an anonymous dispatch table that is only shared with
547
+ // macro invocations coming from the same source line.
548
+ #define HWY_DISPATCH_TABLE_T() HWY_CONCAT(HighwayDispatchTableT, __LINE__)
549
+
550
+ // For templated code, combines export and dispatch using an anonymous table.
551
+ #define HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(FUNC_NAME) \
552
+ HWY_EXPORT_T(HWY_DISPATCH_TABLE_T(), FUNC_NAME); \
553
+ HWY_DYNAMIC_DISPATCH_T(HWY_DISPATCH_TABLE_T())
554
+
398
555
  // DEPRECATED names; please use HWY_HAVE_* instead.
399
556
  #define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
400
557
  #define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
@@ -425,13 +582,11 @@ FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
425
582
  HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
426
583
  #include "hwy/ops/x86_512-inl.h"
427
584
  #elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15 || \
428
- HWY_TARGET == HWY_PPC8 || HWY_TARGET == HWY_PPC9 || \
429
- HWY_TARGET == HWY_PPC10
585
+ (HWY_TARGET & HWY_ALL_PPC)
430
586
  #include "hwy/ops/ppc_vsx-inl.h"
431
- #elif HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
587
+ #elif HWY_TARGET & HWY_ALL_NEON
432
588
  #include "hwy/ops/arm_neon-inl.h"
433
- #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 || \
434
- HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
589
+ #elif HWY_TARGET & HWY_ALL_SVE
435
590
  #include "hwy/ops/arm_sve-inl.h"
436
591
  #elif HWY_TARGET == HWY_WASM_EMU256
437
592
  #include "hwy/ops/wasm_256-inl.h"