@img/sharp-libvips-dev 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/include/expat.h +21 -10
  2. package/include/expat_config.h +11 -5
  3. package/include/ffi.h +12 -25
  4. package/include/freetype2/freetype/config/ftoption.h +1 -1
  5. package/include/gio-unix-2.0/gio/gfiledescriptorbased.h +3 -2
  6. package/include/glib-2.0/gio/gapplication.h +6 -0
  7. package/include/glib-2.0/gio/giotypes.h +0 -1
  8. package/include/glib-2.0/girepository/giarginfo.h +23 -6
  9. package/include/glib-2.0/girepository/gibaseinfo.h +44 -18
  10. package/include/glib-2.0/girepository/gicallableinfo.h +26 -16
  11. package/include/glib-2.0/girepository/gicallbackinfo.h +17 -2
  12. package/include/glib-2.0/girepository/giconstantinfo.h +19 -4
  13. package/include/glib-2.0/girepository/gienuminfo.h +20 -21
  14. package/include/glib-2.0/girepository/gifieldinfo.h +22 -7
  15. package/include/glib-2.0/girepository/giflagsinfo.h +60 -0
  16. package/include/glib-2.0/girepository/gifunctioninfo.h +22 -7
  17. package/include/glib-2.0/girepository/giinterfaceinfo.h +33 -18
  18. package/include/glib-2.0/girepository/giobjectinfo.h +41 -26
  19. package/include/glib-2.0/girepository/gipropertyinfo.h +18 -3
  20. package/include/glib-2.0/girepository/giregisteredtypeinfo.h +22 -11
  21. package/include/glib-2.0/girepository/girepository-autocleanups.h +56 -0
  22. package/include/glib-2.0/girepository/girepository.h +53 -62
  23. package/include/glib-2.0/girepository/girffi.h +8 -7
  24. package/include/glib-2.0/girepository/gisignalinfo.h +18 -3
  25. package/include/glib-2.0/girepository/gistructinfo.h +26 -11
  26. package/include/glib-2.0/girepository/gitypeinfo.h +29 -16
  27. package/include/glib-2.0/girepository/gitypelib.h +9 -13
  28. package/include/glib-2.0/girepository/gitypes.h +52 -104
  29. package/include/glib-2.0/girepository/giunioninfo.h +28 -12
  30. package/include/glib-2.0/girepository/giunresolvedinfo.h +17 -2
  31. package/include/glib-2.0/girepository/givalueinfo.h +65 -0
  32. package/include/glib-2.0/girepository/givfuncinfo.h +23 -8
  33. package/include/glib-2.0/glib/deprecated/gthread.h +9 -5
  34. package/include/glib-2.0/glib/gbitlock.h +31 -0
  35. package/include/glib-2.0/glib/gmessages.h +8 -0
  36. package/include/glib-2.0/glib/gslice.h +2 -0
  37. package/include/glib-2.0/glib/gstrfuncs.h +24 -18
  38. package/include/glib-2.0/glib/gthread.h +191 -3
  39. package/include/glib-2.0/glib-unix.h +7 -1
  40. package/include/glib-2.0/gobject/genums.h +6 -6
  41. package/include/glib-2.0/gobject/glib-types.h +11 -0
  42. package/include/glib-2.0/gobject/gsignal.h +16 -6
  43. package/include/hwy/aligned_allocator.h +171 -6
  44. package/include/hwy/base.h +1765 -543
  45. package/include/hwy/cache_control.h +24 -6
  46. package/include/hwy/detect_compiler_arch.h +23 -2
  47. package/include/hwy/detect_targets.h +56 -13
  48. package/include/hwy/foreach_target.h +24 -0
  49. package/include/hwy/highway.h +20 -3
  50. package/include/hwy/ops/arm_neon-inl.h +1086 -667
  51. package/include/hwy/ops/arm_sve-inl.h +1091 -235
  52. package/include/hwy/ops/emu128-inl.h +271 -196
  53. package/include/hwy/ops/generic_ops-inl.h +2270 -399
  54. package/include/hwy/ops/ppc_vsx-inl.h +1786 -563
  55. package/include/hwy/ops/rvv-inl.h +1043 -311
  56. package/include/hwy/ops/scalar-inl.h +189 -159
  57. package/include/hwy/ops/set_macros-inl.h +66 -6
  58. package/include/hwy/ops/shared-inl.h +175 -56
  59. package/include/hwy/ops/wasm_128-inl.h +153 -136
  60. package/include/hwy/ops/x86_128-inl.h +1647 -646
  61. package/include/hwy/ops/x86_256-inl.h +1003 -370
  62. package/include/hwy/ops/x86_512-inl.h +948 -353
  63. package/include/hwy/per_target.h +4 -0
  64. package/include/hwy/profiler.h +648 -0
  65. package/include/hwy/robust_statistics.h +2 -2
  66. package/include/hwy/targets.h +18 -11
  67. package/include/hwy/timer.h +11 -0
  68. package/include/libpng16/png.h +32 -29
  69. package/include/libpng16/pngconf.h +2 -2
  70. package/include/libpng16/pnglibconf.h +7 -2
  71. package/include/librsvg-2.0/librsvg/rsvg-version.h +2 -2
  72. package/include/libxml2/libxml/parser.h +16 -7
  73. package/include/libxml2/libxml/xmlIO.h +0 -1
  74. package/include/libxml2/libxml/xmlversion.h +4 -4
  75. package/include/pango-1.0/pango/pango-features.h +3 -3
  76. package/include/pango-1.0/pango/pango-fontmap.h +7 -0
  77. package/include/pixman-1/pixman-version.h +2 -2
  78. package/include/png.h +32 -29
  79. package/include/pngconf.h +2 -2
  80. package/include/pnglibconf.h +7 -2
  81. package/include/vips/connection.h +9 -3
  82. package/include/vips/util.h +0 -9
  83. package/include/vips/version.h +4 -4
  84. package/package.json +1 -1
  85. package/versions.json +11 -11
@@ -16,6 +16,7 @@
16
16
  // Single-element vectors and operations.
17
17
  // External include guard in highway.h - see comment there.
18
18
 
19
+ #include <stdint.h>
19
20
  #ifndef HWY_NO_LIBCXX
20
21
  #include <math.h> // sqrtf
21
22
  #endif
@@ -53,6 +54,9 @@ struct Vec1 {
53
54
  HWY_INLINE Vec1& operator-=(const Vec1 other) {
54
55
  return *this = (*this - other);
55
56
  }
57
+ HWY_INLINE Vec1& operator%=(const Vec1 other) {
58
+ return *this = (*this % other);
59
+ }
56
60
  HWY_INLINE Vec1& operator&=(const Vec1 other) {
57
61
  return *this = (*this & other);
58
62
  }
@@ -101,9 +105,7 @@ HWY_API Vec1<TTo> BitCast(DTo /* tag */, Vec1<TFrom> v) {
101
105
 
102
106
  template <class D, HWY_IF_LANES_D(D, 1), typename T = TFromD<D>>
103
107
  HWY_API Vec1<T> Zero(D /* tag */) {
104
- Vec1<T> v;
105
- ZeroBytes<sizeof(v.raw)>(&v.raw);
106
- return v;
108
+ return Vec1<T>(ConvertScalarTo<T>(0));
107
109
  }
108
110
 
109
111
  template <class D>
@@ -137,7 +139,7 @@ HWY_API VFromD<D> ResizeBitCast(D /* tag */, FromV v) {
137
139
  using TFrom = TFromV<FromV>;
138
140
  using TTo = TFromD<D>;
139
141
  constexpr size_t kCopyLen = HWY_MIN(sizeof(TFrom), sizeof(TTo));
140
- TTo to = TTo{0};
142
+ TTo to{};
141
143
  CopyBytes<kCopyLen>(&v.raw, &to);
142
144
  return VFromD<D>(to);
143
145
  }
@@ -156,6 +158,39 @@ HWY_INLINE VFromD<DTo> ZeroExtendResizeBitCast(FromSizeTag /* from_size_tag */,
156
158
 
157
159
  } // namespace detail
158
160
 
161
+ // ------------------------------ Dup128VecFromValues
162
+
163
+ template <class D, HWY_IF_T_SIZE_D(D, 1)>
164
+ HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
165
+ TFromD<D> /*t2*/, TFromD<D> /*t3*/,
166
+ TFromD<D> /*t4*/, TFromD<D> /*t5*/,
167
+ TFromD<D> /*t6*/, TFromD<D> /*t7*/,
168
+ TFromD<D> /*t8*/, TFromD<D> /*t9*/,
169
+ TFromD<D> /*t10*/, TFromD<D> /*t11*/,
170
+ TFromD<D> /*t12*/, TFromD<D> /*t13*/,
171
+ TFromD<D> /*t14*/, TFromD<D> /*t15*/) {
172
+ return VFromD<D>(t0);
173
+ }
174
+
175
+ template <class D, HWY_IF_T_SIZE_D(D, 2)>
176
+ HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
177
+ TFromD<D> /*t2*/, TFromD<D> /*t3*/,
178
+ TFromD<D> /*t4*/, TFromD<D> /*t5*/,
179
+ TFromD<D> /*t6*/, TFromD<D> /*t7*/) {
180
+ return VFromD<D>(t0);
181
+ }
182
+
183
+ template <class D, HWY_IF_T_SIZE_D(D, 4)>
184
+ HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/,
185
+ TFromD<D> /*t2*/, TFromD<D> /*t3*/) {
186
+ return VFromD<D>(t0);
187
+ }
188
+
189
+ template <class D, HWY_IF_T_SIZE_D(D, 8)>
190
+ HWY_API VFromD<D> Dup128VecFromValues(D /*d*/, TFromD<D> t0, TFromD<D> /*t1*/) {
191
+ return VFromD<D>(t0);
192
+ }
193
+
159
194
  // ================================================== LOGICAL
160
195
 
161
196
  // ------------------------------ Not
@@ -328,12 +363,12 @@ HWY_API Vec1<T> IfThenElse(const Mask1<T> mask, const Vec1<T> yes,
328
363
 
329
364
  template <typename T>
330
365
  HWY_API Vec1<T> IfThenElseZero(const Mask1<T> mask, const Vec1<T> yes) {
331
- return mask.bits ? yes : Vec1<T>(0);
366
+ return mask.bits ? yes : Vec1<T>(ConvertScalarTo<T>(0));
332
367
  }
333
368
 
334
369
  template <typename T>
335
370
  HWY_API Vec1<T> IfThenZeroElse(const Mask1<T> mask, const Vec1<T> no) {
336
- return mask.bits ? Vec1<T>(0) : no;
371
+ return mask.bits ? Vec1<T>(ConvertScalarTo<T>(0)) : no;
337
372
  }
338
373
 
339
374
  template <typename T>
@@ -347,7 +382,11 @@ HWY_API Vec1<T> IfNegativeThenElse(Vec1<T> v, Vec1<T> yes, Vec1<T> no) {
347
382
 
348
383
  template <typename T>
349
384
  HWY_API Vec1<T> ZeroIfNegative(const Vec1<T> v) {
350
- return v.raw < 0 ? Vec1<T>(0) : v;
385
+ const DFromV<decltype(v)> d;
386
+ const RebindToSigned<decltype(d)> di;
387
+ const auto vi = BitCast(di, v);
388
+
389
+ return vi.raw < 0 ? Vec1<T>(ConvertScalarTo<T>(0)) : v;
351
390
  }
352
391
 
353
392
  // ------------------------------ Mask logical
@@ -407,6 +446,19 @@ HWY_API Mask1<T> SetAtOrBeforeFirst(Mask1<T> /*mask*/) {
407
446
  return Mask1<T>::FromBool(true);
408
447
  }
409
448
 
449
+ // ------------------------------ LowerHalfOfMask
450
+
451
+ #ifdef HWY_NATIVE_LOWER_HALF_OF_MASK
452
+ #undef HWY_NATIVE_LOWER_HALF_OF_MASK
453
+ #else
454
+ #define HWY_NATIVE_LOWER_HALF_OF_MASK
455
+ #endif
456
+
457
+ template <class D>
458
+ HWY_API MFromD<D> LowerHalfOfMask(D /*d*/, MFromD<D> m) {
459
+ return m;
460
+ }
461
+
410
462
  // ================================================== SHIFTS
411
463
 
412
464
  // ------------------------------ ShiftLeft/ShiftRight (BroadcastSignBit)
@@ -528,10 +580,22 @@ HWY_API Vec1<double> operator-(const Vec1<double> a, const Vec1<double> b) {
528
580
 
529
581
  // ------------------------------ SumsOf8
530
582
 
583
+ HWY_API Vec1<int64_t> SumsOf8(const Vec1<int8_t> v) {
584
+ return Vec1<int64_t>(v.raw);
585
+ }
531
586
  HWY_API Vec1<uint64_t> SumsOf8(const Vec1<uint8_t> v) {
532
587
  return Vec1<uint64_t>(v.raw);
533
588
  }
534
589
 
590
+ // ------------------------------ SumsOf2
591
+
592
+ template <class T>
593
+ HWY_API Vec1<MakeWide<T>> SumsOf2(const Vec1<T> v) {
594
+ const DFromV<decltype(v)> d;
595
+ const Rebind<MakeWide<T>, decltype(d)> dw;
596
+ return PromoteTo(dw, v);
597
+ }
598
+
535
599
  // ------------------------------ SaturatedAdd
536
600
 
537
601
  // Returns a + b clamped to the destination range.
@@ -603,57 +667,12 @@ HWY_API Vec1<uint16_t> AverageRound(const Vec1<uint16_t> a,
603
667
 
604
668
  template <typename T>
605
669
  HWY_API Vec1<T> Abs(const Vec1<T> a) {
606
- const T i = a.raw;
607
- if (i >= 0 || i == hwy::LimitsMin<T>()) return a;
608
- return Vec1<T>(static_cast<T>(-i & T{-1}));
609
- }
610
- HWY_API Vec1<float> Abs(Vec1<float> a) {
611
- int32_t i;
612
- CopyBytes<sizeof(i)>(&a.raw, &i);
613
- i &= 0x7FFFFFFF;
614
- CopyBytes<sizeof(i)>(&i, &a.raw);
615
- return a;
616
- }
617
- HWY_API Vec1<double> Abs(Vec1<double> a) {
618
- int64_t i;
619
- CopyBytes<sizeof(i)>(&a.raw, &i);
620
- i &= 0x7FFFFFFFFFFFFFFFL;
621
- CopyBytes<sizeof(i)>(&i, &a.raw);
622
- return a;
670
+ return Vec1<T>(ScalarAbs(a.raw));
623
671
  }
624
672
 
625
673
  // ------------------------------ Min/Max
626
674
 
627
675
  // <cmath> may be unavailable, so implement our own.
628
- namespace detail {
629
-
630
- static inline float Abs(float f) {
631
- uint32_t i;
632
- CopyBytes<4>(&f, &i);
633
- i &= 0x7FFFFFFFu;
634
- CopyBytes<4>(&i, &f);
635
- return f;
636
- }
637
- static inline double Abs(double f) {
638
- uint64_t i;
639
- CopyBytes<8>(&f, &i);
640
- i &= 0x7FFFFFFFFFFFFFFFull;
641
- CopyBytes<8>(&i, &f);
642
- return f;
643
- }
644
-
645
- static inline bool SignBit(float f) {
646
- uint32_t i;
647
- CopyBytes<4>(&f, &i);
648
- return (i >> 31) != 0;
649
- }
650
- static inline bool SignBit(double f) {
651
- uint64_t i;
652
- CopyBytes<8>(&f, &i);
653
- return (i >> 63) != 0;
654
- }
655
-
656
- } // namespace detail
657
676
 
658
677
  template <typename T, HWY_IF_NOT_FLOAT(T)>
659
678
  HWY_API Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
@@ -716,7 +735,7 @@ HWY_API Vec1<T> operator*(const Vec1<T> a, const Vec1<T> b) {
716
735
  static_cast<uint64_t>(b.raw)));
717
736
  }
718
737
 
719
- template <typename T>
738
+ template <typename T, HWY_IF_FLOAT(T)>
720
739
  HWY_API Vec1<T> operator/(const Vec1<T> a, const Vec1<T> b) {
721
740
  return Vec1<T>(a.raw / b.raw);
722
741
  }
@@ -763,23 +782,23 @@ HWY_API Vec1<T> AbsDiff(const Vec1<T> a, const Vec1<T> b) {
763
782
 
764
783
  // ------------------------------ Floating-point multiply-add variants
765
784
 
766
- template <typename T>
785
+ template <typename T, HWY_IF_FLOAT(T)>
767
786
  HWY_API Vec1<T> MulAdd(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> add) {
768
787
  return mul * x + add;
769
788
  }
770
789
 
771
- template <typename T>
790
+ template <typename T, HWY_IF_FLOAT(T)>
772
791
  HWY_API Vec1<T> NegMulAdd(const Vec1<T> mul, const Vec1<T> x,
773
792
  const Vec1<T> add) {
774
793
  return add - mul * x;
775
794
  }
776
795
 
777
- template <typename T>
796
+ template <typename T, HWY_IF_FLOAT(T)>
778
797
  HWY_API Vec1<T> MulSub(const Vec1<T> mul, const Vec1<T> x, const Vec1<T> sub) {
779
798
  return mul * x - sub;
780
799
  }
781
800
 
782
- template <typename T>
801
+ template <typename T, HWY_IF_FLOAT(T)>
783
802
  HWY_API Vec1<T> NegMulSub(const Vec1<T> mul, const Vec1<T> x,
784
803
  const Vec1<T> sub) {
785
804
  return Neg(mul) * x - sub;
@@ -842,14 +861,17 @@ HWY_API Vec1<T> Round(const Vec1<T> v) {
842
861
  if (!(Abs(v).raw < MantissaEnd<T>())) { // Huge or NaN
843
862
  return v;
844
863
  }
845
- const T bias = v.raw < T(0.0) ? T(-0.5) : T(0.5);
846
- const TI rounded = static_cast<TI>(v.raw + bias);
847
- if (rounded == 0) return CopySignToAbs(Vec1<T>(0), v);
864
+ const T k0 = ConvertScalarTo<T>(0);
865
+ const T bias = ConvertScalarTo<T>(v.raw < k0 ? -0.5 : 0.5);
866
+ const TI rounded = ConvertScalarTo<TI>(v.raw + bias);
867
+ if (rounded == 0) return CopySignToAbs(Vec1<T>(k0), v);
868
+ TI offset = 0;
848
869
  // Round to even
849
- if ((rounded & 1) && detail::Abs(static_cast<T>(rounded) - v.raw) == T(0.5)) {
850
- return Vec1<T>(static_cast<T>(rounded - (v.raw < T(0) ? -1 : 1)));
870
+ if ((rounded & 1) && ScalarAbs(ConvertScalarTo<T>(rounded) - v.raw) ==
871
+ ConvertScalarTo<T>(0.5)) {
872
+ offset = v.raw < k0 ? -1 : 1;
851
873
  }
852
- return Vec1<T>(static_cast<T>(rounded));
874
+ return Vec1<T>(ConvertScalarTo<T>(rounded - offset));
853
875
  }
854
876
 
855
877
  // Round-to-nearest even.
@@ -858,23 +880,26 @@ HWY_API Vec1<int32_t> NearestInt(const Vec1<float> v) {
858
880
  using TI = int32_t;
859
881
 
860
882
  const T abs = Abs(v).raw;
861
- const bool is_sign = detail::SignBit(v.raw);
883
+ const bool is_sign = ScalarSignBit(v.raw);
862
884
 
863
885
  if (!(abs < MantissaEnd<T>())) { // Huge or NaN
864
886
  // Check if too large to cast or NaN
865
- if (!(abs <= static_cast<T>(LimitsMax<TI>()))) {
887
+ if (!(abs <= ConvertScalarTo<T>(LimitsMax<TI>()))) {
866
888
  return Vec1<TI>(is_sign ? LimitsMin<TI>() : LimitsMax<TI>());
867
889
  }
868
- return Vec1<int32_t>(static_cast<TI>(v.raw));
890
+ return Vec1<int32_t>(ConvertScalarTo<TI>(v.raw));
869
891
  }
870
- const T bias = v.raw < T(0.0) ? T(-0.5) : T(0.5);
871
- const TI rounded = static_cast<TI>(v.raw + bias);
892
+ const T bias =
893
+ ConvertScalarTo<T>(v.raw < ConvertScalarTo<T>(0.0) ? -0.5 : 0.5);
894
+ const TI rounded = ConvertScalarTo<TI>(v.raw + bias);
872
895
  if (rounded == 0) return Vec1<int32_t>(0);
896
+ TI offset = 0;
873
897
  // Round to even
874
- if ((rounded & 1) && detail::Abs(static_cast<T>(rounded) - v.raw) == T(0.5)) {
875
- return Vec1<TI>(rounded - (is_sign ? -1 : 1));
898
+ if ((rounded & 1) && ScalarAbs(ConvertScalarTo<T>(rounded) - v.raw) ==
899
+ ConvertScalarTo<T>(0.5)) {
900
+ offset = is_sign ? -1 : 1;
876
901
  }
877
- return Vec1<TI>(rounded);
902
+ return Vec1<TI>(rounded - offset);
878
903
  }
879
904
 
880
905
  template <typename T>
@@ -883,9 +908,9 @@ HWY_API Vec1<T> Trunc(const Vec1<T> v) {
883
908
  if (!(Abs(v).raw <= MantissaEnd<T>())) { // Huge or NaN
884
909
  return v;
885
910
  }
886
- const TI truncated = static_cast<TI>(v.raw);
911
+ const TI truncated = ConvertScalarTo<TI>(v.raw);
887
912
  if (truncated == 0) return CopySignToAbs(Vec1<T>(0), v);
888
- return Vec1<T>(static_cast<T>(truncated));
913
+ return Vec1<T>(ConvertScalarTo<T>(truncated));
889
914
  }
890
915
 
891
916
  template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
@@ -1017,6 +1042,13 @@ HWY_API Mask1<T> IsNaN(const Vec1<T> v) {
1017
1042
  return Mask1<T>::FromBool(bits > ExponentMask<T>());
1018
1043
  }
1019
1044
 
1045
+ // Per-target flag to prevent generic_ops-inl.h from defining IsInf / IsFinite.
1046
+ #ifdef HWY_NATIVE_ISINF
1047
+ #undef HWY_NATIVE_ISINF
1048
+ #else
1049
+ #define HWY_NATIVE_ISINF
1050
+ #endif
1051
+
1020
1052
  HWY_API Mask1<float> IsInf(const Vec1<float> v) {
1021
1053
  const Sisd<float> d;
1022
1054
  const RebindToUnsigned<decltype(d)> du;
@@ -1205,8 +1237,9 @@ HWY_API void Stream(const Vec1<T> v, D d, T* HWY_RESTRICT aligned) {
1205
1237
  template <class D, typename T = TFromD<D>, typename TI>
1206
1238
  HWY_API void ScatterOffset(Vec1<T> v, D d, T* base, Vec1<TI> offset) {
1207
1239
  static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
1208
- uint8_t* const base8 = reinterpret_cast<uint8_t*>(base) + offset.raw;
1209
- Store(v, d, reinterpret_cast<T*>(base8));
1240
+ const intptr_t addr =
1241
+ reinterpret_cast<intptr_t>(base) + static_cast<intptr_t>(offset.raw);
1242
+ Store(v, d, reinterpret_cast<T*>(addr));
1210
1243
  }
1211
1244
 
1212
1245
  template <class D, typename T = TFromD<D>, typename TI>
@@ -1231,27 +1264,36 @@ HWY_API void MaskedScatterIndex(Vec1<T> v, Mask1<T> m, D d,
1231
1264
  #define HWY_NATIVE_GATHER
1232
1265
  #endif
1233
1266
 
1234
- template <class D, typename T = TFromD<D>, typename TI>
1235
- HWY_API Vec1<T> GatherOffset(D d, const T* base, Vec1<TI> offset) {
1236
- static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
1267
+ template <class D, typename T = TFromD<D>>
1268
+ HWY_API Vec1<T> GatherOffset(D d, const T* base, Vec1<MakeSigned<T>> offset) {
1269
+ HWY_DASSERT(offset.raw >= 0);
1237
1270
  const intptr_t addr =
1238
1271
  reinterpret_cast<intptr_t>(base) + static_cast<intptr_t>(offset.raw);
1239
1272
  return Load(d, reinterpret_cast<const T*>(addr));
1240
1273
  }
1241
1274
 
1242
- template <class D, typename T = TFromD<D>, typename TI>
1243
- HWY_API Vec1<T> GatherIndex(D d, const T* HWY_RESTRICT base, Vec1<TI> index) {
1244
- static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
1275
+ template <class D, typename T = TFromD<D>>
1276
+ HWY_API Vec1<T> GatherIndex(D d, const T* HWY_RESTRICT base,
1277
+ Vec1<MakeSigned<T>> index) {
1278
+ HWY_DASSERT(index.raw >= 0);
1245
1279
  return Load(d, base + index.raw);
1246
1280
  }
1247
1281
 
1248
- template <class D, typename T = TFromD<D>, typename TI>
1282
+ template <class D, typename T = TFromD<D>>
1249
1283
  HWY_API Vec1<T> MaskedGatherIndex(Mask1<T> m, D d, const T* HWY_RESTRICT base,
1250
- Vec1<TI> index) {
1251
- static_assert(sizeof(T) == sizeof(TI), "Index/lane size must match");
1284
+ Vec1<MakeSigned<T>> index) {
1285
+ HWY_DASSERT(index.raw >= 0);
1252
1286
  return MaskedLoad(m, d, base + index.raw);
1253
1287
  }
1254
1288
 
1289
+ template <class D, typename T = TFromD<D>>
1290
+ HWY_API Vec1<T> MaskedGatherIndexOr(Vec1<T> no, Mask1<T> m, D d,
1291
+ const T* HWY_RESTRICT base,
1292
+ Vec1<MakeSigned<T>> index) {
1293
+ HWY_DASSERT(index.raw >= 0);
1294
+ return MaskedLoadOr(no, m, d, base + index.raw);
1295
+ }
1296
+
1255
1297
  // ================================================== CONVERT
1256
1298
 
1257
1299
  // ConvertTo and DemoteTo with floating-point input and integer output truncate
@@ -1260,73 +1302,69 @@ HWY_API Vec1<T> MaskedGatherIndex(Mask1<T> m, D d, const T* HWY_RESTRICT base,
1260
1302
  namespace detail {
1261
1303
 
1262
1304
  template <class ToT, class FromT>
1263
- HWY_INLINE ToT CastValueForF2IConv(hwy::UnsignedTag /* to_type_tag */,
1264
- FromT val) {
1265
- // Prevent ubsan errors when converting float to narrower integer
1266
-
1267
- // If LimitsMax<ToT>() can be exactly represented in FromT,
1268
- // kSmallestOutOfToTRangePosVal is equal to LimitsMax<ToT>().
1269
-
1270
- // Otherwise, if LimitsMax<ToT>() cannot be exactly represented in FromT,
1271
- // kSmallestOutOfToTRangePosVal is equal to LimitsMax<ToT>() + 1, which can
1272
- // be exactly represented in FromT.
1273
- constexpr FromT kSmallestOutOfToTRangePosVal =
1274
- (sizeof(ToT) * 8 <= static_cast<size_t>(MantissaBits<FromT>()) + 1)
1275
- ? static_cast<FromT>(LimitsMax<ToT>())
1276
- : static_cast<FromT>(
1277
- static_cast<FromT>(ToT{1} << (sizeof(ToT) * 8 - 1)) * FromT(2));
1278
-
1279
- if (detail::SignBit(val)) {
1280
- return ToT{0};
1281
- } else if (IsInf(Vec1<FromT>(val)).bits ||
1282
- val >= kSmallestOutOfToTRangePosVal) {
1283
- return LimitsMax<ToT>();
1284
- } else {
1285
- return static_cast<ToT>(val);
1286
- }
1287
- }
1288
-
1289
- template <class ToT, class FromT>
1290
- HWY_INLINE ToT CastValueForF2IConv(hwy::SignedTag /* to_type_tag */,
1291
- FromT val) {
1305
+ HWY_INLINE ToT CastValueForF2IConv(FromT val) {
1292
1306
  // Prevent ubsan errors when converting float to narrower integer
1293
1307
 
1294
- // If LimitsMax<ToT>() can be exactly represented in FromT,
1295
- // kSmallestOutOfToTRangePosVal is equal to LimitsMax<ToT>().
1296
-
1297
- // Otherwise, if LimitsMax<ToT>() cannot be exactly represented in FromT,
1298
- // kSmallestOutOfToTRangePosVal is equal to -LimitsMin<ToT>(), which can
1299
- // be exactly represented in FromT.
1300
- constexpr FromT kSmallestOutOfToTRangePosVal =
1301
- (sizeof(ToT) * 8 <= static_cast<size_t>(MantissaBits<FromT>()) + 2)
1302
- ? static_cast<FromT>(LimitsMax<ToT>())
1303
- : static_cast<FromT>(-static_cast<FromT>(LimitsMin<ToT>()));
1304
-
1305
- if (IsInf(Vec1<FromT>(val)).bits ||
1306
- detail::Abs(val) >= kSmallestOutOfToTRangePosVal) {
1307
- return detail::SignBit(val) ? LimitsMin<ToT>() : LimitsMax<ToT>();
1308
- } else {
1309
- return static_cast<ToT>(val);
1310
- }
1308
+ using FromTU = MakeUnsigned<FromT>;
1309
+ using ToTU = MakeUnsigned<ToT>;
1310
+
1311
+ constexpr unsigned kMaxExpField =
1312
+ static_cast<unsigned>(MaxExponentField<FromT>());
1313
+ constexpr unsigned kExpBias = kMaxExpField >> 1;
1314
+ constexpr unsigned kMinOutOfRangeExpField = static_cast<unsigned>(HWY_MIN(
1315
+ kExpBias + sizeof(ToT) * 8 - static_cast<unsigned>(IsSigned<ToT>()),
1316
+ kMaxExpField));
1317
+
1318
+ // If ToT is signed, compare only the exponent bits of val against
1319
+ // kMinOutOfRangeExpField.
1320
+ //
1321
+ // Otherwise, if ToT is unsigned, compare the sign bit plus exponent bits of
1322
+ // val against kMinOutOfRangeExpField as a negative value is outside of the
1323
+ // range of an unsigned integer type.
1324
+ const FromT val_to_compare =
1325
+ static_cast<FromT>(IsSigned<ToT>() ? ScalarAbs(val) : val);
1326
+
1327
+ // val is within the range of ToT if
1328
+ // (BitCastScalar<FromTU>(val_to_compare) >> MantissaBits<FromT>()) is less
1329
+ // than kMinOutOfRangeExpField
1330
+ //
1331
+ // Otherwise, val is either outside of the range of ToT or equal to
1332
+ // LimitsMin<ToT>() if
1333
+ // (BitCastScalar<FromTU>(val_to_compare) >> MantissaBits<FromT>()) is greater
1334
+ // than or equal to kMinOutOfRangeExpField.
1335
+
1336
+ return (static_cast<unsigned>(BitCastScalar<FromTU>(val_to_compare) >>
1337
+ MantissaBits<FromT>()) < kMinOutOfRangeExpField)
1338
+ ? static_cast<ToT>(val)
1339
+ : static_cast<ToT>(static_cast<ToTU>(LimitsMax<ToT>()) +
1340
+ static_cast<ToTU>(ScalarSignBit(val)));
1311
1341
  }
1312
1342
 
1313
1343
  template <class ToT, class ToTypeTag, class FromT>
1314
1344
  HWY_INLINE ToT CastValueForPromoteTo(ToTypeTag /* to_type_tag */, FromT val) {
1315
- return static_cast<ToT>(val);
1345
+ return ConvertScalarTo<ToT>(val);
1316
1346
  }
1317
1347
 
1318
1348
  template <class ToT>
1319
- HWY_INLINE ToT CastValueForPromoteTo(hwy::SignedTag to_type_tag, float val) {
1320
- return CastValueForF2IConv<ToT>(to_type_tag, val);
1349
+ HWY_INLINE ToT CastValueForPromoteTo(hwy::SignedTag /*to_type_tag*/,
1350
+ float val) {
1351
+ return CastValueForF2IConv<ToT>(val);
1321
1352
  }
1322
1353
 
1323
1354
  template <class ToT>
1324
- HWY_INLINE ToT CastValueForPromoteTo(hwy::UnsignedTag to_type_tag, float val) {
1325
- return CastValueForF2IConv<ToT>(to_type_tag, val);
1355
+ HWY_INLINE ToT CastValueForPromoteTo(hwy::UnsignedTag /*to_type_tag*/,
1356
+ float val) {
1357
+ return CastValueForF2IConv<ToT>(val);
1326
1358
  }
1327
1359
 
1328
1360
  } // namespace detail
1329
1361
 
1362
+ #ifdef HWY_NATIVE_PROMOTE_F16_TO_F64
1363
+ #undef HWY_NATIVE_PROMOTE_F16_TO_F64
1364
+ #else
1365
+ #define HWY_NATIVE_PROMOTE_F16_TO_F64
1366
+ #endif
1367
+
1330
1368
  template <class DTo, typename TTo = TFromD<DTo>, typename TFrom>
1331
1369
  HWY_API Vec1<TTo> PromoteTo(DTo /* tag */, Vec1<TFrom> from) {
1332
1370
  static_assert(sizeof(TTo) > sizeof(TFrom), "Not promoting");
@@ -1342,16 +1380,15 @@ HWY_API Vec1<float> DemoteTo(D /* tag */, Vec1<double> from) {
1342
1380
  // Prevent ubsan errors when converting float to narrower integer/float
1343
1381
  if (IsInf(from).bits ||
1344
1382
  Abs(from).raw > static_cast<double>(HighestValue<float>())) {
1345
- return Vec1<float>(detail::SignBit(from.raw) ? LowestValue<float>()
1346
- : HighestValue<float>());
1383
+ return Vec1<float>(ScalarSignBit(from.raw) ? LowestValue<float>()
1384
+ : HighestValue<float>());
1347
1385
  }
1348
1386
  return Vec1<float>(static_cast<float>(from.raw));
1349
1387
  }
1350
1388
  template <class D, HWY_IF_UI32_D(D)>
1351
1389
  HWY_API VFromD<D> DemoteTo(D /* tag */, Vec1<double> from) {
1352
1390
  // Prevent ubsan errors when converting int32_t to narrower integer/int32_t
1353
- return Vec1<TFromD<D>>(detail::CastValueForF2IConv<TFromD<D>>(
1354
- hwy::TypeTag<TFromD<D>>(), from.raw));
1391
+ return Vec1<TFromD<D>>(detail::CastValueForF2IConv<TFromD<D>>(from.raw));
1355
1392
  }
1356
1393
 
1357
1394
  template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
@@ -1401,6 +1438,11 @@ HWY_API Vec1<float> PromoteTo(D d, const Vec1<bfloat16_t> v) {
1401
1438
  return Set(d, F32FromBF16(v.raw));
1402
1439
  }
1403
1440
 
1441
+ template <class DTo, typename TFrom>
1442
+ HWY_API VFromD<DTo> PromoteEvenTo(DTo d_to, Vec1<TFrom> v) {
1443
+ return PromoteTo(d_to, v);
1444
+ }
1445
+
1404
1446
  template <class D, HWY_IF_F16_D(D)>
1405
1447
  HWY_API Vec1<float16_t> DemoteTo(D /* tag */, const Vec1<float> v) {
1406
1448
  return Vec1<float16_t>(F16FromF32(v.raw));
@@ -1416,8 +1458,7 @@ template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
1416
1458
  HWY_API Vec1<TTo> ConvertTo(DTo /* tag */, Vec1<TFrom> from) {
1417
1459
  static_assert(sizeof(TTo) == sizeof(TFrom), "Should have same size");
1418
1460
  // float## -> int##: return closest representable value.
1419
- return Vec1<TTo>(
1420
- detail::CastValueForF2IConv<TTo>(hwy::TypeTag<TTo>(), from.raw));
1461
+ return Vec1<TTo>(detail::CastValueForF2IConv<TTo>(from.raw));
1421
1462
  }
1422
1463
 
1423
1464
  template <class DTo, typename TTo = TFromD<DTo>, typename TFrom,
@@ -1792,6 +1833,11 @@ HWY_API Mask1<T> LoadMaskBits(D /* tag */, const uint8_t* HWY_RESTRICT bits) {
1792
1833
  return Mask1<T>::FromBool((bits[0] & 1) != 0);
1793
1834
  }
1794
1835
 
1836
+ template <class D, HWY_IF_LANES_D(D, 1)>
1837
+ HWY_API MFromD<D> Dup128MaskFromMaskBits(D /*d*/, unsigned mask_bits) {
1838
+ return MFromD<D>::FromBool((mask_bits & 1) != 0);
1839
+ }
1840
+
1795
1841
  // `p` points to at least 8 writable bytes.
1796
1842
  template <class D, typename T = TFromD<D>>
1797
1843
  HWY_API size_t StoreMaskBits(D d, const Mask1<T> mask, uint8_t* bits) {
@@ -1971,23 +2017,7 @@ HWY_API Vec1<TW> RearrangeToOddPlusEven(Vec1<TW> sum0, Vec1<TW> /* sum1 */) {
1971
2017
 
1972
2018
  // ================================================== REDUCTIONS
1973
2019
 
1974
- // Sum of all lanes, i.e. the only one.
1975
- template <class D, typename T = TFromD<D>>
1976
- HWY_API Vec1<T> SumOfLanes(D /* tag */, const Vec1<T> v) {
1977
- return v;
1978
- }
1979
- template <class D, typename T = TFromD<D>>
1980
- HWY_API T ReduceSum(D /* tag */, const Vec1<T> v) {
1981
- return GetLane(v);
1982
- }
1983
- template <class D, typename T = TFromD<D>>
1984
- HWY_API Vec1<T> MinOfLanes(D /* tag */, const Vec1<T> v) {
1985
- return v;
1986
- }
1987
- template <class D, typename T = TFromD<D>>
1988
- HWY_API Vec1<T> MaxOfLanes(D /* tag */, const Vec1<T> v) {
1989
- return v;
1990
- }
2020
+ // Nothing native, generic_ops-inl defines SumOfLanes and ReduceSum.
1991
2021
 
1992
2022
  // NOLINTNEXTLINE(google-readability-namespace-comments)
1993
2023
  } // namespace HWY_NAMESPACE