@img/sharp-libvips-dev 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +3 -3
  2. package/cplusplus/VConnection.cpp +54 -54
  3. package/cplusplus/VError.cpp +20 -18
  4. package/cplusplus/VImage.cpp +636 -589
  5. package/cplusplus/VInterpolate.cpp +22 -22
  6. package/cplusplus/VRegion.cpp +4 -4
  7. package/cplusplus/vips-operators.cpp +2326 -2301
  8. package/include/aom/aom_codec.h +10 -6
  9. package/include/aom/aom_decoder.h +1 -1
  10. package/include/aom/aom_encoder.h +9 -2
  11. package/include/aom/aomcx.h +72 -3
  12. package/include/cairo/cairo-ft.h +1 -1
  13. package/include/cairo/cairo-gobject.h +8 -0
  14. package/include/cairo/cairo-svg.h +3 -3
  15. package/include/cairo/cairo-version.h +2 -2
  16. package/include/cairo/cairo.h +91 -24
  17. package/include/glib-2.0/glib/gmacros.h +1 -1
  18. package/include/glib-2.0/glib/gtestutils.h +1 -1
  19. package/include/glib-2.0/gobject/gtype.h +7 -7
  20. package/include/harfbuzz/hb-version.h +2 -2
  21. package/include/hwy/aligned_allocator.h +211 -0
  22. package/include/hwy/base.h +1517 -0
  23. package/include/hwy/cache_control.h +108 -0
  24. package/include/hwy/detect_compiler_arch.h +281 -0
  25. package/include/hwy/detect_targets.h +644 -0
  26. package/include/hwy/foreach_target.h +340 -0
  27. package/include/hwy/highway.h +435 -0
  28. package/include/hwy/highway_export.h +74 -0
  29. package/include/hwy/nanobenchmark.h +171 -0
  30. package/include/hwy/ops/arm_neon-inl.h +8913 -0
  31. package/include/hwy/ops/arm_sve-inl.h +5105 -0
  32. package/include/hwy/ops/emu128-inl.h +2811 -0
  33. package/include/hwy/ops/generic_ops-inl.h +4745 -0
  34. package/include/hwy/ops/ppc_vsx-inl.h +5716 -0
  35. package/include/hwy/ops/rvv-inl.h +5070 -0
  36. package/include/hwy/ops/scalar-inl.h +1995 -0
  37. package/include/hwy/ops/set_macros-inl.h +578 -0
  38. package/include/hwy/ops/shared-inl.h +539 -0
  39. package/include/hwy/ops/tuple-inl.h +125 -0
  40. package/include/hwy/ops/wasm_128-inl.h +5917 -0
  41. package/include/hwy/ops/x86_128-inl.h +11173 -0
  42. package/include/hwy/ops/x86_256-inl.h +7529 -0
  43. package/include/hwy/ops/x86_512-inl.h +6849 -0
  44. package/include/hwy/per_target.h +44 -0
  45. package/include/hwy/print-inl.h +62 -0
  46. package/include/hwy/print.h +75 -0
  47. package/include/hwy/robust_statistics.h +148 -0
  48. package/include/hwy/targets.h +338 -0
  49. package/include/hwy/timer-inl.h +200 -0
  50. package/include/hwy/timer.h +55 -0
  51. package/include/jconfig.h +2 -2
  52. package/include/jpeglib.h +3 -2
  53. package/include/libheif/heif.h +461 -384
  54. package/include/libheif/heif_cxx.h +4 -1
  55. package/include/libheif/heif_plugin.h +1 -1
  56. package/include/libheif/heif_properties.h +138 -0
  57. package/include/libheif/heif_regions.h +866 -0
  58. package/include/libheif/heif_version.h +3 -3
  59. package/include/libpng16/pnglibconf.h +1 -1
  60. package/include/pnglibconf.h +1 -1
  61. package/include/vips/VConnection8.h +43 -49
  62. package/include/vips/VError8.h +27 -24
  63. package/include/vips/VImage8.h +4861 -4597
  64. package/include/vips/VInterpolate8.h +24 -27
  65. package/include/vips/VRegion8.h +32 -33
  66. package/include/vips/arithmetic.h +169 -169
  67. package/include/vips/basic.h +33 -33
  68. package/include/vips/buf.h +56 -54
  69. package/include/vips/colour.h +95 -95
  70. package/include/vips/connection.h +190 -193
  71. package/include/vips/conversion.h +91 -91
  72. package/include/vips/convolution.h +36 -30
  73. package/include/vips/create.h +63 -63
  74. package/include/vips/dbuf.h +35 -37
  75. package/include/vips/debug.h +65 -33
  76. package/include/vips/draw.h +41 -41
  77. package/include/vips/enumtypes.h +54 -51
  78. package/include/vips/error.h +63 -63
  79. package/include/vips/foreign.h +263 -223
  80. package/include/vips/format.h +48 -48
  81. package/include/vips/freqfilt.h +22 -22
  82. package/include/vips/gate.h +55 -47
  83. package/include/vips/generate.h +34 -34
  84. package/include/vips/header.h +111 -101
  85. package/include/vips/histogram.h +28 -28
  86. package/include/vips/image.h +213 -213
  87. package/include/vips/interpolate.h +40 -41
  88. package/include/vips/memory.h +61 -52
  89. package/include/vips/morphology.h +24 -24
  90. package/include/vips/mosaicing.h +32 -33
  91. package/include/vips/object.h +371 -357
  92. package/include/vips/operation.h +68 -67
  93. package/include/vips/private.h +76 -76
  94. package/include/vips/rect.h +26 -26
  95. package/include/vips/region.h +92 -92
  96. package/include/vips/resample.h +38 -38
  97. package/include/vips/sbuf.h +53 -54
  98. package/include/vips/semaphore.h +24 -24
  99. package/include/vips/thread.h +30 -27
  100. package/include/vips/threadpool.h +48 -49
  101. package/include/vips/transform.h +39 -39
  102. package/include/vips/type.h +90 -85
  103. package/include/vips/util.h +274 -229
  104. package/include/vips/vector.h +24 -144
  105. package/include/vips/version.h +9 -9
  106. package/include/vips/vips.h +41 -40
  107. package/include/zlib.h +23 -19
  108. package/package.json +1 -1
  109. package/versions.json +9 -9
@@ -0,0 +1,539 @@
1
+ // Copyright 2020 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ // Per-target definitions shared by ops/*.h and user code.
17
+
18
+ // IWYU pragma: begin_exports
19
+ // Export does not seem to be recursive, so re-export these (also in base.h)
20
+ #include <stddef.h>
21
+
22
+ #include "hwy/base.h"
23
+ // "IWYU pragma: keep" does not work for this include, so hide it from the IDE.
24
+ #if !HWY_IDE
25
+ #include <stdint.h>
26
+ #endif
27
+
28
+ #include "hwy/detect_compiler_arch.h"
29
+
30
+ // Separate header because foreach_target.h re-enables its include guard.
31
+ #include "hwy/ops/set_macros-inl.h"
32
+
33
+ // IWYU pragma: end_exports
34
+
35
+ #if HWY_IS_MSAN
36
+ #include <sanitizer/msan_interface.h>
37
+ #endif
38
+
39
+ // We are covered by the highway.h include guard, but generic_ops-inl.h
40
+ // includes this again #if HWY_IDE.
41
+ #if defined(HIGHWAY_HWY_OPS_SHARED_TOGGLE) == defined(HWY_TARGET_TOGGLE)
42
+ #ifdef HIGHWAY_HWY_OPS_SHARED_TOGGLE
43
+ #undef HIGHWAY_HWY_OPS_SHARED_TOGGLE
44
+ #else
45
+ #define HIGHWAY_HWY_OPS_SHARED_TOGGLE
46
+ #endif
47
+
48
+ HWY_BEFORE_NAMESPACE();
49
+ namespace hwy {
50
+ namespace HWY_NAMESPACE {
51
+
52
+ // NOTE: GCC generates incorrect code for vector arguments to non-inlined
53
+ // functions in two situations:
54
+ // - on Windows and GCC 10.3, passing by value crashes due to unaligned loads:
55
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412.
56
+ // - on aarch64 and GCC 9.3.0 or 11.2.1, passing by value causes many (but not
57
+ // all) tests to fail.
58
+ //
59
+ // We therefore pass by const& only on GCC and (Windows or aarch64). This alias
60
+ // must be used for all vector/mask parameters of functions marked HWY_NOINLINE,
61
+ // and possibly also other functions that are not inlined.
62
+ #if HWY_COMPILER_GCC_ACTUAL && (HWY_OS_WIN || HWY_ARCH_ARM_A64)
63
+ template <class V>
64
+ using VecArg = const V&;
65
+ #else
66
+ template <class V>
67
+ using VecArg = V;
68
+ #endif
69
+
70
+ namespace detail {
71
+
72
+ // Primary template: default is no change for all but f16.
73
+ template <typename T>
74
+ struct NativeLaneTypeT {
75
+ using type = T;
76
+ };
77
+
78
+ template <>
79
+ struct NativeLaneTypeT<hwy::float16_t> {
80
+ using type = hwy::float16_t::Raw;
81
+ };
82
+
83
+ template <>
84
+ struct NativeLaneTypeT<hwy::bfloat16_t> {
85
+ using type = hwy::bfloat16_t::Raw;
86
+ };
87
+
88
+ // Evaluates to the type expected by intrinsics given the Highway lane type T.
89
+ // This is usually the same, but differs for our wrapper types [b]float16_t.
90
+ template <typename T>
91
+ using NativeLaneType = typename NativeLaneTypeT<T>::type;
92
+
93
+ // Returns N * 2^pow2. N is the number of lanes in a full vector and pow2 the
94
+ // desired fraction or multiple of it, see Simd<>. `pow2` is most often in
95
+ // [-3, 3] but can also be lower for user-specified fractions.
96
+ constexpr size_t ScaleByPower(size_t N, int pow2) {
97
+ return pow2 >= 0 ? (N << pow2) : (N >> (-pow2));
98
+ }
99
+
100
+ template <typename T>
101
+ HWY_INLINE void MaybeUnpoison(T* HWY_RESTRICT unaligned, size_t count) {
102
+ // Workaround for MSAN not marking compressstore as initialized (b/233326619)
103
+ #if HWY_IS_MSAN
104
+ __msan_unpoison(unaligned, count * sizeof(T));
105
+ #else
106
+ (void)unaligned;
107
+ (void)count;
108
+ #endif
109
+ }
110
+
111
+ } // namespace detail
112
+
113
+ // Highway operations are implemented as overloaded functions selected using a
114
+ // zero-sized tag type D := Simd<T, N, kPow2>. T denotes the lane type.
115
+ //
116
+ // N defines how many lanes are in a 'full' vector, typically equal to
117
+ // HWY_LANES(T) (which is the actual count on targets with vectors of known
118
+ // size, and an upper bound in case of scalable vectors), otherwise a
119
+ // user-specified limit at most that large.
120
+ //
121
+ // 2^kPow2 is a _subsequently_ applied scaling factor that indicates the
122
+ // desired fraction of a 'full' vector: 0 means full, -1 means half; 1,2,3
123
+ // means two/four/eight full vectors ganged together. The largest supported
124
+ // kPow2 is `HWY_MAX_POW2` and the aliases below take care of clamping
125
+ // user-specified values to that. Note that `Simd<T, 1, 0>` and `Simd<T, 2, -1>`
126
+ // have the same `MaxLanes` and `Lanes`.
127
+ //
128
+ // We can theoretically keep halving Lanes(), but recursive instantiations of
129
+ // kPow2 - 1 will eventually fail e.g. because -64 is not a valid shift count.
130
+ // Users must terminate such compile-time recursions at or above HWY_MIN_POW2.
131
+ //
132
+ // WARNING: do not use N directly because it may be a special representation of
133
+ // a fractional MaxLanes. This arises when we Rebind Simd<uint8_t, 1, 0> to
134
+ // Simd<uint32_t, ??, 2>. RVV requires that the last argument (kPow2) be two,
135
+ // but we want MaxLanes to be the same in both cases. Hence ?? is a
136
+ // fixed-point encoding of 1/4.
137
+ //
138
+ // Instead of referring to Simd<> directly, users create D via aliases:
139
+ // - ScalableTag<T> for a full vector;
140
+ // - ScalableTag<T, kPow2>() for a fraction/group, where `kPow2` is
141
+ // interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`;
142
+ // - CappedTag<T, kLimit> for a vector with up to kLimit lanes; or
143
+ // - FixedTag<T, kNumLanes> for a vector with exactly kNumLanes lanes.
144
+ //
145
+ // Instead of N, use Lanes(D()) for the actual number of lanes at runtime and
146
+ // D().MaxLanes() for a constexpr upper bound. Both are powers of two.
147
+ template <typename Lane, size_t N, int kPow2>
148
+ struct Simd {
149
+ constexpr Simd() = default;
150
+ using T = Lane;
151
+
152
+ private:
153
+ static_assert(sizeof(Lane) <= 8, "Lanes are up to 64-bit");
154
+ // 20 bits are sufficient for any HWY_MAX_BYTES. This is the 'normal' value of
155
+ // N when kFrac == 0, otherwise it is one (see FracN).
156
+ static constexpr size_t kWhole = N & 0xFFFFF;
157
+ // Fractional part is in the bits above kWhole.
158
+ static constexpr int kFrac = static_cast<int>(N >> 20);
159
+ // Can be 8x larger because kPow2 may be as low as -3 (Rebind of a larger
160
+ // type to u8 results in fractions).
161
+ static_assert(kWhole <= 8 * HWY_MAX_N && kFrac <= 3, "Out of range");
162
+ static_assert(kFrac == 0 || kWhole == 1, "If frac, whole must be 1");
163
+ static_assert((kWhole & (kWhole - 1)) == 0 && kWhole != 0, "Not 2^x");
164
+ // Important to check this here because kPow2 <= -64 causes confusing
165
+ // compile errors (invalid shift count).
166
+ static_assert(kPow2 >= HWY_MIN_POW2, "Forgot kPow2 recursion terminator?");
167
+ // However, do NOT verify kPow2 <= HWY_MAX_POW2 - users should be able to
168
+ // Rebind<uint64_t, ScalableTag<uint8_t, 3>> in order to discover that its
169
+ // kPow2 is out of bounds.
170
+
171
+ public:
172
+ // Upper bound on the number of lanes (tight if !HWY_HAVE_SCALABLE). In the
173
+ // common case, N == kWhole, but if kFrac is nonzero, we deduct it from kPow2.
174
+ // E.g. Rebind<uint32_t, Simd<uint8_t, 1, 0>> is Simd<uint32_t, 0x200001, 2>.
175
+ // The resulting number of lanes is still 1 because this N represents 1/4
176
+ // (the ratio of the sizes). Note that RVV requires kPow2 to be the ratio of
177
+ // the sizes so that the correct LMUL overloads are chosen, even if N is
178
+ // small enough that it would fit in an LMUL=1 vector.
179
+ //
180
+ // Cannot be an enum because GCC warns when using enums and non-enums in the
181
+ // same expression. Cannot be a static constexpr function (MSVC limitation).
182
+ // Rounded up to one so this is a valid array length.
183
+ //
184
+ // Do not use this directly - only 'public' so it is visible from the accessor
185
+ // macro required by MSVC.
186
+ static constexpr size_t kPrivateLanes =
187
+ HWY_MAX(size_t{1}, detail::ScaleByPower(kWhole, kPow2 - kFrac));
188
+
189
+ constexpr size_t MaxLanes() const { return kPrivateLanes; }
190
+ constexpr size_t MaxBytes() const { return kPrivateLanes * sizeof(Lane); }
191
+ constexpr size_t MaxBlocks() const { return (MaxBytes() + 15) / 16; }
192
+ // For SFINAE on RVV.
193
+ constexpr int Pow2() const { return kPow2; }
194
+
195
+ // ------------------------------ Changing lane type or count
196
+ // Do not use any of these directly. Anything used from member typedefs cannot
197
+ // be made private, but functions only used within other functions can.
198
+
199
+ // Returns number of NewT lanes that fit within MaxBytes().
200
+ template <typename NewT>
201
+ static constexpr size_t RepartitionLanes() {
202
+ // Round up to correctly handle larger NewT.
203
+ return (kPrivateLanes * sizeof(T) + sizeof(NewT) - 1) / sizeof(NewT);
204
+ }
205
+
206
+ // Returns the new kPow2 required for lanes of type NewT.
207
+ template <typename NewT>
208
+ static constexpr int RebindPow2() {
209
+ return kPow2 +
210
+ ((sizeof(NewT) >= sizeof(T))
211
+ ? static_cast<int>(CeilLog2(sizeof(NewT) / sizeof(T)))
212
+ : -static_cast<int>(CeilLog2(sizeof(T) / sizeof(NewT))));
213
+ }
214
+
215
+ private:
216
+ // Returns 0 or whole NewN such that kNewMaxLanes = NewN * 2^kNewPow2.
217
+ template <int kNewPow2, size_t kNewMaxLanes>
218
+ static constexpr size_t WholeN() {
219
+ return detail::ScaleByPower(kNewMaxLanes, -kNewPow2);
220
+ }
221
+
222
+ // Returns fractional NewN such that kNewMaxLanes = NewN * 2^kNewPow2.
223
+ template <int kNewPow2, size_t kNewMaxLanes>
224
+ static constexpr size_t FracN() {
225
+ // Only reached if kNewPow2 > CeilLog2(kNewMaxLanes) >= 0 (else WholeN
226
+ // would not have been zero), but clamp to zero to avoid warnings. kFrac is
227
+ // the difference, stored in the upper bits of N, and we also set kWhole =
228
+ // 1 so that the new kPrivateLanes = kNewMaxLanes.
229
+ static_assert(HWY_MAX_N <= (size_t{1} << 20), "Change bit shift");
230
+ return static_cast<size_t>(
231
+ 1 + (HWY_MAX(0, kNewPow2 - static_cast<int>(CeilLog2(kNewMaxLanes)))
232
+ << 20));
233
+ }
234
+
235
+ public:
236
+ // Returns (whole or fractional) NewN, see above.
237
+ template <int kNewPow2, size_t kNewMaxLanes>
238
+ static constexpr size_t NewN() {
239
+ // We require a fraction if inverting kNewPow2 results in 0.
240
+ return WholeN<kNewPow2, kNewMaxLanes>() == 0
241
+ ? FracN<kNewPow2, kNewMaxLanes>()
242
+ : WholeN<kNewPow2, kNewMaxLanes>();
243
+ }
244
+
245
+ // PromoteTo/DemoteTo() with another lane type, but same number of lanes.
246
+ template <typename NewT>
247
+ using Rebind =
248
+ Simd<NewT, NewN<RebindPow2<NewT>(), kPrivateLanes>(), RebindPow2<NewT>()>;
249
+
250
+ // Change lane type while keeping the same vector size, e.g. for MulEven.
251
+ template <typename NewT>
252
+ using Repartition =
253
+ Simd<NewT, NewN<kPow2, RepartitionLanes<NewT>()>(), kPow2>;
254
+
255
+ // Half the lanes while keeping the same lane type, e.g. for LowerHalf.
256
+ using Half = Simd<T, N, kPow2 - 1>;
257
+
258
+ // Twice the lanes while keeping the same lane type, e.g. for Combine.
259
+ using Twice = Simd<T, N, kPow2 + 1>;
260
+ };
261
+
262
+ namespace detail {
263
+
264
+ template <typename T, size_t N, int kPow2>
265
+ constexpr bool IsFull(Simd<T, N, kPow2> /* d */) {
266
+ return N == HWY_LANES(T) && kPow2 == 0;
267
+ }
268
+
269
+ // Struct wrappers enable validation of arguments via static_assert.
270
+ template <typename T, size_t N, int kPow2>
271
+ struct ClampNAndPow2 {
272
+ using type = Simd<T, HWY_MIN(N, HWY_MAX_N), HWY_MIN(kPow2, HWY_MAX_POW2)>;
273
+ };
274
+
275
+ template <typename T, int kPow2>
276
+ struct ScalableTagChecker {
277
+ using type = typename ClampNAndPow2<T, HWY_LANES(T), kPow2>::type;
278
+ };
279
+
280
+ template <typename T, size_t kLimit, int kPow2>
281
+ struct CappedTagChecker {
282
+ static_assert(kLimit != 0, "Does not make sense to have zero lanes");
283
+ // Safely handle non-power-of-two inputs by rounding down, which is allowed by
284
+ // CappedTag. Otherwise, Simd<T, 3, 0> would static_assert.
285
+ static constexpr size_t kLimitPow2 = size_t{1} << hwy::FloorLog2(kLimit);
286
+ static constexpr size_t N = HWY_MIN(kLimitPow2, HWY_LANES(T));
287
+ using type = typename ClampNAndPow2<T, N, kPow2>::type;
288
+ };
289
+
290
+ template <typename T, size_t kNumLanes>
291
+ struct FixedTagChecker {
292
+ static_assert(kNumLanes != 0, "Does not make sense to have zero lanes");
293
+ static_assert(kNumLanes <= HWY_LANES(T), "Too many lanes");
294
+ using type = Simd<T, kNumLanes, 0>;
295
+ };
296
+
297
+ } // namespace detail
298
+
299
+ // ------------------------------ Aliases for Simd<>
300
+
301
+ // Tag describing a full vector (kPow2 == 0: the most common usage, e.g. 1D
302
+ // loops where the application does not care about the vector size) or a
303
+ // fraction/multiple of one. Fractions (kPow2 < 0) are useful for arguments or
304
+ // return values of type promotion and demotion. User-specified kPow2 is
305
+ // interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`.
306
+ template <typename T, int kPow2 = 0>
307
+ using ScalableTag = typename detail::ScalableTagChecker<T, kPow2>::type;
308
+
309
+ // Tag describing a vector with *up to* kLimit active lanes, even on targets
310
+ // with scalable vectors and HWY_SCALAR. The runtime lane count `Lanes(tag)` may
311
+ // be less than kLimit, and is 1 on HWY_SCALAR. This alias is typically used for
312
+ // 1D loops with a relatively low application-defined upper bound, e.g. for 8x8
313
+ // DCTs. However, it is better if data structures are designed to be
314
+ // vector-length-agnostic (e.g. a hybrid SoA where there are chunks of `M >=
315
+ // MaxLanes(d)` DC components followed by M AC1, .., and M AC63; this would
316
+ // enable vector-length-agnostic loops using ScalableTag). User-specified kPow2
317
+ // is interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`.
318
+ template <typename T, size_t kLimit, int kPow2 = 0>
319
+ using CappedTag = typename detail::CappedTagChecker<T, kLimit, kPow2>::type;
320
+
321
+ #if !HWY_HAVE_SCALABLE
322
+ // If the vector size is known, and the app knows it does not want more than
323
+ // kLimit lanes, then capping can be beneficial. For example, AVX-512 has lower
324
+ // IPC and potentially higher costs for unaligned load/store vs. 256-bit AVX2.
325
+ template <typename T, size_t kLimit, int kPow2 = 0>
326
+ using CappedTagIfFixed = CappedTag<T, kLimit, kPow2>;
327
+ #else // HWY_HAVE_SCALABLE
328
+ // .. whereas on RVV/SVE, the cost of clamping Lanes() may exceed the benefit.
329
+ template <typename T, size_t kLimit, int kPow2 = 0>
330
+ using CappedTagIfFixed = ScalableTag<T, kPow2>;
331
+ #endif
332
+
333
+ // Alias for a tag describing a vector with *exactly* kNumLanes active lanes,
334
+ // even on targets with scalable vectors. Requires `kNumLanes` to be a power of
335
+ // two not exceeding `HWY_LANES(T)`.
336
+ //
337
+ // NOTE: if the application does not need to support HWY_SCALAR (+), use this
338
+ // instead of CappedTag to emphasize that there will be exactly kNumLanes lanes.
339
+ // This is useful for data structures that rely on exactly 128-bit SIMD, but
340
+ // these are discouraged because they cannot benefit from wider vectors.
341
+ // Instead, applications would ideally define a larger problem size and loop
342
+ // over it with the (unknown size) vectors from ScalableTag.
343
+ //
344
+ // + e.g. if the baseline is known to support SIMD, or the application requires
345
+ // ops such as TableLookupBytes not supported by HWY_SCALAR.
346
+ template <typename T, size_t kNumLanes>
347
+ using FixedTag = typename detail::FixedTagChecker<T, kNumLanes>::type;
348
+
349
+ // Convenience form for fixed sizes.
350
+ template <typename T>
351
+ using Full16 = Simd<T, 2 / sizeof(T), 0>;
352
+
353
+ template <typename T>
354
+ using Full32 = Simd<T, 4 / sizeof(T), 0>;
355
+
356
+ template <typename T>
357
+ using Full64 = Simd<T, 8 / sizeof(T), 0>;
358
+
359
+ template <typename T>
360
+ using Full128 = Simd<T, 16 / sizeof(T), 0>;
361
+
362
+ // ------------------------------ Accessors for Simd<>
363
+
364
+ // Lane type.
365
+ template <class D>
366
+ using TFromD = typename D::T;
367
+
368
+ // Upper bound on the number of lanes, typically used for SFINAE conditions and
369
+ // to allocate storage for targets with known vector sizes. Note: this may be a
370
+ // loose bound, instead use Lanes() as the actual size for AllocateAligned.
371
+ // MSVC workaround: use static constant directly instead of a function.
372
+ #define HWY_MAX_LANES_D(D) D::kPrivateLanes
373
+
374
+ // Non-macro form of HWY_MAX_LANES_D in case that is preferable. WARNING: the
375
+ // macro form may be required for MSVC, which has limitations on deducing
376
+ // arguments.
377
+ template <class D>
378
+ HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t MaxLanes(D) {
379
+ return HWY_MAX_LANES_D(D);
380
+ }
381
+
382
+ #if !HWY_HAVE_SCALABLE
383
+
384
+ // If non-scalable, this is constexpr; otherwise the target's header defines a
385
+ // non-constexpr version of this function. This is the actual vector length,
386
+ // used when advancing loop counters.
387
+ template <class D>
388
+ HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t Lanes(D) {
389
+ return HWY_MAX_LANES_D(D);
390
+ }
391
+
392
+ #endif // !HWY_HAVE_SCALABLE
393
+
394
+ // Tag for the same number of lanes as D, but with the LaneType T.
395
+ template <class T, class D>
396
+ using Rebind = typename D::template Rebind<T>;
397
+
398
+ template <class D>
399
+ using RebindToSigned = Rebind<MakeSigned<TFromD<D>>, D>;
400
+ template <class D>
401
+ using RebindToUnsigned = Rebind<MakeUnsigned<TFromD<D>>, D>;
402
+ template <class D>
403
+ using RebindToFloat = Rebind<MakeFloat<TFromD<D>>, D>;
404
+
405
+ // Tag for the same total size as D, but with the LaneType T.
406
+ template <class T, class D>
407
+ using Repartition = typename D::template Repartition<T>;
408
+
409
+ template <class D>
410
+ using RepartitionToWide = Repartition<MakeWide<TFromD<D>>, D>;
411
+ template <class D>
412
+ using RepartitionToNarrow = Repartition<MakeNarrow<TFromD<D>>, D>;
413
+
414
+ // Tag for the same lane type as D, but half the lanes.
415
+ template <class D>
416
+ using Half = typename D::Half;
417
+
418
+ // Tag for the same lane type as D, but twice the lanes.
419
+ template <class D>
420
+ using Twice = typename D::Twice;
421
+
422
+ // Tag for a 16-byte block with the same lane type as D
423
+ #if HWY_HAVE_SCALABLE
424
+ namespace detail {
425
+
426
+ template <class D>
427
+ class BlockDFromD_t {};
428
+
429
+ template <typename T, size_t N, int kPow2>
430
+ class BlockDFromD_t<Simd<T, N, kPow2>> {
431
+ using D = Simd<T, N, kPow2>;
432
+ static constexpr int kNewPow2 = HWY_MIN(kPow2, 0);
433
+ static constexpr size_t kMaxLpb = HWY_MIN(16 / sizeof(T), HWY_MAX_LANES_D(D));
434
+ static constexpr size_t kNewN = D::template NewN<kNewPow2, kMaxLpb>();
435
+
436
+ public:
437
+ using type = Simd<T, kNewN, kNewPow2>;
438
+ };
439
+
440
+ } // namespace detail
441
+
442
+ template <class D>
443
+ using BlockDFromD = typename detail::BlockDFromD_t<RemoveConst<D>>::type;
444
+ #else
445
+ template <class D>
446
+ using BlockDFromD =
447
+ Simd<TFromD<D>, HWY_MIN(16 / sizeof(TFromD<D>), HWY_MAX_LANES_D(D)), 0>;
448
+ #endif
449
+
450
+ // ------------------------------ Choosing overloads (SFINAE)
451
+
452
+ // Same as base.h macros but with a Simd<T, N, kPow2> argument instead of T.
453
+ #define HWY_IF_UNSIGNED_D(D) HWY_IF_UNSIGNED(TFromD<D>)
454
+ #define HWY_IF_SIGNED_D(D) HWY_IF_SIGNED(TFromD<D>)
455
+ #define HWY_IF_FLOAT_D(D) HWY_IF_FLOAT(TFromD<D>)
456
+ #define HWY_IF_NOT_FLOAT_D(D) HWY_IF_NOT_FLOAT(TFromD<D>)
457
+ #define HWY_IF_FLOAT3264_D(D) HWY_IF_FLOAT3264(TFromD<D>)
458
+ #define HWY_IF_NOT_FLOAT3264_D(D) HWY_IF_NOT_FLOAT3264(TFromD<D>)
459
+ #define HWY_IF_SPECIAL_FLOAT_D(D) HWY_IF_SPECIAL_FLOAT(TFromD<D>)
460
+ #define HWY_IF_NOT_SPECIAL_FLOAT_D(D) HWY_IF_NOT_SPECIAL_FLOAT(TFromD<D>)
461
+ #define HWY_IF_FLOAT_OR_SPECIAL_D(D) HWY_IF_FLOAT_OR_SPECIAL(TFromD<D>)
462
+ #define HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D) \
463
+ HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromD<D>)
464
+
465
+ #define HWY_IF_T_SIZE_D(D, bytes) HWY_IF_T_SIZE(TFromD<D>, bytes)
466
+ #define HWY_IF_NOT_T_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE(TFromD<D>, bytes)
467
+ #define HWY_IF_T_SIZE_ONE_OF_D(D, bit_array) \
468
+ HWY_IF_T_SIZE_ONE_OF(TFromD<D>, bit_array)
469
+
470
+ #define HWY_IF_LANES_D(D, lanes) HWY_IF_LANES(HWY_MAX_LANES_D(D), lanes)
471
+ #define HWY_IF_LANES_LE_D(D, lanes) HWY_IF_LANES_LE(HWY_MAX_LANES_D(D), lanes)
472
+ #define HWY_IF_LANES_GT_D(D, lanes) HWY_IF_LANES_GT(HWY_MAX_LANES_D(D), lanes)
473
+ #define HWY_IF_LANES_PER_BLOCK_D(D, lanes) \
474
+ HWY_IF_LANES_PER_BLOCK( \
475
+ TFromD<D>, HWY_MIN(HWY_MAX_LANES_D(D), 16 / sizeof(TFromD<D>)), lanes)
476
+
477
+ #define HWY_IF_POW2_LE_D(D, pow2) hwy::EnableIf<D().Pow2() <= pow2>* = nullptr
478
+ #define HWY_IF_POW2_GT_D(D, pow2) hwy::EnableIf<(D().Pow2() > pow2)>* = nullptr
479
+
480
+ #define HWY_IF_U8_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint8_t>()>* = nullptr
481
+ #define HWY_IF_U16_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint16_t>()>* = nullptr
482
+ #define HWY_IF_U32_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint32_t>()>* = nullptr
483
+ #define HWY_IF_U64_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint64_t>()>* = nullptr
484
+
485
+ #define HWY_IF_I8_D(D) hwy::EnableIf<IsSame<TFromD<D>, int8_t>()>* = nullptr
486
+ #define HWY_IF_I16_D(D) hwy::EnableIf<IsSame<TFromD<D>, int16_t>()>* = nullptr
487
+ #define HWY_IF_I32_D(D) hwy::EnableIf<IsSame<TFromD<D>, int32_t>()>* = nullptr
488
+ #define HWY_IF_I64_D(D) hwy::EnableIf<IsSame<TFromD<D>, int64_t>()>* = nullptr
489
+
490
+ // Use instead of HWY_IF_T_SIZE_D to avoid ambiguity with float16_t/float/double
491
+ // overloads.
492
+ #define HWY_IF_UI16_D(D) HWY_IF_UI16(TFromD<D>)
493
+ #define HWY_IF_UI32_D(D) HWY_IF_UI32(TFromD<D>)
494
+ #define HWY_IF_UI64_D(D) HWY_IF_UI64(TFromD<D>)
495
+
496
+ #define HWY_IF_BF16_D(D) HWY_IF_BF16(TFromD<D>)
497
+ #define HWY_IF_F16_D(D) HWY_IF_F16(TFromD<D>)
498
+ #define HWY_IF_F32_D(D) hwy::EnableIf<IsSame<TFromD<D>, float>()>* = nullptr
499
+ #define HWY_IF_F64_D(D) hwy::EnableIf<IsSame<TFromD<D>, double>()>* = nullptr
500
+
501
+ #define HWY_IF_V_SIZE_D(D, bytes) \
502
+ HWY_IF_V_SIZE(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
503
+ #define HWY_IF_V_SIZE_LE_D(D, bytes) \
504
+ HWY_IF_V_SIZE_LE(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
505
+ #define HWY_IF_V_SIZE_GT_D(D, bytes) \
506
+ HWY_IF_V_SIZE_GT(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
507
+
508
+ // Same, but with a vector argument. ops/*-inl.h define their own TFromV.
509
+ #define HWY_IF_UNSIGNED_V(V) HWY_IF_UNSIGNED(TFromV<V>)
510
+ #define HWY_IF_SIGNED_V(V) HWY_IF_SIGNED(TFromV<V>)
511
+ #define HWY_IF_FLOAT_V(V) HWY_IF_FLOAT(TFromV<V>)
512
+ #define HWY_IF_NOT_FLOAT_V(V) HWY_IF_NOT_FLOAT(TFromV<V>)
513
+ #define HWY_IF_SPECIAL_FLOAT_V(V) HWY_IF_SPECIAL_FLOAT(TFromV<V>)
514
+ #define HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V) \
515
+ HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromV<V>)
516
+
517
+ #define HWY_IF_T_SIZE_V(V, bytes) HWY_IF_T_SIZE(TFromV<V>, bytes)
518
+ #define HWY_IF_NOT_T_SIZE_V(V, bytes) HWY_IF_NOT_T_SIZE(TFromV<V>, bytes)
519
+ #define HWY_IF_T_SIZE_ONE_OF_V(V, bit_array) \
520
+ HWY_IF_T_SIZE_ONE_OF(TFromV<V>, bit_array)
521
+
522
+ #define HWY_MAX_LANES_V(V) HWY_MAX_LANES_D(DFromV<V>)
523
+ #define HWY_IF_V_SIZE_V(V, bytes) \
524
+ HWY_IF_V_SIZE(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
525
+ #define HWY_IF_V_SIZE_LE_V(V, bytes) \
526
+ HWY_IF_V_SIZE_LE(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
527
+ #define HWY_IF_V_SIZE_GT_V(V, bytes) \
528
+ HWY_IF_V_SIZE_GT(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
529
+
530
+ // Old names (deprecated)
531
+ #define HWY_IF_LANE_SIZE_D(D, bytes) HWY_IF_T_SIZE_D(D, bytes)
532
+ #define HWY_IF_NOT_LANE_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE_D(D, bytes)
533
+
534
+ // NOLINTNEXTLINE(google-readability-namespace-comments)
535
+ } // namespace HWY_NAMESPACE
536
+ } // namespace hwy
537
+ HWY_AFTER_NAMESPACE();
538
+
539
+ #endif // HIGHWAY_HWY_OPS_SHARED_TOGGLE
@@ -0,0 +1,125 @@
1
+ // Copyright 2023 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ // Tuple support. Included by those ops/* that lack native tuple types, after
17
+ // they define VFromD and before they use the tuples e.g. for LoadInterleaved2.
18
+ // Assumes we are already in the HWY_NAMESPACE and under an include guard.
19
+
20
+ // If viewing this header standalone, define VFromD to avoid IDE warnings.
21
+ // This is normally set by set_macros-inl.h before this header is included.
22
+ #if !defined(HWY_NAMESPACE)
23
+ #include "hwy/base.h"
24
+ template <class D>
25
+ using VFromD = int;
26
+ #endif
27
+
28
+ // On SVE, Vec2..4 are aliases to built-in types.
29
+ template <class D>
30
+ struct Vec2 {
31
+ VFromD<D> v0;
32
+ VFromD<D> v1;
33
+ };
34
+
35
+ template <class D>
36
+ struct Vec3 {
37
+ VFromD<D> v0;
38
+ VFromD<D> v1;
39
+ VFromD<D> v2;
40
+ };
41
+
42
+ template <class D>
43
+ struct Vec4 {
44
+ VFromD<D> v0;
45
+ VFromD<D> v1;
46
+ VFromD<D> v2;
47
+ VFromD<D> v3;
48
+ };
49
+
50
+ // D arg is unused but allows deducing D.
51
+ template <class D>
52
+ HWY_API Vec2<D> Create2(D /* tag */, VFromD<D> v0, VFromD<D> v1) {
53
+ return Vec2<D>{v0, v1};
54
+ }
55
+
56
+ template <class D>
57
+ HWY_API Vec3<D> Create3(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2) {
58
+ return Vec3<D>{v0, v1, v2};
59
+ }
60
+
61
+ template <class D>
62
+ HWY_API Vec4<D> Create4(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
63
+ VFromD<D> v3) {
64
+ return Vec4<D>{v0, v1, v2, v3};
65
+ }
66
+
67
+ template <size_t kIndex, class D>
68
+ HWY_API VFromD<D> Get2(Vec2<D> tuple) {
69
+ static_assert(kIndex < 2, "Tuple index out of bounds");
70
+ return kIndex == 0 ? tuple.v0 : tuple.v1;
71
+ }
72
+
73
+ template <size_t kIndex, class D>
74
+ HWY_API VFromD<D> Get3(Vec3<D> tuple) {
75
+ static_assert(kIndex < 3, "Tuple index out of bounds");
76
+ return kIndex == 0 ? tuple.v0 : kIndex == 1 ? tuple.v1 : tuple.v2;
77
+ }
78
+
79
+ template <size_t kIndex, class D>
80
+ HWY_API VFromD<D> Get4(Vec4<D> tuple) {
81
+ static_assert(kIndex < 4, "Tuple index out of bounds");
82
+ return kIndex == 0 ? tuple.v0
83
+ : kIndex == 1 ? tuple.v1
84
+ : kIndex == 2 ? tuple.v2
85
+ : tuple.v3;
86
+ }
87
+
88
+ template <size_t kIndex, class D>
89
+ HWY_API Vec2<D> Set2(Vec2<D> tuple, VFromD<D> val) {
90
+ static_assert(kIndex < 2, "Tuple index out of bounds");
91
+ if (kIndex == 0) {
92
+ tuple.v0 = val;
93
+ } else {
94
+ tuple.v1 = val;
95
+ }
96
+ return tuple;
97
+ }
98
+
99
+ template <size_t kIndex, class D>
100
+ HWY_API Vec3<D> Set3(Vec3<D> tuple, VFromD<D> val) {
101
+ static_assert(kIndex < 3, "Tuple index out of bounds");
102
+ if (kIndex == 0) {
103
+ tuple.v0 = val;
104
+ } else if (kIndex == 1) {
105
+ tuple.v1 = val;
106
+ } else {
107
+ tuple.v2 = val;
108
+ }
109
+ return tuple;
110
+ }
111
+
112
+ template <size_t kIndex, class D>
113
+ HWY_API Vec4<D> Set4(Vec4<D> tuple, VFromD<D> val) {
114
+ static_assert(kIndex < 4, "Tuple index out of bounds");
115
+ if (kIndex == 0) {
116
+ tuple.v0 = val;
117
+ } else if (kIndex == 1) {
118
+ tuple.v1 = val;
119
+ } else if (kIndex == 2) {
120
+ tuple.v2 = val;
121
+ } else {
122
+ tuple.v3 = val;
123
+ }
124
+ return tuple;
125
+ }