@img/sharp-libvips-dev 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +2 -2
  2. package/cplusplus/VConnection.cpp +54 -54
  3. package/cplusplus/VError.cpp +20 -18
  4. package/cplusplus/VImage.cpp +636 -589
  5. package/cplusplus/VInterpolate.cpp +22 -22
  6. package/cplusplus/VRegion.cpp +4 -4
  7. package/cplusplus/vips-operators.cpp +2326 -2301
  8. package/include/aom/aom_codec.h +10 -6
  9. package/include/aom/aom_decoder.h +1 -1
  10. package/include/aom/aom_encoder.h +9 -2
  11. package/include/aom/aomcx.h +72 -3
  12. package/include/cairo/cairo-ft.h +1 -1
  13. package/include/cairo/cairo-gobject.h +8 -0
  14. package/include/cairo/cairo-svg.h +3 -3
  15. package/include/cairo/cairo-version.h +2 -2
  16. package/include/cairo/cairo.h +91 -24
  17. package/include/harfbuzz/hb-version.h +2 -2
  18. package/include/hwy/aligned_allocator.h +211 -0
  19. package/include/hwy/base.h +1517 -0
  20. package/include/hwy/cache_control.h +108 -0
  21. package/include/hwy/detect_compiler_arch.h +281 -0
  22. package/include/hwy/detect_targets.h +644 -0
  23. package/include/hwy/foreach_target.h +340 -0
  24. package/include/hwy/highway.h +435 -0
  25. package/include/hwy/highway_export.h +74 -0
  26. package/include/hwy/nanobenchmark.h +171 -0
  27. package/include/hwy/ops/arm_neon-inl.h +8913 -0
  28. package/include/hwy/ops/arm_sve-inl.h +5105 -0
  29. package/include/hwy/ops/emu128-inl.h +2811 -0
  30. package/include/hwy/ops/generic_ops-inl.h +4745 -0
  31. package/include/hwy/ops/ppc_vsx-inl.h +5716 -0
  32. package/include/hwy/ops/rvv-inl.h +5070 -0
  33. package/include/hwy/ops/scalar-inl.h +1995 -0
  34. package/include/hwy/ops/set_macros-inl.h +578 -0
  35. package/include/hwy/ops/shared-inl.h +539 -0
  36. package/include/hwy/ops/tuple-inl.h +125 -0
  37. package/include/hwy/ops/wasm_128-inl.h +5917 -0
  38. package/include/hwy/ops/x86_128-inl.h +11173 -0
  39. package/include/hwy/ops/x86_256-inl.h +7529 -0
  40. package/include/hwy/ops/x86_512-inl.h +6849 -0
  41. package/include/hwy/per_target.h +44 -0
  42. package/include/hwy/print-inl.h +62 -0
  43. package/include/hwy/print.h +75 -0
  44. package/include/hwy/robust_statistics.h +148 -0
  45. package/include/hwy/targets.h +338 -0
  46. package/include/hwy/timer-inl.h +200 -0
  47. package/include/hwy/timer.h +55 -0
  48. package/include/jconfig.h +2 -2
  49. package/include/jpeglib.h +3 -2
  50. package/include/libheif/heif.h +443 -377
  51. package/include/libheif/heif_cxx.h +4 -1
  52. package/include/libheif/heif_plugin.h +1 -1
  53. package/include/libheif/heif_properties.h +138 -0
  54. package/include/libheif/heif_regions.h +866 -0
  55. package/include/libheif/heif_version.h +3 -3
  56. package/include/vips/VConnection8.h +43 -49
  57. package/include/vips/VError8.h +27 -24
  58. package/include/vips/VImage8.h +4861 -4597
  59. package/include/vips/VInterpolate8.h +24 -27
  60. package/include/vips/VRegion8.h +32 -33
  61. package/include/vips/arithmetic.h +169 -169
  62. package/include/vips/basic.h +33 -33
  63. package/include/vips/buf.h +56 -54
  64. package/include/vips/colour.h +95 -95
  65. package/include/vips/connection.h +190 -193
  66. package/include/vips/conversion.h +91 -91
  67. package/include/vips/convolution.h +36 -30
  68. package/include/vips/create.h +63 -63
  69. package/include/vips/dbuf.h +35 -37
  70. package/include/vips/debug.h +65 -33
  71. package/include/vips/draw.h +41 -41
  72. package/include/vips/enumtypes.h +54 -51
  73. package/include/vips/error.h +63 -63
  74. package/include/vips/foreign.h +263 -223
  75. package/include/vips/format.h +48 -48
  76. package/include/vips/freqfilt.h +22 -22
  77. package/include/vips/gate.h +55 -47
  78. package/include/vips/generate.h +34 -34
  79. package/include/vips/header.h +111 -101
  80. package/include/vips/histogram.h +28 -28
  81. package/include/vips/image.h +213 -213
  82. package/include/vips/interpolate.h +40 -41
  83. package/include/vips/memory.h +61 -52
  84. package/include/vips/morphology.h +24 -24
  85. package/include/vips/mosaicing.h +32 -33
  86. package/include/vips/object.h +371 -357
  87. package/include/vips/operation.h +68 -67
  88. package/include/vips/private.h +76 -76
  89. package/include/vips/rect.h +26 -26
  90. package/include/vips/region.h +92 -92
  91. package/include/vips/resample.h +38 -38
  92. package/include/vips/sbuf.h +53 -54
  93. package/include/vips/semaphore.h +24 -24
  94. package/include/vips/thread.h +30 -27
  95. package/include/vips/threadpool.h +48 -49
  96. package/include/vips/transform.h +39 -39
  97. package/include/vips/type.h +90 -85
  98. package/include/vips/util.h +274 -229
  99. package/include/vips/vector.h +24 -144
  100. package/include/vips/version.h +9 -9
  101. package/include/vips/vips.h +41 -40
  102. package/package.json +1 -1
  103. package/versions.json +7 -7
@@ -0,0 +1,44 @@
1
+ // Copyright 2022 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef HIGHWAY_HWY_PER_TARGET_H_
17
+ #define HIGHWAY_HWY_PER_TARGET_H_
18
+
19
+ #include <stddef.h>
20
+
21
+ #include "hwy/highway_export.h"
22
+
23
+ // Functions to query the capabilities of the target that will be called by
24
+ // HWY_DYNAMIC_DISPATCH, which is not necessarily the current target.
25
+
26
+ namespace hwy {
27
+
28
+ // Returns size in bytes of a vector, i.e. `Lanes(ScalableTag<uint8_t>())`.
29
+ //
30
+ // Do not cache the result, which may change after calling DisableTargets, or
31
+ // if software requests a different vector size (e.g. when entering/exiting SME
32
+ // streaming mode). Instead call this right before the code that depends on the
33
+ // result, without any DisableTargets or SME transition in-between. Note that
34
+ // this involves an indirect call, so prefer not to call this frequently nor
35
+ // unnecessarily.
36
+ HWY_DLLEXPORT size_t VectorBytes();
37
+
38
+ // Returns whether 16/64-bit floats are a supported lane type.
39
+ HWY_DLLEXPORT bool HaveFloat16();
40
+ HWY_DLLEXPORT bool HaveFloat64();
41
+
42
+ } // namespace hwy
43
+
44
+ #endif // HIGHWAY_HWY_PER_TARGET_H_
@@ -0,0 +1,62 @@
1
+ // Copyright 2022 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ // Print() function
17
+
18
+ #include "hwy/highway.h"
19
+ #include "hwy/print.h"
20
+
21
+ // Per-target include guard
22
+ #if defined(HIGHWAY_HWY_PRINT_INL_H_) == defined(HWY_TARGET_TOGGLE)
23
+ #ifdef HIGHWAY_HWY_PRINT_INL_H_
24
+ #undef HIGHWAY_HWY_PRINT_INL_H_
25
+ #else
26
+ #define HIGHWAY_HWY_PRINT_INL_H_
27
+ #endif
28
+
29
+ #if HWY_TARGET == HWY_RVV
30
+ #include "hwy/aligned_allocator.h"
31
+ #endif
32
+
33
+ HWY_BEFORE_NAMESPACE();
34
+ namespace hwy {
35
+ namespace HWY_NAMESPACE {
36
+
37
+ // Prints lanes around `lane`, in memory order.
38
+ template <class D, class V = VFromD<D>>
39
+ HWY_API void Print(const D d, const char* caption, V v, size_t lane_u = 0,
40
+ size_t max_lanes = 7) {
41
+ const size_t N = Lanes(d);
42
+ using T = TFromD<D>;
43
+ #if HWY_TARGET == HWY_RVV
44
+ auto storage = AllocateAligned<T>(N);
45
+ T* HWY_RESTRICT lanes = storage.get();
46
+ #else
47
+ // This works around an SVE compile error on GCC 11 and 12. Calling
48
+ // AllocateAligned here would seem to require it be marked with HWY_ATTR.
49
+ HWY_ALIGN T lanes[MaxLanes(d)];
50
+ #endif
51
+ Store(v, d, lanes);
52
+
53
+ const auto info = hwy::detail::MakeTypeInfo<T>();
54
+ hwy::detail::PrintArray(info, caption, lanes, N, lane_u, max_lanes);
55
+ }
56
+
57
+ // NOLINTNEXTLINE(google-readability-namespace-comments)
58
+ } // namespace HWY_NAMESPACE
59
+ } // namespace hwy
60
+ HWY_AFTER_NAMESPACE();
61
+
62
+ #endif // per-target include guard
@@ -0,0 +1,75 @@
1
+ // Copyright 2022 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef HWY_PRINT_H_
17
+ #define HWY_PRINT_H_
18
+
19
+ // Helpers for printing vector lanes.
20
+
21
+ #include <stddef.h>
22
+ #include <stdio.h>
23
+
24
+ #include "hwy/base.h"
25
+ #include "hwy/highway_export.h"
26
+
27
+ namespace hwy {
28
+
29
+ namespace detail {
30
+
31
+ // For implementing value comparisons etc. as type-erased functions to reduce
32
+ // template bloat.
33
+ struct TypeInfo {
34
+ size_t sizeof_t;
35
+ bool is_float;
36
+ bool is_signed;
37
+ bool is_bf16;
38
+ };
39
+
40
+ template <typename T>
41
+ HWY_INLINE TypeInfo MakeTypeInfo() {
42
+ TypeInfo info;
43
+ info.sizeof_t = sizeof(T);
44
+ info.is_float = IsFloat<T>();
45
+ info.is_signed = IsSigned<T>();
46
+ info.is_bf16 = IsSame<T, bfloat16_t>();
47
+ return info;
48
+ }
49
+
50
+ HWY_DLLEXPORT void TypeName(const TypeInfo& info, size_t N, char* string100);
51
+ HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr,
52
+ char* string100);
53
+
54
+ HWY_DLLEXPORT void PrintArray(const TypeInfo& info, const char* caption,
55
+ const void* array_void, size_t N,
56
+ size_t lane_u = 0, size_t max_lanes = 7);
57
+
58
+ } // namespace detail
59
+
60
+ template <typename T>
61
+ HWY_NOINLINE void PrintValue(T value) {
62
+ char str[100];
63
+ detail::ToString(hwy::detail::MakeTypeInfo<T>(), &value, str);
64
+ fprintf(stderr, "%s,", str);
65
+ }
66
+
67
+ template <typename T>
68
+ HWY_NOINLINE void PrintArray(const T* value, size_t count) {
69
+ detail::PrintArray(hwy::detail::MakeTypeInfo<T>(), "", value, count, 0,
70
+ count);
71
+ }
72
+
73
+ } // namespace hwy
74
+
75
+ #endif // HWY_PRINT_H_
@@ -0,0 +1,148 @@
1
+ // Copyright 2023 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef HIGHWAY_HWY_ROBUST_STATISTICS_H_
17
+ #define HIGHWAY_HWY_ROBUST_STATISTICS_H_
18
+
19
+ #include <algorithm> // std::sort, std::find_if
20
+ #include <limits>
21
+ #include <utility> // std::pair
22
+ #include <vector>
23
+
24
+ #include "hwy/base.h"
25
+
26
+ namespace hwy {
27
+ namespace robust_statistics {
28
+
29
+ // Sorts integral values in ascending order (e.g. for Mode). About 3x faster
30
+ // than std::sort for input distributions with very few unique values.
31
+ template <class T>
32
+ void CountingSort(T* values, size_t num_values) {
33
+ // Unique values and their frequency (similar to flat_map).
34
+ using Unique = std::pair<T, int>;
35
+ std::vector<Unique> unique;
36
+ for (size_t i = 0; i < num_values; ++i) {
37
+ const T value = values[i];
38
+ const auto pos =
39
+ std::find_if(unique.begin(), unique.end(),
40
+ [value](const Unique u) { return u.first == value; });
41
+ if (pos == unique.end()) {
42
+ unique.push_back(std::make_pair(value, 1));
43
+ } else {
44
+ ++pos->second;
45
+ }
46
+ }
47
+
48
+ // Sort in ascending order of value (pair.first).
49
+ std::sort(unique.begin(), unique.end());
50
+
51
+ // Write that many copies of each unique value to the array.
52
+ T* HWY_RESTRICT p = values;
53
+ for (const auto& value_count : unique) {
54
+ std::fill(p, p + value_count.second, value_count.first);
55
+ p += value_count.second;
56
+ }
57
+ HWY_ASSERT(p == values + num_values);
58
+ }
59
+
60
+ // @return i in [idx_begin, idx_begin + half_count) that minimizes
61
+ // sorted[i + half_count] - sorted[i].
62
+ template <typename T>
63
+ size_t MinRange(const T* const HWY_RESTRICT sorted, const size_t idx_begin,
64
+ const size_t half_count) {
65
+ T min_range = std::numeric_limits<T>::max();
66
+ size_t min_idx = 0;
67
+
68
+ for (size_t idx = idx_begin; idx < idx_begin + half_count; ++idx) {
69
+ HWY_ASSERT(sorted[idx] <= sorted[idx + half_count]);
70
+ const T range = sorted[idx + half_count] - sorted[idx];
71
+ if (range < min_range) {
72
+ min_range = range;
73
+ min_idx = idx;
74
+ }
75
+ }
76
+
77
+ return min_idx;
78
+ }
79
+
80
+ // Returns an estimate of the mode by calling MinRange on successively
81
+ // halved intervals. "sorted" must be in ascending order. This is the
82
+ // Half Sample Mode estimator proposed by Bickel in "On a fast, robust
83
+ // estimator of the mode", with complexity O(N log N). The mode is less
84
+ // affected by outliers in highly-skewed distributions than the median.
85
+ // The averaging operation below assumes "T" is an unsigned integer type.
86
+ template <typename T>
87
+ T ModeOfSorted(const T* const HWY_RESTRICT sorted, const size_t num_values) {
88
+ size_t idx_begin = 0;
89
+ size_t half_count = num_values / 2;
90
+ while (half_count > 1) {
91
+ idx_begin = MinRange(sorted, idx_begin, half_count);
92
+ half_count >>= 1;
93
+ }
94
+
95
+ const T x = sorted[idx_begin + 0];
96
+ if (half_count == 0) {
97
+ return x;
98
+ }
99
+ HWY_ASSERT(half_count == 1);
100
+ const T average = (x + sorted[idx_begin + 1] + 1) / 2;
101
+ return average;
102
+ }
103
+
104
+ // Returns the mode. Side effect: sorts "values".
105
+ template <typename T>
106
+ T Mode(T* values, const size_t num_values) {
107
+ CountingSort(values, num_values);
108
+ return ModeOfSorted(values, num_values);
109
+ }
110
+
111
+ template <typename T, size_t N>
112
+ T Mode(T (&values)[N]) {
113
+ return Mode(&values[0], N);
114
+ }
115
+
116
+ // Returns the median value. Side effect: sorts "values".
117
+ template <typename T>
118
+ T Median(T* values, const size_t num_values) {
119
+ HWY_ASSERT(num_values != 0);
120
+ std::sort(values, values + num_values);
121
+ const size_t half = num_values / 2;
122
+ // Odd count: return middle
123
+ if (num_values % 2) {
124
+ return values[half];
125
+ }
126
+ // Even count: return average of middle two.
127
+ return (values[half] + values[half - 1] + 1) / 2;
128
+ }
129
+
130
+ // Returns a robust measure of variability.
131
+ template <typename T>
132
+ T MedianAbsoluteDeviation(const T* values, const size_t num_values,
133
+ const T median) {
134
+ HWY_ASSERT(num_values != 0);
135
+ std::vector<T> abs_deviations;
136
+ abs_deviations.reserve(num_values);
137
+ for (size_t i = 0; i < num_values; ++i) {
138
+ const int64_t abs = std::abs(static_cast<int64_t>(values[i]) -
139
+ static_cast<int64_t>(median));
140
+ abs_deviations.push_back(static_cast<T>(abs));
141
+ }
142
+ return Median(abs_deviations.data(), num_values);
143
+ }
144
+
145
+ } // namespace robust_statistics
146
+ } // namespace hwy
147
+
148
+ #endif // HIGHWAY_HWY_ROBUST_STATISTICS_H_
@@ -0,0 +1,338 @@
1
+ // Copyright 2020 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef HIGHWAY_HWY_TARGETS_H_
17
+ #define HIGHWAY_HWY_TARGETS_H_
18
+
19
+ // Allows opting out of C++ standard library usage, which is not available in
20
+ // some Compiler Explorer environments.
21
+ #ifndef HWY_NO_LIBCXX
22
+ #include <vector>
23
+ #endif
24
+
25
+ // For SIMD module implementations and their callers. Defines which targets to
26
+ // generate and call.
27
+
28
+ #include "hwy/base.h"
29
+ #include "hwy/detect_targets.h"
30
+ #include "hwy/highway_export.h"
31
+
32
+ #if !HWY_ARCH_RVV && !defined(HWY_NO_LIBCXX)
33
+ #include <atomic>
34
+ #endif
35
+
36
+ namespace hwy {
37
+
38
+ // Returns bitfield of enabled targets that are supported on this CPU; there is
39
+ // always at least one such target, hence the return value is never 0. The
40
+ // targets returned may change after calling DisableTargets. This function is
41
+ // always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
42
+ // calls to it if there is only a single target enabled.
43
+ HWY_DLLEXPORT int64_t SupportedTargets();
44
+
45
+ // Evaluates to a function call, or literal if there is a single target.
46
+ #if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
47
+ #define HWY_SUPPORTED_TARGETS HWY_TARGETS
48
+ #else
49
+ #define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
50
+ #endif
51
+
52
+ // Subsequent SupportedTargets will not return targets whose bit(s) are set in
53
+ // `disabled_targets`. Exception: if SupportedTargets would return 0, it will
54
+ // instead return HWY_STATIC_TARGET (there must always be one target to call).
55
+ //
56
+ // This function is useful for disabling targets known to be buggy, or if the
57
+ // best available target is undesirable (perhaps due to throttling or memory
58
+ // bandwidth limitations). Use SetSupportedTargetsForTest instead of this
59
+ // function for iteratively enabling specific targets for testing.
60
+ HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets);
61
+
62
+ // Subsequent SupportedTargets will return the given set of targets, except
63
+ // those disabled via DisableTargets. Call with a mask of 0 to disable the mock
64
+ // and return to the normal SupportedTargets behavior. Used to run tests for
65
+ // all targets.
66
+ HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets);
67
+
68
+ #ifndef HWY_NO_LIBCXX
69
+
70
+ // Return the list of targets in HWY_TARGETS supported by the CPU as a list of
71
+ // individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
72
+ // is affected by the current SetSupportedTargetsForTest() mock if any.
73
+ HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() {
74
+ std::vector<int64_t> ret;
75
+ for (int64_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
76
+ targets = targets & (targets - 1)) {
77
+ int64_t current_target = targets & ~(targets - 1);
78
+ ret.push_back(current_target);
79
+ }
80
+ return ret;
81
+ }
82
+
83
+ #endif // HWY_NO_LIBCXX
84
+
85
+ static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {
86
+ switch (target) {
87
+ #if HWY_ARCH_X86
88
+ case HWY_SSE2:
89
+ return "SSE2";
90
+ case HWY_SSSE3:
91
+ return "SSSE3";
92
+ case HWY_SSE4:
93
+ return "SSE4";
94
+ case HWY_AVX2:
95
+ return "AVX2";
96
+ case HWY_AVX3:
97
+ return "AVX3";
98
+ case HWY_AVX3_DL:
99
+ return "AVX3_DL";
100
+ case HWY_AVX3_ZEN4:
101
+ return "AVX3_ZEN4";
102
+ case HWY_AVX3_SPR:
103
+ return "AVX3_SPR";
104
+ #endif
105
+
106
+ #if HWY_ARCH_ARM
107
+ case HWY_SVE2_128:
108
+ return "SVE2_128";
109
+ case HWY_SVE_256:
110
+ return "SVE_256";
111
+ case HWY_SVE2:
112
+ return "SVE2";
113
+ case HWY_SVE:
114
+ return "SVE";
115
+ case HWY_NEON:
116
+ return "NEON";
117
+ case HWY_NEON_WITHOUT_AES:
118
+ return "NEON_WITHOUT_AES";
119
+ #endif
120
+
121
+ #if HWY_ARCH_PPC
122
+ case HWY_PPC8:
123
+ return "PPC8";
124
+ case HWY_PPC9:
125
+ return "PPC9";
126
+ case HWY_PPC10:
127
+ return "PPC10";
128
+ #endif
129
+
130
+ #if HWY_ARCH_WASM
131
+ case HWY_WASM:
132
+ return "WASM";
133
+ case HWY_WASM_EMU256:
134
+ return "WASM_EMU256";
135
+ #endif
136
+
137
+ #if HWY_ARCH_RVV
138
+ case HWY_RVV:
139
+ return "RVV";
140
+ #endif
141
+
142
+ case HWY_EMU128:
143
+ return "EMU128";
144
+ case HWY_SCALAR:
145
+ return "SCALAR";
146
+
147
+ default:
148
+ return "Unknown"; // must satisfy gtest IsValidParamName()
149
+ }
150
+ }
151
+
152
+ // The maximum number of dynamic targets on any architecture is defined by
153
+ // HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
154
+
155
+ // For the ChosenTarget mask and index we use a different bit arrangement than
156
+ // in the HWY_TARGETS mask. Only the targets involved in the current
157
+ // architecture are used in this mask, and therefore only the least significant
158
+ // (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least
159
+ // significant bit is set when the mask is not initialized, the next
160
+ // HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
161
+ // HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
162
+ // that position and the next more significant bit is used for HWY_SCALAR (if
163
+ // HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
164
+ // define equivalent values for HWY_TARGETS in this representation.
165
+ // This mask representation allows to use ctz() on this mask and obtain a small
166
+ // number that's used as an index of the table for dynamic dispatch. In this
167
+ // way the first entry is used when the mask is uninitialized, the following
168
+ // HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
169
+ // scalar.
170
+
171
+ // The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
172
+ #define HWY_CHOSEN_TARGET_MASK_SCALAR (1LL << (HWY_MAX_DYNAMIC_TARGETS + 1))
173
+
174
+ // Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
175
+ // current architecture.
176
+ #define HWY_CHOSEN_TARGET_SHIFT(X) \
177
+ ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
178
+ ((1LL << HWY_MAX_DYNAMIC_TARGETS) - 1)) \
179
+ << 1)
180
+
181
+ // The HWY_TARGETS mask in the ChosenTarget mask format.
182
+ #define HWY_CHOSEN_TARGET_MASK_TARGETS \
183
+ (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1LL)
184
+
185
+ #if HWY_ARCH_X86
186
+ // Maximum number of dynamic targets, changing this value is an ABI incompatible
187
+ // change
188
+ #define HWY_MAX_DYNAMIC_TARGETS 15
189
+ #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
190
+ // These must match the order in which the HWY_TARGETS are defined
191
+ // starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
192
+ // HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
193
+ // HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
194
+ // corresponds to the best target. Don't include a "," at the end of the list.
195
+ #define HWY_CHOOSE_TARGET_LIST(func_name) \
196
+ nullptr, /* reserved */ \
197
+ nullptr, /* reserved */ \
198
+ nullptr, /* reserved */ \
199
+ nullptr, /* reserved */ \
200
+ HWY_CHOOSE_AVX3_SPR(func_name), /* AVX3_SPR */ \
201
+ nullptr, /* reserved */ \
202
+ HWY_CHOOSE_AVX3_ZEN4(func_name), /* AVX3_ZEN4 */ \
203
+ HWY_CHOOSE_AVX3_DL(func_name), /* AVX3_DL */ \
204
+ HWY_CHOOSE_AVX3(func_name), /* AVX3 */ \
205
+ HWY_CHOOSE_AVX2(func_name), /* AVX2 */ \
206
+ nullptr, /* AVX */ \
207
+ HWY_CHOOSE_SSE4(func_name), /* SSE4 */ \
208
+ HWY_CHOOSE_SSSE3(func_name), /* SSSE3 */ \
209
+ nullptr, /* reserved - SSE3? */ \
210
+ HWY_CHOOSE_SSE2(func_name) /* SSE2 */
211
+
212
+ #elif HWY_ARCH_ARM
213
+ // See HWY_ARCH_X86 above for details.
214
+ #define HWY_MAX_DYNAMIC_TARGETS 15
215
+ #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
216
+ #define HWY_CHOOSE_TARGET_LIST(func_name) \
217
+ nullptr, /* reserved */ \
218
+ nullptr, /* reserved */ \
219
+ nullptr, /* reserved */ \
220
+ nullptr, /* reserved */ \
221
+ nullptr, /* reserved */ \
222
+ nullptr, /* reserved */ \
223
+ nullptr, /* reserved */ \
224
+ nullptr, /* reserved */ \
225
+ nullptr, /* reserved */ \
226
+ HWY_CHOOSE_SVE2_128(func_name), /* SVE2 128-bit */ \
227
+ HWY_CHOOSE_SVE_256(func_name), /* SVE 256-bit */ \
228
+ HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \
229
+ HWY_CHOOSE_SVE(func_name), /* SVE */ \
230
+ HWY_CHOOSE_NEON(func_name), /* NEON */ \
231
+ HWY_CHOOSE_NEON_WITHOUT_AES(func_name) /* NEON without AES */
232
+
233
+ #elif HWY_ARCH_RVV
234
+ // See HWY_ARCH_X86 above for details.
235
+ #define HWY_MAX_DYNAMIC_TARGETS 9
236
+ #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
237
+ #define HWY_CHOOSE_TARGET_LIST(func_name) \
238
+ nullptr, /* reserved */ \
239
+ nullptr, /* reserved */ \
240
+ nullptr, /* reserved */ \
241
+ nullptr, /* reserved */ \
242
+ nullptr, /* reserved */ \
243
+ nullptr, /* reserved */ \
244
+ nullptr, /* reserved */ \
245
+ HWY_CHOOSE_RVV(func_name), /* RVV */ \
246
+ nullptr /* reserved */
247
+
248
+ #elif HWY_ARCH_PPC
249
+ // See HWY_ARCH_X86 above for details.
250
+ #define HWY_MAX_DYNAMIC_TARGETS 9
251
+ #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
252
+ #define HWY_CHOOSE_TARGET_LIST(func_name) \
253
+ nullptr, /* reserved */ \
254
+ nullptr, /* reserved */ \
255
+ nullptr, /* reserved */ \
256
+ nullptr, /* reserved */ \
257
+ HWY_CHOOSE_PPC10(func_name), /* PPC10 */ \
258
+ HWY_CHOOSE_PPC9(func_name), /* PPC9 */ \
259
+ HWY_CHOOSE_PPC8(func_name), /* PPC8 */ \
260
+ nullptr, /* reserved (VSX or AltiVec) */ \
261
+ nullptr /* reserved (VSX or AltiVec) */
262
+
263
+ #elif HWY_ARCH_WASM
264
+ // See HWY_ARCH_X86 above for details.
265
+ #define HWY_MAX_DYNAMIC_TARGETS 9
266
+ #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
267
+ #define HWY_CHOOSE_TARGET_LIST(func_name) \
268
+ nullptr, /* reserved */ \
269
+ nullptr, /* reserved */ \
270
+ nullptr, /* reserved */ \
271
+ nullptr, /* reserved */ \
272
+ nullptr, /* reserved */ \
273
+ nullptr, /* reserved */ \
274
+ HWY_CHOOSE_WASM_EMU256(func_name), /* WASM_EMU256 */ \
275
+ HWY_CHOOSE_WASM(func_name), /* WASM */ \
276
+ nullptr /* reserved */
277
+
278
+ #else
279
+ // Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
280
+ // still creating single-entry tables in HWY_EXPORT to ensure portability.
281
+ #define HWY_MAX_DYNAMIC_TARGETS 1
282
+ #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
283
+ #endif
284
+
285
+ // Bitfield of supported and enabled targets. The format differs from that of
286
+ // HWY_TARGETS; the lowest bit governs the first function pointer (which is
287
+ // special in that it calls FunctionCache, then Update, then dispatches to the
288
+ // actual implementation) in the tables created by HWY_EXPORT. Monostate (see
289
+ // GetChosenTarget), thread-safe except on RVV.
290
+ struct ChosenTarget {
291
+ public:
292
+ // Reset bits according to `targets` (typically the return value of
293
+ // SupportedTargets()). Postcondition: IsInitialized() == true.
294
+ void Update(int64_t targets) {
295
+ // These are `targets` shifted downwards, see above. Also include SCALAR
296
+ // (corresponds to the last entry in the function table) as fallback.
297
+ StoreMask(HWY_CHOSEN_TARGET_SHIFT(targets) | HWY_CHOSEN_TARGET_MASK_SCALAR);
298
+ }
299
+
300
+ // Reset to the uninitialized state, so that FunctionCache will call Update
301
+ // during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false.
302
+ void DeInit() { StoreMask(1); }
303
+
304
+ // Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH
305
+ // function was called, which we check in tests.
306
+ bool IsInitialized() const { return LoadMask() != 1; }
307
+
308
+ // Return the index in the dynamic dispatch table to be used by the current
309
+ // CPU. Note that this method must be in the header file so it uses the value
310
+ // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
311
+ // calls it, which may be different from others. This means we only enable
312
+ // those targets that were actually compiled in this module.
313
+ size_t HWY_INLINE GetIndex() const {
314
+ return hwy::Num0BitsBelowLS1Bit_Nonzero64(
315
+ static_cast<uint64_t>(LoadMask() & HWY_CHOSEN_TARGET_MASK_TARGETS));
316
+ }
317
+
318
+ private:
319
+ // TODO(janwas): remove RVV once <atomic> is available
320
+ #if HWY_ARCH_RVV || defined(HWY_NO_LIBCXX)
321
+ int64_t LoadMask() const { return mask_; }
322
+ void StoreMask(int64_t mask) { mask_ = mask; }
323
+
324
+ int64_t mask_{1}; // Initialized to 1 so GetIndex() returns 0.
325
+ #else
326
+ int64_t LoadMask() const { return mask_.load(); }
327
+ void StoreMask(int64_t mask) { mask_.store(mask); }
328
+
329
+ std::atomic<int64_t> mask_{1}; // Initialized to 1 so GetIndex() returns 0.
330
+ #endif // HWY_ARCH_RVV
331
+ };
332
+
333
+ // For internal use (e.g. by FunctionCache and DisableTargets).
334
+ HWY_DLLEXPORT ChosenTarget& GetChosenTarget();
335
+
336
+ } // namespace hwy
337
+
338
+ #endif // HIGHWAY_HWY_TARGETS_H_