@img/sharp-libvips-dev 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/cplusplus/VConnection.cpp +54 -54
- package/cplusplus/VError.cpp +20 -18
- package/cplusplus/VImage.cpp +636 -589
- package/cplusplus/VInterpolate.cpp +22 -22
- package/cplusplus/VRegion.cpp +4 -4
- package/cplusplus/vips-operators.cpp +2326 -2301
- package/include/aom/aom_codec.h +10 -6
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +9 -2
- package/include/aom/aomcx.h +72 -3
- package/include/cairo/cairo-ft.h +1 -1
- package/include/cairo/cairo-gobject.h +8 -0
- package/include/cairo/cairo-svg.h +3 -3
- package/include/cairo/cairo-version.h +2 -2
- package/include/cairo/cairo.h +91 -24
- package/include/glib-2.0/glib/gmacros.h +1 -1
- package/include/glib-2.0/glib/gtestutils.h +1 -1
- package/include/glib-2.0/gobject/gtype.h +7 -7
- package/include/harfbuzz/hb-version.h +2 -2
- package/include/hwy/aligned_allocator.h +211 -0
- package/include/hwy/base.h +1517 -0
- package/include/hwy/cache_control.h +108 -0
- package/include/hwy/detect_compiler_arch.h +281 -0
- package/include/hwy/detect_targets.h +644 -0
- package/include/hwy/foreach_target.h +340 -0
- package/include/hwy/highway.h +435 -0
- package/include/hwy/highway_export.h +74 -0
- package/include/hwy/nanobenchmark.h +171 -0
- package/include/hwy/ops/arm_neon-inl.h +8913 -0
- package/include/hwy/ops/arm_sve-inl.h +5105 -0
- package/include/hwy/ops/emu128-inl.h +2811 -0
- package/include/hwy/ops/generic_ops-inl.h +4745 -0
- package/include/hwy/ops/ppc_vsx-inl.h +5716 -0
- package/include/hwy/ops/rvv-inl.h +5070 -0
- package/include/hwy/ops/scalar-inl.h +1995 -0
- package/include/hwy/ops/set_macros-inl.h +578 -0
- package/include/hwy/ops/shared-inl.h +539 -0
- package/include/hwy/ops/tuple-inl.h +125 -0
- package/include/hwy/ops/wasm_128-inl.h +5917 -0
- package/include/hwy/ops/x86_128-inl.h +11173 -0
- package/include/hwy/ops/x86_256-inl.h +7529 -0
- package/include/hwy/ops/x86_512-inl.h +6849 -0
- package/include/hwy/per_target.h +44 -0
- package/include/hwy/print-inl.h +62 -0
- package/include/hwy/print.h +75 -0
- package/include/hwy/robust_statistics.h +148 -0
- package/include/hwy/targets.h +338 -0
- package/include/hwy/timer-inl.h +200 -0
- package/include/hwy/timer.h +55 -0
- package/include/jconfig.h +2 -2
- package/include/jpeglib.h +3 -2
- package/include/libheif/heif.h +461 -384
- package/include/libheif/heif_cxx.h +4 -1
- package/include/libheif/heif_plugin.h +1 -1
- package/include/libheif/heif_properties.h +138 -0
- package/include/libheif/heif_regions.h +866 -0
- package/include/libheif/heif_version.h +3 -3
- package/include/libpng16/pnglibconf.h +1 -1
- package/include/pnglibconf.h +1 -1
- package/include/vips/VConnection8.h +43 -49
- package/include/vips/VError8.h +27 -24
- package/include/vips/VImage8.h +4861 -4597
- package/include/vips/VInterpolate8.h +24 -27
- package/include/vips/VRegion8.h +32 -33
- package/include/vips/arithmetic.h +169 -169
- package/include/vips/basic.h +33 -33
- package/include/vips/buf.h +56 -54
- package/include/vips/colour.h +95 -95
- package/include/vips/connection.h +190 -193
- package/include/vips/conversion.h +91 -91
- package/include/vips/convolution.h +36 -30
- package/include/vips/create.h +63 -63
- package/include/vips/dbuf.h +35 -37
- package/include/vips/debug.h +65 -33
- package/include/vips/draw.h +41 -41
- package/include/vips/enumtypes.h +54 -51
- package/include/vips/error.h +63 -63
- package/include/vips/foreign.h +263 -223
- package/include/vips/format.h +48 -48
- package/include/vips/freqfilt.h +22 -22
- package/include/vips/gate.h +55 -47
- package/include/vips/generate.h +34 -34
- package/include/vips/header.h +111 -101
- package/include/vips/histogram.h +28 -28
- package/include/vips/image.h +213 -213
- package/include/vips/interpolate.h +40 -41
- package/include/vips/memory.h +61 -52
- package/include/vips/morphology.h +24 -24
- package/include/vips/mosaicing.h +32 -33
- package/include/vips/object.h +371 -357
- package/include/vips/operation.h +68 -67
- package/include/vips/private.h +76 -76
- package/include/vips/rect.h +26 -26
- package/include/vips/region.h +92 -92
- package/include/vips/resample.h +38 -38
- package/include/vips/sbuf.h +53 -54
- package/include/vips/semaphore.h +24 -24
- package/include/vips/thread.h +30 -27
- package/include/vips/threadpool.h +48 -49
- package/include/vips/transform.h +39 -39
- package/include/vips/type.h +90 -85
- package/include/vips/util.h +274 -229
- package/include/vips/vector.h +24 -144
- package/include/vips/version.h +9 -9
- package/include/vips/vips.h +41 -40
- package/include/zlib.h +23 -19
- package/package.json +1 -1
- package/versions.json +9 -9
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
// Copyright 2022 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
#ifndef HIGHWAY_HWY_PER_TARGET_H_
|
|
17
|
+
#define HIGHWAY_HWY_PER_TARGET_H_
|
|
18
|
+
|
|
19
|
+
#include <stddef.h>
|
|
20
|
+
|
|
21
|
+
#include "hwy/highway_export.h"
|
|
22
|
+
|
|
23
|
+
// Functions to query the capabilities of the target that will be called by
|
|
24
|
+
// HWY_DYNAMIC_DISPATCH, which is not necessarily the current target.
|
|
25
|
+
|
|
26
|
+
namespace hwy {
|
|
27
|
+
|
|
28
|
+
// Returns size in bytes of a vector, i.e. `Lanes(ScalableTag<uint8_t>())`.
|
|
29
|
+
//
|
|
30
|
+
// Do not cache the result, which may change after calling DisableTargets, or
|
|
31
|
+
// if software requests a different vector size (e.g. when entering/exiting SME
|
|
32
|
+
// streaming mode). Instead call this right before the code that depends on the
|
|
33
|
+
// result, without any DisableTargets or SME transition in-between. Note that
|
|
34
|
+
// this involves an indirect call, so prefer not to call this frequently nor
|
|
35
|
+
// unnecessarily.
|
|
36
|
+
HWY_DLLEXPORT size_t VectorBytes();
|
|
37
|
+
|
|
38
|
+
// Returns whether 16/64-bit floats are a supported lane type.
|
|
39
|
+
HWY_DLLEXPORT bool HaveFloat16();
|
|
40
|
+
HWY_DLLEXPORT bool HaveFloat64();
|
|
41
|
+
|
|
42
|
+
} // namespace hwy
|
|
43
|
+
|
|
44
|
+
#endif // HIGHWAY_HWY_PER_TARGET_H_
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// Copyright 2022 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
// Print() function
|
|
17
|
+
|
|
18
|
+
#include "hwy/highway.h"
|
|
19
|
+
#include "hwy/print.h"
|
|
20
|
+
|
|
21
|
+
// Per-target include guard
|
|
22
|
+
#if defined(HIGHWAY_HWY_PRINT_INL_H_) == defined(HWY_TARGET_TOGGLE)
|
|
23
|
+
#ifdef HIGHWAY_HWY_PRINT_INL_H_
|
|
24
|
+
#undef HIGHWAY_HWY_PRINT_INL_H_
|
|
25
|
+
#else
|
|
26
|
+
#define HIGHWAY_HWY_PRINT_INL_H_
|
|
27
|
+
#endif
|
|
28
|
+
|
|
29
|
+
#if HWY_TARGET == HWY_RVV
|
|
30
|
+
#include "hwy/aligned_allocator.h"
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
HWY_BEFORE_NAMESPACE();
|
|
34
|
+
namespace hwy {
|
|
35
|
+
namespace HWY_NAMESPACE {
|
|
36
|
+
|
|
37
|
+
// Prints lanes around `lane`, in memory order.
|
|
38
|
+
template <class D, class V = VFromD<D>>
|
|
39
|
+
HWY_API void Print(const D d, const char* caption, V v, size_t lane_u = 0,
|
|
40
|
+
size_t max_lanes = 7) {
|
|
41
|
+
const size_t N = Lanes(d);
|
|
42
|
+
using T = TFromD<D>;
|
|
43
|
+
#if HWY_TARGET == HWY_RVV
|
|
44
|
+
auto storage = AllocateAligned<T>(N);
|
|
45
|
+
T* HWY_RESTRICT lanes = storage.get();
|
|
46
|
+
#else
|
|
47
|
+
// This works around an SVE compile error on GCC 11 and 12. Calling
|
|
48
|
+
// AllocateAligned here would seem to require it be marked with HWY_ATTR.
|
|
49
|
+
HWY_ALIGN T lanes[MaxLanes(d)];
|
|
50
|
+
#endif
|
|
51
|
+
Store(v, d, lanes);
|
|
52
|
+
|
|
53
|
+
const auto info = hwy::detail::MakeTypeInfo<T>();
|
|
54
|
+
hwy::detail::PrintArray(info, caption, lanes, N, lane_u, max_lanes);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
58
|
+
} // namespace HWY_NAMESPACE
|
|
59
|
+
} // namespace hwy
|
|
60
|
+
HWY_AFTER_NAMESPACE();
|
|
61
|
+
|
|
62
|
+
#endif // per-target include guard
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// Copyright 2022 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
#ifndef HWY_PRINT_H_
|
|
17
|
+
#define HWY_PRINT_H_
|
|
18
|
+
|
|
19
|
+
// Helpers for printing vector lanes.
|
|
20
|
+
|
|
21
|
+
#include <stddef.h>
|
|
22
|
+
#include <stdio.h>
|
|
23
|
+
|
|
24
|
+
#include "hwy/base.h"
|
|
25
|
+
#include "hwy/highway_export.h"
|
|
26
|
+
|
|
27
|
+
namespace hwy {
|
|
28
|
+
|
|
29
|
+
namespace detail {
|
|
30
|
+
|
|
31
|
+
// For implementing value comparisons etc. as type-erased functions to reduce
|
|
32
|
+
// template bloat.
|
|
33
|
+
struct TypeInfo {
|
|
34
|
+
size_t sizeof_t;
|
|
35
|
+
bool is_float;
|
|
36
|
+
bool is_signed;
|
|
37
|
+
bool is_bf16;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
template <typename T>
|
|
41
|
+
HWY_INLINE TypeInfo MakeTypeInfo() {
|
|
42
|
+
TypeInfo info;
|
|
43
|
+
info.sizeof_t = sizeof(T);
|
|
44
|
+
info.is_float = IsFloat<T>();
|
|
45
|
+
info.is_signed = IsSigned<T>();
|
|
46
|
+
info.is_bf16 = IsSame<T, bfloat16_t>();
|
|
47
|
+
return info;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
HWY_DLLEXPORT void TypeName(const TypeInfo& info, size_t N, char* string100);
|
|
51
|
+
HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr,
|
|
52
|
+
char* string100);
|
|
53
|
+
|
|
54
|
+
HWY_DLLEXPORT void PrintArray(const TypeInfo& info, const char* caption,
|
|
55
|
+
const void* array_void, size_t N,
|
|
56
|
+
size_t lane_u = 0, size_t max_lanes = 7);
|
|
57
|
+
|
|
58
|
+
} // namespace detail
|
|
59
|
+
|
|
60
|
+
template <typename T>
|
|
61
|
+
HWY_NOINLINE void PrintValue(T value) {
|
|
62
|
+
char str[100];
|
|
63
|
+
detail::ToString(hwy::detail::MakeTypeInfo<T>(), &value, str);
|
|
64
|
+
fprintf(stderr, "%s,", str);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
template <typename T>
|
|
68
|
+
HWY_NOINLINE void PrintArray(const T* value, size_t count) {
|
|
69
|
+
detail::PrintArray(hwy::detail::MakeTypeInfo<T>(), "", value, count, 0,
|
|
70
|
+
count);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
} // namespace hwy
|
|
74
|
+
|
|
75
|
+
#endif // HWY_PRINT_H_
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// Copyright 2023 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
#ifndef HIGHWAY_HWY_ROBUST_STATISTICS_H_
|
|
17
|
+
#define HIGHWAY_HWY_ROBUST_STATISTICS_H_
|
|
18
|
+
|
|
19
|
+
#include <algorithm> // std::sort, std::find_if
|
|
20
|
+
#include <limits>
|
|
21
|
+
#include <utility> // std::pair
|
|
22
|
+
#include <vector>
|
|
23
|
+
|
|
24
|
+
#include "hwy/base.h"
|
|
25
|
+
|
|
26
|
+
namespace hwy {
|
|
27
|
+
namespace robust_statistics {
|
|
28
|
+
|
|
29
|
+
// Sorts integral values in ascending order (e.g. for Mode). About 3x faster
|
|
30
|
+
// than std::sort for input distributions with very few unique values.
|
|
31
|
+
template <class T>
|
|
32
|
+
void CountingSort(T* values, size_t num_values) {
|
|
33
|
+
// Unique values and their frequency (similar to flat_map).
|
|
34
|
+
using Unique = std::pair<T, int>;
|
|
35
|
+
std::vector<Unique> unique;
|
|
36
|
+
for (size_t i = 0; i < num_values; ++i) {
|
|
37
|
+
const T value = values[i];
|
|
38
|
+
const auto pos =
|
|
39
|
+
std::find_if(unique.begin(), unique.end(),
|
|
40
|
+
[value](const Unique u) { return u.first == value; });
|
|
41
|
+
if (pos == unique.end()) {
|
|
42
|
+
unique.push_back(std::make_pair(value, 1));
|
|
43
|
+
} else {
|
|
44
|
+
++pos->second;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Sort in ascending order of value (pair.first).
|
|
49
|
+
std::sort(unique.begin(), unique.end());
|
|
50
|
+
|
|
51
|
+
// Write that many copies of each unique value to the array.
|
|
52
|
+
T* HWY_RESTRICT p = values;
|
|
53
|
+
for (const auto& value_count : unique) {
|
|
54
|
+
std::fill(p, p + value_count.second, value_count.first);
|
|
55
|
+
p += value_count.second;
|
|
56
|
+
}
|
|
57
|
+
HWY_ASSERT(p == values + num_values);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// @return i in [idx_begin, idx_begin + half_count) that minimizes
|
|
61
|
+
// sorted[i + half_count] - sorted[i].
|
|
62
|
+
template <typename T>
|
|
63
|
+
size_t MinRange(const T* const HWY_RESTRICT sorted, const size_t idx_begin,
|
|
64
|
+
const size_t half_count) {
|
|
65
|
+
T min_range = std::numeric_limits<T>::max();
|
|
66
|
+
size_t min_idx = 0;
|
|
67
|
+
|
|
68
|
+
for (size_t idx = idx_begin; idx < idx_begin + half_count; ++idx) {
|
|
69
|
+
HWY_ASSERT(sorted[idx] <= sorted[idx + half_count]);
|
|
70
|
+
const T range = sorted[idx + half_count] - sorted[idx];
|
|
71
|
+
if (range < min_range) {
|
|
72
|
+
min_range = range;
|
|
73
|
+
min_idx = idx;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return min_idx;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Returns an estimate of the mode by calling MinRange on successively
|
|
81
|
+
// halved intervals. "sorted" must be in ascending order. This is the
|
|
82
|
+
// Half Sample Mode estimator proposed by Bickel in "On a fast, robust
|
|
83
|
+
// estimator of the mode", with complexity O(N log N). The mode is less
|
|
84
|
+
// affected by outliers in highly-skewed distributions than the median.
|
|
85
|
+
// The averaging operation below assumes "T" is an unsigned integer type.
|
|
86
|
+
template <typename T>
|
|
87
|
+
T ModeOfSorted(const T* const HWY_RESTRICT sorted, const size_t num_values) {
|
|
88
|
+
size_t idx_begin = 0;
|
|
89
|
+
size_t half_count = num_values / 2;
|
|
90
|
+
while (half_count > 1) {
|
|
91
|
+
idx_begin = MinRange(sorted, idx_begin, half_count);
|
|
92
|
+
half_count >>= 1;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const T x = sorted[idx_begin + 0];
|
|
96
|
+
if (half_count == 0) {
|
|
97
|
+
return x;
|
|
98
|
+
}
|
|
99
|
+
HWY_ASSERT(half_count == 1);
|
|
100
|
+
const T average = (x + sorted[idx_begin + 1] + 1) / 2;
|
|
101
|
+
return average;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Returns the mode. Side effect: sorts "values".
|
|
105
|
+
template <typename T>
|
|
106
|
+
T Mode(T* values, const size_t num_values) {
|
|
107
|
+
CountingSort(values, num_values);
|
|
108
|
+
return ModeOfSorted(values, num_values);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
template <typename T, size_t N>
|
|
112
|
+
T Mode(T (&values)[N]) {
|
|
113
|
+
return Mode(&values[0], N);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Returns the median value. Side effect: sorts "values".
|
|
117
|
+
template <typename T>
|
|
118
|
+
T Median(T* values, const size_t num_values) {
|
|
119
|
+
HWY_ASSERT(num_values != 0);
|
|
120
|
+
std::sort(values, values + num_values);
|
|
121
|
+
const size_t half = num_values / 2;
|
|
122
|
+
// Odd count: return middle
|
|
123
|
+
if (num_values % 2) {
|
|
124
|
+
return values[half];
|
|
125
|
+
}
|
|
126
|
+
// Even count: return average of middle two.
|
|
127
|
+
return (values[half] + values[half - 1] + 1) / 2;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Returns a robust measure of variability.
|
|
131
|
+
template <typename T>
|
|
132
|
+
T MedianAbsoluteDeviation(const T* values, const size_t num_values,
|
|
133
|
+
const T median) {
|
|
134
|
+
HWY_ASSERT(num_values != 0);
|
|
135
|
+
std::vector<T> abs_deviations;
|
|
136
|
+
abs_deviations.reserve(num_values);
|
|
137
|
+
for (size_t i = 0; i < num_values; ++i) {
|
|
138
|
+
const int64_t abs = std::abs(static_cast<int64_t>(values[i]) -
|
|
139
|
+
static_cast<int64_t>(median));
|
|
140
|
+
abs_deviations.push_back(static_cast<T>(abs));
|
|
141
|
+
}
|
|
142
|
+
return Median(abs_deviations.data(), num_values);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
} // namespace robust_statistics
|
|
146
|
+
} // namespace hwy
|
|
147
|
+
|
|
148
|
+
#endif // HIGHWAY_HWY_ROBUST_STATISTICS_H_
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
// Copyright 2020 Google LLC
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License.
|
|
6
|
+
// You may obtain a copy of the License at
|
|
7
|
+
//
|
|
8
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
//
|
|
10
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
// See the License for the specific language governing permissions and
|
|
14
|
+
// limitations under the License.
|
|
15
|
+
|
|
16
|
+
#ifndef HIGHWAY_HWY_TARGETS_H_
|
|
17
|
+
#define HIGHWAY_HWY_TARGETS_H_
|
|
18
|
+
|
|
19
|
+
// Allows opting out of C++ standard library usage, which is not available in
|
|
20
|
+
// some Compiler Explorer environments.
|
|
21
|
+
#ifndef HWY_NO_LIBCXX
|
|
22
|
+
#include <vector>
|
|
23
|
+
#endif
|
|
24
|
+
|
|
25
|
+
// For SIMD module implementations and their callers. Defines which targets to
|
|
26
|
+
// generate and call.
|
|
27
|
+
|
|
28
|
+
#include "hwy/base.h"
|
|
29
|
+
#include "hwy/detect_targets.h"
|
|
30
|
+
#include "hwy/highway_export.h"
|
|
31
|
+
|
|
32
|
+
#if !HWY_ARCH_RVV && !defined(HWY_NO_LIBCXX)
|
|
33
|
+
#include <atomic>
|
|
34
|
+
#endif
|
|
35
|
+
|
|
36
|
+
namespace hwy {
|
|
37
|
+
|
|
38
|
+
// Returns bitfield of enabled targets that are supported on this CPU; there is
|
|
39
|
+
// always at least one such target, hence the return value is never 0. The
|
|
40
|
+
// targets returned may change after calling DisableTargets. This function is
|
|
41
|
+
// always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
|
|
42
|
+
// calls to it if there is only a single target enabled.
|
|
43
|
+
HWY_DLLEXPORT int64_t SupportedTargets();
|
|
44
|
+
|
|
45
|
+
// Evaluates to a function call, or literal if there is a single target.
|
|
46
|
+
#if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
|
|
47
|
+
#define HWY_SUPPORTED_TARGETS HWY_TARGETS
|
|
48
|
+
#else
|
|
49
|
+
#define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
|
|
50
|
+
#endif
|
|
51
|
+
|
|
52
|
+
// Subsequent SupportedTargets will not return targets whose bit(s) are set in
|
|
53
|
+
// `disabled_targets`. Exception: if SupportedTargets would return 0, it will
|
|
54
|
+
// instead return HWY_STATIC_TARGET (there must always be one target to call).
|
|
55
|
+
//
|
|
56
|
+
// This function is useful for disabling targets known to be buggy, or if the
|
|
57
|
+
// best available target is undesirable (perhaps due to throttling or memory
|
|
58
|
+
// bandwidth limitations). Use SetSupportedTargetsForTest instead of this
|
|
59
|
+
// function for iteratively enabling specific targets for testing.
|
|
60
|
+
HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets);
|
|
61
|
+
|
|
62
|
+
// Subsequent SupportedTargets will return the given set of targets, except
|
|
63
|
+
// those disabled via DisableTargets. Call with a mask of 0 to disable the mock
|
|
64
|
+
// and return to the normal SupportedTargets behavior. Used to run tests for
|
|
65
|
+
// all targets.
|
|
66
|
+
HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets);
|
|
67
|
+
|
|
68
|
+
#ifndef HWY_NO_LIBCXX
|
|
69
|
+
|
|
70
|
+
// Return the list of targets in HWY_TARGETS supported by the CPU as a list of
|
|
71
|
+
// individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
|
|
72
|
+
// is affected by the current SetSupportedTargetsForTest() mock if any.
|
|
73
|
+
HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() {
|
|
74
|
+
std::vector<int64_t> ret;
|
|
75
|
+
for (int64_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
|
|
76
|
+
targets = targets & (targets - 1)) {
|
|
77
|
+
int64_t current_target = targets & ~(targets - 1);
|
|
78
|
+
ret.push_back(current_target);
|
|
79
|
+
}
|
|
80
|
+
return ret;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
#endif // HWY_NO_LIBCXX
|
|
84
|
+
|
|
85
|
+
static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {
|
|
86
|
+
switch (target) {
|
|
87
|
+
#if HWY_ARCH_X86
|
|
88
|
+
case HWY_SSE2:
|
|
89
|
+
return "SSE2";
|
|
90
|
+
case HWY_SSSE3:
|
|
91
|
+
return "SSSE3";
|
|
92
|
+
case HWY_SSE4:
|
|
93
|
+
return "SSE4";
|
|
94
|
+
case HWY_AVX2:
|
|
95
|
+
return "AVX2";
|
|
96
|
+
case HWY_AVX3:
|
|
97
|
+
return "AVX3";
|
|
98
|
+
case HWY_AVX3_DL:
|
|
99
|
+
return "AVX3_DL";
|
|
100
|
+
case HWY_AVX3_ZEN4:
|
|
101
|
+
return "AVX3_ZEN4";
|
|
102
|
+
case HWY_AVX3_SPR:
|
|
103
|
+
return "AVX3_SPR";
|
|
104
|
+
#endif
|
|
105
|
+
|
|
106
|
+
#if HWY_ARCH_ARM
|
|
107
|
+
case HWY_SVE2_128:
|
|
108
|
+
return "SVE2_128";
|
|
109
|
+
case HWY_SVE_256:
|
|
110
|
+
return "SVE_256";
|
|
111
|
+
case HWY_SVE2:
|
|
112
|
+
return "SVE2";
|
|
113
|
+
case HWY_SVE:
|
|
114
|
+
return "SVE";
|
|
115
|
+
case HWY_NEON:
|
|
116
|
+
return "NEON";
|
|
117
|
+
case HWY_NEON_WITHOUT_AES:
|
|
118
|
+
return "NEON_WITHOUT_AES";
|
|
119
|
+
#endif
|
|
120
|
+
|
|
121
|
+
#if HWY_ARCH_PPC
|
|
122
|
+
case HWY_PPC8:
|
|
123
|
+
return "PPC8";
|
|
124
|
+
case HWY_PPC9:
|
|
125
|
+
return "PPC9";
|
|
126
|
+
case HWY_PPC10:
|
|
127
|
+
return "PPC10";
|
|
128
|
+
#endif
|
|
129
|
+
|
|
130
|
+
#if HWY_ARCH_WASM
|
|
131
|
+
case HWY_WASM:
|
|
132
|
+
return "WASM";
|
|
133
|
+
case HWY_WASM_EMU256:
|
|
134
|
+
return "WASM_EMU256";
|
|
135
|
+
#endif
|
|
136
|
+
|
|
137
|
+
#if HWY_ARCH_RVV
|
|
138
|
+
case HWY_RVV:
|
|
139
|
+
return "RVV";
|
|
140
|
+
#endif
|
|
141
|
+
|
|
142
|
+
case HWY_EMU128:
|
|
143
|
+
return "EMU128";
|
|
144
|
+
case HWY_SCALAR:
|
|
145
|
+
return "SCALAR";
|
|
146
|
+
|
|
147
|
+
default:
|
|
148
|
+
return "Unknown"; // must satisfy gtest IsValidParamName()
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// The maximum number of dynamic targets on any architecture is defined by
|
|
153
|
+
// HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
|
|
154
|
+
|
|
155
|
+
// For the ChosenTarget mask and index we use a different bit arrangement than
|
|
156
|
+
// in the HWY_TARGETS mask. Only the targets involved in the current
|
|
157
|
+
// architecture are used in this mask, and therefore only the least significant
|
|
158
|
+
// (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least
|
|
159
|
+
// significant bit is set when the mask is not initialized, the next
|
|
160
|
+
// HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
|
|
161
|
+
// HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
|
|
162
|
+
// that position and the next more significant bit is used for HWY_SCALAR (if
|
|
163
|
+
// HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
|
|
164
|
+
// define equivalent values for HWY_TARGETS in this representation.
|
|
165
|
+
// This mask representation allows to use ctz() on this mask and obtain a small
|
|
166
|
+
// number that's used as an index of the table for dynamic dispatch. In this
|
|
167
|
+
// way the first entry is used when the mask is uninitialized, the following
|
|
168
|
+
// HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
|
|
169
|
+
// scalar.
|
|
170
|
+
|
|
171
|
+
// The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
|
|
172
|
+
#define HWY_CHOSEN_TARGET_MASK_SCALAR (1LL << (HWY_MAX_DYNAMIC_TARGETS + 1))
|
|
173
|
+
|
|
174
|
+
// Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
|
|
175
|
+
// current architecture.
|
|
176
|
+
#define HWY_CHOSEN_TARGET_SHIFT(X) \
|
|
177
|
+
((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
|
|
178
|
+
((1LL << HWY_MAX_DYNAMIC_TARGETS) - 1)) \
|
|
179
|
+
<< 1)
|
|
180
|
+
|
|
181
|
+
// The HWY_TARGETS mask in the ChosenTarget mask format.
|
|
182
|
+
#define HWY_CHOSEN_TARGET_MASK_TARGETS \
|
|
183
|
+
(HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1LL)
|
|
184
|
+
|
|
185
|
+
#if HWY_ARCH_X86
|
|
186
|
+
// Maximum number of dynamic targets, changing this value is an ABI incompatible
|
|
187
|
+
// change
|
|
188
|
+
#define HWY_MAX_DYNAMIC_TARGETS 15
|
|
189
|
+
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
|
|
190
|
+
// These must match the order in which the HWY_TARGETS are defined
|
|
191
|
+
// starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
|
|
192
|
+
// HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
|
|
193
|
+
// HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
|
|
194
|
+
// corresponds to the best target. Don't include a "," at the end of the list.
|
|
195
|
+
#define HWY_CHOOSE_TARGET_LIST(func_name) \
|
|
196
|
+
nullptr, /* reserved */ \
|
|
197
|
+
nullptr, /* reserved */ \
|
|
198
|
+
nullptr, /* reserved */ \
|
|
199
|
+
nullptr, /* reserved */ \
|
|
200
|
+
HWY_CHOOSE_AVX3_SPR(func_name), /* AVX3_SPR */ \
|
|
201
|
+
nullptr, /* reserved */ \
|
|
202
|
+
HWY_CHOOSE_AVX3_ZEN4(func_name), /* AVX3_ZEN4 */ \
|
|
203
|
+
HWY_CHOOSE_AVX3_DL(func_name), /* AVX3_DL */ \
|
|
204
|
+
HWY_CHOOSE_AVX3(func_name), /* AVX3 */ \
|
|
205
|
+
HWY_CHOOSE_AVX2(func_name), /* AVX2 */ \
|
|
206
|
+
nullptr, /* AVX */ \
|
|
207
|
+
HWY_CHOOSE_SSE4(func_name), /* SSE4 */ \
|
|
208
|
+
HWY_CHOOSE_SSSE3(func_name), /* SSSE3 */ \
|
|
209
|
+
nullptr, /* reserved - SSE3? */ \
|
|
210
|
+
HWY_CHOOSE_SSE2(func_name) /* SSE2 */
|
|
211
|
+
|
|
212
|
+
#elif HWY_ARCH_ARM
|
|
213
|
+
// See HWY_ARCH_X86 above for details.
|
|
214
|
+
#define HWY_MAX_DYNAMIC_TARGETS 15
|
|
215
|
+
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
|
|
216
|
+
#define HWY_CHOOSE_TARGET_LIST(func_name) \
|
|
217
|
+
nullptr, /* reserved */ \
|
|
218
|
+
nullptr, /* reserved */ \
|
|
219
|
+
nullptr, /* reserved */ \
|
|
220
|
+
nullptr, /* reserved */ \
|
|
221
|
+
nullptr, /* reserved */ \
|
|
222
|
+
nullptr, /* reserved */ \
|
|
223
|
+
nullptr, /* reserved */ \
|
|
224
|
+
nullptr, /* reserved */ \
|
|
225
|
+
nullptr, /* reserved */ \
|
|
226
|
+
HWY_CHOOSE_SVE2_128(func_name), /* SVE2 128-bit */ \
|
|
227
|
+
HWY_CHOOSE_SVE_256(func_name), /* SVE 256-bit */ \
|
|
228
|
+
HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \
|
|
229
|
+
HWY_CHOOSE_SVE(func_name), /* SVE */ \
|
|
230
|
+
HWY_CHOOSE_NEON(func_name), /* NEON */ \
|
|
231
|
+
HWY_CHOOSE_NEON_WITHOUT_AES(func_name) /* NEON without AES */
|
|
232
|
+
|
|
233
|
+
#elif HWY_ARCH_RVV
|
|
234
|
+
// See HWY_ARCH_X86 above for details.
|
|
235
|
+
#define HWY_MAX_DYNAMIC_TARGETS 9
|
|
236
|
+
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
|
|
237
|
+
#define HWY_CHOOSE_TARGET_LIST(func_name) \
|
|
238
|
+
nullptr, /* reserved */ \
|
|
239
|
+
nullptr, /* reserved */ \
|
|
240
|
+
nullptr, /* reserved */ \
|
|
241
|
+
nullptr, /* reserved */ \
|
|
242
|
+
nullptr, /* reserved */ \
|
|
243
|
+
nullptr, /* reserved */ \
|
|
244
|
+
nullptr, /* reserved */ \
|
|
245
|
+
HWY_CHOOSE_RVV(func_name), /* RVV */ \
|
|
246
|
+
nullptr /* reserved */
|
|
247
|
+
|
|
248
|
+
#elif HWY_ARCH_PPC
|
|
249
|
+
// See HWY_ARCH_X86 above for details.
|
|
250
|
+
#define HWY_MAX_DYNAMIC_TARGETS 9
|
|
251
|
+
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
|
|
252
|
+
#define HWY_CHOOSE_TARGET_LIST(func_name) \
|
|
253
|
+
nullptr, /* reserved */ \
|
|
254
|
+
nullptr, /* reserved */ \
|
|
255
|
+
nullptr, /* reserved */ \
|
|
256
|
+
nullptr, /* reserved */ \
|
|
257
|
+
HWY_CHOOSE_PPC10(func_name), /* PPC10 */ \
|
|
258
|
+
HWY_CHOOSE_PPC9(func_name), /* PPC9 */ \
|
|
259
|
+
HWY_CHOOSE_PPC8(func_name), /* PPC8 */ \
|
|
260
|
+
nullptr, /* reserved (VSX or AltiVec) */ \
|
|
261
|
+
nullptr /* reserved (VSX or AltiVec) */
|
|
262
|
+
|
|
263
|
+
#elif HWY_ARCH_WASM
|
|
264
|
+
// See HWY_ARCH_X86 above for details.
|
|
265
|
+
#define HWY_MAX_DYNAMIC_TARGETS 9
|
|
266
|
+
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
|
|
267
|
+
#define HWY_CHOOSE_TARGET_LIST(func_name) \
|
|
268
|
+
nullptr, /* reserved */ \
|
|
269
|
+
nullptr, /* reserved */ \
|
|
270
|
+
nullptr, /* reserved */ \
|
|
271
|
+
nullptr, /* reserved */ \
|
|
272
|
+
nullptr, /* reserved */ \
|
|
273
|
+
nullptr, /* reserved */ \
|
|
274
|
+
HWY_CHOOSE_WASM_EMU256(func_name), /* WASM_EMU256 */ \
|
|
275
|
+
HWY_CHOOSE_WASM(func_name), /* WASM */ \
|
|
276
|
+
nullptr /* reserved */
|
|
277
|
+
|
|
278
|
+
#else
|
|
279
|
+
// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
|
|
280
|
+
// still creating single-entry tables in HWY_EXPORT to ensure portability.
|
|
281
|
+
#define HWY_MAX_DYNAMIC_TARGETS 1
|
|
282
|
+
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
|
|
283
|
+
#endif
|
|
284
|
+
|
|
285
|
+
// Bitfield of supported and enabled targets. The format differs from that of
|
|
286
|
+
// HWY_TARGETS; the lowest bit governs the first function pointer (which is
|
|
287
|
+
// special in that it calls FunctionCache, then Update, then dispatches to the
|
|
288
|
+
// actual implementation) in the tables created by HWY_EXPORT. Monostate (see
|
|
289
|
+
// GetChosenTarget), thread-safe except on RVV.
|
|
290
|
+
struct ChosenTarget {
|
|
291
|
+
public:
|
|
292
|
+
// Reset bits according to `targets` (typically the return value of
|
|
293
|
+
// SupportedTargets()). Postcondition: IsInitialized() == true.
|
|
294
|
+
void Update(int64_t targets) {
|
|
295
|
+
// These are `targets` shifted downwards, see above. Also include SCALAR
|
|
296
|
+
// (corresponds to the last entry in the function table) as fallback.
|
|
297
|
+
StoreMask(HWY_CHOSEN_TARGET_SHIFT(targets) | HWY_CHOSEN_TARGET_MASK_SCALAR);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Reset to the uninitialized state, so that FunctionCache will call Update
|
|
301
|
+
// during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false.
|
|
302
|
+
void DeInit() { StoreMask(1); }
|
|
303
|
+
|
|
304
|
+
// Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH
|
|
305
|
+
// function was called, which we check in tests.
|
|
306
|
+
bool IsInitialized() const { return LoadMask() != 1; }
|
|
307
|
+
|
|
308
|
+
// Return the index in the dynamic dispatch table to be used by the current
|
|
309
|
+
// CPU. Note that this method must be in the header file so it uses the value
|
|
310
|
+
// of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
|
|
311
|
+
// calls it, which may be different from others. This means we only enable
|
|
312
|
+
// those targets that were actually compiled in this module.
|
|
313
|
+
size_t HWY_INLINE GetIndex() const {
|
|
314
|
+
return hwy::Num0BitsBelowLS1Bit_Nonzero64(
|
|
315
|
+
static_cast<uint64_t>(LoadMask() & HWY_CHOSEN_TARGET_MASK_TARGETS));
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
private:
|
|
319
|
+
// TODO(janwas): remove RVV once <atomic> is available
|
|
320
|
+
#if HWY_ARCH_RVV || defined(HWY_NO_LIBCXX)
|
|
321
|
+
int64_t LoadMask() const { return mask_; }
|
|
322
|
+
void StoreMask(int64_t mask) { mask_ = mask; }
|
|
323
|
+
|
|
324
|
+
int64_t mask_{1}; // Initialized to 1 so GetIndex() returns 0.
|
|
325
|
+
#else
|
|
326
|
+
int64_t LoadMask() const { return mask_.load(); }
|
|
327
|
+
void StoreMask(int64_t mask) { mask_.store(mask); }
|
|
328
|
+
|
|
329
|
+
std::atomic<int64_t> mask_{1}; // Initialized to 1 so GetIndex() returns 0.
|
|
330
|
+
#endif // HWY_ARCH_RVV
|
|
331
|
+
};
|
|
332
|
+
|
|
333
|
+
// For internal use (e.g. by FunctionCache and DisableTargets).
|
|
334
|
+
HWY_DLLEXPORT ChosenTarget& GetChosenTarget();
|
|
335
|
+
|
|
336
|
+
} // namespace hwy
|
|
337
|
+
|
|
338
|
+
#endif // HIGHWAY_HWY_TARGETS_H_
|