@img/sharp-libvips-dev 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/include/aom/aom_decoder.h +1 -1
- package/include/aom/aom_encoder.h +7 -1
- package/include/aom/aom_image.h +24 -12
- package/include/aom/aom_integer.h +3 -3
- package/include/aom/aomcx.h +15 -0
- package/include/aom/aomdx.h +5 -2
- package/include/archive.h +7 -5
- package/include/archive_entry.h +5 -3
- package/include/cgif.h +3 -0
- package/include/freetype2/freetype/config/ftoption.h +1 -1
- package/include/fribidi/fribidi-config.h +2 -2
- package/include/fribidi/fribidi-unicode-version.h +3 -3
- package/include/glib-2.0/gio/gappinfo.h +40 -25
- package/include/glib-2.0/gio/gasyncresult.h +1 -1
- package/include/glib-2.0/gio/gconverter.h +5 -0
- package/include/glib-2.0/gio/gdbusintrospection.h +1 -1
- package/include/glib-2.0/gio/gfile.h +16 -0
- package/include/glib-2.0/gio/gio-visibility.h +34 -0
- package/include/glib-2.0/gio/gsettings.h +8 -0
- package/include/glib-2.0/gio/gvfs.h +2 -2
- package/include/glib-2.0/girepository/gi-visibility.h +34 -0
- package/include/glib-2.0/glib/gbookmarkfile.h +1 -1
- package/include/glib-2.0/glib/giochannel.h +2 -2
- package/include/glib-2.0/glib/glib-visibility.h +34 -0
- package/include/glib-2.0/glib/gmacros.h +12 -5
- package/include/glib-2.0/glib/gmain.h +93 -7
- package/include/glib-2.0/glib/gqsort.h +8 -1
- package/include/glib-2.0/glib/gstrfuncs.h +0 -12
- package/include/glib-2.0/glib/gstrvbuilder.h +3 -0
- package/include/glib-2.0/glib/gunicode.h +1 -1
- package/include/glib-2.0/glib/gversionmacros.h +9 -0
- package/include/glib-2.0/gmodule/gmodule-visibility.h +34 -0
- package/include/glib-2.0/gobject/gobject-visibility.h +34 -0
- package/include/glib-2.0/gobject/gtype.h +6 -6
- package/include/harfbuzz/hb-buffer.h +6 -0
- package/include/harfbuzz/hb-common.h +6 -9
- package/include/harfbuzz/hb-cplusplus.hh +8 -11
- package/include/harfbuzz/hb-subset.h +17 -4
- package/include/harfbuzz/hb-version.h +3 -3
- package/include/hwy/abort.h +28 -0
- package/include/hwy/aligned_allocator.h +48 -1
- package/include/hwy/base.h +235 -34
- package/include/hwy/detect_compiler_arch.h +84 -10
- package/include/hwy/detect_targets.h +95 -29
- package/include/hwy/foreach_target.h +12 -1
- package/include/hwy/highway.h +205 -50
- package/include/hwy/ops/arm_neon-inl.h +841 -99
- package/include/hwy/ops/arm_sve-inl.h +413 -141
- package/include/hwy/ops/emu128-inl.h +373 -360
- package/include/hwy/ops/generic_ops-inl.h +804 -401
- package/include/hwy/ops/inside-inl.h +691 -0
- package/include/hwy/ops/ppc_vsx-inl.h +456 -166
- package/include/hwy/ops/rvv-inl.h +537 -249
- package/include/hwy/ops/scalar-inl.h +169 -79
- package/include/hwy/ops/set_macros-inl.h +106 -18
- package/include/hwy/ops/shared-inl.h +23 -0
- package/include/hwy/ops/wasm_128-inl.h +130 -108
- package/include/hwy/ops/x86_128-inl.h +1892 -577
- package/include/hwy/ops/x86_256-inl.h +625 -184
- package/include/hwy/ops/x86_512-inl.h +733 -131
- package/include/hwy/targets.h +22 -21
- package/include/hwy/timer-inl.h +3 -3
- package/include/hwy/timer.h +5 -1
- package/include/libheif/heif.h +170 -15
- package/include/libheif/heif_items.h +237 -0
- package/include/libheif/heif_properties.h +38 -2
- package/include/libheif/heif_regions.h +1 -1
- package/include/libheif/heif_version.h +2 -2
- package/include/libpng16/pnglibconf.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-cairo.h +1 -1
- package/include/librsvg-2.0/librsvg/rsvg-features.h +3 -4
- package/include/librsvg-2.0/librsvg/rsvg-pixbuf.h +235 -0
- package/include/librsvg-2.0/librsvg/rsvg-version.h +3 -3
- package/include/librsvg-2.0/librsvg/rsvg.h +55 -176
- package/include/libxml2/libxml/HTMLparser.h +12 -19
- package/include/libxml2/libxml/c14n.h +1 -12
- package/include/libxml2/libxml/debugXML.h +1 -1
- package/include/libxml2/libxml/encoding.h +9 -0
- package/include/libxml2/libxml/entities.h +12 -1
- package/include/libxml2/libxml/hash.h +19 -0
- package/include/libxml2/libxml/list.h +2 -2
- package/include/libxml2/libxml/nanohttp.h +17 -0
- package/include/libxml2/libxml/parser.h +61 -55
- package/include/libxml2/libxml/parserInternals.h +9 -1
- package/include/libxml2/libxml/pattern.h +6 -0
- package/include/libxml2/libxml/tree.h +32 -12
- package/include/libxml2/libxml/uri.h +11 -0
- package/include/libxml2/libxml/valid.h +29 -2
- package/include/libxml2/libxml/xinclude.h +7 -0
- package/include/libxml2/libxml/xmlIO.h +21 -4
- package/include/libxml2/libxml/xmlerror.h +14 -0
- package/include/libxml2/libxml/xmlexports.h +111 -15
- package/include/libxml2/libxml/xmlmemory.h +8 -45
- package/include/libxml2/libxml/xmlreader.h +2 -0
- package/include/libxml2/libxml/xmlsave.h +5 -0
- package/include/libxml2/libxml/xmlunicode.h +165 -1
- package/include/libxml2/libxml/xmlversion.h +15 -179
- package/include/libxml2/libxml/xmlwriter.h +1 -0
- package/include/libxml2/libxml/xpath.h +4 -0
- package/include/pango-1.0/pango/pango-features.h +3 -3
- package/include/pango-1.0/pango/pango-item.h +4 -2
- package/include/pango-1.0/pango/pango-version-macros.h +25 -0
- package/include/pango-1.0/pango/pangofc-font.h +2 -1
- package/include/pnglibconf.h +1 -1
- package/include/vips/util.h +1 -2
- package/include/vips/version.h +4 -4
- package/include/webp/decode.h +58 -56
- package/include/webp/demux.h +25 -21
- package/include/webp/encode.h +44 -39
- package/include/webp/mux.h +76 -15
- package/include/webp/mux_types.h +2 -1
- package/include/webp/sharpyuv/sharpyuv.h +77 -8
- package/include/webp/types.h +29 -8
- package/include/zconf.h +1 -1
- package/include/zlib.h +12 -12
- package/package.json +1 -1
- package/versions.json +14 -15
|
@@ -21,11 +21,13 @@
|
|
|
21
21
|
#include <algorithm>
|
|
22
22
|
#include <array>
|
|
23
23
|
#include <cassert>
|
|
24
|
+
#include <cstdint>
|
|
24
25
|
#include <cstring>
|
|
25
26
|
#include <initializer_list>
|
|
26
27
|
#include <memory>
|
|
27
28
|
#include <type_traits>
|
|
28
29
|
#include <utility>
|
|
30
|
+
#include <vector>
|
|
29
31
|
|
|
30
32
|
#include "hwy/base.h"
|
|
31
33
|
#include "hwy/per_target.h"
|
|
@@ -35,9 +37,14 @@ namespace hwy {
|
|
|
35
37
|
// Minimum alignment of allocated memory for use in HWY_ASSUME_ALIGNED, which
|
|
36
38
|
// requires a literal. To prevent false sharing, this should be at least the
|
|
37
39
|
// L1 cache line size, usually 64 bytes. However, Intel's L2 prefetchers may
|
|
38
|
-
// access pairs of lines, and POWER8 also
|
|
40
|
+
// access pairs of lines, and M1 L2 and POWER8 lines are also 128 bytes.
|
|
39
41
|
#define HWY_ALIGNMENT 128
|
|
40
42
|
|
|
43
|
+
template <typename T>
|
|
44
|
+
HWY_API constexpr bool IsAligned(T* ptr, size_t align = HWY_ALIGNMENT) {
|
|
45
|
+
return reinterpret_cast<uintptr_t>(ptr) % align == 0;
|
|
46
|
+
}
|
|
47
|
+
|
|
41
48
|
// Pointers to functions equivalent to malloc/free with an opaque void* passed
|
|
42
49
|
// to them.
|
|
43
50
|
using AllocPtr = void* (*)(void* opaque, size_t bytes);
|
|
@@ -124,6 +131,46 @@ AlignedUniquePtr<T> MakeUniqueAligned(Args&&... args) {
|
|
|
124
131
|
AlignedDeleter());
|
|
125
132
|
}
|
|
126
133
|
|
|
134
|
+
template <class T>
|
|
135
|
+
struct AlignedAllocator {
|
|
136
|
+
using value_type = T;
|
|
137
|
+
|
|
138
|
+
AlignedAllocator() = default;
|
|
139
|
+
|
|
140
|
+
template <class V>
|
|
141
|
+
explicit AlignedAllocator(const AlignedAllocator<V>&) noexcept {}
|
|
142
|
+
|
|
143
|
+
template <class V>
|
|
144
|
+
value_type* allocate(V n) {
|
|
145
|
+
static_assert(std::is_integral<V>::value,
|
|
146
|
+
"AlignedAllocator only supports integer types");
|
|
147
|
+
static_assert(sizeof(V) <= sizeof(std::size_t),
|
|
148
|
+
"V n must be smaller or equal size_t to avoid overflow");
|
|
149
|
+
return static_cast<value_type*>(
|
|
150
|
+
AllocateAlignedBytes(static_cast<std::size_t>(n) * sizeof(value_type)));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
template <class V>
|
|
154
|
+
void deallocate(value_type* p, HWY_MAYBE_UNUSED V n) {
|
|
155
|
+
return FreeAlignedBytes(p, nullptr, nullptr);
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
template <class T, class V>
|
|
160
|
+
constexpr bool operator==(const AlignedAllocator<T>&,
|
|
161
|
+
const AlignedAllocator<V>&) noexcept {
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
template <class T, class V>
|
|
166
|
+
constexpr bool operator!=(const AlignedAllocator<T>&,
|
|
167
|
+
const AlignedAllocator<V>&) noexcept {
|
|
168
|
+
return false;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
template <class T>
|
|
172
|
+
using AlignedVector = std::vector<T, AlignedAllocator<T>>;
|
|
173
|
+
|
|
127
174
|
// Helpers for array allocators (avoids overflow)
|
|
128
175
|
namespace detail {
|
|
129
176
|
|
package/include/hwy/base.h
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
#ifndef HIGHWAY_HWY_BASE_H_
|
|
17
17
|
#define HIGHWAY_HWY_BASE_H_
|
|
18
18
|
|
|
19
|
-
//
|
|
19
|
+
// Target-independent definitions.
|
|
20
20
|
|
|
21
21
|
// IWYU pragma: begin_exports
|
|
22
22
|
#include <stddef.h>
|
|
@@ -25,11 +25,17 @@
|
|
|
25
25
|
#include "hwy/detect_compiler_arch.h"
|
|
26
26
|
#include "hwy/highway_export.h"
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
#define
|
|
30
|
-
#
|
|
31
|
-
#define
|
|
32
|
-
|
|
28
|
+
// API version (https://semver.org/); keep in sync with CMakeLists.txt.
|
|
29
|
+
#define HWY_MAJOR 1
|
|
30
|
+
#define HWY_MINOR 2
|
|
31
|
+
#define HWY_PATCH 0
|
|
32
|
+
|
|
33
|
+
// True if the Highway version >= major.minor.0. Added in 1.2.0.
|
|
34
|
+
#define HWY_VERSION_GE(major, minor) \
|
|
35
|
+
(HWY_MAJOR > (major) || (HWY_MAJOR == (major) && HWY_MINOR >= (minor)))
|
|
36
|
+
// True if the Highway version < major.minor.0. Added in 1.2.0.
|
|
37
|
+
#define HWY_VERSION_LT(major, minor) \
|
|
38
|
+
(HWY_MAJOR < (major) || (HWY_MAJOR == (major) && HWY_MINOR < (minor)))
|
|
33
39
|
|
|
34
40
|
// "IWYU pragma: keep" does not work for these includes, so hide from the IDE.
|
|
35
41
|
#if !HWY_IDE
|
|
@@ -47,14 +53,25 @@
|
|
|
47
53
|
|
|
48
54
|
#endif // !HWY_IDE
|
|
49
55
|
|
|
50
|
-
#
|
|
51
|
-
|
|
52
|
-
|
|
56
|
+
#ifndef HWY_HAVE_COMPARE_HEADER // allow override
|
|
57
|
+
#define HWY_HAVE_COMPARE_HEADER 0
|
|
58
|
+
#if defined(__has_include) // note: wrapper macro fails on Clang ~17
|
|
53
59
|
#if __has_include(<compare>)
|
|
60
|
+
#undef HWY_HAVE_COMPARE_HEADER
|
|
61
|
+
#define HWY_HAVE_COMPARE_HEADER 1
|
|
62
|
+
#endif // __has_include
|
|
63
|
+
#endif // defined(__has_include)
|
|
64
|
+
#endif // HWY_HAVE_COMPARE_HEADER
|
|
65
|
+
|
|
66
|
+
#ifndef HWY_HAVE_CXX20_THREE_WAY_COMPARE // allow override
|
|
67
|
+
#if !defined(HWY_NO_LIBCXX) && defined(__cpp_impl_three_way_comparison) && \
|
|
68
|
+
__cpp_impl_three_way_comparison >= 201907L && HWY_HAVE_COMPARE_HEADER
|
|
54
69
|
#include <compare>
|
|
55
70
|
#define HWY_HAVE_CXX20_THREE_WAY_COMPARE 1
|
|
71
|
+
#else
|
|
72
|
+
#define HWY_HAVE_CXX20_THREE_WAY_COMPARE 0
|
|
56
73
|
#endif
|
|
57
|
-
#endif
|
|
74
|
+
#endif // HWY_HAVE_CXX20_THREE_WAY_COMPARE
|
|
58
75
|
|
|
59
76
|
// IWYU pragma: end_exports
|
|
60
77
|
|
|
@@ -72,6 +89,7 @@
|
|
|
72
89
|
|
|
73
90
|
#include <intrin.h>
|
|
74
91
|
|
|
92
|
+
#define HWY_FUNCTION __FUNCSIG__ // function name + template args
|
|
75
93
|
#define HWY_RESTRICT __restrict
|
|
76
94
|
#define HWY_INLINE __forceinline
|
|
77
95
|
#define HWY_NOINLINE __declspec(noinline)
|
|
@@ -92,6 +110,7 @@
|
|
|
92
110
|
|
|
93
111
|
#else
|
|
94
112
|
|
|
113
|
+
#define HWY_FUNCTION __PRETTY_FUNCTION__ // function name + template args
|
|
95
114
|
#define HWY_RESTRICT __restrict__
|
|
96
115
|
// force inlining without optimization enabled creates very inefficient code
|
|
97
116
|
// that can cause compiler timeout
|
|
@@ -139,9 +158,10 @@ namespace hwy {
|
|
|
139
158
|
#define HWY_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
|
|
140
159
|
#endif
|
|
141
160
|
|
|
142
|
-
//
|
|
161
|
+
// Returns a pointer whose type is `type` (T*), while allowing the compiler to
|
|
162
|
+
// assume that the untyped pointer `ptr` is aligned to a multiple of sizeof(T).
|
|
143
163
|
#define HWY_RCAST_ALIGNED(type, ptr) \
|
|
144
|
-
reinterpret_cast<type>(HWY_ASSUME_ALIGNED((ptr), alignof(type)))
|
|
164
|
+
reinterpret_cast<type>(HWY_ASSUME_ALIGNED((ptr), alignof(RemovePtr<type>)))
|
|
145
165
|
|
|
146
166
|
// Clang and GCC require attributes on each function into which SIMD intrinsics
|
|
147
167
|
// are inlined. Support both per-function annotation (HWY_ATTR) for lambdas and
|
|
@@ -240,24 +260,41 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
|
|
|
240
260
|
} \
|
|
241
261
|
} while (0)
|
|
242
262
|
|
|
243
|
-
#if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER)
|
|
263
|
+
#if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER) || \
|
|
264
|
+
defined(__SANITIZE_MEMORY__)
|
|
244
265
|
#define HWY_IS_MSAN 1
|
|
245
266
|
#else
|
|
246
267
|
#define HWY_IS_MSAN 0
|
|
247
268
|
#endif
|
|
248
269
|
|
|
249
|
-
#if HWY_HAS_FEATURE(address_sanitizer) || defined(ADDRESS_SANITIZER)
|
|
270
|
+
#if HWY_HAS_FEATURE(address_sanitizer) || defined(ADDRESS_SANITIZER) || \
|
|
271
|
+
defined(__SANITIZE_ADDRESS__)
|
|
250
272
|
#define HWY_IS_ASAN 1
|
|
251
273
|
#else
|
|
252
274
|
#define HWY_IS_ASAN 0
|
|
253
275
|
#endif
|
|
254
276
|
|
|
255
|
-
#if HWY_HAS_FEATURE(
|
|
277
|
+
#if HWY_HAS_FEATURE(hwaddress_sanitizer) || defined(HWADDRESS_SANITIZER) || \
|
|
278
|
+
defined(__SANITIZE_HWADDRESS__)
|
|
279
|
+
#define HWY_IS_HWASAN 1
|
|
280
|
+
#else
|
|
281
|
+
#define HWY_IS_HWASAN 0
|
|
282
|
+
#endif
|
|
283
|
+
|
|
284
|
+
#if HWY_HAS_FEATURE(thread_sanitizer) || defined(THREAD_SANITIZER) || \
|
|
285
|
+
defined(__SANITIZE_THREAD__)
|
|
256
286
|
#define HWY_IS_TSAN 1
|
|
257
287
|
#else
|
|
258
288
|
#define HWY_IS_TSAN 0
|
|
259
289
|
#endif
|
|
260
290
|
|
|
291
|
+
#if HWY_HAS_FEATURE(undefined_behavior_sanitizer) || \
|
|
292
|
+
defined(UNDEFINED_BEHAVIOR_SANITIZER)
|
|
293
|
+
#define HWY_IS_UBSAN 1
|
|
294
|
+
#else
|
|
295
|
+
#define HWY_IS_UBSAN 0
|
|
296
|
+
#endif
|
|
297
|
+
|
|
261
298
|
// MSAN may cause lengthy build times or false positives e.g. in AVX3 DemoteTo.
|
|
262
299
|
// You can disable MSAN by adding this attribute to the function that fails.
|
|
263
300
|
#if HWY_IS_MSAN
|
|
@@ -271,7 +308,8 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
|
|
|
271
308
|
// Clang does not define NDEBUG, but it and GCC define __OPTIMIZE__, and recent
|
|
272
309
|
// MSVC defines NDEBUG (if not, could instead check _DEBUG).
|
|
273
310
|
#if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || HWY_IS_ASAN || \
|
|
274
|
-
HWY_IS_MSAN || HWY_IS_TSAN ||
|
|
311
|
+
HWY_IS_HWASAN || HWY_IS_MSAN || HWY_IS_TSAN || HWY_IS_UBSAN || \
|
|
312
|
+
defined(__clang_analyzer__)
|
|
275
313
|
#define HWY_IS_DEBUG_BUILD 1
|
|
276
314
|
#else
|
|
277
315
|
#define HWY_IS_DEBUG_BUILD 0
|
|
@@ -286,16 +324,6 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
|
|
|
286
324
|
} while (0)
|
|
287
325
|
#endif
|
|
288
326
|
|
|
289
|
-
#if __cpp_constexpr >= 201304L
|
|
290
|
-
#define HWY_CXX14_CONSTEXPR constexpr
|
|
291
|
-
#else
|
|
292
|
-
#define HWY_CXX14_CONSTEXPR
|
|
293
|
-
#endif
|
|
294
|
-
|
|
295
|
-
#ifndef HWY_HAVE_CXX20_THREE_WAY_COMPARE
|
|
296
|
-
#define HWY_HAVE_CXX20_THREE_WAY_COMPARE 0
|
|
297
|
-
#endif
|
|
298
|
-
|
|
299
327
|
//------------------------------------------------------------------------------
|
|
300
328
|
// CopyBytes / ZeroBytes
|
|
301
329
|
|
|
@@ -304,9 +332,8 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
|
|
|
304
332
|
#pragma intrinsic(memset)
|
|
305
333
|
#endif
|
|
306
334
|
|
|
307
|
-
// The source/destination must not overlap/alias.
|
|
308
335
|
template <size_t kBytes, typename From, typename To>
|
|
309
|
-
HWY_API void CopyBytes(const From* from, To* to) {
|
|
336
|
+
HWY_API void CopyBytes(const From* HWY_RESTRICT from, To* HWY_RESTRICT to) {
|
|
310
337
|
#if HWY_COMPILER_MSVC
|
|
311
338
|
memcpy(to, from, kBytes);
|
|
312
339
|
#else
|
|
@@ -352,7 +379,7 @@ HWY_API void ZeroBytes(void* to, size_t num_bytes) {
|
|
|
352
379
|
|
|
353
380
|
#if HWY_ARCH_X86
|
|
354
381
|
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 64; // AVX-512
|
|
355
|
-
#elif
|
|
382
|
+
#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
|
|
356
383
|
__riscv_v_intrinsic >= 11000
|
|
357
384
|
// Not actually an upper bound on the size.
|
|
358
385
|
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 4096;
|
|
@@ -368,7 +395,7 @@ static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 16;
|
|
|
368
395
|
// exceed the stack size.
|
|
369
396
|
#if HWY_ARCH_X86
|
|
370
397
|
#define HWY_ALIGN_MAX alignas(64)
|
|
371
|
-
#elif
|
|
398
|
+
#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
|
|
372
399
|
__riscv_v_intrinsic >= 11000
|
|
373
400
|
#define HWY_ALIGN_MAX alignas(8) // only elements need be aligned
|
|
374
401
|
#else
|
|
@@ -559,6 +586,30 @@ using RemoveRef = typename RemoveRefT<T>::type;
|
|
|
559
586
|
template <class T>
|
|
560
587
|
using RemoveCvRef = RemoveConst<RemoveVolatile<RemoveRef<T>>>;
|
|
561
588
|
|
|
589
|
+
template <class T>
|
|
590
|
+
struct RemovePtrT {
|
|
591
|
+
using type = T;
|
|
592
|
+
};
|
|
593
|
+
template <class T>
|
|
594
|
+
struct RemovePtrT<T*> {
|
|
595
|
+
using type = T;
|
|
596
|
+
};
|
|
597
|
+
template <class T>
|
|
598
|
+
struct RemovePtrT<const T*> {
|
|
599
|
+
using type = T;
|
|
600
|
+
};
|
|
601
|
+
template <class T>
|
|
602
|
+
struct RemovePtrT<volatile T*> {
|
|
603
|
+
using type = T;
|
|
604
|
+
};
|
|
605
|
+
template <class T>
|
|
606
|
+
struct RemovePtrT<const volatile T*> {
|
|
607
|
+
using type = T;
|
|
608
|
+
};
|
|
609
|
+
|
|
610
|
+
template <class T>
|
|
611
|
+
using RemovePtr = typename RemovePtrT<T>::type;
|
|
612
|
+
|
|
562
613
|
// Insert into template/function arguments to enable this overload only for
|
|
563
614
|
// vectors of exactly, at most (LE), or more than (GT) this many bytes.
|
|
564
615
|
//
|
|
@@ -576,6 +627,7 @@ using RemoveCvRef = RemoveConst<RemoveVolatile<RemoveRef<T>>>;
|
|
|
576
627
|
#define HWY_IF_LANES_GT(kN, lanes) hwy::EnableIf<(kN > lanes)>* = nullptr
|
|
577
628
|
|
|
578
629
|
#define HWY_IF_UNSIGNED(T) hwy::EnableIf<!hwy::IsSigned<T>()>* = nullptr
|
|
630
|
+
#define HWY_IF_NOT_UNSIGNED(T) hwy::EnableIf<hwy::IsSigned<T>()>* = nullptr
|
|
579
631
|
#define HWY_IF_SIGNED(T) \
|
|
580
632
|
hwy::EnableIf<hwy::IsSigned<T>() && !hwy::IsFloat<T>() && \
|
|
581
633
|
!hwy::IsSpecialFloat<T>()>* = nullptr
|
|
@@ -1003,7 +1055,7 @@ HWY_API HWY_BITCASTSCALAR_CONSTEXPR To BitCastScalar(const From& val) {
|
|
|
1003
1055
|
|
|
1004
1056
|
// RVV with f16 extension supports _Float16 and f16 vector ops. If set, implies
|
|
1005
1057
|
// HWY_HAVE_FLOAT16.
|
|
1006
|
-
#if
|
|
1058
|
+
#if HWY_ARCH_RISCV && defined(__riscv_zvfh) && HWY_COMPILER_CLANG >= 1600
|
|
1007
1059
|
#define HWY_RVV_HAVE_F16_VEC 1
|
|
1008
1060
|
#else
|
|
1009
1061
|
#define HWY_RVV_HAVE_F16_VEC 0
|
|
@@ -1351,8 +1403,22 @@ HWY_API HWY_F16_CONSTEXPR float16_t F16FromF32(float f32) {
|
|
|
1351
1403
|
// 1[01] + 10 = 1[11]
|
|
1352
1404
|
// 1[10] + 10 = C0[00] (round up toward even with C=1 carry out)
|
|
1353
1405
|
// 1[11] + 10 = C0[01] (round up toward even with C=1 carry out)
|
|
1354
|
-
|
|
1355
|
-
|
|
1406
|
+
|
|
1407
|
+
// If |f32| >= 2^-24, f16_ulp_bit_idx is the index of the F32 mantissa bit
|
|
1408
|
+
// that will be shifted down into the ULP bit of the rounded down F16 result
|
|
1409
|
+
|
|
1410
|
+
// The biased F32 exponent of 2^-14 (the smallest positive normal F16 value)
|
|
1411
|
+
// is 113, and bit 13 of the F32 mantissa will be shifted down to into the ULP
|
|
1412
|
+
// bit of the rounded down F16 result if |f32| >= 2^14
|
|
1413
|
+
|
|
1414
|
+
// If |f32| < 2^-24, f16_ulp_bit_idx is equal to 24 as there are 24 mantissa
|
|
1415
|
+
// bits (including the implied 1 bit) in the mantissa of a normal F32 value
|
|
1416
|
+
// and as we want to round up the mantissa if |f32| > 2^-25 && |f32| < 2^-24
|
|
1417
|
+
const int32_t f16_ulp_bit_idx =
|
|
1418
|
+
HWY_MIN(HWY_MAX(126 - static_cast<int32_t>(biased_exp32), 13), 24);
|
|
1419
|
+
const uint32_t odd_bit = ((mantissa32 | 0x800000u) >> f16_ulp_bit_idx) & 1;
|
|
1420
|
+
const uint32_t rounded =
|
|
1421
|
+
mantissa32 + odd_bit + (uint32_t{1} << (f16_ulp_bit_idx - 1)) - 1u;
|
|
1356
1422
|
const bool carry = rounded >= (1u << 23);
|
|
1357
1423
|
|
|
1358
1424
|
const int32_t exp = static_cast<int32_t>(biased_exp32) - 127 + carry;
|
|
@@ -1741,12 +1807,42 @@ HWY_API HWY_BF16_CONSTEXPR float F32FromBF16(bfloat16_t bf) {
|
|
|
1741
1807
|
#endif
|
|
1742
1808
|
}
|
|
1743
1809
|
|
|
1810
|
+
namespace detail {
|
|
1811
|
+
|
|
1812
|
+
// Returns the increment to add to the bits of a finite F32 value to round a
|
|
1813
|
+
// finite F32 to the nearest BF16 value
|
|
1814
|
+
static HWY_INLINE HWY_MAYBE_UNUSED constexpr uint32_t F32BitsToBF16RoundIncr(
|
|
1815
|
+
const uint32_t f32_bits) {
|
|
1816
|
+
return static_cast<uint32_t>(((f32_bits & 0x7FFFFFFFu) < 0x7F800000u)
|
|
1817
|
+
? (0x7FFFu + ((f32_bits >> 16) & 1u))
|
|
1818
|
+
: 0u);
|
|
1819
|
+
}
|
|
1820
|
+
|
|
1821
|
+
// Converts f32_bits (which is the bits of a F32 value) to BF16 bits,
|
|
1822
|
+
// rounded to the nearest F16 value
|
|
1823
|
+
static HWY_INLINE HWY_MAYBE_UNUSED constexpr uint16_t F32BitsToBF16Bits(
|
|
1824
|
+
const uint32_t f32_bits) {
|
|
1825
|
+
// Round f32_bits to the nearest BF16 by first adding
|
|
1826
|
+
// F32BitsToBF16RoundIncr(f32_bits) to f32_bits and then right shifting
|
|
1827
|
+
// f32_bits + F32BitsToBF16RoundIncr(f32_bits) by 16
|
|
1828
|
+
|
|
1829
|
+
// If f32_bits is the bit representation of a NaN F32 value, make sure that
|
|
1830
|
+
// bit 6 of the BF16 result is set to convert SNaN F32 values to QNaN BF16
|
|
1831
|
+
// values and to prevent NaN F32 values from being converted to an infinite
|
|
1832
|
+
// BF16 value
|
|
1833
|
+
return static_cast<uint16_t>(
|
|
1834
|
+
((f32_bits + F32BitsToBF16RoundIncr(f32_bits)) >> 16) |
|
|
1835
|
+
(static_cast<uint32_t>((f32_bits & 0x7FFFFFFFu) > 0x7F800000u) << 6));
|
|
1836
|
+
}
|
|
1837
|
+
|
|
1838
|
+
} // namespace detail
|
|
1839
|
+
|
|
1744
1840
|
HWY_API HWY_BF16_CONSTEXPR bfloat16_t BF16FromF32(float f) {
|
|
1745
1841
|
#if HWY_HAVE_SCALAR_BF16_OPERATORS
|
|
1746
1842
|
return static_cast<bfloat16_t>(f);
|
|
1747
1843
|
#else
|
|
1748
1844
|
return bfloat16_t::FromBits(
|
|
1749
|
-
|
|
1845
|
+
detail::F32BitsToBF16Bits(BitCastScalar<uint32_t>(f)));
|
|
1750
1846
|
#endif
|
|
1751
1847
|
}
|
|
1752
1848
|
|
|
@@ -2418,6 +2514,51 @@ constexpr inline size_t RoundUpTo(size_t what, size_t align) {
|
|
|
2418
2514
|
return DivCeil(what, align) * align;
|
|
2419
2515
|
}
|
|
2420
2516
|
|
|
2517
|
+
// Works for any `align`; if a power of two, compiler emits AND.
|
|
2518
|
+
constexpr inline size_t RoundDownTo(size_t what, size_t align) {
|
|
2519
|
+
return what - (what % align);
|
|
2520
|
+
}
|
|
2521
|
+
|
|
2522
|
+
namespace detail {
|
|
2523
|
+
|
|
2524
|
+
// T is unsigned or T is signed and (val >> shift_amt) is an arithmetic right
|
|
2525
|
+
// shift
|
|
2526
|
+
template <class T>
|
|
2527
|
+
static HWY_INLINE constexpr T ScalarShr(hwy::UnsignedTag /*type_tag*/, T val,
|
|
2528
|
+
int shift_amt) {
|
|
2529
|
+
return static_cast<T>(val >> shift_amt);
|
|
2530
|
+
}
|
|
2531
|
+
|
|
2532
|
+
// T is signed and (val >> shift_amt) is a non-arithmetic right shift
|
|
2533
|
+
template <class T>
|
|
2534
|
+
static HWY_INLINE constexpr T ScalarShr(hwy::SignedTag /*type_tag*/, T val,
|
|
2535
|
+
int shift_amt) {
|
|
2536
|
+
using TU = MakeUnsigned<MakeLaneTypeIfInteger<T>>;
|
|
2537
|
+
return static_cast<T>(
|
|
2538
|
+
(val < 0) ? static_cast<TU>(
|
|
2539
|
+
~(static_cast<TU>(~static_cast<TU>(val)) >> shift_amt))
|
|
2540
|
+
: static_cast<TU>(static_cast<TU>(val) >> shift_amt));
|
|
2541
|
+
}
|
|
2542
|
+
|
|
2543
|
+
} // namespace detail
|
|
2544
|
+
|
|
2545
|
+
// If T is an signed integer type, ScalarShr is guaranteed to perform an
|
|
2546
|
+
// arithmetic right shift
|
|
2547
|
+
|
|
2548
|
+
// Otherwise, if T is an unsigned integer type, ScalarShr is guaranteed to
|
|
2549
|
+
// perform a logical right shift
|
|
2550
|
+
template <class T, HWY_IF_INTEGER(RemoveCvRef<T>)>
|
|
2551
|
+
HWY_API constexpr RemoveCvRef<T> ScalarShr(T val, int shift_amt) {
|
|
2552
|
+
using NonCvRefT = RemoveCvRef<T>;
|
|
2553
|
+
return detail::ScalarShr(
|
|
2554
|
+
hwy::SizeTag<((IsSigned<NonCvRefT>() &&
|
|
2555
|
+
(LimitsMin<NonCvRefT>() >> (sizeof(T) * 8 - 1)) !=
|
|
2556
|
+
static_cast<NonCvRefT>(-1))
|
|
2557
|
+
? 0x100
|
|
2558
|
+
: 0)>(),
|
|
2559
|
+
static_cast<NonCvRefT>(val), shift_amt);
|
|
2560
|
+
}
|
|
2561
|
+
|
|
2421
2562
|
// Undefined results for x == 0.
|
|
2422
2563
|
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x) {
|
|
2423
2564
|
HWY_DASSERT(x != 0);
|
|
@@ -2579,6 +2720,7 @@ HWY_INLINE constexpr T AddWithWraparound(T t, T2 n) {
|
|
|
2579
2720
|
}
|
|
2580
2721
|
|
|
2581
2722
|
#if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
|
|
2723
|
+
#pragma intrinsic(_mul128)
|
|
2582
2724
|
#pragma intrinsic(_umul128)
|
|
2583
2725
|
#endif
|
|
2584
2726
|
|
|
@@ -2602,6 +2744,65 @@ HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t* HWY_RESTRICT upper) {
|
|
|
2602
2744
|
#endif
|
|
2603
2745
|
}
|
|
2604
2746
|
|
|
2747
|
+
HWY_API int64_t Mul128(int64_t a, int64_t b, int64_t* HWY_RESTRICT upper) {
|
|
2748
|
+
#if defined(__SIZEOF_INT128__)
|
|
2749
|
+
__int128_t product = (__int128_t)a * (__int128_t)b;
|
|
2750
|
+
*upper = (int64_t)(product >> 64);
|
|
2751
|
+
return (int64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
|
|
2752
|
+
#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
|
|
2753
|
+
return _mul128(a, b, upper);
|
|
2754
|
+
#else
|
|
2755
|
+
uint64_t unsigned_upper;
|
|
2756
|
+
const int64_t lower = static_cast<int64_t>(Mul128(
|
|
2757
|
+
static_cast<uint64_t>(a), static_cast<uint64_t>(b), &unsigned_upper));
|
|
2758
|
+
*upper = static_cast<int64_t>(
|
|
2759
|
+
unsigned_upper -
|
|
2760
|
+
(static_cast<uint64_t>(ScalarShr(a, 63)) & static_cast<uint64_t>(b)) -
|
|
2761
|
+
(static_cast<uint64_t>(ScalarShr(b, 63)) & static_cast<uint64_t>(a)));
|
|
2762
|
+
return lower;
|
|
2763
|
+
#endif
|
|
2764
|
+
}
|
|
2765
|
+
|
|
2766
|
+
// Precomputation for fast n / divisor and n % divisor, where n is a variable
|
|
2767
|
+
// and divisor is unchanging but unknown at compile-time.
|
|
2768
|
+
class Divisor {
|
|
2769
|
+
public:
|
|
2770
|
+
explicit Divisor(uint32_t divisor) : divisor_(divisor) {
|
|
2771
|
+
if (divisor <= 1) return;
|
|
2772
|
+
|
|
2773
|
+
const uint32_t len =
|
|
2774
|
+
static_cast<uint32_t>(31 - Num0BitsAboveMS1Bit_Nonzero32(divisor - 1));
|
|
2775
|
+
const uint64_t u_hi = (2ULL << len) - divisor;
|
|
2776
|
+
const uint32_t q = Truncate((u_hi << 32) / divisor);
|
|
2777
|
+
|
|
2778
|
+
mul_ = q + 1;
|
|
2779
|
+
shift1_ = 1;
|
|
2780
|
+
shift2_ = len;
|
|
2781
|
+
}
|
|
2782
|
+
|
|
2783
|
+
uint32_t GetDivisor() const { return divisor_; }
|
|
2784
|
+
|
|
2785
|
+
// Returns n / divisor_.
|
|
2786
|
+
uint32_t Divide(uint32_t n) const {
|
|
2787
|
+
const uint64_t mul = mul_;
|
|
2788
|
+
const uint32_t t = Truncate((mul * n) >> 32);
|
|
2789
|
+
return (t + ((n - t) >> shift1_)) >> shift2_;
|
|
2790
|
+
}
|
|
2791
|
+
|
|
2792
|
+
// Returns n % divisor_.
|
|
2793
|
+
uint32_t Remainder(uint32_t n) const { return n - (Divide(n) * divisor_); }
|
|
2794
|
+
|
|
2795
|
+
private:
|
|
2796
|
+
static uint32_t Truncate(uint64_t x) {
|
|
2797
|
+
return static_cast<uint32_t>(x & 0xFFFFFFFFu);
|
|
2798
|
+
}
|
|
2799
|
+
|
|
2800
|
+
uint32_t divisor_;
|
|
2801
|
+
uint32_t mul_ = 1;
|
|
2802
|
+
uint32_t shift1_ = 0;
|
|
2803
|
+
uint32_t shift2_ = 0;
|
|
2804
|
+
};
|
|
2805
|
+
|
|
2605
2806
|
namespace detail {
|
|
2606
2807
|
|
|
2607
2808
|
template <typename T>
|
|
@@ -73,7 +73,11 @@
|
|
|
73
73
|
// https://github.com/simd-everywhere/simde/blob/47d6e603de9d04ee05cdfbc57cf282a02be1bf2a/simde/simde-detect-clang.h#L59.
|
|
74
74
|
// Please send updates below to them as well, thanks!
|
|
75
75
|
#if defined(__apple_build_version__) || __clang_major__ >= 999
|
|
76
|
-
#if
|
|
76
|
+
#if __has_warning("-Woverriding-option")
|
|
77
|
+
#define HWY_COMPILER_CLANG 1801
|
|
78
|
+
// No new warnings in 17.0, and Apple LLVM 15.3, which should be 1600, already
|
|
79
|
+
// has the unsafe_buffer_usage attribute, so we instead check for new builtins.
|
|
80
|
+
#elif __has_builtin(__builtin_nondeterministic_value)
|
|
77
81
|
#define HWY_COMPILER_CLANG 1700
|
|
78
82
|
#elif __has_attribute(nouwtable) // no new warnings in 16.0
|
|
79
83
|
#define HWY_COMPILER_CLANG 1600
|
|
@@ -115,7 +119,8 @@
|
|
|
115
119
|
#define HWY_COMPILER3_CLANG 0
|
|
116
120
|
#endif
|
|
117
121
|
|
|
118
|
-
#if HWY_COMPILER_GCC && !HWY_COMPILER_CLANG && !HWY_COMPILER_ICC
|
|
122
|
+
#if HWY_COMPILER_GCC && !HWY_COMPILER_CLANG && !HWY_COMPILER_ICC && \
|
|
123
|
+
!HWY_COMPILER_ICX
|
|
119
124
|
#define HWY_COMPILER_GCC_ACTUAL HWY_COMPILER_GCC
|
|
120
125
|
#else
|
|
121
126
|
#define HWY_COMPILER_GCC_ACTUAL 0
|
|
@@ -123,17 +128,20 @@
|
|
|
123
128
|
|
|
124
129
|
// More than one may be nonzero, but we want at least one.
|
|
125
130
|
#if 0 == (HWY_COMPILER_MSVC + HWY_COMPILER_CLANGCL + HWY_COMPILER_ICC + \
|
|
126
|
-
HWY_COMPILER_GCC + HWY_COMPILER_CLANG)
|
|
131
|
+
HWY_COMPILER_ICX + HWY_COMPILER_GCC + HWY_COMPILER_CLANG)
|
|
127
132
|
#error "Unsupported compiler"
|
|
128
133
|
#endif
|
|
129
134
|
|
|
130
|
-
// We should only detect one of these (only clang/clangcl overlap)
|
|
131
|
-
#if 1 <
|
|
132
|
-
|
|
133
|
-
|
|
135
|
+
// We should only detect one of these (only clang/clangcl/icx overlap)
|
|
136
|
+
#if 1 < (!!HWY_COMPILER_MSVC + (!!HWY_COMPILER_ICC & !HWY_COMPILER_ICX) + \
|
|
137
|
+
!!HWY_COMPILER_GCC_ACTUAL + \
|
|
138
|
+
!!(HWY_COMPILER_ICX | HWY_COMPILER_CLANGCL | HWY_COMPILER_CLANG))
|
|
134
139
|
#error "Detected multiple compilers"
|
|
135
140
|
#endif
|
|
136
141
|
|
|
142
|
+
//------------------------------------------------------------------------------
|
|
143
|
+
// Compiler features and C++ version
|
|
144
|
+
|
|
137
145
|
#ifdef __has_builtin
|
|
138
146
|
#define HWY_HAS_BUILTIN(name) __has_builtin(name)
|
|
139
147
|
#else
|
|
@@ -158,6 +166,32 @@
|
|
|
158
166
|
#define HWY_HAS_FEATURE(name) 0
|
|
159
167
|
#endif
|
|
160
168
|
|
|
169
|
+
// NOTE: clang ~17 does not correctly handle wrapping __has_include in a macro.
|
|
170
|
+
|
|
171
|
+
#if HWY_COMPILER_MSVC && defined(_MSVC_LANG) && _MSVC_LANG > __cplusplus
|
|
172
|
+
#define HWY_CXX_LANG _MSVC_LANG
|
|
173
|
+
#else
|
|
174
|
+
#define HWY_CXX_LANG __cplusplus
|
|
175
|
+
#endif
|
|
176
|
+
|
|
177
|
+
#if defined(__cpp_constexpr) && __cpp_constexpr >= 201603L
|
|
178
|
+
#define HWY_CXX17_CONSTEXPR constexpr
|
|
179
|
+
#else
|
|
180
|
+
#define HWY_CXX17_CONSTEXPR
|
|
181
|
+
#endif
|
|
182
|
+
|
|
183
|
+
#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304L
|
|
184
|
+
#define HWY_CXX14_CONSTEXPR constexpr
|
|
185
|
+
#else
|
|
186
|
+
#define HWY_CXX14_CONSTEXPR
|
|
187
|
+
#endif
|
|
188
|
+
|
|
189
|
+
#if HWY_CXX_LANG >= 201703L
|
|
190
|
+
#define HWY_IF_CONSTEXPR if constexpr
|
|
191
|
+
#else
|
|
192
|
+
#define HWY_IF_CONSTEXPR if
|
|
193
|
+
#endif
|
|
194
|
+
|
|
161
195
|
//------------------------------------------------------------------------------
|
|
162
196
|
// Architecture
|
|
163
197
|
|
|
@@ -233,9 +267,34 @@
|
|
|
233
267
|
#endif
|
|
234
268
|
|
|
235
269
|
#ifdef __riscv
|
|
236
|
-
#define
|
|
270
|
+
#define HWY_ARCH_RISCV 1
|
|
237
271
|
#else
|
|
238
|
-
#define
|
|
272
|
+
#define HWY_ARCH_RISCV 0
|
|
273
|
+
#endif
|
|
274
|
+
// DEPRECATED names; please use HWY_ARCH_RISCV instead.
|
|
275
|
+
#define HWY_ARCH_RVV HWY_ARCH_RISCV
|
|
276
|
+
|
|
277
|
+
#if HWY_ARCH_RISCV && defined(__riscv_xlen)
|
|
278
|
+
|
|
279
|
+
#if __riscv_xlen == 32
|
|
280
|
+
#define HWY_ARCH_RISCV_32 1
|
|
281
|
+
#else
|
|
282
|
+
#define HWY_ARCH_RISCV_32 0
|
|
283
|
+
#endif
|
|
284
|
+
|
|
285
|
+
#if __riscv_xlen == 64
|
|
286
|
+
#define HWY_ARCH_RISCV_64 1
|
|
287
|
+
#else
|
|
288
|
+
#define HWY_ARCH_RISCV_64 0
|
|
289
|
+
#endif
|
|
290
|
+
|
|
291
|
+
#else // !HWY_ARCH_RISCV || !defined(__riscv_xlen)
|
|
292
|
+
#define HWY_ARCH_RISCV_32 0
|
|
293
|
+
#define HWY_ARCH_RISCV_64 0
|
|
294
|
+
#endif // HWY_ARCH_RISCV && defined(__riscv_xlen)
|
|
295
|
+
|
|
296
|
+
#if HWY_ARCH_RISCV_32 && HWY_ARCH_RISCV_64
|
|
297
|
+
#error "Cannot have both RISCV_32 and RISCV_64"
|
|
239
298
|
#endif
|
|
240
299
|
|
|
241
300
|
#if defined(__s390x__)
|
|
@@ -247,10 +306,13 @@
|
|
|
247
306
|
// It is an error to detect multiple architectures at the same time, but OK to
|
|
248
307
|
// detect none of the above.
|
|
249
308
|
#if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_ARM_OLD + \
|
|
250
|
-
HWY_ARCH_WASM +
|
|
309
|
+
HWY_ARCH_WASM + HWY_ARCH_RISCV + HWY_ARCH_S390X) > 1
|
|
251
310
|
#error "Must not detect more than one architecture"
|
|
252
311
|
#endif
|
|
253
312
|
|
|
313
|
+
//------------------------------------------------------------------------------
|
|
314
|
+
// Operating system
|
|
315
|
+
|
|
254
316
|
#if defined(_WIN32) || defined(_WIN64)
|
|
255
317
|
#define HWY_OS_WIN 1
|
|
256
318
|
#else
|
|
@@ -270,6 +332,18 @@
|
|
|
270
332
|
#define HWY_OS_APPLE 0
|
|
271
333
|
#endif
|
|
272
334
|
|
|
335
|
+
#if defined(__FreeBSD__)
|
|
336
|
+
#define HWY_OS_FREEBSD 1
|
|
337
|
+
#else
|
|
338
|
+
#define HWY_OS_FREEBSD 0
|
|
339
|
+
#endif
|
|
340
|
+
|
|
341
|
+
// It is an error to detect multiple OSes at the same time, but OK to
|
|
342
|
+
// detect none of the above.
|
|
343
|
+
#if (HWY_OS_WIN + HWY_OS_LINUX + HWY_OS_APPLE + HWY_OS_FREEBSD) > 1
|
|
344
|
+
#error "Must not detect more than one OS"
|
|
345
|
+
#endif
|
|
346
|
+
|
|
273
347
|
//------------------------------------------------------------------------------
|
|
274
348
|
// Endianness
|
|
275
349
|
|