minimap2 0.2.25.0 → 0.2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
- data/ext/minimap2/lib/simde/COPYING +20 -0
- data/ext/minimap2/lib/simde/README.md +333 -0
- data/ext/minimap2/lib/simde/amalgamate.py +58 -0
- data/ext/minimap2/lib/simde/meson.build +33 -0
- data/ext/minimap2/lib/simde/netlify.toml +20 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
- data/ext/minimap2/lib/simde/simde/check.h +267 -0
- data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
- data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
- data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
- data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
- data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
- data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
- data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
- data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
- data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
- data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
- data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
- data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
- data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
- data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
- data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
- data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
- data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
- data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
- data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
- data/ext/minimap2/lib/simde/test/meson.build +64 -0
- data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
- data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
- data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
- data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
- data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
- data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
- data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
- data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
- data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
- data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
- data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
- data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
- data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
- data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
- data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
- data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
- data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
- data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
- data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
- data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
- data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
- data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
- data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
- data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/lib/minimap2/version.rb +1 -1
- metadata +100 -3
|
@@ -0,0 +1,1783 @@
|
|
|
1
|
+
/* Copyright (c) 2017-2020 Evan Nemerson <evan@nemerson.com>
|
|
2
|
+
*
|
|
3
|
+
* Permission is hereby granted, free of charge, to any person
|
|
4
|
+
* obtaining a copy of this software and associated documentation
|
|
5
|
+
* files (the "Software"), to deal in the Software without
|
|
6
|
+
* restriction, including without limitation the rights to use, copy,
|
|
7
|
+
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
8
|
+
* of the Software, and to permit persons to whom the Software is
|
|
9
|
+
* furnished to do so, subject to the following conditions:
|
|
10
|
+
*
|
|
11
|
+
* The above copyright notice and this permission notice shall be
|
|
12
|
+
* included in all copies or substantial portions of the Software.
|
|
13
|
+
*
|
|
14
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
18
|
+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
19
|
+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
20
|
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
* SOFTWARE.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
#if !defined(SIMDE__SSE4_1_H)
|
|
25
|
+
# if !defined(SIMDE__SSE4_1_H)
|
|
26
|
+
# define SIMDE__SSE4_1_H
|
|
27
|
+
# endif
|
|
28
|
+
# include "ssse3.h"
|
|
29
|
+
|
|
30
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
31
|
+
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
32
|
+
|
|
33
|
+
# if defined(SIMDE_SSE4_1_NATIVE)
|
|
34
|
+
# undef SIMDE_SSE4_1_NATIVE
|
|
35
|
+
# endif
|
|
36
|
+
# if defined(SIMDE_ARCH_X86_SSE4_1) && !defined(SIMDE_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
|
37
|
+
# define SIMDE_SSE4_1_NATIVE
|
|
38
|
+
# elif defined(__ARM_NEON) && !defined(SIMDE_SSE4_1_NO_NEON) && !defined(SIMDE_NO_NEON)
|
|
39
|
+
# define SIMDE_SSE4_1_NEON
|
|
40
|
+
# elif defined(SIMDE_ARCH_POWER_ALTIVEC)
|
|
41
|
+
# define SIMDE_SSE4_1_POWER_ALTIVEC
|
|
42
|
+
# endif
|
|
43
|
+
|
|
44
|
+
# if defined(SIMDE_SSE4_1_NATIVE) && !defined(SIMDE_SSE3_NATIVE)
|
|
45
|
+
# if defined(SIMDE_SSE4_1_FORCE_NATIVE)
|
|
46
|
+
# error Native SSE4.1 support requires native SSE3 support
|
|
47
|
+
# else
|
|
48
|
+
HEDLEY_WARNING("Native SSE4.1 support requires native SSE3 support, disabling")
|
|
49
|
+
# undef SIMDE_SSE4_1_NATIVE
|
|
50
|
+
# endif
|
|
51
|
+
# elif defined(SIMDE_SSE4_1_NEON) && !defined(SIMDE_SSE3_NEON)
|
|
52
|
+
HEDLEY_WARNING("SSE4.1 NEON support requires SSE3 NEON support, disabling")
|
|
53
|
+
# undef SIMDE_SSE4_1_NEON
|
|
54
|
+
# endif
|
|
55
|
+
|
|
56
|
+
# if defined(SIMDE_SSE4_1_NATIVE)
|
|
57
|
+
# include <smmintrin.h>
|
|
58
|
+
# else
|
|
59
|
+
# if defined(SIMDE_SSE4_1_NEON)
|
|
60
|
+
# include <arm_neon.h>
|
|
61
|
+
# endif
|
|
62
|
+
# endif
|
|
63
|
+
|
|
64
|
+
SIMDE__BEGIN_DECLS
|
|
65
|
+
|
|
66
|
+
#if !defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
|
67
|
+
# define SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES
|
|
68
|
+
#endif
|
|
69
|
+
|
|
70
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
71
|
+
# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT
|
|
72
|
+
# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF
|
|
73
|
+
# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF
|
|
74
|
+
# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO
|
|
75
|
+
# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION
|
|
76
|
+
|
|
77
|
+
# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC
|
|
78
|
+
# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC
|
|
79
|
+
#else
|
|
80
|
+
# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00
|
|
81
|
+
# define SIMDE_MM_FROUND_TO_NEG_INF 0x01
|
|
82
|
+
# define SIMDE_MM_FROUND_TO_POS_INF 0x02
|
|
83
|
+
# define SIMDE_MM_FROUND_TO_ZERO 0x03
|
|
84
|
+
# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04
|
|
85
|
+
|
|
86
|
+
# define SIMDE_MM_FROUND_RAISE_EXC 0x00
|
|
87
|
+
# define SIMDE_MM_FROUND_NO_EXC 0x08
|
|
88
|
+
#endif
|
|
89
|
+
|
|
90
|
+
#define SIMDE_MM_FROUND_NINT \
|
|
91
|
+
(SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC)
|
|
92
|
+
#define SIMDE_MM_FROUND_FLOOR \
|
|
93
|
+
(SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC)
|
|
94
|
+
#define SIMDE_MM_FROUND_CEIL \
|
|
95
|
+
(SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC)
|
|
96
|
+
#define SIMDE_MM_FROUND_TRUNC \
|
|
97
|
+
(SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC)
|
|
98
|
+
#define SIMDE_MM_FROUND_RINT \
|
|
99
|
+
(SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC)
|
|
100
|
+
#define SIMDE_MM_FROUND_NEARBYINT \
|
|
101
|
+
(SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC)
|
|
102
|
+
|
|
103
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
104
|
+
# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT
|
|
105
|
+
# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
|
|
106
|
+
# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF
|
|
107
|
+
# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO
|
|
108
|
+
# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION
|
|
109
|
+
# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC
|
|
110
|
+
# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT
|
|
111
|
+
# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR
|
|
112
|
+
# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL
|
|
113
|
+
# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC
|
|
114
|
+
# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT
|
|
115
|
+
# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT
|
|
116
|
+
#endif
|
|
117
|
+
|
|
118
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
119
|
+
simde__m128i
|
|
120
|
+
simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8)
|
|
121
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
122
|
+
simde__m128i_private
|
|
123
|
+
r_,
|
|
124
|
+
a_ = simde__m128i_to_private(a),
|
|
125
|
+
b_ = simde__m128i_to_private(b);
|
|
126
|
+
|
|
127
|
+
SIMDE__VECTORIZE
|
|
128
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
129
|
+
r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i];
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return simde__m128i_from_private(r_);
|
|
133
|
+
}
|
|
134
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
135
|
+
# define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8)
|
|
136
|
+
#endif
|
|
137
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
138
|
+
# define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8)
|
|
139
|
+
#endif
|
|
140
|
+
|
|
141
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
142
|
+
simde__m128d
|
|
143
|
+
simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8)
|
|
144
|
+
HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
|
|
145
|
+
simde__m128d_private
|
|
146
|
+
r_,
|
|
147
|
+
a_ = simde__m128d_to_private(a),
|
|
148
|
+
b_ = simde__m128d_to_private(b);
|
|
149
|
+
|
|
150
|
+
SIMDE__VECTORIZE
|
|
151
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
152
|
+
r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i];
|
|
153
|
+
}
|
|
154
|
+
return simde__m128d_from_private(r_);
|
|
155
|
+
}
|
|
156
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
157
|
+
# define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8)
|
|
158
|
+
#endif
|
|
159
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
160
|
+
# define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8)
|
|
161
|
+
#endif
|
|
162
|
+
|
|
163
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
164
|
+
simde__m128
|
|
165
|
+
simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8)
|
|
166
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
|
|
167
|
+
simde__m128_private
|
|
168
|
+
r_,
|
|
169
|
+
a_ = simde__m128_to_private(a),
|
|
170
|
+
b_ = simde__m128_to_private(b);
|
|
171
|
+
|
|
172
|
+
SIMDE__VECTORIZE
|
|
173
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
174
|
+
r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i];
|
|
175
|
+
}
|
|
176
|
+
return simde__m128_from_private(r_);
|
|
177
|
+
}
|
|
178
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
179
|
+
# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8)
|
|
180
|
+
#endif
|
|
181
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
182
|
+
# define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8)
|
|
183
|
+
#endif
|
|
184
|
+
|
|
185
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
186
|
+
simde__m128i
|
|
187
|
+
simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) {
|
|
188
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
189
|
+
return _mm_blendv_epi8(a, b, mask);
|
|
190
|
+
#else
|
|
191
|
+
simde__m128i_private
|
|
192
|
+
r_,
|
|
193
|
+
a_ = simde__m128i_to_private(a),
|
|
194
|
+
b_ = simde__m128i_to_private(b),
|
|
195
|
+
mask_ = simde__m128i_to_private(mask);
|
|
196
|
+
|
|
197
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
198
|
+
mask_ = simde__m128i_to_private(simde_mm_cmplt_epi8(mask, simde_mm_setzero_si128()));
|
|
199
|
+
r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8);
|
|
200
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
201
|
+
/* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */
|
|
202
|
+
#if defined(HEDLEY_INTEL_VERSION_CHECK)
|
|
203
|
+
__typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
204
|
+
mask_.i8 = HEDLEY_STATIC_CAST(__typeof__(mask_.i8), mask_.i8 < z);
|
|
205
|
+
#else
|
|
206
|
+
mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1;
|
|
207
|
+
#endif
|
|
208
|
+
|
|
209
|
+
r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8);
|
|
210
|
+
#else
|
|
211
|
+
SIMDE__VECTORIZE
|
|
212
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
213
|
+
int8_t m = mask_.i8[i] >> 7;
|
|
214
|
+
r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]);
|
|
215
|
+
}
|
|
216
|
+
#endif
|
|
217
|
+
|
|
218
|
+
return simde__m128i_from_private(r_);
|
|
219
|
+
#endif
|
|
220
|
+
}
|
|
221
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
222
|
+
# define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask)
|
|
223
|
+
#endif
|
|
224
|
+
|
|
225
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
226
|
+
simde__m128i
|
|
227
|
+
simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) {
|
|
228
|
+
#if defined(SIMDE_SSE2_NATIVE)
|
|
229
|
+
mask = simde_mm_srai_epi16(mask, 15);
|
|
230
|
+
return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a));
|
|
231
|
+
#else
|
|
232
|
+
simde__m128i_private
|
|
233
|
+
r_,
|
|
234
|
+
a_ = simde__m128i_to_private(a),
|
|
235
|
+
b_ = simde__m128i_to_private(b),
|
|
236
|
+
mask_ = simde__m128i_to_private(mask);
|
|
237
|
+
|
|
238
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
239
|
+
mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128()));
|
|
240
|
+
r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16);
|
|
241
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
242
|
+
#if defined(HEDLEY_INTEL_VERSION_CHECK)
|
|
243
|
+
__typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
244
|
+
mask_.i16 = mask_.i16 < z;
|
|
245
|
+
#else
|
|
246
|
+
mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1;
|
|
247
|
+
#endif
|
|
248
|
+
|
|
249
|
+
r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16);
|
|
250
|
+
#else
|
|
251
|
+
SIMDE__VECTORIZE
|
|
252
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
253
|
+
int16_t m = mask_.i16[i] >> 15;
|
|
254
|
+
r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]);
|
|
255
|
+
}
|
|
256
|
+
#endif
|
|
257
|
+
|
|
258
|
+
return simde__m128i_from_private(r_);
|
|
259
|
+
#endif
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
263
|
+
simde__m128i
|
|
264
|
+
simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) {
|
|
265
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
266
|
+
return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask)));
|
|
267
|
+
#else
|
|
268
|
+
simde__m128i_private
|
|
269
|
+
r_,
|
|
270
|
+
a_ = simde__m128i_to_private(a),
|
|
271
|
+
b_ = simde__m128i_to_private(b),
|
|
272
|
+
mask_ = simde__m128i_to_private(mask);
|
|
273
|
+
|
|
274
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
275
|
+
mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128()));
|
|
276
|
+
r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32);
|
|
277
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
278
|
+
#if defined(HEDLEY_INTEL_VERSION_CHECK)
|
|
279
|
+
__typeof__(mask_.i32) z = { 0, 0, 0, 0 };
|
|
280
|
+
mask_.i32 = mask_.i32 < z;
|
|
281
|
+
#else
|
|
282
|
+
mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1;
|
|
283
|
+
#endif
|
|
284
|
+
|
|
285
|
+
r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32);
|
|
286
|
+
#else
|
|
287
|
+
SIMDE__VECTORIZE
|
|
288
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
289
|
+
int32_t m = mask_.i32[i] >> 31;
|
|
290
|
+
r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]);
|
|
291
|
+
}
|
|
292
|
+
#endif
|
|
293
|
+
|
|
294
|
+
return simde__m128i_from_private(r_);
|
|
295
|
+
#endif
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
299
|
+
simde__m128i
|
|
300
|
+
simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) {
|
|
301
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
302
|
+
return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask)));
|
|
303
|
+
#else
|
|
304
|
+
simde__m128i_private
|
|
305
|
+
r_,
|
|
306
|
+
a_ = simde__m128i_to_private(a),
|
|
307
|
+
b_ = simde__m128i_to_private(b),
|
|
308
|
+
mask_ = simde__m128i_to_private(mask);
|
|
309
|
+
|
|
310
|
+
#if defined(SIMDE_SSE4_1_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
311
|
+
mask_.i64 = vreinterpretq_s64_u64(vcltq_s64(mask_.i64, vdupq_n_s64(UINT64_C(0))));
|
|
312
|
+
r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64);
|
|
313
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
314
|
+
#if defined(HEDLEY_INTEL_VERSION_CHECK)
|
|
315
|
+
__typeof__(mask_.i64) z = { 0, 0 };
|
|
316
|
+
mask_.i64 = HEDLEY_STATIC_CAST(__typeof__(mask_.i64), mask_.i64 < z);
|
|
317
|
+
#else
|
|
318
|
+
mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1;
|
|
319
|
+
#endif
|
|
320
|
+
|
|
321
|
+
r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64);
|
|
322
|
+
#else
|
|
323
|
+
SIMDE__VECTORIZE
|
|
324
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
325
|
+
int64_t m = mask_.i64[i] >> 63;
|
|
326
|
+
r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]);
|
|
327
|
+
}
|
|
328
|
+
#endif
|
|
329
|
+
|
|
330
|
+
return simde__m128i_from_private(r_);
|
|
331
|
+
#endif
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
335
|
+
simde__m128d
|
|
336
|
+
simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) {
|
|
337
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
338
|
+
return _mm_blendv_pd(a, b, mask);
|
|
339
|
+
#else
|
|
340
|
+
return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask)));
|
|
341
|
+
#endif
|
|
342
|
+
}
|
|
343
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
344
|
+
# define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask)
|
|
345
|
+
#endif
|
|
346
|
+
|
|
347
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
348
|
+
simde__m128
|
|
349
|
+
simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) {
|
|
350
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
351
|
+
return _mm_blendv_ps(a, b, mask);
|
|
352
|
+
#else
|
|
353
|
+
return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask)));
|
|
354
|
+
#endif
|
|
355
|
+
}
|
|
356
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
357
|
+
# define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask)
|
|
358
|
+
#endif
|
|
359
|
+
|
|
360
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
361
|
+
simde__m128d
|
|
362
|
+
simde_mm_ceil_pd (simde__m128d a) {
|
|
363
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
364
|
+
return _mm_ceil_pd(a);
|
|
365
|
+
#else
|
|
366
|
+
simde__m128d_private
|
|
367
|
+
r_,
|
|
368
|
+
a_ = simde__m128d_to_private(a);
|
|
369
|
+
|
|
370
|
+
#if defined(SIMDE_SSE4_1_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
371
|
+
r_.neon_f64 = vrndpq_f64(a_.neon_f64);
|
|
372
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
373
|
+
r_.altivec_f64 = vec_ceil(a_.altivec_f64);
|
|
374
|
+
#elif defined(SIMDE_HAVE_MATH_H)
|
|
375
|
+
SIMDE__VECTORIZE
|
|
376
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
377
|
+
r_.f64[i] = ceil(a_.f64[i]);
|
|
378
|
+
}
|
|
379
|
+
#else
|
|
380
|
+
HEDLEY_UNREACHABLE();
|
|
381
|
+
#endif
|
|
382
|
+
|
|
383
|
+
return simde__m128d_from_private(r_);
|
|
384
|
+
#endif
|
|
385
|
+
}
|
|
386
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
387
|
+
# define _mm_ceil_pd(a) simde_mm_ceil_pd(a)
|
|
388
|
+
#endif
|
|
389
|
+
|
|
390
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
391
|
+
simde__m128
|
|
392
|
+
simde_mm_ceil_ps (simde__m128 a) {
|
|
393
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
394
|
+
return _mm_ceil_ps(a);
|
|
395
|
+
#else
|
|
396
|
+
simde__m128_private
|
|
397
|
+
r_,
|
|
398
|
+
a_ = simde__m128_to_private(a);
|
|
399
|
+
|
|
400
|
+
#if defined(SIMDE_SSE4_1_NEON) && (SIMDE_ARCH_ARM >= 80) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0))
|
|
401
|
+
r_.neon_f32 = vrndpq_f32(a_.neon_f32);
|
|
402
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
403
|
+
r_.altivec_f32 = vec_ceil(a_.altivec_f32);
|
|
404
|
+
#elif defined(SIMDE_HAVE_MATH_H)
|
|
405
|
+
SIMDE__VECTORIZE
|
|
406
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
407
|
+
r_.f32[i] = ceilf(a_.f32[i]);
|
|
408
|
+
}
|
|
409
|
+
#else
|
|
410
|
+
HEDLEY_UNREACHABLE();
|
|
411
|
+
#endif
|
|
412
|
+
|
|
413
|
+
return simde__m128_from_private(r_);
|
|
414
|
+
#endif
|
|
415
|
+
}
|
|
416
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
417
|
+
# define _mm_ceil_ps(a) simde_mm_ceil_ps(a)
|
|
418
|
+
#endif
|
|
419
|
+
|
|
420
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
421
|
+
simde__m128d
|
|
422
|
+
simde_mm_ceil_sd (simde__m128d a, simde__m128d b) {
|
|
423
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
424
|
+
return _mm_ceil_sd(a, b);
|
|
425
|
+
#else
|
|
426
|
+
simde__m128d_private
|
|
427
|
+
r_,
|
|
428
|
+
a_ = simde__m128d_to_private(a),
|
|
429
|
+
b_ = simde__m128d_to_private(b);
|
|
430
|
+
|
|
431
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
432
|
+
r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], ceil(b_.f64[0])));
|
|
433
|
+
#else
|
|
434
|
+
HEDLEY_UNREACHABLE();
|
|
435
|
+
#endif
|
|
436
|
+
|
|
437
|
+
return simde__m128d_from_private(r_);
|
|
438
|
+
#endif
|
|
439
|
+
}
|
|
440
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
441
|
+
# define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b)
|
|
442
|
+
#endif
|
|
443
|
+
|
|
444
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
445
|
+
simde__m128
|
|
446
|
+
simde_mm_ceil_ss (simde__m128 a, simde__m128 b) {
|
|
447
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
448
|
+
return _mm_ceil_ss(a, b);
|
|
449
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
450
|
+
return simde_mm_move_ss(a, simde_mm_ceil_ps(b));
|
|
451
|
+
#else
|
|
452
|
+
simde__m128_private
|
|
453
|
+
r_,
|
|
454
|
+
a_ = simde__m128_to_private(a),
|
|
455
|
+
b_ = simde__m128_to_private(b);
|
|
456
|
+
|
|
457
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
458
|
+
r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], ceilf(b_.f32[0])));
|
|
459
|
+
#else
|
|
460
|
+
HEDLEY_UNREACHABLE();
|
|
461
|
+
#endif
|
|
462
|
+
|
|
463
|
+
return simde__m128_from_private(r_);
|
|
464
|
+
#endif
|
|
465
|
+
}
|
|
466
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
467
|
+
# define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b)
|
|
468
|
+
#endif
|
|
469
|
+
|
|
470
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
471
|
+
simde__m128i
|
|
472
|
+
simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) {
|
|
473
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
474
|
+
return _mm_cmpeq_epi64(a, b);
|
|
475
|
+
#else
|
|
476
|
+
simde__m128i_private
|
|
477
|
+
r_,
|
|
478
|
+
a_ = simde__m128i_to_private(a),
|
|
479
|
+
b_ = simde__m128i_to_private(b);
|
|
480
|
+
|
|
481
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
482
|
+
r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), a_.i64 == b_.i64);
|
|
483
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
484
|
+
r_.altivec_i64 = (vector signed long long) vec_cmpeq(a_.altivec_i64, b_.altivec_i64);
|
|
485
|
+
#else
|
|
486
|
+
SIMDE__VECTORIZE
|
|
487
|
+
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
|
|
488
|
+
r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0);
|
|
489
|
+
}
|
|
490
|
+
#endif
|
|
491
|
+
|
|
492
|
+
return simde__m128i_from_private(r_);
|
|
493
|
+
#endif
|
|
494
|
+
}
|
|
495
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
496
|
+
# define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b)
|
|
497
|
+
#endif
|
|
498
|
+
|
|
499
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
500
|
+
simde__m128i
|
|
501
|
+
simde_mm_cvtepi8_epi16 (simde__m128i a) {
|
|
502
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
503
|
+
return _mm_cvtepi8_epi16(a);
|
|
504
|
+
#else
|
|
505
|
+
simde__m128i_private
|
|
506
|
+
r_,
|
|
507
|
+
a_ = simde__m128i_to_private(a);
|
|
508
|
+
|
|
509
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
510
|
+
SIMDE__CONVERT_VECTOR(r_.i16, a_.m64_private[0].i8);
|
|
511
|
+
#else
|
|
512
|
+
SIMDE__VECTORIZE
|
|
513
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
514
|
+
r_.i16[i] = a_.i8[i];
|
|
515
|
+
}
|
|
516
|
+
#endif
|
|
517
|
+
|
|
518
|
+
return simde__m128i_from_private(r_);
|
|
519
|
+
#endif
|
|
520
|
+
}
|
|
521
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
522
|
+
# define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a)
|
|
523
|
+
#endif
|
|
524
|
+
|
|
525
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
526
|
+
simde__m128i
|
|
527
|
+
simde_mm_cvtepi8_epi32 (simde__m128i a) {
|
|
528
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
529
|
+
return _mm_cvtepi8_epi32(a);
|
|
530
|
+
#else
|
|
531
|
+
simde__m128i_private
|
|
532
|
+
r_,
|
|
533
|
+
a_ = simde__m128i_to_private(a);
|
|
534
|
+
|
|
535
|
+
SIMDE__VECTORIZE
|
|
536
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
537
|
+
r_.i32[i] = a_.i8[i];
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
return simde__m128i_from_private(r_);
|
|
541
|
+
#endif
|
|
542
|
+
}
|
|
543
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
544
|
+
# define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a)
|
|
545
|
+
#endif
|
|
546
|
+
|
|
547
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
548
|
+
simde__m128i
|
|
549
|
+
simde_mm_cvtepi8_epi64 (simde__m128i a) {
|
|
550
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
551
|
+
return _mm_cvtepi8_epi64(a);
|
|
552
|
+
#else
|
|
553
|
+
simde__m128i_private
|
|
554
|
+
r_,
|
|
555
|
+
a_ = simde__m128i_to_private(a);
|
|
556
|
+
|
|
557
|
+
SIMDE__VECTORIZE
|
|
558
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
559
|
+
r_.i64[i] = a_.i8[i];
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
return simde__m128i_from_private(r_);
|
|
563
|
+
#endif
|
|
564
|
+
}
|
|
565
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
566
|
+
# define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a)
|
|
567
|
+
#endif
|
|
568
|
+
|
|
569
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
570
|
+
simde__m128i
|
|
571
|
+
simde_mm_cvtepu8_epi16 (simde__m128i a) {
|
|
572
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
573
|
+
return _mm_cvtepu8_epi16(a);
|
|
574
|
+
#else
|
|
575
|
+
simde__m128i_private
|
|
576
|
+
r_,
|
|
577
|
+
a_ = simde__m128i_to_private(a);
|
|
578
|
+
|
|
579
|
+
#if defined(SIMDE__CONVERT_VECTOR) && !defined(SIMDE_BUG_CLANG_45541)
|
|
580
|
+
SIMDE__CONVERT_VECTOR(r_.i16, a_.m64_private[0].u8);
|
|
581
|
+
#else
|
|
582
|
+
SIMDE__VECTORIZE
|
|
583
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
584
|
+
r_.i16[i] = a_.u8[i];
|
|
585
|
+
}
|
|
586
|
+
#endif
|
|
587
|
+
|
|
588
|
+
return simde__m128i_from_private(r_);
|
|
589
|
+
#endif
|
|
590
|
+
}
|
|
591
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
592
|
+
# define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a)
|
|
593
|
+
#endif
|
|
594
|
+
|
|
595
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
596
|
+
simde__m128i
|
|
597
|
+
simde_mm_cvtepu8_epi32 (simde__m128i a) {
|
|
598
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
599
|
+
return _mm_cvtepu8_epi32(a);
|
|
600
|
+
#else
|
|
601
|
+
simde__m128i_private
|
|
602
|
+
r_,
|
|
603
|
+
a_ = simde__m128i_to_private(a);
|
|
604
|
+
|
|
605
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
606
|
+
uint8x16_t u8x16 = a_.neon_u8;
|
|
607
|
+
uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));
|
|
608
|
+
r_.neon_u32 = vmovl_u16(vget_low_u16(u16x8));
|
|
609
|
+
#else
|
|
610
|
+
SIMDE__VECTORIZE
|
|
611
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
612
|
+
r_.i32[i] = a_.u8[i];
|
|
613
|
+
}
|
|
614
|
+
#endif
|
|
615
|
+
|
|
616
|
+
return simde__m128i_from_private(r_);
|
|
617
|
+
#endif
|
|
618
|
+
}
|
|
619
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
620
|
+
# define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a)
|
|
621
|
+
#endif
|
|
622
|
+
|
|
623
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
624
|
+
simde__m128i
|
|
625
|
+
simde_mm_cvtepu8_epi64 (simde__m128i a) {
|
|
626
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
627
|
+
return _mm_cvtepu8_epi64(a);
|
|
628
|
+
#else
|
|
629
|
+
simde__m128i_private
|
|
630
|
+
r_,
|
|
631
|
+
a_ = simde__m128i_to_private(a);
|
|
632
|
+
|
|
633
|
+
SIMDE__VECTORIZE
|
|
634
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
635
|
+
r_.i64[i] = a_.u8[i];
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
return simde__m128i_from_private(r_);
|
|
639
|
+
#endif
|
|
640
|
+
}
|
|
641
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
642
|
+
# define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a)
|
|
643
|
+
#endif
|
|
644
|
+
|
|
645
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
646
|
+
simde__m128i
|
|
647
|
+
simde_mm_cvtepi16_epi32 (simde__m128i a) {
|
|
648
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
649
|
+
return _mm_cvtepi16_epi32(a);
|
|
650
|
+
#else
|
|
651
|
+
simde__m128i_private
|
|
652
|
+
r_,
|
|
653
|
+
a_ = simde__m128i_to_private(a);
|
|
654
|
+
|
|
655
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
656
|
+
r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16));
|
|
657
|
+
#else
|
|
658
|
+
SIMDE__VECTORIZE
|
|
659
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
660
|
+
r_.i32[i] = a_.i16[i];
|
|
661
|
+
}
|
|
662
|
+
#endif
|
|
663
|
+
|
|
664
|
+
return simde__m128i_from_private(r_);
|
|
665
|
+
#endif
|
|
666
|
+
}
|
|
667
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
668
|
+
# define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a)
|
|
669
|
+
#endif
|
|
670
|
+
|
|
671
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
672
|
+
simde__m128i
|
|
673
|
+
simde_mm_cvtepu16_epi32 (simde__m128i a) {
|
|
674
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
675
|
+
return _mm_cvtepu16_epi32(a);
|
|
676
|
+
#else
|
|
677
|
+
simde__m128i_private
|
|
678
|
+
r_,
|
|
679
|
+
a_ = simde__m128i_to_private(a);
|
|
680
|
+
|
|
681
|
+
#if defined(SIMDE__CONVERT_VECTOR) && !defined(SIMDE_BUG_CLANG_45541)
|
|
682
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].u16);
|
|
683
|
+
#else
|
|
684
|
+
SIMDE__VECTORIZE
|
|
685
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
686
|
+
r_.i32[i] = a_.u16[i];
|
|
687
|
+
}
|
|
688
|
+
#endif
|
|
689
|
+
|
|
690
|
+
return simde__m128i_from_private(r_);
|
|
691
|
+
#endif
|
|
692
|
+
}
|
|
693
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
694
|
+
# define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a)
|
|
695
|
+
#endif
|
|
696
|
+
|
|
697
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
698
|
+
simde__m128i
|
|
699
|
+
simde_mm_cvtepu16_epi64 (simde__m128i a) {
|
|
700
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
701
|
+
return _mm_cvtepu16_epi64(a);
|
|
702
|
+
#else
|
|
703
|
+
simde__m128i_private
|
|
704
|
+
r_,
|
|
705
|
+
a_ = simde__m128i_to_private(a);
|
|
706
|
+
|
|
707
|
+
SIMDE__VECTORIZE
|
|
708
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
709
|
+
r_.i64[i] = a_.u16[i];
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
return simde__m128i_from_private(r_);
|
|
713
|
+
#endif
|
|
714
|
+
}
|
|
715
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
716
|
+
# define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a)
|
|
717
|
+
#endif
|
|
718
|
+
|
|
719
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
720
|
+
simde__m128i
|
|
721
|
+
simde_mm_cvtepi16_epi64 (simde__m128i a) {
|
|
722
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
723
|
+
return _mm_cvtepi16_epi64(a);
|
|
724
|
+
#else
|
|
725
|
+
simde__m128i_private
|
|
726
|
+
r_,
|
|
727
|
+
a_ = simde__m128i_to_private(a);
|
|
728
|
+
|
|
729
|
+
SIMDE__VECTORIZE
|
|
730
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
731
|
+
r_.i64[i] = a_.i16[i];
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
return simde__m128i_from_private(r_);
|
|
735
|
+
#endif
|
|
736
|
+
}
|
|
737
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
738
|
+
# define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a)
|
|
739
|
+
#endif
|
|
740
|
+
|
|
741
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
742
|
+
simde__m128i
|
|
743
|
+
simde_mm_cvtepi32_epi64 (simde__m128i a) {
|
|
744
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
745
|
+
return _mm_cvtepi32_epi64(a);
|
|
746
|
+
#else
|
|
747
|
+
simde__m128i_private
|
|
748
|
+
r_,
|
|
749
|
+
a_ = simde__m128i_to_private(a);
|
|
750
|
+
|
|
751
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
752
|
+
SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i32);
|
|
753
|
+
#else
|
|
754
|
+
SIMDE__VECTORIZE
|
|
755
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
756
|
+
r_.i64[i] = a_.i32[i];
|
|
757
|
+
}
|
|
758
|
+
#endif
|
|
759
|
+
|
|
760
|
+
return simde__m128i_from_private(r_);
|
|
761
|
+
#endif
|
|
762
|
+
}
|
|
763
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
764
|
+
# define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a)
|
|
765
|
+
#endif
|
|
766
|
+
|
|
767
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
768
|
+
simde__m128i
|
|
769
|
+
simde_mm_cvtepu32_epi64 (simde__m128i a) {
|
|
770
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
771
|
+
return _mm_cvtepu32_epi64(a);
|
|
772
|
+
#else
|
|
773
|
+
simde__m128i_private
|
|
774
|
+
r_,
|
|
775
|
+
a_ = simde__m128i_to_private(a);
|
|
776
|
+
|
|
777
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
778
|
+
SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].u32);
|
|
779
|
+
#else
|
|
780
|
+
SIMDE__VECTORIZE
|
|
781
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
782
|
+
r_.i64[i] = a_.u32[i];
|
|
783
|
+
}
|
|
784
|
+
#endif
|
|
785
|
+
|
|
786
|
+
return simde__m128i_from_private(r_);
|
|
787
|
+
#endif
|
|
788
|
+
}
|
|
789
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
790
|
+
# define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a)
|
|
791
|
+
#endif
|
|
792
|
+
|
|
793
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
794
|
+
simde__m128d
|
|
795
|
+
simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8)
|
|
796
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
797
|
+
simde__m128d_private
|
|
798
|
+
r_,
|
|
799
|
+
a_ = simde__m128d_to_private(a),
|
|
800
|
+
b_ = simde__m128d_to_private(b);
|
|
801
|
+
|
|
802
|
+
simde_float64 sum = SIMDE_FLOAT64_C(0.0);
|
|
803
|
+
|
|
804
|
+
SIMDE__VECTORIZE_REDUCTION(+:sum)
|
|
805
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
806
|
+
sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
SIMDE__VECTORIZE
|
|
810
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
811
|
+
r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0;
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
return simde__m128d_from_private(r_);
|
|
815
|
+
}
|
|
816
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
817
|
+
# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8)
|
|
818
|
+
#endif
|
|
819
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
820
|
+
# define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8)
|
|
821
|
+
#endif
|
|
822
|
+
|
|
823
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
824
|
+
simde__m128
|
|
825
|
+
simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8)
|
|
826
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
827
|
+
simde__m128_private
|
|
828
|
+
r_,
|
|
829
|
+
a_ = simde__m128_to_private(a),
|
|
830
|
+
b_ = simde__m128_to_private(b);
|
|
831
|
+
|
|
832
|
+
simde_float32 sum = SIMDE_FLOAT32_C(0.0);
|
|
833
|
+
|
|
834
|
+
SIMDE__VECTORIZE_REDUCTION(+:sum)
|
|
835
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
836
|
+
sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
SIMDE__VECTORIZE
|
|
840
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
841
|
+
r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0);
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
return simde__m128_from_private(r_);
|
|
845
|
+
}
|
|
846
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
847
|
+
# define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8)
|
|
848
|
+
#endif
|
|
849
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
850
|
+
# define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8)
|
|
851
|
+
#endif
|
|
852
|
+
|
|
853
|
+
#if defined(simde_mm_extract_epi8)
|
|
854
|
+
# undef simde_mm_extract_epi8
|
|
855
|
+
#endif
|
|
856
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
857
|
+
int8_t
|
|
858
|
+
simde_mm_extract_epi8 (simde__m128i a, const int imm8)
|
|
859
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
|
|
860
|
+
simde__m128i_private
|
|
861
|
+
a_ = simde__m128i_to_private(a);
|
|
862
|
+
|
|
863
|
+
return a_.i8[imm8&15];
|
|
864
|
+
}
|
|
865
|
+
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8)
|
|
866
|
+
# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8))
|
|
867
|
+
#elif defined(SIMDE_SSE4_1_NEON)
|
|
868
|
+
# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_private(a).neon_i8, imm8)
|
|
869
|
+
#endif
|
|
870
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
871
|
+
# define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8))
|
|
872
|
+
#endif
|
|
873
|
+
|
|
874
|
+
#if defined(simde_mm_extract_epi32)
|
|
875
|
+
# undef simde_mm_extract_epi32
|
|
876
|
+
#endif
|
|
877
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
878
|
+
int32_t
|
|
879
|
+
simde_mm_extract_epi32 (simde__m128i a, const int imm8)
|
|
880
|
+
HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
|
|
881
|
+
simde__m128i_private
|
|
882
|
+
a_ = simde__m128i_to_private(a);
|
|
883
|
+
|
|
884
|
+
return a_.i32[imm8&3];
|
|
885
|
+
}
|
|
886
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
887
|
+
# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8)
|
|
888
|
+
#elif defined(SIMDE_SSE4_1_NEON)
|
|
889
|
+
# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_private(a).neon_i32, imm8)
|
|
890
|
+
#endif
|
|
891
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
892
|
+
# define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8)
|
|
893
|
+
#endif
|
|
894
|
+
|
|
895
|
+
#if defined(simde_mm_extract_epi64)
|
|
896
|
+
# undef simde_mm_extract_epi64
|
|
897
|
+
#endif
|
|
898
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
899
|
+
int64_t
|
|
900
|
+
simde_mm_extract_epi64 (simde__m128i a, const int imm8)
|
|
901
|
+
HEDLEY_REQUIRE_MSG((imm8 & 1) == imm8, "imm8 must be 0 or 1") {
|
|
902
|
+
simde__m128i_private
|
|
903
|
+
a_ = simde__m128i_to_private(a);
|
|
904
|
+
|
|
905
|
+
return a_.i64[imm8&1];
|
|
906
|
+
}
|
|
907
|
+
#if defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)
|
|
908
|
+
# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8)
|
|
909
|
+
#elif defined(SIMDE_SSE4_1_NEON)
|
|
910
|
+
# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_private(a).neon_i64, imm8)
|
|
911
|
+
#endif
|
|
912
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
913
|
+
# define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8)
|
|
914
|
+
#endif
|
|
915
|
+
|
|
916
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
917
|
+
simde__m128d
|
|
918
|
+
simde_mm_floor_pd (simde__m128d a) {
|
|
919
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
920
|
+
return _mm_floor_pd(a);
|
|
921
|
+
#else
|
|
922
|
+
simde__m128d_private
|
|
923
|
+
r_,
|
|
924
|
+
a_ = simde__m128d_to_private(a);
|
|
925
|
+
|
|
926
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
927
|
+
SIMDE__VECTORIZE
|
|
928
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
929
|
+
r_.f64[i] = floor(a_.f64[i]);
|
|
930
|
+
}
|
|
931
|
+
#else
|
|
932
|
+
HEDLEY_UNREACHABLE();
|
|
933
|
+
#endif
|
|
934
|
+
|
|
935
|
+
return simde__m128d_from_private(r_);
|
|
936
|
+
#endif
|
|
937
|
+
}
|
|
938
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
939
|
+
# define _mm_floor_pd(a) simde_mm_floor_pd(a)
|
|
940
|
+
#endif
|
|
941
|
+
|
|
942
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
943
|
+
simde__m128
|
|
944
|
+
simde_mm_floor_ps (simde__m128 a) {
|
|
945
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
946
|
+
return _mm_floor_ps(a);
|
|
947
|
+
#else
|
|
948
|
+
simde__m128_private
|
|
949
|
+
r_,
|
|
950
|
+
a_ = simde__m128_to_private(a);
|
|
951
|
+
|
|
952
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
953
|
+
SIMDE__VECTORIZE
|
|
954
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
955
|
+
r_.f32[i] = floorf(a_.f32[i]);
|
|
956
|
+
}
|
|
957
|
+
#else
|
|
958
|
+
HEDLEY_UNREACHABLE();
|
|
959
|
+
#endif
|
|
960
|
+
|
|
961
|
+
return simde__m128_from_private(r_);
|
|
962
|
+
#endif
|
|
963
|
+
}
|
|
964
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
965
|
+
# define _mm_floor_ps(a) simde_mm_floor_ps(a)
|
|
966
|
+
#endif
|
|
967
|
+
|
|
968
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
969
|
+
simde__m128d
|
|
970
|
+
simde_mm_floor_sd (simde__m128d a, simde__m128d b) {
|
|
971
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
972
|
+
return _mm_floor_sd(a, b);
|
|
973
|
+
#else
|
|
974
|
+
simde__m128d_private
|
|
975
|
+
r_,
|
|
976
|
+
a_ = simde__m128d_to_private(a),
|
|
977
|
+
b_ = simde__m128d_to_private(b);
|
|
978
|
+
|
|
979
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
980
|
+
r_.f64[0] = floor(b_.f64[0]);
|
|
981
|
+
r_.f64[1] = a_.f64[1];
|
|
982
|
+
#else
|
|
983
|
+
HEDLEY_UNREACHABLE();
|
|
984
|
+
#endif
|
|
985
|
+
|
|
986
|
+
return simde__m128d_from_private(r_);
|
|
987
|
+
#endif
|
|
988
|
+
}
|
|
989
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
990
|
+
# define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b)
|
|
991
|
+
#endif
|
|
992
|
+
|
|
993
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
994
|
+
simde__m128
|
|
995
|
+
simde_mm_floor_ss (simde__m128 a, simde__m128 b) {
|
|
996
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
997
|
+
return _mm_floor_ss(a, b);
|
|
998
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
999
|
+
return simde_mm_move_ss(a, simde_mm_floor_ps(b));
|
|
1000
|
+
#else
|
|
1001
|
+
simde__m128_private
|
|
1002
|
+
r_,
|
|
1003
|
+
a_ = simde__m128_to_private(a),
|
|
1004
|
+
b_ = simde__m128_to_private(b);
|
|
1005
|
+
|
|
1006
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
1007
|
+
r_.f32[0] = floorf(b_.f32[0]);
|
|
1008
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1009
|
+
r_.f32[i] = a_.f32[i];
|
|
1010
|
+
}
|
|
1011
|
+
#else
|
|
1012
|
+
HEDLEY_UNREACHABLE();
|
|
1013
|
+
#endif
|
|
1014
|
+
|
|
1015
|
+
return simde__m128_from_private(r_);
|
|
1016
|
+
#endif
|
|
1017
|
+
}
|
|
1018
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1019
|
+
# define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b)
|
|
1020
|
+
#endif
|
|
1021
|
+
|
|
1022
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1023
|
+
simde__m128i
|
|
1024
|
+
simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8)
|
|
1025
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
|
|
1026
|
+
simde__m128i_private
|
|
1027
|
+
r_ = simde__m128i_to_private(a);
|
|
1028
|
+
|
|
1029
|
+
r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i);
|
|
1030
|
+
|
|
1031
|
+
return simde__m128i_from_private(r_);
|
|
1032
|
+
}
|
|
1033
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1034
|
+
# define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8)
|
|
1035
|
+
#endif
|
|
1036
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1037
|
+
# define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8)
|
|
1038
|
+
#endif
|
|
1039
|
+
|
|
1040
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1041
|
+
simde__m128i
|
|
1042
|
+
simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8)
|
|
1043
|
+
HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
|
|
1044
|
+
simde__m128i_private
|
|
1045
|
+
r_ = simde__m128i_to_private(a);
|
|
1046
|
+
|
|
1047
|
+
r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i);
|
|
1048
|
+
|
|
1049
|
+
return simde__m128i_from_private(r_);
|
|
1050
|
+
}
|
|
1051
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1052
|
+
# define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8)
|
|
1053
|
+
#endif
|
|
1054
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1055
|
+
# define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8)
|
|
1056
|
+
#endif
|
|
1057
|
+
|
|
1058
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1059
|
+
simde__m128i
|
|
1060
|
+
simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8)
|
|
1061
|
+
HEDLEY_REQUIRE_MSG((imm8 & 1) == imm8, "imm8 must be 0 or 1") {
|
|
1062
|
+
#if defined(SIMDE_BUG_GCC_94482)
|
|
1063
|
+
simde__m128i_private
|
|
1064
|
+
a_ = simde__m128i_to_private(a);
|
|
1065
|
+
|
|
1066
|
+
switch(imm8) {
|
|
1067
|
+
case 0:
|
|
1068
|
+
return simde_mm_set_epi64x(a_.i64[1], i);
|
|
1069
|
+
break;
|
|
1070
|
+
case 1:
|
|
1071
|
+
return simde_mm_set_epi64x(i, a_.i64[0]);
|
|
1072
|
+
break;
|
|
1073
|
+
default:
|
|
1074
|
+
HEDLEY_UNREACHABLE();
|
|
1075
|
+
break;
|
|
1076
|
+
}
|
|
1077
|
+
#else
|
|
1078
|
+
simde__m128i_private
|
|
1079
|
+
r_ = simde__m128i_to_private(a);
|
|
1080
|
+
|
|
1081
|
+
r_.i64[imm8] = i;
|
|
1082
|
+
|
|
1083
|
+
return simde__m128i_from_private(r_);
|
|
1084
|
+
#endif
|
|
1085
|
+
}
|
|
1086
|
+
#if defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)
|
|
1087
|
+
# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8)
|
|
1088
|
+
#endif
|
|
1089
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1090
|
+
# define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8)
|
|
1091
|
+
#endif
|
|
1092
|
+
|
|
1093
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1094
|
+
simde__m128
|
|
1095
|
+
simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8)
|
|
1096
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
1097
|
+
simde__m128_private
|
|
1098
|
+
r_,
|
|
1099
|
+
a_ = simde__m128_to_private(a),
|
|
1100
|
+
b_ = simde__m128_to_private(b);
|
|
1101
|
+
|
|
1102
|
+
a_.f32[0] = b_.f32[(imm8 >> 6) & 3];
|
|
1103
|
+
a_.f32[(imm8 >> 4) & 3] = a_.f32[0];
|
|
1104
|
+
|
|
1105
|
+
SIMDE__VECTORIZE
|
|
1106
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1107
|
+
r_.f32[i] = (imm8 >> i) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i];
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
return simde__m128_from_private(r_);
|
|
1111
|
+
}
|
|
1112
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1113
|
+
# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8)
|
|
1114
|
+
#endif
|
|
1115
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1116
|
+
# define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8)
|
|
1117
|
+
#endif
|
|
1118
|
+
|
|
1119
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1120
|
+
simde__m128i
|
|
1121
|
+
simde_mm_max_epi8 (simde__m128i a, simde__m128i b) {
|
|
1122
|
+
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
|
|
1123
|
+
return _mm_max_epi8(a, b);
|
|
1124
|
+
#else
|
|
1125
|
+
simde__m128i_private
|
|
1126
|
+
r_,
|
|
1127
|
+
a_ = simde__m128i_to_private(a),
|
|
1128
|
+
b_ = simde__m128i_to_private(b);
|
|
1129
|
+
|
|
1130
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1131
|
+
r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8);
|
|
1132
|
+
#else
|
|
1133
|
+
SIMDE__VECTORIZE
|
|
1134
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
1135
|
+
r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i];
|
|
1136
|
+
}
|
|
1137
|
+
#endif
|
|
1138
|
+
|
|
1139
|
+
return simde__m128i_from_private(r_);
|
|
1140
|
+
#endif
|
|
1141
|
+
}
|
|
1142
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1143
|
+
# define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b)
|
|
1144
|
+
#endif
|
|
1145
|
+
|
|
1146
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1147
|
+
simde__m128i
|
|
1148
|
+
simde_mm_max_epi32 (simde__m128i a, simde__m128i b) {
|
|
1149
|
+
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
|
|
1150
|
+
return _mm_max_epi32(a, b);
|
|
1151
|
+
#else
|
|
1152
|
+
simde__m128i_private
|
|
1153
|
+
r_,
|
|
1154
|
+
a_ = simde__m128i_to_private(a),
|
|
1155
|
+
b_ = simde__m128i_to_private(b);
|
|
1156
|
+
|
|
1157
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1158
|
+
r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32);
|
|
1159
|
+
#else
|
|
1160
|
+
SIMDE__VECTORIZE
|
|
1161
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1162
|
+
r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i];
|
|
1163
|
+
}
|
|
1164
|
+
#endif
|
|
1165
|
+
|
|
1166
|
+
return simde__m128i_from_private(r_);
|
|
1167
|
+
#endif
|
|
1168
|
+
}
|
|
1169
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1170
|
+
# define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b)
|
|
1171
|
+
#endif
|
|
1172
|
+
|
|
1173
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1174
|
+
simde__m128i
|
|
1175
|
+
simde_mm_max_epu16 (simde__m128i a, simde__m128i b) {
|
|
1176
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1177
|
+
return _mm_max_epu16(a, b);
|
|
1178
|
+
#else
|
|
1179
|
+
simde__m128i_private
|
|
1180
|
+
r_,
|
|
1181
|
+
a_ = simde__m128i_to_private(a),
|
|
1182
|
+
b_ = simde__m128i_to_private(b);
|
|
1183
|
+
|
|
1184
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1185
|
+
r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16);
|
|
1186
|
+
#else
|
|
1187
|
+
SIMDE__VECTORIZE
|
|
1188
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1189
|
+
r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i];
|
|
1190
|
+
}
|
|
1191
|
+
#endif
|
|
1192
|
+
|
|
1193
|
+
return simde__m128i_from_private(r_);
|
|
1194
|
+
#endif
|
|
1195
|
+
}
|
|
1196
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1197
|
+
# define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b)
|
|
1198
|
+
#endif
|
|
1199
|
+
|
|
1200
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1201
|
+
simde__m128i
|
|
1202
|
+
simde_mm_max_epu32 (simde__m128i a, simde__m128i b) {
|
|
1203
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1204
|
+
return _mm_max_epu32(a, b);
|
|
1205
|
+
#else
|
|
1206
|
+
simde__m128i_private
|
|
1207
|
+
r_,
|
|
1208
|
+
a_ = simde__m128i_to_private(a),
|
|
1209
|
+
b_ = simde__m128i_to_private(b);
|
|
1210
|
+
|
|
1211
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1212
|
+
r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32);
|
|
1213
|
+
#else
|
|
1214
|
+
SIMDE__VECTORIZE
|
|
1215
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
1216
|
+
r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i];
|
|
1217
|
+
}
|
|
1218
|
+
#endif
|
|
1219
|
+
|
|
1220
|
+
return simde__m128i_from_private(r_);
|
|
1221
|
+
#endif
|
|
1222
|
+
}
|
|
1223
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1224
|
+
# define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b)
|
|
1225
|
+
#endif
|
|
1226
|
+
|
|
1227
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1228
|
+
simde__m128i
|
|
1229
|
+
simde_mm_min_epi8 (simde__m128i a, simde__m128i b) {
|
|
1230
|
+
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
|
|
1231
|
+
return _mm_min_epi8(a, b);
|
|
1232
|
+
#else
|
|
1233
|
+
simde__m128i_private
|
|
1234
|
+
r_,
|
|
1235
|
+
a_ = simde__m128i_to_private(a),
|
|
1236
|
+
b_ = simde__m128i_to_private(b);
|
|
1237
|
+
|
|
1238
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1239
|
+
r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8);
|
|
1240
|
+
#else
|
|
1241
|
+
SIMDE__VECTORIZE
|
|
1242
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
1243
|
+
r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];
|
|
1244
|
+
}
|
|
1245
|
+
#endif
|
|
1246
|
+
|
|
1247
|
+
return simde__m128i_from_private(r_);
|
|
1248
|
+
#endif
|
|
1249
|
+
}
|
|
1250
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1251
|
+
# define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b)
|
|
1252
|
+
#endif
|
|
1253
|
+
|
|
1254
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1255
|
+
simde__m128i
|
|
1256
|
+
simde_mm_min_epi32 (simde__m128i a, simde__m128i b) {
|
|
1257
|
+
#if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
|
|
1258
|
+
return _mm_min_epi32(a, b);
|
|
1259
|
+
#else
|
|
1260
|
+
simde__m128i_private
|
|
1261
|
+
r_,
|
|
1262
|
+
a_ = simde__m128i_to_private(a),
|
|
1263
|
+
b_ = simde__m128i_to_private(b);
|
|
1264
|
+
|
|
1265
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1266
|
+
r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32);
|
|
1267
|
+
#else
|
|
1268
|
+
SIMDE__VECTORIZE
|
|
1269
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1270
|
+
r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];
|
|
1271
|
+
}
|
|
1272
|
+
#endif
|
|
1273
|
+
|
|
1274
|
+
return simde__m128i_from_private(r_);
|
|
1275
|
+
#endif
|
|
1276
|
+
}
|
|
1277
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1278
|
+
# define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b)
|
|
1279
|
+
#endif
|
|
1280
|
+
|
|
1281
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1282
|
+
simde__m128i
|
|
1283
|
+
simde_mm_min_epu16 (simde__m128i a, simde__m128i b) {
|
|
1284
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1285
|
+
return _mm_min_epu16(a, b);
|
|
1286
|
+
#else
|
|
1287
|
+
simde__m128i_private
|
|
1288
|
+
r_,
|
|
1289
|
+
a_ = simde__m128i_to_private(a),
|
|
1290
|
+
b_ = simde__m128i_to_private(b);
|
|
1291
|
+
|
|
1292
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1293
|
+
r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16);
|
|
1294
|
+
#else
|
|
1295
|
+
SIMDE__VECTORIZE
|
|
1296
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1297
|
+
r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i];
|
|
1298
|
+
}
|
|
1299
|
+
#endif
|
|
1300
|
+
|
|
1301
|
+
return simde__m128i_from_private(r_);
|
|
1302
|
+
#endif
|
|
1303
|
+
}
|
|
1304
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1305
|
+
# define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b)
|
|
1306
|
+
#endif
|
|
1307
|
+
|
|
1308
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1309
|
+
simde__m128i
|
|
1310
|
+
simde_mm_min_epu32 (simde__m128i a, simde__m128i b) {
|
|
1311
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1312
|
+
return _mm_min_epu32(a, b);
|
|
1313
|
+
#else
|
|
1314
|
+
simde__m128i_private
|
|
1315
|
+
r_,
|
|
1316
|
+
a_ = simde__m128i_to_private(a),
|
|
1317
|
+
b_ = simde__m128i_to_private(b);
|
|
1318
|
+
|
|
1319
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1320
|
+
r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32);
|
|
1321
|
+
#else
|
|
1322
|
+
SIMDE__VECTORIZE
|
|
1323
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
1324
|
+
r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i];
|
|
1325
|
+
}
|
|
1326
|
+
#endif
|
|
1327
|
+
|
|
1328
|
+
return simde__m128i_from_private(r_);
|
|
1329
|
+
#endif
|
|
1330
|
+
}
|
|
1331
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1332
|
+
# define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b)
|
|
1333
|
+
#endif
|
|
1334
|
+
|
|
1335
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1336
|
+
simde__m128i
|
|
1337
|
+
simde_mm_minpos_epu16 (simde__m128i a) {
|
|
1338
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1339
|
+
return _mm_minpos_epu16(a);
|
|
1340
|
+
#else
|
|
1341
|
+
simde__m128i_private
|
|
1342
|
+
r_ = simde__m128i_to_private(simde_mm_setzero_si128()),
|
|
1343
|
+
a_ = simde__m128i_to_private(a);
|
|
1344
|
+
|
|
1345
|
+
r_.u16[0] = UINT16_MAX;
|
|
1346
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1347
|
+
if (a_.u16[i] < r_.u16[0]) {
|
|
1348
|
+
r_.u16[0] = a_.u16[i];
|
|
1349
|
+
r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i);
|
|
1350
|
+
}
|
|
1351
|
+
}
|
|
1352
|
+
|
|
1353
|
+
return simde__m128i_from_private(r_);
|
|
1354
|
+
#endif
|
|
1355
|
+
}
|
|
1356
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1357
|
+
# define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a)
|
|
1358
|
+
#endif
|
|
1359
|
+
|
|
1360
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1361
|
+
simde__m128i
|
|
1362
|
+
simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8)
|
|
1363
|
+
HEDLEY_REQUIRE_MSG((imm8 & 7) == imm8, "imm8 must be in range [0, 7]") {
|
|
1364
|
+
simde__m128i_private
|
|
1365
|
+
r_,
|
|
1366
|
+
a_ = simde__m128i_to_private(a),
|
|
1367
|
+
b_ = simde__m128i_to_private(b);
|
|
1368
|
+
|
|
1369
|
+
const int a_offset = imm8 & 4;
|
|
1370
|
+
const int b_offset = (imm8 & 3) << 2;
|
|
1371
|
+
|
|
1372
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
1373
|
+
for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) {
|
|
1374
|
+
r_.u16[i] =
|
|
1375
|
+
HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0])) +
|
|
1376
|
+
HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1])) +
|
|
1377
|
+
HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2])) +
|
|
1378
|
+
HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]));
|
|
1379
|
+
}
|
|
1380
|
+
#else
|
|
1381
|
+
HEDLEY_UNREACHABLE();
|
|
1382
|
+
#endif
|
|
1383
|
+
|
|
1384
|
+
return simde__m128i_from_private(r_);
|
|
1385
|
+
}
|
|
1386
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1387
|
+
# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8)
|
|
1388
|
+
#endif
|
|
1389
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1390
|
+
# define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8)
|
|
1391
|
+
#endif
|
|
1392
|
+
|
|
1393
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1394
|
+
simde__m128i
|
|
1395
|
+
simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) {
|
|
1396
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1397
|
+
return _mm_mul_epi32(a, b);
|
|
1398
|
+
#else
|
|
1399
|
+
simde__m128i_private
|
|
1400
|
+
r_,
|
|
1401
|
+
a_ = simde__m128i_to_private(a),
|
|
1402
|
+
b_ = simde__m128i_to_private(b);
|
|
1403
|
+
|
|
1404
|
+
SIMDE__VECTORIZE
|
|
1405
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1406
|
+
r_.i64[i] =
|
|
1407
|
+
HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) *
|
|
1408
|
+
HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]);
|
|
1409
|
+
}
|
|
1410
|
+
|
|
1411
|
+
return simde__m128i_from_private(r_);
|
|
1412
|
+
#endif
|
|
1413
|
+
}
|
|
1414
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1415
|
+
# define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b)
|
|
1416
|
+
#endif
|
|
1417
|
+
|
|
1418
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1419
|
+
simde__m128i
|
|
1420
|
+
simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) {
|
|
1421
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1422
|
+
return _mm_mullo_epi32(a, b);
|
|
1423
|
+
#else
|
|
1424
|
+
simde__m128i_private
|
|
1425
|
+
r_,
|
|
1426
|
+
a_ = simde__m128i_to_private(a),
|
|
1427
|
+
b_ = simde__m128i_to_private(b);
|
|
1428
|
+
|
|
1429
|
+
#if defined(SIMDE_SSE4_1_NEON)
|
|
1430
|
+
r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32);
|
|
1431
|
+
#else
|
|
1432
|
+
SIMDE__VECTORIZE
|
|
1433
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1434
|
+
r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff));
|
|
1435
|
+
}
|
|
1436
|
+
#endif
|
|
1437
|
+
|
|
1438
|
+
return simde__m128i_from_private(r_);
|
|
1439
|
+
#endif
|
|
1440
|
+
}
|
|
1441
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1442
|
+
# define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b)
|
|
1443
|
+
#endif
|
|
1444
|
+
|
|
1445
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1446
|
+
simde__m128i
|
|
1447
|
+
simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) {
|
|
1448
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1449
|
+
return _mm_packus_epi32(a, b);
|
|
1450
|
+
#else
|
|
1451
|
+
simde__m128i_private
|
|
1452
|
+
r_,
|
|
1453
|
+
a_ = simde__m128i_to_private(a),
|
|
1454
|
+
b_ = simde__m128i_to_private(b);
|
|
1455
|
+
|
|
1456
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1457
|
+
r_.u16[i + 0] = (a_.i32[i] < 0) ? UINT16_C(0) : ((a_.i32[i] > UINT16_MAX) ? (UINT16_MAX) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i]));
|
|
1458
|
+
r_.u16[i + 4] = (b_.i32[i] < 0) ? UINT16_C(0) : ((b_.i32[i] > UINT16_MAX) ? (UINT16_MAX) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i]));
|
|
1459
|
+
}
|
|
1460
|
+
return simde__m128i_from_private(r_);
|
|
1461
|
+
#endif
|
|
1462
|
+
}
|
|
1463
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1464
|
+
# define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b)
|
|
1465
|
+
#endif
|
|
1466
|
+
|
|
1467
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1468
|
+
simde__m128d
|
|
1469
|
+
simde_mm_round_pd (simde__m128d a, int rounding) {
|
|
1470
|
+
simde__m128d_private
|
|
1471
|
+
r_,
|
|
1472
|
+
a_ = simde__m128d_to_private(a);
|
|
1473
|
+
|
|
1474
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
1475
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
1476
|
+
switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
|
|
1477
|
+
case SIMDE_MM_FROUND_TO_NEAREST_INT:
|
|
1478
|
+
r_.f64[i] = nearbyint(a_.f64[i]);
|
|
1479
|
+
break;
|
|
1480
|
+
case SIMDE_MM_FROUND_TO_NEG_INF:
|
|
1481
|
+
r_.f64[i] = floor(a_.f64[i]);
|
|
1482
|
+
break;
|
|
1483
|
+
case SIMDE_MM_FROUND_TO_POS_INF:
|
|
1484
|
+
r_.f64[i] = ceil(a_.f64[i]);
|
|
1485
|
+
break;
|
|
1486
|
+
case SIMDE_MM_FROUND_TO_ZERO:
|
|
1487
|
+
r_.f64[i] = trunc(a_.f64[i]);
|
|
1488
|
+
break;
|
|
1489
|
+
case SIMDE_MM_FROUND_CUR_DIRECTION:
|
|
1490
|
+
r_.f64[i] = nearbyint(a_.f64[i]);
|
|
1491
|
+
break;
|
|
1492
|
+
default:
|
|
1493
|
+
HEDLEY_UNREACHABLE();
|
|
1494
|
+
break;
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
#else
|
|
1498
|
+
HEDLEY_UNREACHABLE();
|
|
1499
|
+
#endif
|
|
1500
|
+
|
|
1501
|
+
return simde__m128d_from_private(r_);
|
|
1502
|
+
}
|
|
1503
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1504
|
+
# define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding)
|
|
1505
|
+
#endif
|
|
1506
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1507
|
+
# define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding)
|
|
1508
|
+
#endif
|
|
1509
|
+
|
|
1510
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1511
|
+
simde__m128
|
|
1512
|
+
simde_mm_round_ps (simde__m128 a, int rounding) {
|
|
1513
|
+
simde__m128_private
|
|
1514
|
+
r_,
|
|
1515
|
+
a_ = simde__m128_to_private(a);
|
|
1516
|
+
|
|
1517
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
1518
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1519
|
+
switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
|
|
1520
|
+
case SIMDE_MM_FROUND_TO_NEAREST_INT:
|
|
1521
|
+
r_.f32[i] = nearbyintf(a_.f32[i]);
|
|
1522
|
+
break;
|
|
1523
|
+
case SIMDE_MM_FROUND_TO_NEG_INF:
|
|
1524
|
+
r_.f32[i] = floorf(a_.f32[i]);
|
|
1525
|
+
break;
|
|
1526
|
+
case SIMDE_MM_FROUND_TO_POS_INF:
|
|
1527
|
+
r_.f32[i] = ceilf(a_.f32[i]);
|
|
1528
|
+
break;
|
|
1529
|
+
case SIMDE_MM_FROUND_TO_ZERO:
|
|
1530
|
+
r_.f32[i] = truncf(a_.f32[i]);
|
|
1531
|
+
break;
|
|
1532
|
+
case SIMDE_MM_FROUND_CUR_DIRECTION:
|
|
1533
|
+
r_.f32[i] = nearbyintf (a_.f32[i]);
|
|
1534
|
+
break;
|
|
1535
|
+
default:
|
|
1536
|
+
HEDLEY_UNREACHABLE();
|
|
1537
|
+
break;
|
|
1538
|
+
}
|
|
1539
|
+
}
|
|
1540
|
+
#else
|
|
1541
|
+
HEDLEY_UNREACHABLE();
|
|
1542
|
+
#endif
|
|
1543
|
+
|
|
1544
|
+
return simde__m128_from_private(r_);
|
|
1545
|
+
}
|
|
1546
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1547
|
+
# define simde_mm_round_ps(a, rounding) _mm_round_ps(a, rounding)
|
|
1548
|
+
#endif
|
|
1549
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1550
|
+
# define _mm_round_ps(a, rounding) simde_mm_round_ps(a, rounding)
|
|
1551
|
+
#endif
|
|
1552
|
+
|
|
1553
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1554
|
+
simde__m128d
|
|
1555
|
+
simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) {
|
|
1556
|
+
simde__m128d_private
|
|
1557
|
+
r_ = simde__m128d_to_private(a),
|
|
1558
|
+
b_ = simde__m128d_to_private(b);
|
|
1559
|
+
|
|
1560
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
1561
|
+
switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
|
|
1562
|
+
case SIMDE_MM_FROUND_TO_NEAREST_INT:
|
|
1563
|
+
r_.f64[0] = nearbyint(b_.f64[0]);
|
|
1564
|
+
break;
|
|
1565
|
+
case SIMDE_MM_FROUND_TO_NEG_INF:
|
|
1566
|
+
r_.f64[0] = floor(b_.f64[0]);
|
|
1567
|
+
break;
|
|
1568
|
+
case SIMDE_MM_FROUND_TO_POS_INF:
|
|
1569
|
+
r_.f64[0] = ceil(b_.f64[0]);
|
|
1570
|
+
break;
|
|
1571
|
+
case SIMDE_MM_FROUND_TO_ZERO:
|
|
1572
|
+
r_.f64[0] = trunc(b_.f64[0]);
|
|
1573
|
+
break;
|
|
1574
|
+
case SIMDE_MM_FROUND_CUR_DIRECTION:
|
|
1575
|
+
r_.f64[0] = nearbyint(b_.f64[0]);
|
|
1576
|
+
break;
|
|
1577
|
+
default:
|
|
1578
|
+
HEDLEY_UNREACHABLE();
|
|
1579
|
+
break;
|
|
1580
|
+
}
|
|
1581
|
+
#else
|
|
1582
|
+
HEDLEY_UNREACHABLE();
|
|
1583
|
+
#endif
|
|
1584
|
+
|
|
1585
|
+
return simde__m128d_from_private(r_);
|
|
1586
|
+
}
|
|
1587
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1588
|
+
# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding)
|
|
1589
|
+
#endif
|
|
1590
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1591
|
+
# define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding)
|
|
1592
|
+
#endif
|
|
1593
|
+
|
|
1594
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1595
|
+
simde__m128
|
|
1596
|
+
simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) {
|
|
1597
|
+
simde__m128_private
|
|
1598
|
+
r_ = simde__m128_to_private(a),
|
|
1599
|
+
b_ = simde__m128_to_private(b);
|
|
1600
|
+
|
|
1601
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
1602
|
+
switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
|
|
1603
|
+
case SIMDE_MM_FROUND_TO_NEAREST_INT:
|
|
1604
|
+
r_.f32[0] = nearbyintf(b_.f32[0]);
|
|
1605
|
+
break;
|
|
1606
|
+
case SIMDE_MM_FROUND_TO_NEG_INF:
|
|
1607
|
+
r_.f32[0] = floorf(b_.f32[0]);
|
|
1608
|
+
break;
|
|
1609
|
+
case SIMDE_MM_FROUND_TO_POS_INF:
|
|
1610
|
+
r_.f32[0] = ceilf(b_.f32[0]);
|
|
1611
|
+
break;
|
|
1612
|
+
case SIMDE_MM_FROUND_TO_ZERO:
|
|
1613
|
+
r_.f32[0] = truncf(b_.f32[0]);
|
|
1614
|
+
break;
|
|
1615
|
+
case SIMDE_MM_FROUND_CUR_DIRECTION:
|
|
1616
|
+
r_.f32[0] = nearbyintf (b_.f32[0]);
|
|
1617
|
+
break;
|
|
1618
|
+
default:
|
|
1619
|
+
HEDLEY_UNREACHABLE();
|
|
1620
|
+
break;
|
|
1621
|
+
}
|
|
1622
|
+
#else
|
|
1623
|
+
HEDLEY_UNREACHABLE();
|
|
1624
|
+
#endif
|
|
1625
|
+
|
|
1626
|
+
return simde__m128_from_private(r_);
|
|
1627
|
+
}
|
|
1628
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1629
|
+
# define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding)
|
|
1630
|
+
#endif
|
|
1631
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1632
|
+
# define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding)
|
|
1633
|
+
#endif
|
|
1634
|
+
|
|
1635
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1636
|
+
simde__m128i
|
|
1637
|
+
simde_mm_stream_load_si128 (const simde__m128i* mem_addr) {
|
|
1638
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1639
|
+
return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr));
|
|
1640
|
+
#else
|
|
1641
|
+
return *mem_addr;
|
|
1642
|
+
#endif
|
|
1643
|
+
}
|
|
1644
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1645
|
+
# define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr)
|
|
1646
|
+
#endif
|
|
1647
|
+
|
|
1648
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1649
|
+
int
|
|
1650
|
+
simde_mm_test_all_ones (simde__m128i a) {
|
|
1651
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1652
|
+
return _mm_test_all_ones(a);
|
|
1653
|
+
#else
|
|
1654
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1655
|
+
|
|
1656
|
+
for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
|
|
1657
|
+
if (a_.u64[i] != ~UINT64_C(0))
|
|
1658
|
+
return 0;
|
|
1659
|
+
}
|
|
1660
|
+
|
|
1661
|
+
return 1;
|
|
1662
|
+
#endif
|
|
1663
|
+
}
|
|
1664
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1665
|
+
# define _mm_test_all_ones(a) simde_mm_test_all_ones(a)
|
|
1666
|
+
#endif
|
|
1667
|
+
|
|
1668
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1669
|
+
int
|
|
1670
|
+
simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) {
|
|
1671
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1672
|
+
return _mm_test_all_zeros(a, mask);
|
|
1673
|
+
#else
|
|
1674
|
+
simde__m128i_private
|
|
1675
|
+
a_ = simde__m128i_to_private(a),
|
|
1676
|
+
mask_ = simde__m128i_to_private(mask);
|
|
1677
|
+
|
|
1678
|
+
for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
|
|
1679
|
+
if ((a_.u64[i] & mask_.u64[i]) != 0)
|
|
1680
|
+
return 0;
|
|
1681
|
+
}
|
|
1682
|
+
|
|
1683
|
+
return 1;
|
|
1684
|
+
#endif
|
|
1685
|
+
}
|
|
1686
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1687
|
+
# define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask)
|
|
1688
|
+
#endif
|
|
1689
|
+
|
|
1690
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1691
|
+
int
|
|
1692
|
+
simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) {
|
|
1693
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1694
|
+
return _mm_test_mix_ones_zeros(a, mask);
|
|
1695
|
+
#else
|
|
1696
|
+
simde__m128i_private
|
|
1697
|
+
a_ = simde__m128i_to_private(a),
|
|
1698
|
+
mask_ = simde__m128i_to_private(mask);
|
|
1699
|
+
|
|
1700
|
+
for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++)
|
|
1701
|
+
if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0))
|
|
1702
|
+
return 1;
|
|
1703
|
+
|
|
1704
|
+
return 0;
|
|
1705
|
+
#endif
|
|
1706
|
+
}
|
|
1707
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1708
|
+
# define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask)
|
|
1709
|
+
#endif
|
|
1710
|
+
|
|
1711
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1712
|
+
int
|
|
1713
|
+
simde_mm_testc_si128 (simde__m128i a, simde__m128i b) {
|
|
1714
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1715
|
+
return _mm_testc_si128(a, b);
|
|
1716
|
+
#else
|
|
1717
|
+
simde__m128i_private
|
|
1718
|
+
a_ = simde__m128i_to_private(a),
|
|
1719
|
+
b_ = simde__m128i_to_private(b);
|
|
1720
|
+
|
|
1721
|
+
int_fast32_t r = 0;
|
|
1722
|
+
|
|
1723
|
+
SIMDE__VECTORIZE_REDUCTION(|:r)
|
|
1724
|
+
for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {
|
|
1725
|
+
r |= ~a_.i32f[i] & b_.i32f[i];
|
|
1726
|
+
}
|
|
1727
|
+
|
|
1728
|
+
return HEDLEY_STATIC_CAST(int, !r);
|
|
1729
|
+
#endif
|
|
1730
|
+
}
|
|
1731
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1732
|
+
# define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b)
|
|
1733
|
+
#endif
|
|
1734
|
+
|
|
1735
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1736
|
+
int
|
|
1737
|
+
simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) {
|
|
1738
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1739
|
+
return _mm_testnzc_si128(a, b);
|
|
1740
|
+
#else
|
|
1741
|
+
simde__m128i_private
|
|
1742
|
+
a_ = simde__m128i_to_private(a),
|
|
1743
|
+
b_ = simde__m128i_to_private(b);
|
|
1744
|
+
|
|
1745
|
+
for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
|
|
1746
|
+
if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0))
|
|
1747
|
+
return 1;
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
return 0;
|
|
1751
|
+
#endif
|
|
1752
|
+
}
|
|
1753
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1754
|
+
# define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b)
|
|
1755
|
+
#endif
|
|
1756
|
+
|
|
1757
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1758
|
+
int
|
|
1759
|
+
simde_mm_testz_si128 (simde__m128i a, simde__m128i b) {
|
|
1760
|
+
#if defined(SIMDE_SSE4_1_NATIVE)
|
|
1761
|
+
return _mm_testz_si128(a, b);
|
|
1762
|
+
#else
|
|
1763
|
+
simde__m128i_private
|
|
1764
|
+
a_ = simde__m128i_to_private(a),
|
|
1765
|
+
b_ = simde__m128i_to_private(b);
|
|
1766
|
+
|
|
1767
|
+
for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
|
|
1768
|
+
if ((a_.u64[i] & b_.u64[i]) == 0)
|
|
1769
|
+
return 1;
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
return 0;
|
|
1773
|
+
#endif
|
|
1774
|
+
}
|
|
1775
|
+
#if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
|
|
1776
|
+
# define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b)
|
|
1777
|
+
#endif
|
|
1778
|
+
|
|
1779
|
+
SIMDE__END_DECLS
|
|
1780
|
+
|
|
1781
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
1782
|
+
|
|
1783
|
+
#endif /* !defined(SIMDE__SSE4_1_H) */
|