minimap2 0.2.25.0 → 0.2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
- data/ext/minimap2/lib/simde/COPYING +20 -0
- data/ext/minimap2/lib/simde/README.md +333 -0
- data/ext/minimap2/lib/simde/amalgamate.py +58 -0
- data/ext/minimap2/lib/simde/meson.build +33 -0
- data/ext/minimap2/lib/simde/netlify.toml +20 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
- data/ext/minimap2/lib/simde/simde/check.h +267 -0
- data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
- data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
- data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
- data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
- data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
- data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
- data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
- data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
- data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
- data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
- data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
- data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
- data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
- data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
- data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
- data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
- data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
- data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
- data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
- data/ext/minimap2/lib/simde/test/meson.build +64 -0
- data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
- data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
- data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
- data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
- data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
- data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
- data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
- data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
- data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
- data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
- data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
- data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
- data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
- data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
- data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
- data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
- data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
- data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
- data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
- data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
- data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
- data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
- data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
- data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/lib/minimap2/version.rb +1 -1
- metadata +100 -3
|
@@ -0,0 +1,2402 @@
|
|
|
1
|
+
/* Permission is hereby granted, free of charge, to any person
|
|
2
|
+
* obtaining a copy of this software and associated documentation
|
|
3
|
+
* files (the "Software"), to deal in the Software without
|
|
4
|
+
* restriction, including without limitation the rights to use, copy,
|
|
5
|
+
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
6
|
+
* of the Software, and to permit persons to whom the Software is
|
|
7
|
+
* furnished to do so, subject to the following conditions:
|
|
8
|
+
*
|
|
9
|
+
* The above copyright notice and this permission notice shall be
|
|
10
|
+
* included in all copies or substantial portions of the Software.
|
|
11
|
+
*
|
|
12
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
13
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
14
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
15
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
16
|
+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
17
|
+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
18
|
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
* SOFTWARE.
|
|
20
|
+
*
|
|
21
|
+
* Copyright:
|
|
22
|
+
* 2018 Evan Nemerson <evan@nemerson.com>
|
|
23
|
+
* 2019 Michael R. Crusoe <michael.crusoe@gmail.com>
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
#include "sse4.1.h"
|
|
27
|
+
#include "sse4.2.h"
|
|
28
|
+
#if !defined(SIMDE__AVX2_H)
|
|
29
|
+
# if !defined(SIMDE__AVX2_H)
|
|
30
|
+
# define SIMDE__AVX2_H
|
|
31
|
+
# endif
|
|
32
|
+
# include "avx.h"
|
|
33
|
+
|
|
34
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
35
|
+
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
36
|
+
|
|
37
|
+
# if defined(SIMDE_AVX2_NATIVE)
|
|
38
|
+
# undef SIMDE_AVX2_NATIVE
|
|
39
|
+
# endif
|
|
40
|
+
# if defined(SIMDE_ARCH_X86_AVX2) && !defined(SIMDE_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
|
41
|
+
# define SIMDE_AVX2_NATIVE
|
|
42
|
+
# elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_AVX2_NO_NEON) && !defined(SIMDE_NO_NEON)
|
|
43
|
+
# define SIMDE_AVX2_NEON
|
|
44
|
+
# endif
|
|
45
|
+
|
|
46
|
+
# if defined(SIMDE_AVX2_NATIVE) && !defined(SIMDE_AVX_NATIVE)
|
|
47
|
+
# if defined(SIMDE_AVX2_FORCE_NATIVE)
|
|
48
|
+
# error Native AVX2 support requires native AVX support
|
|
49
|
+
# else
|
|
50
|
+
HEDLEY_WARNING("Native AVX2 support requires native AVX support, disabling")
|
|
51
|
+
# undef SIMDE_AVX2_NATIVE
|
|
52
|
+
# endif
|
|
53
|
+
# elif defined(SIMDE_AVX2_NEON) && !defined(SIMDE_AVX_NEON)
|
|
54
|
+
HEDLEY_WARNING("AVX2 NEON support requires AVX NEON support, disabling")
|
|
55
|
+
# undef SIMDE_AVX_NEON
|
|
56
|
+
# endif
|
|
57
|
+
|
|
58
|
+
# if defined(SIMDE_AVX2_NATIVE)
|
|
59
|
+
# include <immintrin.h>
|
|
60
|
+
# endif
|
|
61
|
+
|
|
62
|
+
# if !defined(SIMDE_AVX2_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
|
63
|
+
# define SIMDE_AVX2_ENABLE_NATIVE_ALIASES
|
|
64
|
+
# endif
|
|
65
|
+
|
|
66
|
+
# include <stdint.h>
|
|
67
|
+
|
|
68
|
+
SIMDE__BEGIN_DECLS
|
|
69
|
+
|
|
70
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
71
|
+
simde__m256i
|
|
72
|
+
simde_mm256_abs_epi8 (simde__m256i a) {
|
|
73
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
74
|
+
return _mm256_abs_epi8(a);
|
|
75
|
+
#else
|
|
76
|
+
simde__m256i_private
|
|
77
|
+
r_,
|
|
78
|
+
a_ = simde__m256i_to_private(a);
|
|
79
|
+
|
|
80
|
+
SIMDE__VECTORIZE
|
|
81
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
82
|
+
r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i];
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return simde__m256i_from_private(r_);
|
|
86
|
+
#endif
|
|
87
|
+
}
|
|
88
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
89
|
+
# define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a)
|
|
90
|
+
#endif
|
|
91
|
+
|
|
92
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
93
|
+
simde__m256i
|
|
94
|
+
simde_mm256_abs_epi16 (simde__m256i a) {
|
|
95
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
96
|
+
return _mm256_abs_epi16(a);
|
|
97
|
+
#else
|
|
98
|
+
simde__m256i_private
|
|
99
|
+
r_,
|
|
100
|
+
a_ = simde__m256i_to_private(a);
|
|
101
|
+
|
|
102
|
+
SIMDE__VECTORIZE
|
|
103
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
104
|
+
r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i];
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return simde__m256i_from_private(r_);
|
|
108
|
+
#endif
|
|
109
|
+
}
|
|
110
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
111
|
+
# define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a)
|
|
112
|
+
#endif
|
|
113
|
+
|
|
114
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
115
|
+
simde__m256i
|
|
116
|
+
simde_mm256_abs_epi32(simde__m256i a) {
|
|
117
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
118
|
+
return _mm256_abs_epi32(a);
|
|
119
|
+
#else
|
|
120
|
+
simde__m256i_private
|
|
121
|
+
r_,
|
|
122
|
+
a_ = simde__m256i_to_private(a);
|
|
123
|
+
|
|
124
|
+
SIMDE__VECTORIZE
|
|
125
|
+
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
126
|
+
r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return simde__m256i_from_private(r_);
|
|
130
|
+
#endif
|
|
131
|
+
}
|
|
132
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
133
|
+
# define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a)
|
|
134
|
+
#endif
|
|
135
|
+
|
|
136
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
137
|
+
simde__m256i
|
|
138
|
+
simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) {
|
|
139
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
140
|
+
return _mm256_add_epi8(a, b);
|
|
141
|
+
#else
|
|
142
|
+
simde__m256i_private
|
|
143
|
+
r_,
|
|
144
|
+
a_ = simde__m256i_to_private(a),
|
|
145
|
+
b_ = simde__m256i_to_private(b);
|
|
146
|
+
|
|
147
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
148
|
+
r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]);
|
|
149
|
+
r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]);
|
|
150
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
151
|
+
r_.i8 = a_.i8 + b_.i8;
|
|
152
|
+
#else
|
|
153
|
+
SIMDE__VECTORIZE
|
|
154
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
155
|
+
r_.i8[i] = a_.i8[i] + b_.i8[i];
|
|
156
|
+
}
|
|
157
|
+
#endif
|
|
158
|
+
|
|
159
|
+
return simde__m256i_from_private(r_);
|
|
160
|
+
#endif
|
|
161
|
+
}
|
|
162
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
163
|
+
# define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b)
|
|
164
|
+
#endif
|
|
165
|
+
|
|
166
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
167
|
+
simde__m256i
|
|
168
|
+
simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) {
|
|
169
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
170
|
+
return _mm256_add_epi16(a, b);
|
|
171
|
+
#else
|
|
172
|
+
simde__m256i_private
|
|
173
|
+
r_,
|
|
174
|
+
a_ = simde__m256i_to_private(a),
|
|
175
|
+
b_ = simde__m256i_to_private(b);
|
|
176
|
+
|
|
177
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
178
|
+
r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]);
|
|
179
|
+
r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]);
|
|
180
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
181
|
+
r_.i16 = a_.i16 + b_.i16;
|
|
182
|
+
#else
|
|
183
|
+
SIMDE__VECTORIZE
|
|
184
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
185
|
+
r_.i16[i] = a_.i16[i] + b_.i16[i];
|
|
186
|
+
}
|
|
187
|
+
#endif
|
|
188
|
+
|
|
189
|
+
return simde__m256i_from_private(r_);
|
|
190
|
+
#endif
|
|
191
|
+
}
|
|
192
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
193
|
+
# define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b)
|
|
194
|
+
#endif
|
|
195
|
+
|
|
196
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
197
|
+
simde__m256i
|
|
198
|
+
simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) {
|
|
199
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
200
|
+
return _mm256_add_epi32(a, b);
|
|
201
|
+
#else
|
|
202
|
+
simde__m256i_private
|
|
203
|
+
r_,
|
|
204
|
+
a_ = simde__m256i_to_private(a),
|
|
205
|
+
b_ = simde__m256i_to_private(b);
|
|
206
|
+
|
|
207
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
208
|
+
r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]);
|
|
209
|
+
r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]);
|
|
210
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
211
|
+
r_.i32 = a_.i32 + b_.i32;
|
|
212
|
+
#else
|
|
213
|
+
SIMDE__VECTORIZE
|
|
214
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
215
|
+
r_.i32[i] = a_.i32[i] + b_.i32[i];
|
|
216
|
+
}
|
|
217
|
+
#endif
|
|
218
|
+
|
|
219
|
+
return simde__m256i_from_private(r_);
|
|
220
|
+
#endif
|
|
221
|
+
}
|
|
222
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
223
|
+
# define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b)
|
|
224
|
+
#endif
|
|
225
|
+
|
|
226
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
227
|
+
simde__m256i
|
|
228
|
+
simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) {
|
|
229
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
230
|
+
return _mm256_add_epi64(a, b);
|
|
231
|
+
#else
|
|
232
|
+
simde__m256i_private
|
|
233
|
+
r_,
|
|
234
|
+
a_ = simde__m256i_to_private(a),
|
|
235
|
+
b_ = simde__m256i_to_private(b);
|
|
236
|
+
|
|
237
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
238
|
+
r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]);
|
|
239
|
+
r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]);
|
|
240
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
241
|
+
r_.i64 = a_.i64 + b_.i64;
|
|
242
|
+
#else
|
|
243
|
+
SIMDE__VECTORIZE
|
|
244
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
245
|
+
r_.i64[i] = a_.i64[i] + b_.i64[i];
|
|
246
|
+
}
|
|
247
|
+
#endif
|
|
248
|
+
|
|
249
|
+
return simde__m256i_from_private(r_);
|
|
250
|
+
#endif
|
|
251
|
+
}
|
|
252
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
253
|
+
# define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b)
|
|
254
|
+
#endif
|
|
255
|
+
|
|
256
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
257
|
+
simde__m256i
|
|
258
|
+
simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) {
|
|
259
|
+
simde__m256i_private
|
|
260
|
+
r_,
|
|
261
|
+
a_ = simde__m256i_to_private(a),
|
|
262
|
+
b_ = simde__m256i_to_private(b);
|
|
263
|
+
|
|
264
|
+
if (HEDLEY_UNLIKELY(count > 31))
|
|
265
|
+
return simde_mm256_setzero_si256();
|
|
266
|
+
|
|
267
|
+
for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) {
|
|
268
|
+
SIMDE__VECTORIZE
|
|
269
|
+
for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {
|
|
270
|
+
const int srcpos = count + HEDLEY_STATIC_CAST(int, i);
|
|
271
|
+
if (srcpos > 31) {
|
|
272
|
+
r_.m128i_private[h].i8[i] = 0;
|
|
273
|
+
} else if (srcpos > 15) {
|
|
274
|
+
r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15];
|
|
275
|
+
} else {
|
|
276
|
+
r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos];
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return simde__m256i_from_private(r_);
|
|
282
|
+
}
|
|
283
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
284
|
+
# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count)
|
|
285
|
+
#elif defined(SIMDE_ARCH_X86_SSSE3)
|
|
286
|
+
# define simde_mm256_alignr_epi8(a, b, count) \
|
|
287
|
+
simde_mm256_set_m128i( \
|
|
288
|
+
simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \
|
|
289
|
+
simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count)))
|
|
290
|
+
#endif
|
|
291
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
292
|
+
# define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count))
|
|
293
|
+
#endif
|
|
294
|
+
|
|
295
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
296
|
+
simde__m256i
|
|
297
|
+
simde_mm256_and_si256 (simde__m256i a, simde__m256i b) {
|
|
298
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
299
|
+
return _mm256_and_si256(a, b);
|
|
300
|
+
#else
|
|
301
|
+
simde__m256i_private
|
|
302
|
+
r_,
|
|
303
|
+
a_ = simde__m256i_to_private(a),
|
|
304
|
+
b_ = simde__m256i_to_private(b);
|
|
305
|
+
|
|
306
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
307
|
+
r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]);
|
|
308
|
+
r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]);
|
|
309
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
310
|
+
r_.i32f = a_.i32f & b_.i32f;
|
|
311
|
+
#else
|
|
312
|
+
SIMDE__VECTORIZE
|
|
313
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
314
|
+
r_.i64[i] = a_.i64[i] & b_.i64[i];
|
|
315
|
+
}
|
|
316
|
+
#endif
|
|
317
|
+
|
|
318
|
+
return simde__m256i_from_private(r_);
|
|
319
|
+
#endif
|
|
320
|
+
}
|
|
321
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
322
|
+
# define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b)
|
|
323
|
+
#endif
|
|
324
|
+
|
|
325
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
326
|
+
simde__m256i
|
|
327
|
+
simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) {
|
|
328
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
329
|
+
return _mm256_andnot_si256(a, b);
|
|
330
|
+
#else
|
|
331
|
+
simde__m256i_private
|
|
332
|
+
r_,
|
|
333
|
+
a_ = simde__m256i_to_private(a),
|
|
334
|
+
b_ = simde__m256i_to_private(b);
|
|
335
|
+
|
|
336
|
+
#if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
|
|
337
|
+
r_.m128i_private[0] = simde__m128i_to_private(simde_mm_andnot_si128(simde__m128i_from_private(a_.m128i_private[0]), simde__m128i_from_private(b_.m128i_private[0])));
|
|
338
|
+
r_.m128i_private[1] = simde__m128i_to_private(simde_mm_andnot_si128(simde__m128i_from_private(a_.m128i_private[1]), simde__m128i_from_private(b_.m128i_private[1])));
|
|
339
|
+
#else
|
|
340
|
+
SIMDE__VECTORIZE
|
|
341
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
|
|
342
|
+
r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
|
|
343
|
+
}
|
|
344
|
+
#endif
|
|
345
|
+
|
|
346
|
+
return simde__m256i_from_private(r_);
|
|
347
|
+
#endif
|
|
348
|
+
}
|
|
349
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
350
|
+
# define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b)
|
|
351
|
+
#endif
|
|
352
|
+
|
|
353
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
354
|
+
simde__m256i
|
|
355
|
+
simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) {
|
|
356
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
357
|
+
return _mm256_adds_epi8(a, b);
|
|
358
|
+
#else
|
|
359
|
+
simde__m256i_private
|
|
360
|
+
r_,
|
|
361
|
+
a_ = simde__m256i_to_private(a),
|
|
362
|
+
b_ = simde__m256i_to_private(b);
|
|
363
|
+
|
|
364
|
+
#if defined(SIMDE_ARCH_X86_SSE2) && !defined(HEDLEY_INTEL_VERSION)
|
|
365
|
+
SIMDE__VECTORIZE
|
|
366
|
+
for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
|
|
367
|
+
r_.m128i[i] = simde_mm_adds_epi8(a_.m128i[i], b_.m128i[i]);
|
|
368
|
+
}
|
|
369
|
+
#else
|
|
370
|
+
SIMDE__VECTORIZE
|
|
371
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
372
|
+
const int32_t tmp =
|
|
373
|
+
HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) +
|
|
374
|
+
HEDLEY_STATIC_CAST(int16_t, b_.i8[i]);
|
|
375
|
+
r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN) : INT8_MAX));
|
|
376
|
+
}
|
|
377
|
+
#endif
|
|
378
|
+
|
|
379
|
+
return simde__m256i_from_private(r_);
|
|
380
|
+
#endif
|
|
381
|
+
}
|
|
382
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
383
|
+
# define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b)
|
|
384
|
+
#endif
|
|
385
|
+
|
|
386
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
387
|
+
simde__m256i
|
|
388
|
+
simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) {
|
|
389
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
390
|
+
return _mm256_adds_epi16(a, b);
|
|
391
|
+
#else
|
|
392
|
+
simde__m256i_private
|
|
393
|
+
r_,
|
|
394
|
+
a_ = simde__m256i_to_private(a),
|
|
395
|
+
b_ = simde__m256i_to_private(b);
|
|
396
|
+
|
|
397
|
+
#if defined(SIMDE_ARCH_X86_SSE2) && !defined(HEDLEY_INTEL_VERSION)
|
|
398
|
+
SIMDE__VECTORIZE
|
|
399
|
+
for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
|
|
400
|
+
r_.m128i[i] = simde_mm_adds_epi16(a_.m128i[i], b_.m128i[i]);
|
|
401
|
+
}
|
|
402
|
+
#else
|
|
403
|
+
SIMDE__VECTORIZE
|
|
404
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
405
|
+
const int32_t tmp =
|
|
406
|
+
HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) +
|
|
407
|
+
HEDLEY_STATIC_CAST(int32_t, b_.i16[i]);
|
|
408
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((tmp < INT16_MAX) ? ((tmp > INT16_MIN) ? tmp : INT16_MIN) : INT16_MAX));
|
|
409
|
+
}
|
|
410
|
+
#endif
|
|
411
|
+
|
|
412
|
+
return simde__m256i_from_private(r_);
|
|
413
|
+
#endif
|
|
414
|
+
}
|
|
415
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
416
|
+
# define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b)
|
|
417
|
+
#endif
|
|
418
|
+
|
|
419
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
420
|
+
simde__m256i
|
|
421
|
+
simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) {
|
|
422
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
423
|
+
return _mm256_adds_epu8(a, b);
|
|
424
|
+
#else
|
|
425
|
+
simde__m256i_private
|
|
426
|
+
r_,
|
|
427
|
+
a_ = simde__m256i_to_private(a),
|
|
428
|
+
b_ = simde__m256i_to_private(b);
|
|
429
|
+
|
|
430
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
431
|
+
r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]);
|
|
432
|
+
r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]);
|
|
433
|
+
#else
|
|
434
|
+
SIMDE__VECTORIZE
|
|
435
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
436
|
+
r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i]) ? (a_.u8[i] + b_.u8[i]) : UINT8_MAX;
|
|
437
|
+
}
|
|
438
|
+
#endif
|
|
439
|
+
|
|
440
|
+
return simde__m256i_from_private(r_);
|
|
441
|
+
#endif
|
|
442
|
+
}
|
|
443
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
444
|
+
# define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b)
|
|
445
|
+
#endif
|
|
446
|
+
|
|
447
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
448
|
+
simde__m256i
|
|
449
|
+
simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) {
|
|
450
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
451
|
+
return _mm256_adds_epu16(a, b);
|
|
452
|
+
#else
|
|
453
|
+
simde__m256i_private
|
|
454
|
+
r_,
|
|
455
|
+
a_ = simde__m256i_to_private(a),
|
|
456
|
+
b_ = simde__m256i_to_private(b);
|
|
457
|
+
|
|
458
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
459
|
+
r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]);
|
|
460
|
+
r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]);
|
|
461
|
+
#else
|
|
462
|
+
SIMDE__VECTORIZE
|
|
463
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
464
|
+
r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i]) ? (a_.u16[i] + b_.u16[i]) : UINT16_MAX;
|
|
465
|
+
}
|
|
466
|
+
#endif
|
|
467
|
+
|
|
468
|
+
return simde__m256i_from_private(r_);
|
|
469
|
+
#endif
|
|
470
|
+
}
|
|
471
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
472
|
+
# define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b)
|
|
473
|
+
#endif
|
|
474
|
+
|
|
475
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
476
|
+
simde__m256i
|
|
477
|
+
simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) {
|
|
478
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
479
|
+
return _mm256_avg_epu8(a, b);
|
|
480
|
+
#else
|
|
481
|
+
simde__m256i_private
|
|
482
|
+
r_,
|
|
483
|
+
a_ = simde__m256i_to_private(a),
|
|
484
|
+
b_ = simde__m256i_to_private(b);
|
|
485
|
+
|
|
486
|
+
SIMDE__VECTORIZE
|
|
487
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
488
|
+
r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
return simde__m256i_from_private(r_);
|
|
492
|
+
#endif
|
|
493
|
+
}
|
|
494
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
495
|
+
# define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b)
|
|
496
|
+
#endif
|
|
497
|
+
|
|
498
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
499
|
+
simde__m256i
|
|
500
|
+
simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) {
|
|
501
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
502
|
+
return _mm256_avg_epu16(a, b);
|
|
503
|
+
#else
|
|
504
|
+
simde__m256i_private
|
|
505
|
+
r_,
|
|
506
|
+
a_ = simde__m256i_to_private(a),
|
|
507
|
+
b_ = simde__m256i_to_private(b);
|
|
508
|
+
|
|
509
|
+
SIMDE__VECTORIZE
|
|
510
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
511
|
+
r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
return simde__m256i_from_private(r_);
|
|
515
|
+
#endif
|
|
516
|
+
}
|
|
517
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
518
|
+
# define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b)
|
|
519
|
+
#endif
|
|
520
|
+
|
|
521
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
522
|
+
simde__m128i
|
|
523
|
+
simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8)
|
|
524
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
525
|
+
simde__m128i_private
|
|
526
|
+
r_,
|
|
527
|
+
a_ = simde__m128i_to_private(a),
|
|
528
|
+
b_ = simde__m128i_to_private(b);
|
|
529
|
+
|
|
530
|
+
SIMDE__VECTORIZE
|
|
531
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
532
|
+
r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i];
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
return simde__m128i_from_private(r_);
|
|
536
|
+
}
|
|
537
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
538
|
+
# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8);
|
|
539
|
+
#endif
|
|
540
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
541
|
+
# define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8)
|
|
542
|
+
#endif
|
|
543
|
+
|
|
544
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
545
|
+
simde__m256i
|
|
546
|
+
simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8)
|
|
547
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
548
|
+
simde__m256i_private
|
|
549
|
+
r_,
|
|
550
|
+
a_ = simde__m256i_to_private(a),
|
|
551
|
+
b_ = simde__m256i_to_private(b);
|
|
552
|
+
|
|
553
|
+
SIMDE__VECTORIZE
|
|
554
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
555
|
+
r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i];
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
return simde__m256i_from_private(r_);
|
|
559
|
+
}
|
|
560
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
561
|
+
# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8);
|
|
562
|
+
#endif
|
|
563
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
564
|
+
# define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8)
|
|
565
|
+
#endif
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
569
|
+
simde__m256i
|
|
570
|
+
simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8)
|
|
571
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
572
|
+
simde__m256i_private
|
|
573
|
+
r_,
|
|
574
|
+
a_ = simde__m256i_to_private(a),
|
|
575
|
+
b_ = simde__m256i_to_private(b);
|
|
576
|
+
|
|
577
|
+
SIMDE__VECTORIZE
|
|
578
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
579
|
+
r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i];
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
return simde__m256i_from_private(r_);
|
|
583
|
+
}
|
|
584
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
585
|
+
# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8);
|
|
586
|
+
#endif
|
|
587
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
588
|
+
# define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8)
|
|
589
|
+
#endif
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
593
|
+
simde__m256i
|
|
594
|
+
simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) {
|
|
595
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
596
|
+
return _mm256_blendv_epi8(a, b, mask);
|
|
597
|
+
#else
|
|
598
|
+
simde__m256i_private
|
|
599
|
+
r_,
|
|
600
|
+
a_ = simde__m256i_to_private(a),
|
|
601
|
+
b_ = simde__m256i_to_private(b),
|
|
602
|
+
mask_ = simde__m256i_to_private(mask);
|
|
603
|
+
|
|
604
|
+
#if defined(SIMDE_ARCH_X86_SSE4_1)
|
|
605
|
+
r_.m128i_private[0] = simde__m128i_to_private(simde_mm_blendv_epi8(simde__m128i_from_private(a_.m128i_private[0]), simde__m128i_from_private(b_.m128i_private[0]), simde__m128i_from_private(mask_.m128i_private[0])));
|
|
606
|
+
r_.m128i_private[1] = simde__m128i_to_private(simde_mm_blendv_epi8(simde__m128i_from_private(a_.m128i_private[1]), simde__m128i_from_private(b_.m128i_private[1]), simde__m128i_from_private(mask_.m128i_private[1])));
|
|
607
|
+
#else
|
|
608
|
+
SIMDE__VECTORIZE
|
|
609
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
610
|
+
if (mask_.u8[i] & 0x80) {
|
|
611
|
+
r_.u8[i] = b_.u8[i];
|
|
612
|
+
} else {
|
|
613
|
+
r_.u8[i] = a_.u8[i];
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
#endif
|
|
617
|
+
|
|
618
|
+
return simde__m256i_from_private(r_);
|
|
619
|
+
#endif
|
|
620
|
+
}
|
|
621
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
622
|
+
# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8);
|
|
623
|
+
#endif
|
|
624
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
625
|
+
# define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask)
|
|
626
|
+
#endif
|
|
627
|
+
|
|
628
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
629
|
+
simde__m128i
|
|
630
|
+
simde_mm_broadcastb_epi8 (simde__m128i a) {
|
|
631
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
632
|
+
return _mm_broadcastb_epi8(a);
|
|
633
|
+
#else
|
|
634
|
+
simde__m128i_private r_;
|
|
635
|
+
simde__m128i_private a_= simde__m128i_to_private(a);
|
|
636
|
+
|
|
637
|
+
SIMDE__VECTORIZE
|
|
638
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
639
|
+
r_.i8[i] = a_.i8[0];
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
return simde__m128i_from_private(r_);
|
|
643
|
+
#endif
|
|
644
|
+
}
|
|
645
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
646
|
+
# define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a)
|
|
647
|
+
#endif
|
|
648
|
+
|
|
649
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
650
|
+
simde__m256i
|
|
651
|
+
simde_mm256_broadcastb_epi8 (simde__m128i a) {
|
|
652
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
653
|
+
return _mm256_broadcastb_epi8(a);
|
|
654
|
+
#else
|
|
655
|
+
simde__m256i_private r_;
|
|
656
|
+
simde__m128i_private a_= simde__m128i_to_private(a);
|
|
657
|
+
|
|
658
|
+
SIMDE__VECTORIZE
|
|
659
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
660
|
+
r_.i8[i] = a_.i8[0];
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
return simde__m256i_from_private(r_);
|
|
664
|
+
#endif
|
|
665
|
+
}
|
|
666
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
667
|
+
# define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a)
|
|
668
|
+
#endif
|
|
669
|
+
|
|
670
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
671
|
+
simde__m256i
|
|
672
|
+
simde_mm256_broadcastsi128_si256 (simde__m128i a) {
|
|
673
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
674
|
+
return _mm256_broadcastsi128_si256(a);
|
|
675
|
+
#else
|
|
676
|
+
simde__m256i_private r_;
|
|
677
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
678
|
+
|
|
679
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
680
|
+
r_.m128i_private[0] = a_;
|
|
681
|
+
r_.m128i_private[1] = a_;
|
|
682
|
+
#else
|
|
683
|
+
r_.i64[0] = a_.i64[0];
|
|
684
|
+
r_.i64[1] = a_.i64[1];
|
|
685
|
+
r_.i64[2] = a_.i64[0];
|
|
686
|
+
r_.i64[3] = a_.i64[1];
|
|
687
|
+
#endif
|
|
688
|
+
|
|
689
|
+
return simde__m256i_from_private(r_);
|
|
690
|
+
#endif
|
|
691
|
+
}
|
|
692
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
693
|
+
# define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a)
|
|
694
|
+
#endif
|
|
695
|
+
|
|
696
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
697
|
+
simde__m256i
|
|
698
|
+
simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) {
|
|
699
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
700
|
+
return _mm256_cmpeq_epi8(a, b);
|
|
701
|
+
#else
|
|
702
|
+
simde__m256i_private
|
|
703
|
+
r_,
|
|
704
|
+
a_ = simde__m256i_to_private(a),
|
|
705
|
+
b_ = simde__m256i_to_private(b);
|
|
706
|
+
|
|
707
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
708
|
+
r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]);
|
|
709
|
+
r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]);
|
|
710
|
+
#else
|
|
711
|
+
SIMDE__VECTORIZE
|
|
712
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
713
|
+
r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
|
|
714
|
+
}
|
|
715
|
+
#endif
|
|
716
|
+
|
|
717
|
+
return simde__m256i_from_private(r_);
|
|
718
|
+
#endif
|
|
719
|
+
}
|
|
720
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
721
|
+
# define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b)
|
|
722
|
+
#endif
|
|
723
|
+
|
|
724
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
725
|
+
simde__m256i
|
|
726
|
+
simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) {
|
|
727
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
728
|
+
return _mm256_cmpeq_epi16(a, b);
|
|
729
|
+
#else
|
|
730
|
+
simde__m256i_private
|
|
731
|
+
r_,
|
|
732
|
+
a_ = simde__m256i_to_private(a),
|
|
733
|
+
b_ = simde__m256i_to_private(b);
|
|
734
|
+
|
|
735
|
+
SIMDE__VECTORIZE
|
|
736
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
737
|
+
r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
return simde__m256i_from_private(r_);
|
|
741
|
+
#endif
|
|
742
|
+
}
|
|
743
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
744
|
+
# define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b)
|
|
745
|
+
#endif
|
|
746
|
+
|
|
747
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
748
|
+
simde__m256i
|
|
749
|
+
simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) {
|
|
750
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
751
|
+
return _mm256_cmpeq_epi32(a, b);
|
|
752
|
+
#else
|
|
753
|
+
simde__m256i_private
|
|
754
|
+
r_,
|
|
755
|
+
a_ = simde__m256i_to_private(a),
|
|
756
|
+
b_ = simde__m256i_to_private(b);
|
|
757
|
+
|
|
758
|
+
#if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
|
|
759
|
+
r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]);
|
|
760
|
+
r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]);
|
|
761
|
+
#else
|
|
762
|
+
SIMDE__VECTORIZE
|
|
763
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
764
|
+
r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
|
|
765
|
+
}
|
|
766
|
+
#endif
|
|
767
|
+
|
|
768
|
+
return simde__m256i_from_private(r_);
|
|
769
|
+
#endif
|
|
770
|
+
}
|
|
771
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
772
|
+
# define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b)
|
|
773
|
+
#endif
|
|
774
|
+
|
|
775
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
776
|
+
simde__m256i
|
|
777
|
+
simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) {
|
|
778
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
779
|
+
return _mm256_cmpeq_epi64(a, b);
|
|
780
|
+
#else
|
|
781
|
+
simde__m256i_private
|
|
782
|
+
r_,
|
|
783
|
+
a_ = simde__m256i_to_private(a),
|
|
784
|
+
b_ = simde__m256i_to_private(b);
|
|
785
|
+
|
|
786
|
+
#if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
|
|
787
|
+
r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]);
|
|
788
|
+
r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]);
|
|
789
|
+
#else
|
|
790
|
+
SIMDE__VECTORIZE
|
|
791
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
792
|
+
r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
|
|
793
|
+
}
|
|
794
|
+
#endif
|
|
795
|
+
|
|
796
|
+
return simde__m256i_from_private(r_);
|
|
797
|
+
#endif
|
|
798
|
+
}
|
|
799
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
800
|
+
# define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b)
|
|
801
|
+
#endif
|
|
802
|
+
|
|
803
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
804
|
+
simde__m256i
|
|
805
|
+
simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) {
|
|
806
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
807
|
+
return _mm256_cmpgt_epi8(a, b);
|
|
808
|
+
#else
|
|
809
|
+
simde__m256i_private
|
|
810
|
+
r_,
|
|
811
|
+
a_ = simde__m256i_to_private(a),
|
|
812
|
+
b_ = simde__m256i_to_private(b);
|
|
813
|
+
|
|
814
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
815
|
+
r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]);
|
|
816
|
+
r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]);
|
|
817
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
818
|
+
r_.i8 = a_.i8 > b_.i8;
|
|
819
|
+
#else
|
|
820
|
+
SIMDE__VECTORIZE
|
|
821
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
822
|
+
r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
|
|
823
|
+
}
|
|
824
|
+
#endif
|
|
825
|
+
|
|
826
|
+
return simde__m256i_from_private(r_);
|
|
827
|
+
#endif
|
|
828
|
+
}
|
|
829
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
830
|
+
# define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b)
|
|
831
|
+
#endif
|
|
832
|
+
|
|
833
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
834
|
+
simde__m256i
|
|
835
|
+
simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) {
|
|
836
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
837
|
+
return _mm256_cmpgt_epi16(a, b);
|
|
838
|
+
#else
|
|
839
|
+
simde__m256i_private
|
|
840
|
+
r_,
|
|
841
|
+
a_ = simde__m256i_to_private(a),
|
|
842
|
+
b_ = simde__m256i_to_private(b);
|
|
843
|
+
|
|
844
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
845
|
+
r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]);
|
|
846
|
+
r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]);
|
|
847
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
848
|
+
r_.i16 = a_.i16 > b_.i16;
|
|
849
|
+
#else
|
|
850
|
+
SIMDE__VECTORIZE
|
|
851
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
852
|
+
r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
|
|
853
|
+
}
|
|
854
|
+
#endif
|
|
855
|
+
|
|
856
|
+
return simde__m256i_from_private(r_);
|
|
857
|
+
#endif
|
|
858
|
+
}
|
|
859
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
860
|
+
# define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b)
|
|
861
|
+
#endif
|
|
862
|
+
|
|
863
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
864
|
+
simde__m256i
|
|
865
|
+
simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) {
|
|
866
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
867
|
+
return _mm256_cmpgt_epi32(a, b);
|
|
868
|
+
#else
|
|
869
|
+
simde__m256i_private
|
|
870
|
+
r_,
|
|
871
|
+
a_ = simde__m256i_to_private(a),
|
|
872
|
+
b_ = simde__m256i_to_private(b);
|
|
873
|
+
|
|
874
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
875
|
+
r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]);
|
|
876
|
+
r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]);
|
|
877
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
878
|
+
r_.i32 = a_.i32 > b_.i32;
|
|
879
|
+
#else
|
|
880
|
+
SIMDE__VECTORIZE
|
|
881
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
882
|
+
r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
|
|
883
|
+
}
|
|
884
|
+
#endif
|
|
885
|
+
|
|
886
|
+
return simde__m256i_from_private(r_);
|
|
887
|
+
#endif
|
|
888
|
+
}
|
|
889
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
890
|
+
# define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b)
|
|
891
|
+
#endif
|
|
892
|
+
|
|
893
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
894
|
+
simde__m256i
|
|
895
|
+
simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) {
|
|
896
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
897
|
+
return _mm256_cmpgt_epi64(a, b);
|
|
898
|
+
#else
|
|
899
|
+
simde__m256i_private
|
|
900
|
+
r_,
|
|
901
|
+
a_ = simde__m256i_to_private(a),
|
|
902
|
+
b_ = simde__m256i_to_private(b);
|
|
903
|
+
|
|
904
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
905
|
+
r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]);
|
|
906
|
+
r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]);
|
|
907
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
908
|
+
r_.i64 = a_.i64 > b_.i64;
|
|
909
|
+
#else
|
|
910
|
+
SIMDE__VECTORIZE
|
|
911
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
912
|
+
r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
|
|
913
|
+
}
|
|
914
|
+
#endif
|
|
915
|
+
|
|
916
|
+
return simde__m256i_from_private(r_);
|
|
917
|
+
#endif
|
|
918
|
+
}
|
|
919
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
920
|
+
# define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b)
|
|
921
|
+
#endif
|
|
922
|
+
|
|
923
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
924
|
+
simde__m256i
|
|
925
|
+
simde_mm256_cvtepi8_epi16 (simde__m128i a) {
|
|
926
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
927
|
+
return _mm256_cvtepi8_epi16(a);
|
|
928
|
+
#else
|
|
929
|
+
simde__m256i_private r_;
|
|
930
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
931
|
+
|
|
932
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
933
|
+
SIMDE__CONVERT_VECTOR(r_.i16, a_.i8);
|
|
934
|
+
#else
|
|
935
|
+
SIMDE__VECTORIZE
|
|
936
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
937
|
+
r_.i16[i] = a_.i8[i];
|
|
938
|
+
}
|
|
939
|
+
#endif
|
|
940
|
+
|
|
941
|
+
return simde__m256i_from_private(r_);
|
|
942
|
+
#endif
|
|
943
|
+
}
|
|
944
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
945
|
+
# define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a)
|
|
946
|
+
#endif
|
|
947
|
+
|
|
948
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
949
|
+
simde__m256i
|
|
950
|
+
simde_mm256_cvtepi8_epi32 (simde__m128i a) {
|
|
951
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
952
|
+
return _mm256_cvtepi8_epi32(a);
|
|
953
|
+
#else
|
|
954
|
+
simde__m256i_private r_;
|
|
955
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
956
|
+
|
|
957
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
958
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].i8);
|
|
959
|
+
#else
|
|
960
|
+
SIMDE__VECTORIZE
|
|
961
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
962
|
+
r_.i32[i] = a_.i8[i];
|
|
963
|
+
}
|
|
964
|
+
#endif
|
|
965
|
+
|
|
966
|
+
return simde__m256i_from_private(r_);
|
|
967
|
+
#endif
|
|
968
|
+
}
|
|
969
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
970
|
+
# define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a)
|
|
971
|
+
#endif
|
|
972
|
+
|
|
973
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
974
|
+
simde__m256i
|
|
975
|
+
simde_mm256_cvtepi8_epi64 (simde__m128i a) {
|
|
976
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
977
|
+
return _mm256_cvtepi8_epi64(a);
|
|
978
|
+
#else
|
|
979
|
+
simde__m256i_private r_;
|
|
980
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
981
|
+
|
|
982
|
+
SIMDE__VECTORIZE
|
|
983
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
984
|
+
r_.i64[i] = a_.i8[i];
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
return simde__m256i_from_private(r_);
|
|
988
|
+
#endif
|
|
989
|
+
}
|
|
990
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
991
|
+
# define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a)
|
|
992
|
+
#endif
|
|
993
|
+
|
|
994
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
995
|
+
simde__m256i
|
|
996
|
+
simde_mm256_cvtepi16_epi32 (simde__m128i a) {
|
|
997
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
998
|
+
return _mm256_cvtepi16_epi32(a);
|
|
999
|
+
#else
|
|
1000
|
+
simde__m256i_private r_;
|
|
1001
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1002
|
+
|
|
1003
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1004
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.i16);
|
|
1005
|
+
#else
|
|
1006
|
+
SIMDE__VECTORIZE
|
|
1007
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1008
|
+
r_.i32[i] = a_.i16[i];
|
|
1009
|
+
}
|
|
1010
|
+
#endif
|
|
1011
|
+
|
|
1012
|
+
return simde__m256i_from_private(r_);
|
|
1013
|
+
#endif
|
|
1014
|
+
}
|
|
1015
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1016
|
+
# define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a)
|
|
1017
|
+
#endif
|
|
1018
|
+
|
|
1019
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1020
|
+
simde__m256i
|
|
1021
|
+
simde_mm256_cvtepi16_epi64 (simde__m128i a) {
|
|
1022
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1023
|
+
return _mm256_cvtepi16_epi64(a);
|
|
1024
|
+
#else
|
|
1025
|
+
simde__m256i_private r_;
|
|
1026
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1027
|
+
|
|
1028
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1029
|
+
SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i16);
|
|
1030
|
+
#else
|
|
1031
|
+
SIMDE__VECTORIZE
|
|
1032
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1033
|
+
r_.i64[i] = a_.i16[i];
|
|
1034
|
+
}
|
|
1035
|
+
#endif
|
|
1036
|
+
|
|
1037
|
+
return simde__m256i_from_private(r_);
|
|
1038
|
+
#endif
|
|
1039
|
+
}
|
|
1040
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1041
|
+
# define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a)
|
|
1042
|
+
#endif
|
|
1043
|
+
|
|
1044
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1045
|
+
simde__m256i
|
|
1046
|
+
simde_mm256_cvtepi32_epi64 (simde__m128i a) {
|
|
1047
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1048
|
+
return _mm256_cvtepi32_epi64(a);
|
|
1049
|
+
#else
|
|
1050
|
+
simde__m256i_private r_;
|
|
1051
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1052
|
+
|
|
1053
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1054
|
+
SIMDE__CONVERT_VECTOR(r_.i64, a_.i32);
|
|
1055
|
+
#else
|
|
1056
|
+
SIMDE__VECTORIZE
|
|
1057
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1058
|
+
r_.i64[i] = a_.i32[i];
|
|
1059
|
+
}
|
|
1060
|
+
#endif
|
|
1061
|
+
|
|
1062
|
+
return simde__m256i_from_private(r_);
|
|
1063
|
+
#endif
|
|
1064
|
+
}
|
|
1065
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1066
|
+
# define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a)
|
|
1067
|
+
#endif
|
|
1068
|
+
|
|
1069
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1070
|
+
simde__m256i
|
|
1071
|
+
simde_mm256_cvtepu8_epi16 (simde__m128i a) {
|
|
1072
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1073
|
+
return _mm256_cvtepu8_epi16(a);
|
|
1074
|
+
#else
|
|
1075
|
+
simde__m256i_private r_;
|
|
1076
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1077
|
+
|
|
1078
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1079
|
+
SIMDE__CONVERT_VECTOR(r_.i16, a_.u8);
|
|
1080
|
+
#else
|
|
1081
|
+
SIMDE__VECTORIZE
|
|
1082
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1083
|
+
r_.i16[i] = a_.u8[i];
|
|
1084
|
+
}
|
|
1085
|
+
#endif
|
|
1086
|
+
|
|
1087
|
+
return simde__m256i_from_private(r_);
|
|
1088
|
+
#endif
|
|
1089
|
+
}
|
|
1090
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1091
|
+
# define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a)
|
|
1092
|
+
#endif
|
|
1093
|
+
|
|
1094
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1095
|
+
simde__m256i
|
|
1096
|
+
simde_mm256_cvtepu8_epi32 (simde__m128i a) {
|
|
1097
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1098
|
+
return _mm256_cvtepu8_epi32(a);
|
|
1099
|
+
#else
|
|
1100
|
+
simde__m256i_private r_;
|
|
1101
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1102
|
+
|
|
1103
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1104
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].u8);
|
|
1105
|
+
#else
|
|
1106
|
+
SIMDE__VECTORIZE
|
|
1107
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1108
|
+
r_.i32[i] = a_.u8[i];
|
|
1109
|
+
}
|
|
1110
|
+
#endif
|
|
1111
|
+
|
|
1112
|
+
return simde__m256i_from_private(r_);
|
|
1113
|
+
#endif
|
|
1114
|
+
}
|
|
1115
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1116
|
+
# define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a)
|
|
1117
|
+
#endif
|
|
1118
|
+
|
|
1119
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1120
|
+
simde__m256i
|
|
1121
|
+
simde_mm256_cvtepu8_epi64 (simde__m128i a) {
|
|
1122
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1123
|
+
return _mm256_cvtepu8_epi64(a);
|
|
1124
|
+
#else
|
|
1125
|
+
simde__m256i_private r_;
|
|
1126
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1127
|
+
|
|
1128
|
+
SIMDE__VECTORIZE
|
|
1129
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1130
|
+
r_.i64[i] = a_.u8[i];
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
return simde__m256i_from_private(r_);
|
|
1134
|
+
#endif
|
|
1135
|
+
}
|
|
1136
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1137
|
+
# define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a)
|
|
1138
|
+
#endif
|
|
1139
|
+
|
|
1140
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1141
|
+
simde__m256i
|
|
1142
|
+
simde_mm256_cvtepu16_epi32 (simde__m128i a) {
|
|
1143
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1144
|
+
return _mm256_cvtepu16_epi32(a);
|
|
1145
|
+
#else
|
|
1146
|
+
simde__m256i_private r_;
|
|
1147
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1148
|
+
|
|
1149
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1150
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.u16);
|
|
1151
|
+
#else
|
|
1152
|
+
SIMDE__VECTORIZE
|
|
1153
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1154
|
+
r_.i32[i] = a_.u16[i];
|
|
1155
|
+
}
|
|
1156
|
+
#endif
|
|
1157
|
+
|
|
1158
|
+
return simde__m256i_from_private(r_);
|
|
1159
|
+
#endif
|
|
1160
|
+
}
|
|
1161
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1162
|
+
# define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a)
|
|
1163
|
+
#endif
|
|
1164
|
+
|
|
1165
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1166
|
+
simde__m256i
|
|
1167
|
+
simde_mm256_cvtepu16_epi64 (simde__m128i a) {
|
|
1168
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1169
|
+
return _mm256_cvtepu16_epi64(a);
|
|
1170
|
+
#else
|
|
1171
|
+
simde__m256i_private r_;
|
|
1172
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1173
|
+
|
|
1174
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1175
|
+
SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].u16);
|
|
1176
|
+
#else
|
|
1177
|
+
SIMDE__VECTORIZE
|
|
1178
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1179
|
+
r_.i64[i] = a_.u16[i];
|
|
1180
|
+
}
|
|
1181
|
+
#endif
|
|
1182
|
+
|
|
1183
|
+
return simde__m256i_from_private(r_);
|
|
1184
|
+
#endif
|
|
1185
|
+
}
|
|
1186
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1187
|
+
# define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a)
|
|
1188
|
+
#endif
|
|
1189
|
+
|
|
1190
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1191
|
+
simde__m256i
|
|
1192
|
+
simde_mm256_cvtepu32_epi64 (simde__m128i a) {
|
|
1193
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1194
|
+
return _mm256_cvtepu32_epi64(a);
|
|
1195
|
+
#else
|
|
1196
|
+
simde__m256i_private r_;
|
|
1197
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
1198
|
+
|
|
1199
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1200
|
+
SIMDE__CONVERT_VECTOR(r_.i64, a_.u32);
|
|
1201
|
+
#else
|
|
1202
|
+
SIMDE__VECTORIZE
|
|
1203
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1204
|
+
r_.i64[i] = a_.u32[i];
|
|
1205
|
+
}
|
|
1206
|
+
#endif
|
|
1207
|
+
|
|
1208
|
+
return simde__m256i_from_private(r_);
|
|
1209
|
+
#endif
|
|
1210
|
+
}
|
|
1211
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1212
|
+
# define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a)
|
|
1213
|
+
#endif
|
|
1214
|
+
|
|
1215
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1216
|
+
int
|
|
1217
|
+
simde_mm256_extract_epi8 (simde__m256i a, const int index)
|
|
1218
|
+
HEDLEY_REQUIRE_MSG((index & 31) == index, "index must be in range [0, 31]"){
|
|
1219
|
+
simde__m256i_private a_ = simde__m256i_to_private(a);
|
|
1220
|
+
return a_.i8[index];
|
|
1221
|
+
}
|
|
1222
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1223
|
+
# define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index)
|
|
1224
|
+
#endif
|
|
1225
|
+
|
|
1226
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1227
|
+
int
|
|
1228
|
+
simde_mm256_extract_epi16 (simde__m256i a, const int index)
|
|
1229
|
+
HEDLEY_REQUIRE_MSG((index & 0xf) == index, "index must be in range [0, 15]") {
|
|
1230
|
+
simde__m256i_private a_ = simde__m256i_to_private(a);
|
|
1231
|
+
return a_.i16[index];
|
|
1232
|
+
}
|
|
1233
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1234
|
+
# define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index)
|
|
1235
|
+
#endif
|
|
1236
|
+
|
|
1237
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1238
|
+
simde__m128i
|
|
1239
|
+
simde_mm256_extracti128_si256 (simde__m256i a, const int imm8)
|
|
1240
|
+
HEDLEY_REQUIRE_MSG((imm8 & 1) == imm8, "imm8 must be 0 or 1") {
|
|
1241
|
+
simde__m256i_private a_ = simde__m256i_to_private(a);
|
|
1242
|
+
return a_.m128i[imm8];
|
|
1243
|
+
}
|
|
1244
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1245
|
+
# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8)
|
|
1246
|
+
#endif
|
|
1247
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1248
|
+
# define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8)
|
|
1249
|
+
#endif
|
|
1250
|
+
|
|
1251
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1252
|
+
simde__m256i
|
|
1253
|
+
simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) {
|
|
1254
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1255
|
+
return _mm256_madd_epi16(a, b);
|
|
1256
|
+
#else
|
|
1257
|
+
simde__m256i_private
|
|
1258
|
+
r_,
|
|
1259
|
+
a_ = simde__m256i_to_private(a),
|
|
1260
|
+
b_ = simde__m256i_to_private(b);
|
|
1261
|
+
|
|
1262
|
+
r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]);
|
|
1263
|
+
r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]);
|
|
1264
|
+
|
|
1265
|
+
return simde__m256i_from_private(r_);
|
|
1266
|
+
#endif
|
|
1267
|
+
}
|
|
1268
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1269
|
+
# define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b)
|
|
1270
|
+
#endif
|
|
1271
|
+
|
|
1272
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1273
|
+
simde__m256i
|
|
1274
|
+
simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) {
|
|
1275
|
+
#if defined(SIMDE_AVX2_NATIVE) && !defined(__PGI)
|
|
1276
|
+
return _mm256_max_epi8(a, b);
|
|
1277
|
+
#else
|
|
1278
|
+
simde__m256i_private
|
|
1279
|
+
r_,
|
|
1280
|
+
a_ = simde__m256i_to_private(a),
|
|
1281
|
+
b_ = simde__m256i_to_private(b);
|
|
1282
|
+
|
|
1283
|
+
#if defined(SIMDE_ARCH_X86_SSE4_1)
|
|
1284
|
+
r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]);
|
|
1285
|
+
r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]);
|
|
1286
|
+
#else
|
|
1287
|
+
SIMDE__VECTORIZE
|
|
1288
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
1289
|
+
r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i];
|
|
1290
|
+
}
|
|
1291
|
+
#endif
|
|
1292
|
+
|
|
1293
|
+
return simde__m256i_from_private(r_);
|
|
1294
|
+
#endif
|
|
1295
|
+
}
|
|
1296
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1297
|
+
# define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b)
|
|
1298
|
+
#endif
|
|
1299
|
+
|
|
1300
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1301
|
+
simde__m256i
|
|
1302
|
+
simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) {
|
|
1303
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1304
|
+
return _mm256_max_epu8(a, b);
|
|
1305
|
+
#else
|
|
1306
|
+
simde__m256i_private
|
|
1307
|
+
r_,
|
|
1308
|
+
a_ = simde__m256i_to_private(a),
|
|
1309
|
+
b_ = simde__m256i_to_private(b);
|
|
1310
|
+
|
|
1311
|
+
#if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
|
|
1312
|
+
r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]);
|
|
1313
|
+
r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]);
|
|
1314
|
+
#else
|
|
1315
|
+
SIMDE__VECTORIZE
|
|
1316
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
1317
|
+
r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
|
|
1318
|
+
}
|
|
1319
|
+
#endif
|
|
1320
|
+
|
|
1321
|
+
return simde__m256i_from_private(r_);
|
|
1322
|
+
#endif
|
|
1323
|
+
}
|
|
1324
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1325
|
+
# define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b)
|
|
1326
|
+
#endif
|
|
1327
|
+
|
|
1328
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1329
|
+
simde__m256i
|
|
1330
|
+
simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) {
|
|
1331
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1332
|
+
return _mm256_max_epu16(a, b);
|
|
1333
|
+
#else
|
|
1334
|
+
simde__m256i_private
|
|
1335
|
+
r_,
|
|
1336
|
+
a_ = simde__m256i_to_private(a),
|
|
1337
|
+
b_ = simde__m256i_to_private(b);
|
|
1338
|
+
|
|
1339
|
+
#if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
|
|
1340
|
+
r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]);
|
|
1341
|
+
r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]);
|
|
1342
|
+
#else
|
|
1343
|
+
SIMDE__VECTORIZE
|
|
1344
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1345
|
+
r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i];
|
|
1346
|
+
}
|
|
1347
|
+
#endif
|
|
1348
|
+
|
|
1349
|
+
return simde__m256i_from_private(r_);
|
|
1350
|
+
#endif
|
|
1351
|
+
}
|
|
1352
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1353
|
+
# define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b)
|
|
1354
|
+
#endif
|
|
1355
|
+
|
|
1356
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1357
|
+
simde__m256i
|
|
1358
|
+
simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) {
|
|
1359
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1360
|
+
return _mm256_max_epu32(a, b);
|
|
1361
|
+
#else
|
|
1362
|
+
simde__m256i_private
|
|
1363
|
+
r_,
|
|
1364
|
+
a_ = simde__m256i_to_private(a),
|
|
1365
|
+
b_ = simde__m256i_to_private(b);
|
|
1366
|
+
|
|
1367
|
+
#if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
|
|
1368
|
+
r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]);
|
|
1369
|
+
r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]);
|
|
1370
|
+
#else
|
|
1371
|
+
SIMDE__VECTORIZE
|
|
1372
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
1373
|
+
r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i];
|
|
1374
|
+
}
|
|
1375
|
+
#endif
|
|
1376
|
+
|
|
1377
|
+
return simde__m256i_from_private(r_);
|
|
1378
|
+
#endif
|
|
1379
|
+
}
|
|
1380
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1381
|
+
# define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b)
|
|
1382
|
+
#endif
|
|
1383
|
+
|
|
1384
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1385
|
+
simde__m256i
|
|
1386
|
+
simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) {
|
|
1387
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1388
|
+
return _mm256_max_epi16(a, b);
|
|
1389
|
+
#else
|
|
1390
|
+
simde__m256i_private
|
|
1391
|
+
r_,
|
|
1392
|
+
a_ = simde__m256i_to_private(a),
|
|
1393
|
+
b_ = simde__m256i_to_private(b);
|
|
1394
|
+
|
|
1395
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1396
|
+
r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]);
|
|
1397
|
+
r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]);
|
|
1398
|
+
#else
|
|
1399
|
+
SIMDE__VECTORIZE
|
|
1400
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1401
|
+
r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
|
|
1402
|
+
}
|
|
1403
|
+
#endif
|
|
1404
|
+
|
|
1405
|
+
return simde__m256i_from_private(r_);
|
|
1406
|
+
#endif
|
|
1407
|
+
}
|
|
1408
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1409
|
+
# define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b)
|
|
1410
|
+
#endif
|
|
1411
|
+
|
|
1412
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1413
|
+
simde__m256i
|
|
1414
|
+
simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) {
|
|
1415
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1416
|
+
return _mm256_max_epi32(a, b);
|
|
1417
|
+
#else
|
|
1418
|
+
simde__m256i_private
|
|
1419
|
+
r_,
|
|
1420
|
+
a_ = simde__m256i_to_private(a),
|
|
1421
|
+
b_ = simde__m256i_to_private(b);
|
|
1422
|
+
|
|
1423
|
+
#if defined(SIMDE_ARCH_X86_SSE4_1)
|
|
1424
|
+
r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]);
|
|
1425
|
+
r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]);
|
|
1426
|
+
#else
|
|
1427
|
+
SIMDE__VECTORIZE
|
|
1428
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1429
|
+
r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i];
|
|
1430
|
+
}
|
|
1431
|
+
#endif
|
|
1432
|
+
|
|
1433
|
+
return simde__m256i_from_private(r_);
|
|
1434
|
+
#endif
|
|
1435
|
+
}
|
|
1436
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1437
|
+
# define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b)
|
|
1438
|
+
#endif
|
|
1439
|
+
|
|
1440
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1441
|
+
simde__m256i
|
|
1442
|
+
simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) {
|
|
1443
|
+
#if defined(SIMDE_AVX2_NATIVE) && !defined(__PGI)
|
|
1444
|
+
return _mm256_min_epi8(a, b);
|
|
1445
|
+
#else
|
|
1446
|
+
simde__m256i_private
|
|
1447
|
+
r_,
|
|
1448
|
+
a_ = simde__m256i_to_private(a),
|
|
1449
|
+
b_ = simde__m256i_to_private(b);
|
|
1450
|
+
|
|
1451
|
+
#if defined(SIMDE_ARCH_X86_SSE4_1)
|
|
1452
|
+
r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]);
|
|
1453
|
+
r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]);
|
|
1454
|
+
#else
|
|
1455
|
+
SIMDE__VECTORIZE
|
|
1456
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
1457
|
+
r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];
|
|
1458
|
+
}
|
|
1459
|
+
#endif
|
|
1460
|
+
|
|
1461
|
+
return simde__m256i_from_private(r_);
|
|
1462
|
+
#endif
|
|
1463
|
+
}
|
|
1464
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1465
|
+
# define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b)
|
|
1466
|
+
#endif
|
|
1467
|
+
|
|
1468
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1469
|
+
simde__m256i
|
|
1470
|
+
simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) {
|
|
1471
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1472
|
+
return _mm256_min_epi16(a, b);
|
|
1473
|
+
#else
|
|
1474
|
+
simde__m256i_private
|
|
1475
|
+
r_,
|
|
1476
|
+
a_ = simde__m256i_to_private(a),
|
|
1477
|
+
b_ = simde__m256i_to_private(b);
|
|
1478
|
+
|
|
1479
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1480
|
+
r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]);
|
|
1481
|
+
r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]);
|
|
1482
|
+
#else
|
|
1483
|
+
SIMDE__VECTORIZE
|
|
1484
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1485
|
+
r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
|
|
1486
|
+
}
|
|
1487
|
+
#endif
|
|
1488
|
+
|
|
1489
|
+
return simde__m256i_from_private(r_);
|
|
1490
|
+
#endif
|
|
1491
|
+
}
|
|
1492
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1493
|
+
# define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b)
|
|
1494
|
+
#endif
|
|
1495
|
+
|
|
1496
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1497
|
+
simde__m256i
|
|
1498
|
+
simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) {
|
|
1499
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1500
|
+
return _mm256_min_epi32(a, b);
|
|
1501
|
+
#else
|
|
1502
|
+
simde__m256i_private
|
|
1503
|
+
r_,
|
|
1504
|
+
a_ = simde__m256i_to_private(a),
|
|
1505
|
+
b_ = simde__m256i_to_private(b);
|
|
1506
|
+
|
|
1507
|
+
#if defined(SIMDE_ARCH_X86_SSE4_1)
|
|
1508
|
+
r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]);
|
|
1509
|
+
r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]);
|
|
1510
|
+
#else
|
|
1511
|
+
SIMDE__VECTORIZE
|
|
1512
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1513
|
+
r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];
|
|
1514
|
+
}
|
|
1515
|
+
#endif
|
|
1516
|
+
|
|
1517
|
+
return simde__m256i_from_private(r_);
|
|
1518
|
+
#endif
|
|
1519
|
+
}
|
|
1520
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1521
|
+
# define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b)
|
|
1522
|
+
#endif
|
|
1523
|
+
|
|
1524
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1525
|
+
simde__m256i
|
|
1526
|
+
simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) {
|
|
1527
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1528
|
+
return _mm256_min_epu8(a, b);
|
|
1529
|
+
#else
|
|
1530
|
+
simde__m256i_private
|
|
1531
|
+
r_,
|
|
1532
|
+
a_ = simde__m256i_to_private(a),
|
|
1533
|
+
b_ = simde__m256i_to_private(b);
|
|
1534
|
+
|
|
1535
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1536
|
+
r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]);
|
|
1537
|
+
r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]);
|
|
1538
|
+
#else
|
|
1539
|
+
SIMDE__VECTORIZE
|
|
1540
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
1541
|
+
r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
|
|
1542
|
+
}
|
|
1543
|
+
#endif
|
|
1544
|
+
|
|
1545
|
+
return simde__m256i_from_private(r_);
|
|
1546
|
+
#endif
|
|
1547
|
+
}
|
|
1548
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1549
|
+
# define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b)
|
|
1550
|
+
#endif
|
|
1551
|
+
|
|
1552
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1553
|
+
simde__m256i
|
|
1554
|
+
simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) {
|
|
1555
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1556
|
+
return _mm256_min_epu16(a, b);
|
|
1557
|
+
#else
|
|
1558
|
+
simde__m256i_private
|
|
1559
|
+
r_,
|
|
1560
|
+
a_ = simde__m256i_to_private(a),
|
|
1561
|
+
b_ = simde__m256i_to_private(b);
|
|
1562
|
+
|
|
1563
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1564
|
+
r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]);
|
|
1565
|
+
r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]);
|
|
1566
|
+
#else
|
|
1567
|
+
SIMDE__VECTORIZE
|
|
1568
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1569
|
+
r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i];
|
|
1570
|
+
}
|
|
1571
|
+
#endif
|
|
1572
|
+
|
|
1573
|
+
return simde__m256i_from_private(r_);
|
|
1574
|
+
#endif
|
|
1575
|
+
}
|
|
1576
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1577
|
+
# define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b)
|
|
1578
|
+
#endif
|
|
1579
|
+
|
|
1580
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1581
|
+
simde__m256i
|
|
1582
|
+
simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) {
|
|
1583
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1584
|
+
return _mm256_min_epu32(a, b);
|
|
1585
|
+
#else
|
|
1586
|
+
simde__m256i_private
|
|
1587
|
+
r_,
|
|
1588
|
+
a_ = simde__m256i_to_private(a),
|
|
1589
|
+
b_ = simde__m256i_to_private(b);
|
|
1590
|
+
|
|
1591
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1592
|
+
r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]);
|
|
1593
|
+
r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]);
|
|
1594
|
+
#else
|
|
1595
|
+
SIMDE__VECTORIZE
|
|
1596
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
1597
|
+
r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i];
|
|
1598
|
+
}
|
|
1599
|
+
#endif
|
|
1600
|
+
|
|
1601
|
+
return simde__m256i_from_private(r_);
|
|
1602
|
+
#endif
|
|
1603
|
+
}
|
|
1604
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1605
|
+
# define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b)
|
|
1606
|
+
#endif
|
|
1607
|
+
|
|
1608
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1609
|
+
int32_t
|
|
1610
|
+
simde_mm256_movemask_epi8 (simde__m256i a) {
|
|
1611
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1612
|
+
return _mm256_movemask_epi8(a);
|
|
1613
|
+
#else
|
|
1614
|
+
simde__m256i_private a_ = simde__m256i_to_private(a);
|
|
1615
|
+
int32_t r;
|
|
1616
|
+
|
|
1617
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1618
|
+
r = simde_mm_movemask_epi8(a_.m128i[1]);
|
|
1619
|
+
r = (r << 16) | simde_mm_movemask_epi8(a_.m128i[0]);
|
|
1620
|
+
#else
|
|
1621
|
+
r = 0;
|
|
1622
|
+
SIMDE__VECTORIZE_REDUCTION(|:r)
|
|
1623
|
+
for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {
|
|
1624
|
+
r |= (a_.u8[31 - i] >> 7) << (31 - i);
|
|
1625
|
+
}
|
|
1626
|
+
#endif
|
|
1627
|
+
|
|
1628
|
+
return r;
|
|
1629
|
+
#endif
|
|
1630
|
+
}
|
|
1631
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1632
|
+
# define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a)
|
|
1633
|
+
#endif
|
|
1634
|
+
|
|
1635
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1636
|
+
simde__m256i
|
|
1637
|
+
simde_mm256_or_si256 (simde__m256i a, simde__m256i b) {
|
|
1638
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1639
|
+
return _mm256_or_si256(a, b);
|
|
1640
|
+
#else
|
|
1641
|
+
simde__m256i_private
|
|
1642
|
+
r_,
|
|
1643
|
+
a_ = simde__m256i_to_private(a),
|
|
1644
|
+
b_ = simde__m256i_to_private(b);
|
|
1645
|
+
|
|
1646
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1647
|
+
r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]);
|
|
1648
|
+
r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]);
|
|
1649
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1650
|
+
r_.i32f = a_.i32f | b_.i32f;
|
|
1651
|
+
#else
|
|
1652
|
+
SIMDE__VECTORIZE
|
|
1653
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
|
|
1654
|
+
r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
|
|
1655
|
+
}
|
|
1656
|
+
#endif
|
|
1657
|
+
|
|
1658
|
+
return simde__m256i_from_private(r_);
|
|
1659
|
+
#endif
|
|
1660
|
+
}
|
|
1661
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1662
|
+
# define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b)
|
|
1663
|
+
#endif
|
|
1664
|
+
|
|
1665
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1666
|
+
simde__m256i
|
|
1667
|
+
simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) {
|
|
1668
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1669
|
+
return _mm256_packs_epi32(a, b);
|
|
1670
|
+
#else
|
|
1671
|
+
simde__m256i_private
|
|
1672
|
+
r_,
|
|
1673
|
+
v_[] = {
|
|
1674
|
+
simde__m256i_to_private(a),
|
|
1675
|
+
simde__m256i_to_private(b)
|
|
1676
|
+
};
|
|
1677
|
+
#if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
|
|
1678
|
+
r_.m128i_private[0] = simde__m128i_to_private(simde_mm_packs_epi32(simde__m128i_from_private(v_[0].m128i_private[0]), simde__m128i_from_private(v_[1].m128i_private[0])));
|
|
1679
|
+
r_.m128i_private[1] = simde__m128i_to_private(simde_mm_packs_epi32(simde__m128i_from_private(v_[0].m128i_private[1]), simde__m128i_from_private(v_[1].m128i_private[1])));
|
|
1680
|
+
#else
|
|
1681
|
+
SIMDE__VECTORIZE
|
|
1682
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1683
|
+
const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)];
|
|
1684
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v));
|
|
1685
|
+
}
|
|
1686
|
+
#endif
|
|
1687
|
+
|
|
1688
|
+
return simde__m256i_from_private(r_);
|
|
1689
|
+
#endif
|
|
1690
|
+
}
|
|
1691
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1692
|
+
# define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b)
|
|
1693
|
+
#endif
|
|
1694
|
+
|
|
1695
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1696
|
+
simde__m256i
|
|
1697
|
+
simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8)
|
|
1698
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
1699
|
+
simde__m256i_private
|
|
1700
|
+
r_,
|
|
1701
|
+
a_ = simde__m256i_to_private(a),
|
|
1702
|
+
b_ = simde__m256i_to_private(b);
|
|
1703
|
+
|
|
1704
|
+
r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]);
|
|
1705
|
+
r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]);
|
|
1706
|
+
|
|
1707
|
+
return simde__m256i_from_private(r_);
|
|
1708
|
+
}
|
|
1709
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1710
|
+
# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8)
|
|
1711
|
+
#endif
|
|
1712
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1713
|
+
# define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8)
|
|
1714
|
+
#endif
|
|
1715
|
+
|
|
1716
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1717
|
+
simde__m256i
|
|
1718
|
+
simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8)
|
|
1719
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
1720
|
+
simde__m256i_private
|
|
1721
|
+
r_,
|
|
1722
|
+
a_ = simde__m256i_to_private(a);
|
|
1723
|
+
|
|
1724
|
+
r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1];
|
|
1725
|
+
r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1];
|
|
1726
|
+
r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1];
|
|
1727
|
+
r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1];
|
|
1728
|
+
|
|
1729
|
+
return simde__m256i_from_private(r_);
|
|
1730
|
+
}
|
|
1731
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1732
|
+
# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8)
|
|
1733
|
+
#endif
|
|
1734
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1735
|
+
# define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8)
|
|
1736
|
+
#endif
|
|
1737
|
+
|
|
1738
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1739
|
+
simde__m256i
|
|
1740
|
+
simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) {
|
|
1741
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1742
|
+
return _mm256_shuffle_epi8(a, b);
|
|
1743
|
+
#else
|
|
1744
|
+
simde__m256i_private
|
|
1745
|
+
r_,
|
|
1746
|
+
a_ = simde__m256i_to_private(a),
|
|
1747
|
+
b_ = simde__m256i_to_private(b);
|
|
1748
|
+
|
|
1749
|
+
#if defined(SIMDE_ARCH_X86_SSSE3)
|
|
1750
|
+
r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]);
|
|
1751
|
+
r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]);
|
|
1752
|
+
#else
|
|
1753
|
+
SIMDE__VECTORIZE
|
|
1754
|
+
for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) {
|
|
1755
|
+
r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ];
|
|
1756
|
+
r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16];
|
|
1757
|
+
}
|
|
1758
|
+
#endif
|
|
1759
|
+
|
|
1760
|
+
return simde__m256i_from_private(r_);
|
|
1761
|
+
#endif
|
|
1762
|
+
}
|
|
1763
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1764
|
+
# define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b)
|
|
1765
|
+
#endif
|
|
1766
|
+
|
|
1767
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1768
|
+
simde__m256i
|
|
1769
|
+
simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) {
|
|
1770
|
+
simde__m256i_private
|
|
1771
|
+
r_,
|
|
1772
|
+
a_ = simde__m256i_to_private(a);
|
|
1773
|
+
|
|
1774
|
+
for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) {
|
|
1775
|
+
r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3];
|
|
1776
|
+
}
|
|
1777
|
+
for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) {
|
|
1778
|
+
r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4];
|
|
1779
|
+
}
|
|
1780
|
+
|
|
1781
|
+
return simde__m256i_from_private(r_);
|
|
1782
|
+
}
|
|
1783
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1784
|
+
# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8)
|
|
1785
|
+
#elif defined(SIMDE_ARCH_X86_SSE2) && !defined(__PGI)
|
|
1786
|
+
# define simde_mm256_shuffle_epi32(a, imm8) \
|
|
1787
|
+
simde_mm256_set_m128i( \
|
|
1788
|
+
simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
1789
|
+
simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
1790
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
1791
|
+
# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \
|
|
1792
|
+
const simde__m256i_private simde__tmp_a_ = simde__m256i_to_private(a); \
|
|
1793
|
+
simde__m256i_from_private((simde__m256i_private) { .i32 = \
|
|
1794
|
+
SIMDE__SHUFFLE_VECTOR(32, 32, \
|
|
1795
|
+
(simde__tmp_a_).i32, \
|
|
1796
|
+
(simde__tmp_a_).i32, \
|
|
1797
|
+
((imm8) ) & 3, \
|
|
1798
|
+
((imm8) >> 2) & 3, \
|
|
1799
|
+
((imm8) >> 4) & 3, \
|
|
1800
|
+
((imm8) >> 6) & 3, \
|
|
1801
|
+
(((imm8) ) & 3) + 4, \
|
|
1802
|
+
(((imm8) >> 2) & 3) + 4, \
|
|
1803
|
+
(((imm8) >> 4) & 3) + 4, \
|
|
1804
|
+
(((imm8) >> 6) & 3) + 4) }); }))
|
|
1805
|
+
#endif
|
|
1806
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1807
|
+
# define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8)
|
|
1808
|
+
#endif
|
|
1809
|
+
|
|
1810
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1811
|
+
# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8)
|
|
1812
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
1813
|
+
# define simde_mm256_shufflelo_epi16(a, imm8) \
|
|
1814
|
+
simde_mm256_set_m128i( \
|
|
1815
|
+
simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
1816
|
+
simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
1817
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
1818
|
+
# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \
|
|
1819
|
+
const simde__m256i_private simde__tmp_a_ = simde__m256i_to_private(a); \
|
|
1820
|
+
simde__m256i_from_private((simde__m256i_private) { .i16 = \
|
|
1821
|
+
SIMDE__SHUFFLE_VECTOR(16, 32, \
|
|
1822
|
+
(simde__tmp_a_).i16, \
|
|
1823
|
+
(simde__tmp_a_).i16, \
|
|
1824
|
+
(((imm8) ) & 3), \
|
|
1825
|
+
(((imm8) >> 2) & 3), \
|
|
1826
|
+
(((imm8) >> 4) & 3), \
|
|
1827
|
+
(((imm8) >> 6) & 3), \
|
|
1828
|
+
4, 5, 6, 7, \
|
|
1829
|
+
((((imm8) ) & 3) + 8), \
|
|
1830
|
+
((((imm8) >> 2) & 3) + 8), \
|
|
1831
|
+
((((imm8) >> 4) & 3) + 8), \
|
|
1832
|
+
((((imm8) >> 6) & 3) + 8), \
|
|
1833
|
+
12, 13, 14, 15) }); }))
|
|
1834
|
+
#else
|
|
1835
|
+
# define simde_mm256_shufflelo_epi16(a, imm8) \
|
|
1836
|
+
simde_mm256_set_m128i( \
|
|
1837
|
+
simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \
|
|
1838
|
+
simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8))
|
|
1839
|
+
#endif
|
|
1840
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1841
|
+
# define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8)
|
|
1842
|
+
#endif
|
|
1843
|
+
|
|
1844
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1845
|
+
simde__m256i
|
|
1846
|
+
simde_mm256_slli_epi16 (simde__m256i a, const int imm8)
|
|
1847
|
+
HEDLEY_REQUIRE_MSG((imm8 & 15) == imm8, "imm8 must be in range [0, 15]") {
|
|
1848
|
+
/* Note: There is no consistency in how compilers handle values outside of
|
|
1849
|
+
the expected range, hence the discrepancy between what we allow and what
|
|
1850
|
+
Intel specifies. Some compilers will return 0, others seem to just mask
|
|
1851
|
+
off everything outside of the range. */
|
|
1852
|
+
simde__m256i_private
|
|
1853
|
+
r_,
|
|
1854
|
+
a_ = simde__m256i_to_private(a);
|
|
1855
|
+
|
|
1856
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1857
|
+
r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8);
|
|
1858
|
+
#else
|
|
1859
|
+
SIMDE__VECTORIZE
|
|
1860
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1861
|
+
r_.i16[i] = a_.i16[i] << (imm8 & 0xff);
|
|
1862
|
+
}
|
|
1863
|
+
#endif
|
|
1864
|
+
|
|
1865
|
+
return simde__m256i_from_private(r_);
|
|
1866
|
+
}
|
|
1867
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1868
|
+
# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8)
|
|
1869
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
1870
|
+
# define simde_mm256_slli_epi16(a, imm8) \
|
|
1871
|
+
simde_mm256_set_m128i( \
|
|
1872
|
+
simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
1873
|
+
simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
1874
|
+
#endif
|
|
1875
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1876
|
+
# define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8)
|
|
1877
|
+
#endif
|
|
1878
|
+
|
|
1879
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1880
|
+
simde__m256i
|
|
1881
|
+
simde_mm256_slli_epi32 (simde__m256i a, const int imm8)
|
|
1882
|
+
HEDLEY_REQUIRE_MSG((imm8 & 31) == imm8, "imm8 must be in range [0, 31]") {
|
|
1883
|
+
simde__m256i_private
|
|
1884
|
+
r_,
|
|
1885
|
+
a_ = simde__m256i_to_private(a);
|
|
1886
|
+
|
|
1887
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1888
|
+
r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8);
|
|
1889
|
+
#else
|
|
1890
|
+
SIMDE__VECTORIZE
|
|
1891
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1892
|
+
r_.i32[i] = a_.i32[i] << (imm8 & 0xff);
|
|
1893
|
+
}
|
|
1894
|
+
#endif
|
|
1895
|
+
|
|
1896
|
+
return simde__m256i_from_private(r_);
|
|
1897
|
+
}
|
|
1898
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1899
|
+
# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8)
|
|
1900
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
1901
|
+
# define simde_mm256_slli_epi32(a, imm8) \
|
|
1902
|
+
simde_mm256_set_m128i( \
|
|
1903
|
+
simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
1904
|
+
simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
1905
|
+
#endif
|
|
1906
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1907
|
+
# define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8)
|
|
1908
|
+
#endif
|
|
1909
|
+
|
|
1910
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1911
|
+
simde__m256i
|
|
1912
|
+
simde_mm256_slli_epi64 (simde__m256i a, const int imm8)
|
|
1913
|
+
HEDLEY_REQUIRE_MSG((imm8 & 15) == imm8, "imm8 must be in range [0, 63]") {
|
|
1914
|
+
simde__m256i_private
|
|
1915
|
+
r_,
|
|
1916
|
+
a_ = simde__m256i_to_private(a);
|
|
1917
|
+
|
|
1918
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1919
|
+
r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8);
|
|
1920
|
+
#else
|
|
1921
|
+
SIMDE__VECTORIZE
|
|
1922
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1923
|
+
r_.i64[i] = a_.i64[i] << (imm8 & 0xff);
|
|
1924
|
+
}
|
|
1925
|
+
#endif
|
|
1926
|
+
|
|
1927
|
+
return simde__m256i_from_private(r_);
|
|
1928
|
+
}
|
|
1929
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1930
|
+
# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8)
|
|
1931
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
1932
|
+
# define simde_mm256_slli_epi64(a, imm8) \
|
|
1933
|
+
simde_mm256_set_m128i( \
|
|
1934
|
+
simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
1935
|
+
simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
1936
|
+
#endif
|
|
1937
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1938
|
+
# define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8)
|
|
1939
|
+
#endif
|
|
1940
|
+
|
|
1941
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1942
|
+
simde__m256i
|
|
1943
|
+
simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) {
|
|
1944
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1945
|
+
return _mm256_sub_epi8(a, b);
|
|
1946
|
+
#else
|
|
1947
|
+
simde__m256i_private
|
|
1948
|
+
r_,
|
|
1949
|
+
a_ = simde__m256i_to_private(a),
|
|
1950
|
+
b_ = simde__m256i_to_private(b);
|
|
1951
|
+
|
|
1952
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1953
|
+
r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]);
|
|
1954
|
+
r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]);
|
|
1955
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1956
|
+
r_.i8 = a_.i8 - b_.i8;
|
|
1957
|
+
#else
|
|
1958
|
+
SIMDE__VECTORIZE
|
|
1959
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
1960
|
+
r_.i8[i] = a_.i8[i] - b_.i8[i];
|
|
1961
|
+
}
|
|
1962
|
+
#endif
|
|
1963
|
+
|
|
1964
|
+
return simde__m256i_from_private(r_);
|
|
1965
|
+
#endif
|
|
1966
|
+
}
|
|
1967
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1968
|
+
# define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b)
|
|
1969
|
+
#endif
|
|
1970
|
+
|
|
1971
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1972
|
+
simde__m256i
|
|
1973
|
+
simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) {
|
|
1974
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
1975
|
+
return _mm256_sub_epi16(a, b);
|
|
1976
|
+
#else
|
|
1977
|
+
simde__m256i_private
|
|
1978
|
+
r_,
|
|
1979
|
+
a_ = simde__m256i_to_private(a),
|
|
1980
|
+
b_ = simde__m256i_to_private(b);
|
|
1981
|
+
|
|
1982
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
1983
|
+
r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]);
|
|
1984
|
+
r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]);
|
|
1985
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1986
|
+
r_.i16 = a_.i16 - b_.i16;
|
|
1987
|
+
#else
|
|
1988
|
+
SIMDE__VECTORIZE
|
|
1989
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1990
|
+
r_.i16[i] = a_.i16[i] - b_.i16[i];
|
|
1991
|
+
}
|
|
1992
|
+
#endif
|
|
1993
|
+
|
|
1994
|
+
return simde__m256i_from_private(r_);
|
|
1995
|
+
#endif
|
|
1996
|
+
}
|
|
1997
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
1998
|
+
# define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b)
|
|
1999
|
+
#endif
|
|
2000
|
+
|
|
2001
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2002
|
+
simde__m256i
|
|
2003
|
+
simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) {
|
|
2004
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2005
|
+
return _mm256_sub_epi32(a, b);
|
|
2006
|
+
#else
|
|
2007
|
+
simde__m256i_private
|
|
2008
|
+
r_,
|
|
2009
|
+
a_ = simde__m256i_to_private(a),
|
|
2010
|
+
b_ = simde__m256i_to_private(b);
|
|
2011
|
+
|
|
2012
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
2013
|
+
r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]);
|
|
2014
|
+
r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]);
|
|
2015
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2016
|
+
r_.i32 = a_.i32 - b_.i32;
|
|
2017
|
+
#else
|
|
2018
|
+
SIMDE__VECTORIZE
|
|
2019
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
2020
|
+
r_.i32[i] = a_.i32[i] - b_.i32[i];
|
|
2021
|
+
}
|
|
2022
|
+
#endif
|
|
2023
|
+
|
|
2024
|
+
return simde__m256i_from_private(r_);
|
|
2025
|
+
#endif
|
|
2026
|
+
}
|
|
2027
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2028
|
+
# define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b)
|
|
2029
|
+
#endif
|
|
2030
|
+
|
|
2031
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2032
|
+
simde__m256i
|
|
2033
|
+
simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) {
|
|
2034
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2035
|
+
return _mm256_sub_epi64(a, b);
|
|
2036
|
+
#else
|
|
2037
|
+
simde__m256i_private
|
|
2038
|
+
r_,
|
|
2039
|
+
a_ = simde__m256i_to_private(a),
|
|
2040
|
+
b_ = simde__m256i_to_private(b);
|
|
2041
|
+
|
|
2042
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
2043
|
+
r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]);
|
|
2044
|
+
r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]);
|
|
2045
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2046
|
+
r_.i64 = a_.i64 - b_.i64;
|
|
2047
|
+
#else
|
|
2048
|
+
SIMDE__VECTORIZE
|
|
2049
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
2050
|
+
r_.i64[i] = a_.i64[i] - b_.i64[i];
|
|
2051
|
+
}
|
|
2052
|
+
#endif
|
|
2053
|
+
|
|
2054
|
+
return simde__m256i_from_private(r_);
|
|
2055
|
+
#endif
|
|
2056
|
+
}
|
|
2057
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2058
|
+
# define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b)
|
|
2059
|
+
#endif
|
|
2060
|
+
|
|
2061
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2062
|
+
simde__m256i
|
|
2063
|
+
simde_mm256_srli_epi64 (simde__m256i a, const int imm8) {
|
|
2064
|
+
simde__m256i_private
|
|
2065
|
+
r_,
|
|
2066
|
+
a_ = simde__m256i_to_private(a);
|
|
2067
|
+
|
|
2068
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
2069
|
+
r_.u64 = a_.u64 >> HEDLEY_STATIC_CAST(int32_t, imm8);
|
|
2070
|
+
#else
|
|
2071
|
+
SIMDE__VECTORIZE
|
|
2072
|
+
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
|
|
2073
|
+
r_.u64[i] = a_.u64[i] >> imm8;
|
|
2074
|
+
}
|
|
2075
|
+
#endif
|
|
2076
|
+
|
|
2077
|
+
return simde__m256i_from_private(r_);
|
|
2078
|
+
}
|
|
2079
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2080
|
+
# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8)
|
|
2081
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
2082
|
+
# define simde_mm256_srli_epi64(a, imm8) \
|
|
2083
|
+
simde_mm256_set_m128i( \
|
|
2084
|
+
simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
2085
|
+
simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
2086
|
+
#endif
|
|
2087
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2088
|
+
# define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8)
|
|
2089
|
+
#endif
|
|
2090
|
+
|
|
2091
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2092
|
+
simde__m256i
|
|
2093
|
+
simde_mm256_srli_si256 (simde__m256i a, const int imm8) {
|
|
2094
|
+
simde__m256i_private
|
|
2095
|
+
r_,
|
|
2096
|
+
a_ = simde__m256i_to_private(a);
|
|
2097
|
+
|
|
2098
|
+
for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) {
|
|
2099
|
+
SIMDE__VECTORIZE
|
|
2100
|
+
for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {
|
|
2101
|
+
const int e = imm8 + HEDLEY_STATIC_CAST(int, i);
|
|
2102
|
+
r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0;
|
|
2103
|
+
}
|
|
2104
|
+
}
|
|
2105
|
+
|
|
2106
|
+
return simde__m256i_from_private(r_);
|
|
2107
|
+
}
|
|
2108
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2109
|
+
# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8)
|
|
2110
|
+
#elif defined(SIMDE_ARCH_X86_SSE2) && !defined(__PGI)
|
|
2111
|
+
# define simde_mm256_srli_si256(a, imm8) \
|
|
2112
|
+
simde_mm256_set_m128i( \
|
|
2113
|
+
simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
2114
|
+
simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
2115
|
+
#elif defined(SIMDE_SSE2_NEON)
|
|
2116
|
+
# define simde_mm256_srli_si256(a, imm8) \
|
|
2117
|
+
simde_mm256_set_m128i( \
|
|
2118
|
+
simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
2119
|
+
simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
2120
|
+
#endif
|
|
2121
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2122
|
+
# define _mm256_srli_si256(a, imm8) simde_mm_srli_si256(a, imm8)
|
|
2123
|
+
#endif
|
|
2124
|
+
|
|
2125
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2126
|
+
simde__m256i
|
|
2127
|
+
simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) {
|
|
2128
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2129
|
+
return _mm256_unpacklo_epi8(a, b);
|
|
2130
|
+
#else
|
|
2131
|
+
simde__m256i_private
|
|
2132
|
+
r_,
|
|
2133
|
+
a_ = simde__m256i_to_private(a),
|
|
2134
|
+
b_ = simde__m256i_to_private(b);
|
|
2135
|
+
|
|
2136
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2137
|
+
r_.i8 = SIMDE__SHUFFLE_VECTOR(8, 32, a_.i8, b_.i8,
|
|
2138
|
+
0, 32, 1, 33, 2, 34, 3, 35,
|
|
2139
|
+
4, 36, 5, 37, 6, 38, 7, 39,
|
|
2140
|
+
16, 48, 17, 49, 18, 50, 19, 51,
|
|
2141
|
+
20, 52, 21, 53, 22, 54, 23, 55);
|
|
2142
|
+
#else
|
|
2143
|
+
r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]);
|
|
2144
|
+
r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]);
|
|
2145
|
+
#endif
|
|
2146
|
+
|
|
2147
|
+
return simde__m256i_from_private(r_);
|
|
2148
|
+
#endif
|
|
2149
|
+
}
|
|
2150
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2151
|
+
# define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b)
|
|
2152
|
+
#endif
|
|
2153
|
+
|
|
2154
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2155
|
+
simde__m256i
|
|
2156
|
+
simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) {
|
|
2157
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2158
|
+
return _mm256_unpacklo_epi16(a, b);
|
|
2159
|
+
#else
|
|
2160
|
+
simde__m256i_private
|
|
2161
|
+
r_,
|
|
2162
|
+
a_ = simde__m256i_to_private(a),
|
|
2163
|
+
b_ = simde__m256i_to_private(b);
|
|
2164
|
+
|
|
2165
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2166
|
+
r_.i16 =SIMDE__SHUFFLE_VECTOR(16, 32, a_.i16, b_.i16,
|
|
2167
|
+
0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27);
|
|
2168
|
+
#else
|
|
2169
|
+
r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]);
|
|
2170
|
+
r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]);
|
|
2171
|
+
#endif
|
|
2172
|
+
|
|
2173
|
+
return simde__m256i_from_private(r_);
|
|
2174
|
+
#endif
|
|
2175
|
+
}
|
|
2176
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2177
|
+
# define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b)
|
|
2178
|
+
#endif
|
|
2179
|
+
|
|
2180
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2181
|
+
simde__m256i
|
|
2182
|
+
simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) {
|
|
2183
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2184
|
+
return _mm256_unpacklo_epi32(a, b);
|
|
2185
|
+
#else
|
|
2186
|
+
simde__m256i_private
|
|
2187
|
+
r_,
|
|
2188
|
+
a_ = simde__m256i_to_private(a),
|
|
2189
|
+
b_ = simde__m256i_to_private(b);
|
|
2190
|
+
|
|
2191
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2192
|
+
r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 32, a_.i32, b_.i32,
|
|
2193
|
+
0, 8, 1, 9, 4, 12, 5, 13);
|
|
2194
|
+
#else
|
|
2195
|
+
r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]);
|
|
2196
|
+
r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]);
|
|
2197
|
+
#endif
|
|
2198
|
+
|
|
2199
|
+
return simde__m256i_from_private(r_);
|
|
2200
|
+
#endif
|
|
2201
|
+
}
|
|
2202
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2203
|
+
# define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b)
|
|
2204
|
+
#endif
|
|
2205
|
+
|
|
2206
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2207
|
+
simde__m256i
|
|
2208
|
+
simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) {
|
|
2209
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2210
|
+
return _mm256_unpacklo_epi64(a, b);
|
|
2211
|
+
#else
|
|
2212
|
+
simde__m256i_private
|
|
2213
|
+
r_,
|
|
2214
|
+
a_ = simde__m256i_to_private(a),
|
|
2215
|
+
b_ = simde__m256i_to_private(b);
|
|
2216
|
+
|
|
2217
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2218
|
+
r_.i64 = SIMDE__SHUFFLE_VECTOR(64, 32, a_.i64, b_.i64, 0, 4, 2, 6);
|
|
2219
|
+
#else
|
|
2220
|
+
r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]);
|
|
2221
|
+
r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]);
|
|
2222
|
+
#endif
|
|
2223
|
+
|
|
2224
|
+
return simde__m256i_from_private(r_);
|
|
2225
|
+
#endif
|
|
2226
|
+
}
|
|
2227
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2228
|
+
# define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b)
|
|
2229
|
+
#endif
|
|
2230
|
+
|
|
2231
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2232
|
+
simde__m256i
|
|
2233
|
+
simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) {
|
|
2234
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2235
|
+
return _mm256_unpackhi_epi8(a, b);
|
|
2236
|
+
#else
|
|
2237
|
+
simde__m256i_private
|
|
2238
|
+
r_,
|
|
2239
|
+
a_ = simde__m256i_to_private(a),
|
|
2240
|
+
b_ = simde__m256i_to_private(b);
|
|
2241
|
+
|
|
2242
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2243
|
+
r_.i8 = SIMDE__SHUFFLE_VECTOR(8, 32, a_.i8, b_.i8,
|
|
2244
|
+
8, 40, 9, 41, 10, 42, 11, 43,
|
|
2245
|
+
12, 44, 13, 45, 14, 46, 15, 47,
|
|
2246
|
+
24, 56, 25, 57, 26, 58, 27, 59,
|
|
2247
|
+
28, 60, 29, 61, 30, 62, 31, 63);
|
|
2248
|
+
#else
|
|
2249
|
+
r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]);
|
|
2250
|
+
r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]);
|
|
2251
|
+
#endif
|
|
2252
|
+
|
|
2253
|
+
return simde__m256i_from_private(r_);
|
|
2254
|
+
#endif
|
|
2255
|
+
}
|
|
2256
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2257
|
+
# define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b)
|
|
2258
|
+
#endif
|
|
2259
|
+
|
|
2260
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2261
|
+
simde__m256i
|
|
2262
|
+
simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) {
|
|
2263
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2264
|
+
return _mm256_unpackhi_epi16(a, b);
|
|
2265
|
+
#else
|
|
2266
|
+
simde__m256i_private
|
|
2267
|
+
r_,
|
|
2268
|
+
a_ = simde__m256i_to_private(a),
|
|
2269
|
+
b_ = simde__m256i_to_private(b);
|
|
2270
|
+
|
|
2271
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2272
|
+
r_.i16 = SIMDE__SHUFFLE_VECTOR(16, 32, a_.i16, b_.i16,
|
|
2273
|
+
4, 20, 5, 21, 6, 22, 7, 23,
|
|
2274
|
+
12, 28, 13, 29, 14, 30, 15, 31);
|
|
2275
|
+
#else
|
|
2276
|
+
r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]);
|
|
2277
|
+
r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]);
|
|
2278
|
+
#endif
|
|
2279
|
+
|
|
2280
|
+
return simde__m256i_from_private(r_);
|
|
2281
|
+
#endif
|
|
2282
|
+
}
|
|
2283
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2284
|
+
# define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b)
|
|
2285
|
+
#endif
|
|
2286
|
+
|
|
2287
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2288
|
+
simde__m256i
|
|
2289
|
+
simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) {
|
|
2290
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2291
|
+
return _mm256_unpackhi_epi32(a, b);
|
|
2292
|
+
#else
|
|
2293
|
+
simde__m256i_private
|
|
2294
|
+
r_,
|
|
2295
|
+
a_ = simde__m256i_to_private(a),
|
|
2296
|
+
b_ = simde__m256i_to_private(b);
|
|
2297
|
+
|
|
2298
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2299
|
+
r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 32, a_.i32, b_.i32,
|
|
2300
|
+
2, 10, 3, 11, 6, 14, 7, 15);
|
|
2301
|
+
#else
|
|
2302
|
+
r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]);
|
|
2303
|
+
r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]);
|
|
2304
|
+
#endif
|
|
2305
|
+
|
|
2306
|
+
return simde__m256i_from_private(r_);
|
|
2307
|
+
#endif
|
|
2308
|
+
}
|
|
2309
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2310
|
+
# define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b)
|
|
2311
|
+
#endif
|
|
2312
|
+
|
|
2313
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2314
|
+
simde__m256i
|
|
2315
|
+
simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) {
|
|
2316
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2317
|
+
return _mm256_unpackhi_epi64(a, b);
|
|
2318
|
+
#else
|
|
2319
|
+
simde__m256i_private
|
|
2320
|
+
r_,
|
|
2321
|
+
a_ = simde__m256i_to_private(a),
|
|
2322
|
+
b_ = simde__m256i_to_private(b);
|
|
2323
|
+
|
|
2324
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2325
|
+
r_.i64 = SIMDE__SHUFFLE_VECTOR(64, 32, a_.i64, b_.i64, 1, 5, 3, 7);
|
|
2326
|
+
#else
|
|
2327
|
+
r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]);
|
|
2328
|
+
r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]);
|
|
2329
|
+
#endif
|
|
2330
|
+
|
|
2331
|
+
return simde__m256i_from_private(r_);
|
|
2332
|
+
#endif
|
|
2333
|
+
}
|
|
2334
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2335
|
+
# define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b)
|
|
2336
|
+
#endif
|
|
2337
|
+
|
|
2338
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2339
|
+
simde__m256i
|
|
2340
|
+
simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) {
|
|
2341
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2342
|
+
return _mm256_xor_si256(a, b);
|
|
2343
|
+
#else
|
|
2344
|
+
simde__m256i_private
|
|
2345
|
+
r_,
|
|
2346
|
+
a_ = simde__m256i_to_private(a),
|
|
2347
|
+
b_ = simde__m256i_to_private(b);
|
|
2348
|
+
|
|
2349
|
+
#if defined(SIMDE_ARCH_X86_SSE2)
|
|
2350
|
+
r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]);
|
|
2351
|
+
r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]);
|
|
2352
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2353
|
+
r_.i32f = a_.i32f ^ b_.i32f;
|
|
2354
|
+
#else
|
|
2355
|
+
SIMDE__VECTORIZE
|
|
2356
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
2357
|
+
r_.i64[i] = a_.i64[i] ^ b_.i64[i];
|
|
2358
|
+
}
|
|
2359
|
+
#endif
|
|
2360
|
+
|
|
2361
|
+
return simde__m256i_from_private(r_);
|
|
2362
|
+
#endif
|
|
2363
|
+
}
|
|
2364
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2365
|
+
# define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b)
|
|
2366
|
+
#endif
|
|
2367
|
+
|
|
2368
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2369
|
+
simde__m256i
|
|
2370
|
+
simde_mm256_srli_epi32 (simde__m256i a, const int imm8) {
|
|
2371
|
+
simde__m256i_private
|
|
2372
|
+
r_,
|
|
2373
|
+
a_ = simde__m256i_to_private(a);
|
|
2374
|
+
|
|
2375
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
2376
|
+
r_.u32 = a_.u32 >> HEDLEY_STATIC_CAST(int16_t, imm8);
|
|
2377
|
+
#else
|
|
2378
|
+
SIMDE__VECTORIZE
|
|
2379
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
2380
|
+
r_.u32[i] = a_.u32[i] >> imm8;
|
|
2381
|
+
}
|
|
2382
|
+
#endif
|
|
2383
|
+
|
|
2384
|
+
return simde__m256i_from_private(r_);
|
|
2385
|
+
}
|
|
2386
|
+
#if defined(SIMDE_AVX2_NATIVE)
|
|
2387
|
+
# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8)
|
|
2388
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
2389
|
+
# define simde_mm256_srli_epi32(a, imm8) \
|
|
2390
|
+
simde_mm256_set_m128i( \
|
|
2391
|
+
simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
|
|
2392
|
+
simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
|
|
2393
|
+
#endif
|
|
2394
|
+
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
|
|
2395
|
+
# define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8)
|
|
2396
|
+
#endif
|
|
2397
|
+
|
|
2398
|
+
SIMDE__END_DECLS
|
|
2399
|
+
|
|
2400
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
2401
|
+
|
|
2402
|
+
#endif /* !defined(SIMDE__AVX2_H) */
|