minimap2 0.2.25.1 → 0.2.25.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/ext/Rakefile +2 -2
  3. data/lib/minimap2/version.rb +1 -1
  4. metadata +1 -97
  5. data/ext/minimap2/lib/simde/CONTRIBUTING.md +0 -114
  6. data/ext/minimap2/lib/simde/COPYING +0 -20
  7. data/ext/minimap2/lib/simde/README.md +0 -333
  8. data/ext/minimap2/lib/simde/amalgamate.py +0 -58
  9. data/ext/minimap2/lib/simde/meson.build +0 -33
  10. data/ext/minimap2/lib/simde/netlify.toml +0 -20
  11. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +0 -140
  12. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +0 -137
  13. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +0 -142
  14. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +0 -145
  15. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +0 -140
  16. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +0 -145
  17. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +0 -140
  18. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +0 -143
  19. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +0 -137
  20. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +0 -141
  21. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +0 -147
  22. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +0 -141
  23. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +0 -134
  24. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +0 -138
  25. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +0 -134
  26. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +0 -137
  27. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +0 -131
  28. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +0 -135
  29. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +0 -141
  30. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +0 -135
  31. data/ext/minimap2/lib/simde/simde/arm/neon.h +0 -97
  32. data/ext/minimap2/lib/simde/simde/check.h +0 -267
  33. data/ext/minimap2/lib/simde/simde/debug-trap.h +0 -83
  34. data/ext/minimap2/lib/simde/simde/hedley.h +0 -1899
  35. data/ext/minimap2/lib/simde/simde/simde-arch.h +0 -445
  36. data/ext/minimap2/lib/simde/simde/simde-common.h +0 -697
  37. data/ext/minimap2/lib/simde/simde/x86/avx.h +0 -5385
  38. data/ext/minimap2/lib/simde/simde/x86/avx2.h +0 -2402
  39. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +0 -391
  40. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +0 -3389
  41. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +0 -112
  42. data/ext/minimap2/lib/simde/simde/x86/fma.h +0 -659
  43. data/ext/minimap2/lib/simde/simde/x86/mmx.h +0 -2210
  44. data/ext/minimap2/lib/simde/simde/x86/sse.h +0 -3696
  45. data/ext/minimap2/lib/simde/simde/x86/sse2.h +0 -5991
  46. data/ext/minimap2/lib/simde/simde/x86/sse3.h +0 -343
  47. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +0 -1783
  48. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +0 -105
  49. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +0 -1053
  50. data/ext/minimap2/lib/simde/simde/x86/svml.h +0 -543
  51. data/ext/minimap2/lib/simde/test/CMakeLists.txt +0 -166
  52. data/ext/minimap2/lib/simde/test/arm/meson.build +0 -4
  53. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +0 -23
  54. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +0 -871
  55. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +0 -134
  56. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +0 -39
  57. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +0 -10
  58. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +0 -1260
  59. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +0 -873
  60. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +0 -1084
  61. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +0 -1260
  62. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +0 -18
  63. data/ext/minimap2/lib/simde/test/arm/test-arm.c +0 -20
  64. data/ext/minimap2/lib/simde/test/arm/test-arm.h +0 -8
  65. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +0 -171
  66. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +0 -68
  67. data/ext/minimap2/lib/simde/test/meson.build +0 -64
  68. data/ext/minimap2/lib/simde/test/munit/COPYING +0 -21
  69. data/ext/minimap2/lib/simde/test/munit/Makefile +0 -55
  70. data/ext/minimap2/lib/simde/test/munit/README.md +0 -54
  71. data/ext/minimap2/lib/simde/test/munit/example.c +0 -351
  72. data/ext/minimap2/lib/simde/test/munit/meson.build +0 -37
  73. data/ext/minimap2/lib/simde/test/munit/munit.c +0 -2055
  74. data/ext/minimap2/lib/simde/test/munit/munit.h +0 -535
  75. data/ext/minimap2/lib/simde/test/run-tests.c +0 -20
  76. data/ext/minimap2/lib/simde/test/run-tests.h +0 -260
  77. data/ext/minimap2/lib/simde/test/x86/avx.c +0 -13752
  78. data/ext/minimap2/lib/simde/test/x86/avx2.c +0 -9977
  79. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +0 -2664
  80. data/ext/minimap2/lib/simde/test/x86/avx512f.c +0 -10416
  81. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +0 -210
  82. data/ext/minimap2/lib/simde/test/x86/fma.c +0 -2557
  83. data/ext/minimap2/lib/simde/test/x86/meson.build +0 -33
  84. data/ext/minimap2/lib/simde/test/x86/mmx.c +0 -2878
  85. data/ext/minimap2/lib/simde/test/x86/skel.c +0 -2984
  86. data/ext/minimap2/lib/simde/test/x86/sse.c +0 -5121
  87. data/ext/minimap2/lib/simde/test/x86/sse2.c +0 -9860
  88. data/ext/minimap2/lib/simde/test/x86/sse3.c +0 -486
  89. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +0 -3446
  90. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +0 -101
  91. data/ext/minimap2/lib/simde/test/x86/ssse3.c +0 -2084
  92. data/ext/minimap2/lib/simde/test/x86/svml.c +0 -1545
  93. data/ext/minimap2/lib/simde/test/x86/test-avx.h +0 -16
  94. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +0 -25
  95. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +0 -13
  96. data/ext/minimap2/lib/simde/test/x86/test-sse.h +0 -13
  97. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +0 -13
  98. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +0 -196
  99. data/ext/minimap2/lib/simde/test/x86/test-x86.c +0 -48
  100. data/ext/minimap2/lib/simde/test/x86/test-x86.h +0 -8
@@ -1,2402 +0,0 @@
1
- /* Permission is hereby granted, free of charge, to any person
2
- * obtaining a copy of this software and associated documentation
3
- * files (the "Software"), to deal in the Software without
4
- * restriction, including without limitation the rights to use, copy,
5
- * modify, merge, publish, distribute, sublicense, and/or sell copies
6
- * of the Software, and to permit persons to whom the Software is
7
- * furnished to do so, subject to the following conditions:
8
- *
9
- * The above copyright notice and this permission notice shall be
10
- * included in all copies or substantial portions of the Software.
11
- *
12
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
- * SOFTWARE.
20
- *
21
- * Copyright:
22
- * 2018 Evan Nemerson <evan@nemerson.com>
23
- * 2019 Michael R. Crusoe <michael.crusoe@gmail.com>
24
- */
25
-
26
- #include "sse4.1.h"
27
- #include "sse4.2.h"
28
- #if !defined(SIMDE__AVX2_H)
29
- # if !defined(SIMDE__AVX2_H)
30
- # define SIMDE__AVX2_H
31
- # endif
32
- # include "avx.h"
33
-
34
- HEDLEY_DIAGNOSTIC_PUSH
35
- SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
36
-
37
- # if defined(SIMDE_AVX2_NATIVE)
38
- # undef SIMDE_AVX2_NATIVE
39
- # endif
40
- # if defined(SIMDE_ARCH_X86_AVX2) && !defined(SIMDE_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
41
- # define SIMDE_AVX2_NATIVE
42
- # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_AVX2_NO_NEON) && !defined(SIMDE_NO_NEON)
43
- # define SIMDE_AVX2_NEON
44
- # endif
45
-
46
- # if defined(SIMDE_AVX2_NATIVE) && !defined(SIMDE_AVX_NATIVE)
47
- # if defined(SIMDE_AVX2_FORCE_NATIVE)
48
- # error Native AVX2 support requires native AVX support
49
- # else
50
- HEDLEY_WARNING("Native AVX2 support requires native AVX support, disabling")
51
- # undef SIMDE_AVX2_NATIVE
52
- # endif
53
- # elif defined(SIMDE_AVX2_NEON) && !defined(SIMDE_AVX_NEON)
54
- HEDLEY_WARNING("AVX2 NEON support requires AVX NEON support, disabling")
55
- # undef SIMDE_AVX_NEON
56
- # endif
57
-
58
- # if defined(SIMDE_AVX2_NATIVE)
59
- # include <immintrin.h>
60
- # endif
61
-
62
- # if !defined(SIMDE_AVX2_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
63
- # define SIMDE_AVX2_ENABLE_NATIVE_ALIASES
64
- # endif
65
-
66
- # include <stdint.h>
67
-
68
- SIMDE__BEGIN_DECLS
69
-
70
- SIMDE__FUNCTION_ATTRIBUTES
71
- simde__m256i
72
- simde_mm256_abs_epi8 (simde__m256i a) {
73
- #if defined(SIMDE_AVX2_NATIVE)
74
- return _mm256_abs_epi8(a);
75
- #else
76
- simde__m256i_private
77
- r_,
78
- a_ = simde__m256i_to_private(a);
79
-
80
- SIMDE__VECTORIZE
81
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
82
- r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i];
83
- }
84
-
85
- return simde__m256i_from_private(r_);
86
- #endif
87
- }
88
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
89
- # define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a)
90
- #endif
91
-
92
- SIMDE__FUNCTION_ATTRIBUTES
93
- simde__m256i
94
- simde_mm256_abs_epi16 (simde__m256i a) {
95
- #if defined(SIMDE_AVX2_NATIVE)
96
- return _mm256_abs_epi16(a);
97
- #else
98
- simde__m256i_private
99
- r_,
100
- a_ = simde__m256i_to_private(a);
101
-
102
- SIMDE__VECTORIZE
103
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
104
- r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i];
105
- }
106
-
107
- return simde__m256i_from_private(r_);
108
- #endif
109
- }
110
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
111
- # define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a)
112
- #endif
113
-
114
- SIMDE__FUNCTION_ATTRIBUTES
115
- simde__m256i
116
- simde_mm256_abs_epi32(simde__m256i a) {
117
- #if defined(SIMDE_AVX2_NATIVE)
118
- return _mm256_abs_epi32(a);
119
- #else
120
- simde__m256i_private
121
- r_,
122
- a_ = simde__m256i_to_private(a);
123
-
124
- SIMDE__VECTORIZE
125
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
126
- r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];
127
- }
128
-
129
- return simde__m256i_from_private(r_);
130
- #endif
131
- }
132
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
133
- # define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a)
134
- #endif
135
-
136
- SIMDE__FUNCTION_ATTRIBUTES
137
- simde__m256i
138
- simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) {
139
- #if defined(SIMDE_AVX2_NATIVE)
140
- return _mm256_add_epi8(a, b);
141
- #else
142
- simde__m256i_private
143
- r_,
144
- a_ = simde__m256i_to_private(a),
145
- b_ = simde__m256i_to_private(b);
146
-
147
- #if defined(SIMDE_ARCH_X86_SSE2)
148
- r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]);
149
- r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]);
150
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
151
- r_.i8 = a_.i8 + b_.i8;
152
- #else
153
- SIMDE__VECTORIZE
154
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
155
- r_.i8[i] = a_.i8[i] + b_.i8[i];
156
- }
157
- #endif
158
-
159
- return simde__m256i_from_private(r_);
160
- #endif
161
- }
162
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
163
- # define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b)
164
- #endif
165
-
166
- SIMDE__FUNCTION_ATTRIBUTES
167
- simde__m256i
168
- simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) {
169
- #if defined(SIMDE_AVX2_NATIVE)
170
- return _mm256_add_epi16(a, b);
171
- #else
172
- simde__m256i_private
173
- r_,
174
- a_ = simde__m256i_to_private(a),
175
- b_ = simde__m256i_to_private(b);
176
-
177
- #if defined(SIMDE_ARCH_X86_SSE2)
178
- r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]);
179
- r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]);
180
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
181
- r_.i16 = a_.i16 + b_.i16;
182
- #else
183
- SIMDE__VECTORIZE
184
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
185
- r_.i16[i] = a_.i16[i] + b_.i16[i];
186
- }
187
- #endif
188
-
189
- return simde__m256i_from_private(r_);
190
- #endif
191
- }
192
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
193
- # define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b)
194
- #endif
195
-
196
- SIMDE__FUNCTION_ATTRIBUTES
197
- simde__m256i
198
- simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) {
199
- #if defined(SIMDE_AVX2_NATIVE)
200
- return _mm256_add_epi32(a, b);
201
- #else
202
- simde__m256i_private
203
- r_,
204
- a_ = simde__m256i_to_private(a),
205
- b_ = simde__m256i_to_private(b);
206
-
207
- #if defined(SIMDE_ARCH_X86_SSE2)
208
- r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]);
209
- r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]);
210
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
211
- r_.i32 = a_.i32 + b_.i32;
212
- #else
213
- SIMDE__VECTORIZE
214
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
215
- r_.i32[i] = a_.i32[i] + b_.i32[i];
216
- }
217
- #endif
218
-
219
- return simde__m256i_from_private(r_);
220
- #endif
221
- }
222
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
223
- # define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b)
224
- #endif
225
-
226
- SIMDE__FUNCTION_ATTRIBUTES
227
- simde__m256i
228
- simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) {
229
- #if defined(SIMDE_AVX2_NATIVE)
230
- return _mm256_add_epi64(a, b);
231
- #else
232
- simde__m256i_private
233
- r_,
234
- a_ = simde__m256i_to_private(a),
235
- b_ = simde__m256i_to_private(b);
236
-
237
- #if defined(SIMDE_ARCH_X86_SSE2)
238
- r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]);
239
- r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]);
240
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
241
- r_.i64 = a_.i64 + b_.i64;
242
- #else
243
- SIMDE__VECTORIZE
244
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
245
- r_.i64[i] = a_.i64[i] + b_.i64[i];
246
- }
247
- #endif
248
-
249
- return simde__m256i_from_private(r_);
250
- #endif
251
- }
252
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
253
- # define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b)
254
- #endif
255
-
256
- SIMDE__FUNCTION_ATTRIBUTES
257
- simde__m256i
258
- simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) {
259
- simde__m256i_private
260
- r_,
261
- a_ = simde__m256i_to_private(a),
262
- b_ = simde__m256i_to_private(b);
263
-
264
- if (HEDLEY_UNLIKELY(count > 31))
265
- return simde_mm256_setzero_si256();
266
-
267
- for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) {
268
- SIMDE__VECTORIZE
269
- for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {
270
- const int srcpos = count + HEDLEY_STATIC_CAST(int, i);
271
- if (srcpos > 31) {
272
- r_.m128i_private[h].i8[i] = 0;
273
- } else if (srcpos > 15) {
274
- r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15];
275
- } else {
276
- r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos];
277
- }
278
- }
279
- }
280
-
281
- return simde__m256i_from_private(r_);
282
- }
283
- #if defined(SIMDE_AVX2_NATIVE)
284
- # define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count)
285
- #elif defined(SIMDE_ARCH_X86_SSSE3)
286
- # define simde_mm256_alignr_epi8(a, b, count) \
287
- simde_mm256_set_m128i( \
288
- simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \
289
- simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count)))
290
- #endif
291
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
292
- # define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count))
293
- #endif
294
-
295
- SIMDE__FUNCTION_ATTRIBUTES
296
- simde__m256i
297
- simde_mm256_and_si256 (simde__m256i a, simde__m256i b) {
298
- #if defined(SIMDE_AVX2_NATIVE)
299
- return _mm256_and_si256(a, b);
300
- #else
301
- simde__m256i_private
302
- r_,
303
- a_ = simde__m256i_to_private(a),
304
- b_ = simde__m256i_to_private(b);
305
-
306
- #if defined(SIMDE_ARCH_X86_SSE2)
307
- r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]);
308
- r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]);
309
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
310
- r_.i32f = a_.i32f & b_.i32f;
311
- #else
312
- SIMDE__VECTORIZE
313
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
314
- r_.i64[i] = a_.i64[i] & b_.i64[i];
315
- }
316
- #endif
317
-
318
- return simde__m256i_from_private(r_);
319
- #endif
320
- }
321
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
322
- # define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b)
323
- #endif
324
-
325
- SIMDE__FUNCTION_ATTRIBUTES
326
- simde__m256i
327
- simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) {
328
- #if defined(SIMDE_AVX2_NATIVE)
329
- return _mm256_andnot_si256(a, b);
330
- #else
331
- simde__m256i_private
332
- r_,
333
- a_ = simde__m256i_to_private(a),
334
- b_ = simde__m256i_to_private(b);
335
-
336
- #if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
337
- r_.m128i_private[0] = simde__m128i_to_private(simde_mm_andnot_si128(simde__m128i_from_private(a_.m128i_private[0]), simde__m128i_from_private(b_.m128i_private[0])));
338
- r_.m128i_private[1] = simde__m128i_to_private(simde_mm_andnot_si128(simde__m128i_from_private(a_.m128i_private[1]), simde__m128i_from_private(b_.m128i_private[1])));
339
- #else
340
- SIMDE__VECTORIZE
341
- for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
342
- r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
343
- }
344
- #endif
345
-
346
- return simde__m256i_from_private(r_);
347
- #endif
348
- }
349
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
350
- # define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b)
351
- #endif
352
-
353
- SIMDE__FUNCTION_ATTRIBUTES
354
- simde__m256i
355
- simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) {
356
- #if defined(SIMDE_AVX2_NATIVE)
357
- return _mm256_adds_epi8(a, b);
358
- #else
359
- simde__m256i_private
360
- r_,
361
- a_ = simde__m256i_to_private(a),
362
- b_ = simde__m256i_to_private(b);
363
-
364
- #if defined(SIMDE_ARCH_X86_SSE2) && !defined(HEDLEY_INTEL_VERSION)
365
- SIMDE__VECTORIZE
366
- for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
367
- r_.m128i[i] = simde_mm_adds_epi8(a_.m128i[i], b_.m128i[i]);
368
- }
369
- #else
370
- SIMDE__VECTORIZE
371
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
372
- const int32_t tmp =
373
- HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) +
374
- HEDLEY_STATIC_CAST(int16_t, b_.i8[i]);
375
- r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN) : INT8_MAX));
376
- }
377
- #endif
378
-
379
- return simde__m256i_from_private(r_);
380
- #endif
381
- }
382
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
383
- # define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b)
384
- #endif
385
-
386
- SIMDE__FUNCTION_ATTRIBUTES
387
- simde__m256i
388
- simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) {
389
- #if defined(SIMDE_AVX2_NATIVE)
390
- return _mm256_adds_epi16(a, b);
391
- #else
392
- simde__m256i_private
393
- r_,
394
- a_ = simde__m256i_to_private(a),
395
- b_ = simde__m256i_to_private(b);
396
-
397
- #if defined(SIMDE_ARCH_X86_SSE2) && !defined(HEDLEY_INTEL_VERSION)
398
- SIMDE__VECTORIZE
399
- for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
400
- r_.m128i[i] = simde_mm_adds_epi16(a_.m128i[i], b_.m128i[i]);
401
- }
402
- #else
403
- SIMDE__VECTORIZE
404
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
405
- const int32_t tmp =
406
- HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) +
407
- HEDLEY_STATIC_CAST(int32_t, b_.i16[i]);
408
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((tmp < INT16_MAX) ? ((tmp > INT16_MIN) ? tmp : INT16_MIN) : INT16_MAX));
409
- }
410
- #endif
411
-
412
- return simde__m256i_from_private(r_);
413
- #endif
414
- }
415
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
416
- # define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b)
417
- #endif
418
-
419
- SIMDE__FUNCTION_ATTRIBUTES
420
- simde__m256i
421
- simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) {
422
- #if defined(SIMDE_AVX2_NATIVE)
423
- return _mm256_adds_epu8(a, b);
424
- #else
425
- simde__m256i_private
426
- r_,
427
- a_ = simde__m256i_to_private(a),
428
- b_ = simde__m256i_to_private(b);
429
-
430
- #if defined(SIMDE_ARCH_X86_SSE2)
431
- r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]);
432
- r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]);
433
- #else
434
- SIMDE__VECTORIZE
435
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
436
- r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i]) ? (a_.u8[i] + b_.u8[i]) : UINT8_MAX;
437
- }
438
- #endif
439
-
440
- return simde__m256i_from_private(r_);
441
- #endif
442
- }
443
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
444
- # define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b)
445
- #endif
446
-
447
- SIMDE__FUNCTION_ATTRIBUTES
448
- simde__m256i
449
- simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) {
450
- #if defined(SIMDE_AVX2_NATIVE)
451
- return _mm256_adds_epu16(a, b);
452
- #else
453
- simde__m256i_private
454
- r_,
455
- a_ = simde__m256i_to_private(a),
456
- b_ = simde__m256i_to_private(b);
457
-
458
- #if defined(SIMDE_ARCH_X86_SSE2)
459
- r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]);
460
- r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]);
461
- #else
462
- SIMDE__VECTORIZE
463
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
464
- r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i]) ? (a_.u16[i] + b_.u16[i]) : UINT16_MAX;
465
- }
466
- #endif
467
-
468
- return simde__m256i_from_private(r_);
469
- #endif
470
- }
471
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
472
- # define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b)
473
- #endif
474
-
475
- SIMDE__FUNCTION_ATTRIBUTES
476
- simde__m256i
477
- simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) {
478
- #if defined(SIMDE_AVX2_NATIVE)
479
- return _mm256_avg_epu8(a, b);
480
- #else
481
- simde__m256i_private
482
- r_,
483
- a_ = simde__m256i_to_private(a),
484
- b_ = simde__m256i_to_private(b);
485
-
486
- SIMDE__VECTORIZE
487
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
488
- r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
489
- }
490
-
491
- return simde__m256i_from_private(r_);
492
- #endif
493
- }
494
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
495
- # define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b)
496
- #endif
497
-
498
- SIMDE__FUNCTION_ATTRIBUTES
499
- simde__m256i
500
- simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) {
501
- #if defined(SIMDE_AVX2_NATIVE)
502
- return _mm256_avg_epu16(a, b);
503
- #else
504
- simde__m256i_private
505
- r_,
506
- a_ = simde__m256i_to_private(a),
507
- b_ = simde__m256i_to_private(b);
508
-
509
- SIMDE__VECTORIZE
510
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
511
- r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
512
- }
513
-
514
- return simde__m256i_from_private(r_);
515
- #endif
516
- }
517
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
518
- # define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b)
519
- #endif
520
-
521
- SIMDE__FUNCTION_ATTRIBUTES
522
- simde__m128i
523
- simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8)
524
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
525
- simde__m128i_private
526
- r_,
527
- a_ = simde__m128i_to_private(a),
528
- b_ = simde__m128i_to_private(b);
529
-
530
- SIMDE__VECTORIZE
531
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
532
- r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i];
533
- }
534
-
535
- return simde__m128i_from_private(r_);
536
- }
537
- #if defined(SIMDE_AVX2_NATIVE)
538
- # define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8);
539
- #endif
540
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
541
- # define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8)
542
- #endif
543
-
544
- SIMDE__FUNCTION_ATTRIBUTES
545
- simde__m256i
546
- simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8)
547
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
548
- simde__m256i_private
549
- r_,
550
- a_ = simde__m256i_to_private(a),
551
- b_ = simde__m256i_to_private(b);
552
-
553
- SIMDE__VECTORIZE
554
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
555
- r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i];
556
- }
557
-
558
- return simde__m256i_from_private(r_);
559
- }
560
- #if defined(SIMDE_AVX2_NATIVE)
561
- # define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8);
562
- #endif
563
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
564
- # define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8)
565
- #endif
566
-
567
-
568
- SIMDE__FUNCTION_ATTRIBUTES
569
- simde__m256i
570
- simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8)
571
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
572
- simde__m256i_private
573
- r_,
574
- a_ = simde__m256i_to_private(a),
575
- b_ = simde__m256i_to_private(b);
576
-
577
- SIMDE__VECTORIZE
578
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
579
- r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i];
580
- }
581
-
582
- return simde__m256i_from_private(r_);
583
- }
584
- #if defined(SIMDE_AVX2_NATIVE)
585
- # define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8);
586
- #endif
587
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
588
- # define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8)
589
- #endif
590
-
591
-
592
- SIMDE__FUNCTION_ATTRIBUTES
593
- simde__m256i
594
- simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) {
595
- #if defined(SIMDE_AVX2_NATIVE)
596
- return _mm256_blendv_epi8(a, b, mask);
597
- #else
598
- simde__m256i_private
599
- r_,
600
- a_ = simde__m256i_to_private(a),
601
- b_ = simde__m256i_to_private(b),
602
- mask_ = simde__m256i_to_private(mask);
603
-
604
- #if defined(SIMDE_ARCH_X86_SSE4_1)
605
- r_.m128i_private[0] = simde__m128i_to_private(simde_mm_blendv_epi8(simde__m128i_from_private(a_.m128i_private[0]), simde__m128i_from_private(b_.m128i_private[0]), simde__m128i_from_private(mask_.m128i_private[0])));
606
- r_.m128i_private[1] = simde__m128i_to_private(simde_mm_blendv_epi8(simde__m128i_from_private(a_.m128i_private[1]), simde__m128i_from_private(b_.m128i_private[1]), simde__m128i_from_private(mask_.m128i_private[1])));
607
- #else
608
- SIMDE__VECTORIZE
609
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
610
- if (mask_.u8[i] & 0x80) {
611
- r_.u8[i] = b_.u8[i];
612
- } else {
613
- r_.u8[i] = a_.u8[i];
614
- }
615
- }
616
- #endif
617
-
618
- return simde__m256i_from_private(r_);
619
- #endif
620
- }
621
- #if defined(SIMDE_AVX2_NATIVE)
622
- # define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8);
623
- #endif
624
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
625
- # define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask)
626
- #endif
627
-
628
- SIMDE__FUNCTION_ATTRIBUTES
629
- simde__m128i
630
- simde_mm_broadcastb_epi8 (simde__m128i a) {
631
- #if defined(SIMDE_AVX2_NATIVE)
632
- return _mm_broadcastb_epi8(a);
633
- #else
634
- simde__m128i_private r_;
635
- simde__m128i_private a_= simde__m128i_to_private(a);
636
-
637
- SIMDE__VECTORIZE
638
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
639
- r_.i8[i] = a_.i8[0];
640
- }
641
-
642
- return simde__m128i_from_private(r_);
643
- #endif
644
- }
645
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
646
- # define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a)
647
- #endif
648
-
649
- SIMDE__FUNCTION_ATTRIBUTES
650
- simde__m256i
651
- simde_mm256_broadcastb_epi8 (simde__m128i a) {
652
- #if defined(SIMDE_AVX2_NATIVE)
653
- return _mm256_broadcastb_epi8(a);
654
- #else
655
- simde__m256i_private r_;
656
- simde__m128i_private a_= simde__m128i_to_private(a);
657
-
658
- SIMDE__VECTORIZE
659
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
660
- r_.i8[i] = a_.i8[0];
661
- }
662
-
663
- return simde__m256i_from_private(r_);
664
- #endif
665
- }
666
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
667
- # define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a)
668
- #endif
669
-
670
- SIMDE__FUNCTION_ATTRIBUTES
671
- simde__m256i
672
- simde_mm256_broadcastsi128_si256 (simde__m128i a) {
673
- #if defined(SIMDE_AVX2_NATIVE)
674
- return _mm256_broadcastsi128_si256(a);
675
- #else
676
- simde__m256i_private r_;
677
- simde__m128i_private a_ = simde__m128i_to_private(a);
678
-
679
- #if defined(SIMDE_ARCH_X86_SSE2)
680
- r_.m128i_private[0] = a_;
681
- r_.m128i_private[1] = a_;
682
- #else
683
- r_.i64[0] = a_.i64[0];
684
- r_.i64[1] = a_.i64[1];
685
- r_.i64[2] = a_.i64[0];
686
- r_.i64[3] = a_.i64[1];
687
- #endif
688
-
689
- return simde__m256i_from_private(r_);
690
- #endif
691
- }
692
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
693
- # define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a)
694
- #endif
695
-
696
- SIMDE__FUNCTION_ATTRIBUTES
697
- simde__m256i
698
- simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) {
699
- #if defined(SIMDE_AVX2_NATIVE)
700
- return _mm256_cmpeq_epi8(a, b);
701
- #else
702
- simde__m256i_private
703
- r_,
704
- a_ = simde__m256i_to_private(a),
705
- b_ = simde__m256i_to_private(b);
706
-
707
- #if defined(SIMDE_ARCH_X86_SSE2)
708
- r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]);
709
- r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]);
710
- #else
711
- SIMDE__VECTORIZE
712
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
713
- r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
714
- }
715
- #endif
716
-
717
- return simde__m256i_from_private(r_);
718
- #endif
719
- }
720
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
721
- # define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b)
722
- #endif
723
-
724
- SIMDE__FUNCTION_ATTRIBUTES
725
- simde__m256i
726
- simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) {
727
- #if defined(SIMDE_AVX2_NATIVE)
728
- return _mm256_cmpeq_epi16(a, b);
729
- #else
730
- simde__m256i_private
731
- r_,
732
- a_ = simde__m256i_to_private(a),
733
- b_ = simde__m256i_to_private(b);
734
-
735
- SIMDE__VECTORIZE
736
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
737
- r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
738
- }
739
-
740
- return simde__m256i_from_private(r_);
741
- #endif
742
- }
743
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
744
- # define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b)
745
- #endif
746
-
747
- SIMDE__FUNCTION_ATTRIBUTES
748
- simde__m256i
749
- simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) {
750
- #if defined(SIMDE_AVX2_NATIVE)
751
- return _mm256_cmpeq_epi32(a, b);
752
- #else
753
- simde__m256i_private
754
- r_,
755
- a_ = simde__m256i_to_private(a),
756
- b_ = simde__m256i_to_private(b);
757
-
758
- #if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
759
- r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]);
760
- r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]);
761
- #else
762
- SIMDE__VECTORIZE
763
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
764
- r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
765
- }
766
- #endif
767
-
768
- return simde__m256i_from_private(r_);
769
- #endif
770
- }
771
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
772
- # define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b)
773
- #endif
774
-
775
- SIMDE__FUNCTION_ATTRIBUTES
776
- simde__m256i
777
- simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) {
778
- #if defined(SIMDE_AVX2_NATIVE)
779
- return _mm256_cmpeq_epi64(a, b);
780
- #else
781
- simde__m256i_private
782
- r_,
783
- a_ = simde__m256i_to_private(a),
784
- b_ = simde__m256i_to_private(b);
785
-
786
- #if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
787
- r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]);
788
- r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]);
789
- #else
790
- SIMDE__VECTORIZE
791
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
792
- r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
793
- }
794
- #endif
795
-
796
- return simde__m256i_from_private(r_);
797
- #endif
798
- }
799
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
800
- # define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b)
801
- #endif
802
-
803
- SIMDE__FUNCTION_ATTRIBUTES
804
- simde__m256i
805
- simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) {
806
- #if defined(SIMDE_AVX2_NATIVE)
807
- return _mm256_cmpgt_epi8(a, b);
808
- #else
809
- simde__m256i_private
810
- r_,
811
- a_ = simde__m256i_to_private(a),
812
- b_ = simde__m256i_to_private(b);
813
-
814
- #if defined(SIMDE_ARCH_X86_SSE2)
815
- r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]);
816
- r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]);
817
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
818
- r_.i8 = a_.i8 > b_.i8;
819
- #else
820
- SIMDE__VECTORIZE
821
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
822
- r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
823
- }
824
- #endif
825
-
826
- return simde__m256i_from_private(r_);
827
- #endif
828
- }
829
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
830
- # define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b)
831
- #endif
832
-
833
- SIMDE__FUNCTION_ATTRIBUTES
834
- simde__m256i
835
- simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) {
836
- #if defined(SIMDE_AVX2_NATIVE)
837
- return _mm256_cmpgt_epi16(a, b);
838
- #else
839
- simde__m256i_private
840
- r_,
841
- a_ = simde__m256i_to_private(a),
842
- b_ = simde__m256i_to_private(b);
843
-
844
- #if defined(SIMDE_ARCH_X86_SSE2)
845
- r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]);
846
- r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]);
847
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
848
- r_.i16 = a_.i16 > b_.i16;
849
- #else
850
- SIMDE__VECTORIZE
851
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
852
- r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
853
- }
854
- #endif
855
-
856
- return simde__m256i_from_private(r_);
857
- #endif
858
- }
859
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
860
- # define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b)
861
- #endif
862
-
863
- SIMDE__FUNCTION_ATTRIBUTES
864
- simde__m256i
865
- simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) {
866
- #if defined(SIMDE_AVX2_NATIVE)
867
- return _mm256_cmpgt_epi32(a, b);
868
- #else
869
- simde__m256i_private
870
- r_,
871
- a_ = simde__m256i_to_private(a),
872
- b_ = simde__m256i_to_private(b);
873
-
874
- #if defined(SIMDE_ARCH_X86_SSE2)
875
- r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]);
876
- r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]);
877
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
878
- r_.i32 = a_.i32 > b_.i32;
879
- #else
880
- SIMDE__VECTORIZE
881
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
882
- r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
883
- }
884
- #endif
885
-
886
- return simde__m256i_from_private(r_);
887
- #endif
888
- }
889
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
890
- # define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b)
891
- #endif
892
-
893
- SIMDE__FUNCTION_ATTRIBUTES
894
- simde__m256i
895
- simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) {
896
- #if defined(SIMDE_AVX2_NATIVE)
897
- return _mm256_cmpgt_epi64(a, b);
898
- #else
899
- simde__m256i_private
900
- r_,
901
- a_ = simde__m256i_to_private(a),
902
- b_ = simde__m256i_to_private(b);
903
-
904
- #if defined(SIMDE_ARCH_X86_SSE2)
905
- r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]);
906
- r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]);
907
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
908
- r_.i64 = a_.i64 > b_.i64;
909
- #else
910
- SIMDE__VECTORIZE
911
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
912
- r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
913
- }
914
- #endif
915
-
916
- return simde__m256i_from_private(r_);
917
- #endif
918
- }
919
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
920
- # define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b)
921
- #endif
922
-
923
- SIMDE__FUNCTION_ATTRIBUTES
924
- simde__m256i
925
- simde_mm256_cvtepi8_epi16 (simde__m128i a) {
926
- #if defined(SIMDE_AVX2_NATIVE)
927
- return _mm256_cvtepi8_epi16(a);
928
- #else
929
- simde__m256i_private r_;
930
- simde__m128i_private a_ = simde__m128i_to_private(a);
931
-
932
- #if defined(SIMDE__CONVERT_VECTOR)
933
- SIMDE__CONVERT_VECTOR(r_.i16, a_.i8);
934
- #else
935
- SIMDE__VECTORIZE
936
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
937
- r_.i16[i] = a_.i8[i];
938
- }
939
- #endif
940
-
941
- return simde__m256i_from_private(r_);
942
- #endif
943
- }
944
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
945
- # define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a)
946
- #endif
947
-
948
- SIMDE__FUNCTION_ATTRIBUTES
949
- simde__m256i
950
- simde_mm256_cvtepi8_epi32 (simde__m128i a) {
951
- #if defined(SIMDE_AVX2_NATIVE)
952
- return _mm256_cvtepi8_epi32(a);
953
- #else
954
- simde__m256i_private r_;
955
- simde__m128i_private a_ = simde__m128i_to_private(a);
956
-
957
- #if defined(SIMDE__CONVERT_VECTOR)
958
- SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].i8);
959
- #else
960
- SIMDE__VECTORIZE
961
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
962
- r_.i32[i] = a_.i8[i];
963
- }
964
- #endif
965
-
966
- return simde__m256i_from_private(r_);
967
- #endif
968
- }
969
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
970
- # define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a)
971
- #endif
972
-
973
- SIMDE__FUNCTION_ATTRIBUTES
974
- simde__m256i
975
- simde_mm256_cvtepi8_epi64 (simde__m128i a) {
976
- #if defined(SIMDE_AVX2_NATIVE)
977
- return _mm256_cvtepi8_epi64(a);
978
- #else
979
- simde__m256i_private r_;
980
- simde__m128i_private a_ = simde__m128i_to_private(a);
981
-
982
- SIMDE__VECTORIZE
983
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
984
- r_.i64[i] = a_.i8[i];
985
- }
986
-
987
- return simde__m256i_from_private(r_);
988
- #endif
989
- }
990
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
991
- # define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a)
992
- #endif
993
-
994
- SIMDE__FUNCTION_ATTRIBUTES
995
- simde__m256i
996
- simde_mm256_cvtepi16_epi32 (simde__m128i a) {
997
- #if defined(SIMDE_AVX2_NATIVE)
998
- return _mm256_cvtepi16_epi32(a);
999
- #else
1000
- simde__m256i_private r_;
1001
- simde__m128i_private a_ = simde__m128i_to_private(a);
1002
-
1003
- #if defined(SIMDE__CONVERT_VECTOR)
1004
- SIMDE__CONVERT_VECTOR(r_.i32, a_.i16);
1005
- #else
1006
- SIMDE__VECTORIZE
1007
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1008
- r_.i32[i] = a_.i16[i];
1009
- }
1010
- #endif
1011
-
1012
- return simde__m256i_from_private(r_);
1013
- #endif
1014
- }
1015
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1016
- # define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a)
1017
- #endif
1018
-
1019
- SIMDE__FUNCTION_ATTRIBUTES
1020
- simde__m256i
1021
- simde_mm256_cvtepi16_epi64 (simde__m128i a) {
1022
- #if defined(SIMDE_AVX2_NATIVE)
1023
- return _mm256_cvtepi16_epi64(a);
1024
- #else
1025
- simde__m256i_private r_;
1026
- simde__m128i_private a_ = simde__m128i_to_private(a);
1027
-
1028
- #if defined(SIMDE__CONVERT_VECTOR)
1029
- SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i16);
1030
- #else
1031
- SIMDE__VECTORIZE
1032
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1033
- r_.i64[i] = a_.i16[i];
1034
- }
1035
- #endif
1036
-
1037
- return simde__m256i_from_private(r_);
1038
- #endif
1039
- }
1040
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1041
- # define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a)
1042
- #endif
1043
-
1044
- SIMDE__FUNCTION_ATTRIBUTES
1045
- simde__m256i
1046
- simde_mm256_cvtepi32_epi64 (simde__m128i a) {
1047
- #if defined(SIMDE_AVX2_NATIVE)
1048
- return _mm256_cvtepi32_epi64(a);
1049
- #else
1050
- simde__m256i_private r_;
1051
- simde__m128i_private a_ = simde__m128i_to_private(a);
1052
-
1053
- #if defined(SIMDE__CONVERT_VECTOR)
1054
- SIMDE__CONVERT_VECTOR(r_.i64, a_.i32);
1055
- #else
1056
- SIMDE__VECTORIZE
1057
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1058
- r_.i64[i] = a_.i32[i];
1059
- }
1060
- #endif
1061
-
1062
- return simde__m256i_from_private(r_);
1063
- #endif
1064
- }
1065
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1066
- # define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a)
1067
- #endif
1068
-
1069
- SIMDE__FUNCTION_ATTRIBUTES
1070
- simde__m256i
1071
- simde_mm256_cvtepu8_epi16 (simde__m128i a) {
1072
- #if defined(SIMDE_AVX2_NATIVE)
1073
- return _mm256_cvtepu8_epi16(a);
1074
- #else
1075
- simde__m256i_private r_;
1076
- simde__m128i_private a_ = simde__m128i_to_private(a);
1077
-
1078
- #if defined(SIMDE__CONVERT_VECTOR)
1079
- SIMDE__CONVERT_VECTOR(r_.i16, a_.u8);
1080
- #else
1081
- SIMDE__VECTORIZE
1082
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1083
- r_.i16[i] = a_.u8[i];
1084
- }
1085
- #endif
1086
-
1087
- return simde__m256i_from_private(r_);
1088
- #endif
1089
- }
1090
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1091
- # define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a)
1092
- #endif
1093
-
1094
- SIMDE__FUNCTION_ATTRIBUTES
1095
- simde__m256i
1096
- simde_mm256_cvtepu8_epi32 (simde__m128i a) {
1097
- #if defined(SIMDE_AVX2_NATIVE)
1098
- return _mm256_cvtepu8_epi32(a);
1099
- #else
1100
- simde__m256i_private r_;
1101
- simde__m128i_private a_ = simde__m128i_to_private(a);
1102
-
1103
- #if defined(SIMDE__CONVERT_VECTOR)
1104
- SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].u8);
1105
- #else
1106
- SIMDE__VECTORIZE
1107
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1108
- r_.i32[i] = a_.u8[i];
1109
- }
1110
- #endif
1111
-
1112
- return simde__m256i_from_private(r_);
1113
- #endif
1114
- }
1115
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1116
- # define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a)
1117
- #endif
1118
-
1119
- SIMDE__FUNCTION_ATTRIBUTES
1120
- simde__m256i
1121
- simde_mm256_cvtepu8_epi64 (simde__m128i a) {
1122
- #if defined(SIMDE_AVX2_NATIVE)
1123
- return _mm256_cvtepu8_epi64(a);
1124
- #else
1125
- simde__m256i_private r_;
1126
- simde__m128i_private a_ = simde__m128i_to_private(a);
1127
-
1128
- SIMDE__VECTORIZE
1129
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1130
- r_.i64[i] = a_.u8[i];
1131
- }
1132
-
1133
- return simde__m256i_from_private(r_);
1134
- #endif
1135
- }
1136
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1137
- # define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a)
1138
- #endif
1139
-
1140
- SIMDE__FUNCTION_ATTRIBUTES
1141
- simde__m256i
1142
- simde_mm256_cvtepu16_epi32 (simde__m128i a) {
1143
- #if defined(SIMDE_AVX2_NATIVE)
1144
- return _mm256_cvtepu16_epi32(a);
1145
- #else
1146
- simde__m256i_private r_;
1147
- simde__m128i_private a_ = simde__m128i_to_private(a);
1148
-
1149
- #if defined(SIMDE__CONVERT_VECTOR)
1150
- SIMDE__CONVERT_VECTOR(r_.i32, a_.u16);
1151
- #else
1152
- SIMDE__VECTORIZE
1153
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1154
- r_.i32[i] = a_.u16[i];
1155
- }
1156
- #endif
1157
-
1158
- return simde__m256i_from_private(r_);
1159
- #endif
1160
- }
1161
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1162
- # define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a)
1163
- #endif
1164
-
1165
- SIMDE__FUNCTION_ATTRIBUTES
1166
- simde__m256i
1167
- simde_mm256_cvtepu16_epi64 (simde__m128i a) {
1168
- #if defined(SIMDE_AVX2_NATIVE)
1169
- return _mm256_cvtepu16_epi64(a);
1170
- #else
1171
- simde__m256i_private r_;
1172
- simde__m128i_private a_ = simde__m128i_to_private(a);
1173
-
1174
- #if defined(SIMDE__CONVERT_VECTOR)
1175
- SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].u16);
1176
- #else
1177
- SIMDE__VECTORIZE
1178
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1179
- r_.i64[i] = a_.u16[i];
1180
- }
1181
- #endif
1182
-
1183
- return simde__m256i_from_private(r_);
1184
- #endif
1185
- }
1186
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1187
- # define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a)
1188
- #endif
1189
-
1190
- SIMDE__FUNCTION_ATTRIBUTES
1191
- simde__m256i
1192
- simde_mm256_cvtepu32_epi64 (simde__m128i a) {
1193
- #if defined(SIMDE_AVX2_NATIVE)
1194
- return _mm256_cvtepu32_epi64(a);
1195
- #else
1196
- simde__m256i_private r_;
1197
- simde__m128i_private a_ = simde__m128i_to_private(a);
1198
-
1199
- #if defined(SIMDE__CONVERT_VECTOR)
1200
- SIMDE__CONVERT_VECTOR(r_.i64, a_.u32);
1201
- #else
1202
- SIMDE__VECTORIZE
1203
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1204
- r_.i64[i] = a_.u32[i];
1205
- }
1206
- #endif
1207
-
1208
- return simde__m256i_from_private(r_);
1209
- #endif
1210
- }
1211
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1212
- # define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a)
1213
- #endif
1214
-
1215
- SIMDE__FUNCTION_ATTRIBUTES
1216
- int
1217
- simde_mm256_extract_epi8 (simde__m256i a, const int index)
1218
- HEDLEY_REQUIRE_MSG((index & 31) == index, "index must be in range [0, 31]"){
1219
- simde__m256i_private a_ = simde__m256i_to_private(a);
1220
- return a_.i8[index];
1221
- }
1222
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1223
- # define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index)
1224
- #endif
1225
-
1226
- SIMDE__FUNCTION_ATTRIBUTES
1227
- int
1228
- simde_mm256_extract_epi16 (simde__m256i a, const int index)
1229
- HEDLEY_REQUIRE_MSG((index & 0xf) == index, "index must be in range [0, 15]") {
1230
- simde__m256i_private a_ = simde__m256i_to_private(a);
1231
- return a_.i16[index];
1232
- }
1233
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1234
- # define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index)
1235
- #endif
1236
-
1237
- SIMDE__FUNCTION_ATTRIBUTES
1238
- simde__m128i
1239
- simde_mm256_extracti128_si256 (simde__m256i a, const int imm8)
1240
- HEDLEY_REQUIRE_MSG((imm8 & 1) == imm8, "imm8 must be 0 or 1") {
1241
- simde__m256i_private a_ = simde__m256i_to_private(a);
1242
- return a_.m128i[imm8];
1243
- }
1244
- #if defined(SIMDE_AVX2_NATIVE)
1245
- # define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8)
1246
- #endif
1247
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1248
- # define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8)
1249
- #endif
1250
-
1251
- SIMDE__FUNCTION_ATTRIBUTES
1252
- simde__m256i
1253
- simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) {
1254
- #if defined(SIMDE_AVX2_NATIVE)
1255
- return _mm256_madd_epi16(a, b);
1256
- #else
1257
- simde__m256i_private
1258
- r_,
1259
- a_ = simde__m256i_to_private(a),
1260
- b_ = simde__m256i_to_private(b);
1261
-
1262
- r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]);
1263
- r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]);
1264
-
1265
- return simde__m256i_from_private(r_);
1266
- #endif
1267
- }
1268
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1269
- # define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b)
1270
- #endif
1271
-
1272
- SIMDE__FUNCTION_ATTRIBUTES
1273
- simde__m256i
1274
- simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) {
1275
- #if defined(SIMDE_AVX2_NATIVE) && !defined(__PGI)
1276
- return _mm256_max_epi8(a, b);
1277
- #else
1278
- simde__m256i_private
1279
- r_,
1280
- a_ = simde__m256i_to_private(a),
1281
- b_ = simde__m256i_to_private(b);
1282
-
1283
- #if defined(SIMDE_ARCH_X86_SSE4_1)
1284
- r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]);
1285
- r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]);
1286
- #else
1287
- SIMDE__VECTORIZE
1288
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1289
- r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i];
1290
- }
1291
- #endif
1292
-
1293
- return simde__m256i_from_private(r_);
1294
- #endif
1295
- }
1296
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1297
- # define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b)
1298
- #endif
1299
-
1300
- SIMDE__FUNCTION_ATTRIBUTES
1301
- simde__m256i
1302
- simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) {
1303
- #if defined(SIMDE_AVX2_NATIVE)
1304
- return _mm256_max_epu8(a, b);
1305
- #else
1306
- simde__m256i_private
1307
- r_,
1308
- a_ = simde__m256i_to_private(a),
1309
- b_ = simde__m256i_to_private(b);
1310
-
1311
- #if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
1312
- r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]);
1313
- r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]);
1314
- #else
1315
- SIMDE__VECTORIZE
1316
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
1317
- r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
1318
- }
1319
- #endif
1320
-
1321
- return simde__m256i_from_private(r_);
1322
- #endif
1323
- }
1324
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1325
- # define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b)
1326
- #endif
1327
-
1328
- SIMDE__FUNCTION_ATTRIBUTES
1329
- simde__m256i
1330
- simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) {
1331
- #if defined(SIMDE_AVX2_NATIVE)
1332
- return _mm256_max_epu16(a, b);
1333
- #else
1334
- simde__m256i_private
1335
- r_,
1336
- a_ = simde__m256i_to_private(a),
1337
- b_ = simde__m256i_to_private(b);
1338
-
1339
- #if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
1340
- r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]);
1341
- r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]);
1342
- #else
1343
- SIMDE__VECTORIZE
1344
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1345
- r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i];
1346
- }
1347
- #endif
1348
-
1349
- return simde__m256i_from_private(r_);
1350
- #endif
1351
- }
1352
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1353
- # define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b)
1354
- #endif
1355
-
1356
- SIMDE__FUNCTION_ATTRIBUTES
1357
- simde__m256i
1358
- simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) {
1359
- #if defined(SIMDE_AVX2_NATIVE)
1360
- return _mm256_max_epu32(a, b);
1361
- #else
1362
- simde__m256i_private
1363
- r_,
1364
- a_ = simde__m256i_to_private(a),
1365
- b_ = simde__m256i_to_private(b);
1366
-
1367
- #if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
1368
- r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]);
1369
- r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]);
1370
- #else
1371
- SIMDE__VECTORIZE
1372
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1373
- r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i];
1374
- }
1375
- #endif
1376
-
1377
- return simde__m256i_from_private(r_);
1378
- #endif
1379
- }
1380
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1381
- # define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b)
1382
- #endif
1383
-
1384
- SIMDE__FUNCTION_ATTRIBUTES
1385
- simde__m256i
1386
- simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) {
1387
- #if defined(SIMDE_AVX2_NATIVE)
1388
- return _mm256_max_epi16(a, b);
1389
- #else
1390
- simde__m256i_private
1391
- r_,
1392
- a_ = simde__m256i_to_private(a),
1393
- b_ = simde__m256i_to_private(b);
1394
-
1395
- #if defined(SIMDE_ARCH_X86_SSE2)
1396
- r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]);
1397
- r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]);
1398
- #else
1399
- SIMDE__VECTORIZE
1400
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1401
- r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
1402
- }
1403
- #endif
1404
-
1405
- return simde__m256i_from_private(r_);
1406
- #endif
1407
- }
1408
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1409
- # define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b)
1410
- #endif
1411
-
1412
- SIMDE__FUNCTION_ATTRIBUTES
1413
- simde__m256i
1414
- simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) {
1415
- #if defined(SIMDE_AVX2_NATIVE)
1416
- return _mm256_max_epi32(a, b);
1417
- #else
1418
- simde__m256i_private
1419
- r_,
1420
- a_ = simde__m256i_to_private(a),
1421
- b_ = simde__m256i_to_private(b);
1422
-
1423
- #if defined(SIMDE_ARCH_X86_SSE4_1)
1424
- r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]);
1425
- r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]);
1426
- #else
1427
- SIMDE__VECTORIZE
1428
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1429
- r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i];
1430
- }
1431
- #endif
1432
-
1433
- return simde__m256i_from_private(r_);
1434
- #endif
1435
- }
1436
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1437
- # define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b)
1438
- #endif
1439
-
1440
- SIMDE__FUNCTION_ATTRIBUTES
1441
- simde__m256i
1442
- simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) {
1443
- #if defined(SIMDE_AVX2_NATIVE) && !defined(__PGI)
1444
- return _mm256_min_epi8(a, b);
1445
- #else
1446
- simde__m256i_private
1447
- r_,
1448
- a_ = simde__m256i_to_private(a),
1449
- b_ = simde__m256i_to_private(b);
1450
-
1451
- #if defined(SIMDE_ARCH_X86_SSE4_1)
1452
- r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]);
1453
- r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]);
1454
- #else
1455
- SIMDE__VECTORIZE
1456
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1457
- r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];
1458
- }
1459
- #endif
1460
-
1461
- return simde__m256i_from_private(r_);
1462
- #endif
1463
- }
1464
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1465
- # define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b)
1466
- #endif
1467
-
1468
- SIMDE__FUNCTION_ATTRIBUTES
1469
- simde__m256i
1470
- simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) {
1471
- #if defined(SIMDE_AVX2_NATIVE)
1472
- return _mm256_min_epi16(a, b);
1473
- #else
1474
- simde__m256i_private
1475
- r_,
1476
- a_ = simde__m256i_to_private(a),
1477
- b_ = simde__m256i_to_private(b);
1478
-
1479
- #if defined(SIMDE_ARCH_X86_SSE2)
1480
- r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]);
1481
- r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]);
1482
- #else
1483
- SIMDE__VECTORIZE
1484
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1485
- r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
1486
- }
1487
- #endif
1488
-
1489
- return simde__m256i_from_private(r_);
1490
- #endif
1491
- }
1492
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1493
- # define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b)
1494
- #endif
1495
-
1496
- SIMDE__FUNCTION_ATTRIBUTES
1497
- simde__m256i
1498
- simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) {
1499
- #if defined(SIMDE_AVX2_NATIVE)
1500
- return _mm256_min_epi32(a, b);
1501
- #else
1502
- simde__m256i_private
1503
- r_,
1504
- a_ = simde__m256i_to_private(a),
1505
- b_ = simde__m256i_to_private(b);
1506
-
1507
- #if defined(SIMDE_ARCH_X86_SSE4_1)
1508
- r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]);
1509
- r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]);
1510
- #else
1511
- SIMDE__VECTORIZE
1512
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1513
- r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];
1514
- }
1515
- #endif
1516
-
1517
- return simde__m256i_from_private(r_);
1518
- #endif
1519
- }
1520
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1521
- # define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b)
1522
- #endif
1523
-
1524
- SIMDE__FUNCTION_ATTRIBUTES
1525
- simde__m256i
1526
- simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) {
1527
- #if defined(SIMDE_AVX2_NATIVE)
1528
- return _mm256_min_epu8(a, b);
1529
- #else
1530
- simde__m256i_private
1531
- r_,
1532
- a_ = simde__m256i_to_private(a),
1533
- b_ = simde__m256i_to_private(b);
1534
-
1535
- #if defined(SIMDE_ARCH_X86_SSE2)
1536
- r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]);
1537
- r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]);
1538
- #else
1539
- SIMDE__VECTORIZE
1540
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
1541
- r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
1542
- }
1543
- #endif
1544
-
1545
- return simde__m256i_from_private(r_);
1546
- #endif
1547
- }
1548
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1549
- # define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b)
1550
- #endif
1551
-
1552
- SIMDE__FUNCTION_ATTRIBUTES
1553
- simde__m256i
1554
- simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) {
1555
- #if defined(SIMDE_AVX2_NATIVE)
1556
- return _mm256_min_epu16(a, b);
1557
- #else
1558
- simde__m256i_private
1559
- r_,
1560
- a_ = simde__m256i_to_private(a),
1561
- b_ = simde__m256i_to_private(b);
1562
-
1563
- #if defined(SIMDE_ARCH_X86_SSE2)
1564
- r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]);
1565
- r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]);
1566
- #else
1567
- SIMDE__VECTORIZE
1568
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1569
- r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i];
1570
- }
1571
- #endif
1572
-
1573
- return simde__m256i_from_private(r_);
1574
- #endif
1575
- }
1576
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1577
- # define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b)
1578
- #endif
1579
-
1580
- SIMDE__FUNCTION_ATTRIBUTES
1581
- simde__m256i
1582
- simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) {
1583
- #if defined(SIMDE_AVX2_NATIVE)
1584
- return _mm256_min_epu32(a, b);
1585
- #else
1586
- simde__m256i_private
1587
- r_,
1588
- a_ = simde__m256i_to_private(a),
1589
- b_ = simde__m256i_to_private(b);
1590
-
1591
- #if defined(SIMDE_ARCH_X86_SSE2)
1592
- r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]);
1593
- r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]);
1594
- #else
1595
- SIMDE__VECTORIZE
1596
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1597
- r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i];
1598
- }
1599
- #endif
1600
-
1601
- return simde__m256i_from_private(r_);
1602
- #endif
1603
- }
1604
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1605
- # define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b)
1606
- #endif
1607
-
1608
- SIMDE__FUNCTION_ATTRIBUTES
1609
- int32_t
1610
- simde_mm256_movemask_epi8 (simde__m256i a) {
1611
- #if defined(SIMDE_AVX2_NATIVE)
1612
- return _mm256_movemask_epi8(a);
1613
- #else
1614
- simde__m256i_private a_ = simde__m256i_to_private(a);
1615
- int32_t r;
1616
-
1617
- #if defined(SIMDE_ARCH_X86_SSE2)
1618
- r = simde_mm_movemask_epi8(a_.m128i[1]);
1619
- r = (r << 16) | simde_mm_movemask_epi8(a_.m128i[0]);
1620
- #else
1621
- r = 0;
1622
- SIMDE__VECTORIZE_REDUCTION(|:r)
1623
- for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {
1624
- r |= (a_.u8[31 - i] >> 7) << (31 - i);
1625
- }
1626
- #endif
1627
-
1628
- return r;
1629
- #endif
1630
- }
1631
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1632
- # define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a)
1633
- #endif
1634
-
1635
- SIMDE__FUNCTION_ATTRIBUTES
1636
- simde__m256i
1637
- simde_mm256_or_si256 (simde__m256i a, simde__m256i b) {
1638
- #if defined(SIMDE_AVX2_NATIVE)
1639
- return _mm256_or_si256(a, b);
1640
- #else
1641
- simde__m256i_private
1642
- r_,
1643
- a_ = simde__m256i_to_private(a),
1644
- b_ = simde__m256i_to_private(b);
1645
-
1646
- #if defined(SIMDE_ARCH_X86_SSE2)
1647
- r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]);
1648
- r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]);
1649
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1650
- r_.i32f = a_.i32f | b_.i32f;
1651
- #else
1652
- SIMDE__VECTORIZE
1653
- for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
1654
- r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
1655
- }
1656
- #endif
1657
-
1658
- return simde__m256i_from_private(r_);
1659
- #endif
1660
- }
1661
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1662
- # define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b)
1663
- #endif
1664
-
1665
- SIMDE__FUNCTION_ATTRIBUTES
1666
- simde__m256i
1667
- simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) {
1668
- #if defined(SIMDE_AVX2_NATIVE)
1669
- return _mm256_packs_epi32(a, b);
1670
- #else
1671
- simde__m256i_private
1672
- r_,
1673
- v_[] = {
1674
- simde__m256i_to_private(a),
1675
- simde__m256i_to_private(b)
1676
- };
1677
- #if defined(SIMDE_ARCH_X86_SSE2) || defined(SIMDE_SSE2_NEON)
1678
- r_.m128i_private[0] = simde__m128i_to_private(simde_mm_packs_epi32(simde__m128i_from_private(v_[0].m128i_private[0]), simde__m128i_from_private(v_[1].m128i_private[0])));
1679
- r_.m128i_private[1] = simde__m128i_to_private(simde_mm_packs_epi32(simde__m128i_from_private(v_[0].m128i_private[1]), simde__m128i_from_private(v_[1].m128i_private[1])));
1680
- #else
1681
- SIMDE__VECTORIZE
1682
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1683
- const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)];
1684
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v));
1685
- }
1686
- #endif
1687
-
1688
- return simde__m256i_from_private(r_);
1689
- #endif
1690
- }
1691
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1692
- # define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b)
1693
- #endif
1694
-
1695
- SIMDE__FUNCTION_ATTRIBUTES
1696
- simde__m256i
1697
- simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8)
1698
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
1699
- simde__m256i_private
1700
- r_,
1701
- a_ = simde__m256i_to_private(a),
1702
- b_ = simde__m256i_to_private(b);
1703
-
1704
- r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]);
1705
- r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]);
1706
-
1707
- return simde__m256i_from_private(r_);
1708
- }
1709
- #if defined(SIMDE_AVX2_NATIVE)
1710
- # define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8)
1711
- #endif
1712
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1713
- # define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8)
1714
- #endif
1715
-
1716
- SIMDE__FUNCTION_ATTRIBUTES
1717
- simde__m256i
1718
- simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8)
1719
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
1720
- simde__m256i_private
1721
- r_,
1722
- a_ = simde__m256i_to_private(a);
1723
-
1724
- r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1];
1725
- r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1];
1726
- r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1];
1727
- r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1];
1728
-
1729
- return simde__m256i_from_private(r_);
1730
- }
1731
- #if defined(SIMDE_AVX2_NATIVE)
1732
- # define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8)
1733
- #endif
1734
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1735
- # define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8)
1736
- #endif
1737
-
1738
- SIMDE__FUNCTION_ATTRIBUTES
1739
- simde__m256i
1740
- simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) {
1741
- #if defined(SIMDE_AVX2_NATIVE)
1742
- return _mm256_shuffle_epi8(a, b);
1743
- #else
1744
- simde__m256i_private
1745
- r_,
1746
- a_ = simde__m256i_to_private(a),
1747
- b_ = simde__m256i_to_private(b);
1748
-
1749
- #if defined(SIMDE_ARCH_X86_SSSE3)
1750
- r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]);
1751
- r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]);
1752
- #else
1753
- SIMDE__VECTORIZE
1754
- for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) {
1755
- r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ];
1756
- r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16];
1757
- }
1758
- #endif
1759
-
1760
- return simde__m256i_from_private(r_);
1761
- #endif
1762
- }
1763
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1764
- # define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b)
1765
- #endif
1766
-
1767
- SIMDE__FUNCTION_ATTRIBUTES
1768
- simde__m256i
1769
- simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) {
1770
- simde__m256i_private
1771
- r_,
1772
- a_ = simde__m256i_to_private(a);
1773
-
1774
- for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) {
1775
- r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3];
1776
- }
1777
- for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) {
1778
- r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4];
1779
- }
1780
-
1781
- return simde__m256i_from_private(r_);
1782
- }
1783
- #if defined(SIMDE_AVX2_NATIVE)
1784
- # define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8)
1785
- #elif defined(SIMDE_ARCH_X86_SSE2) && !defined(__PGI)
1786
- # define simde_mm256_shuffle_epi32(a, imm8) \
1787
- simde_mm256_set_m128i( \
1788
- simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
1789
- simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
1790
- #elif defined(SIMDE__SHUFFLE_VECTOR)
1791
- # define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \
1792
- const simde__m256i_private simde__tmp_a_ = simde__m256i_to_private(a); \
1793
- simde__m256i_from_private((simde__m256i_private) { .i32 = \
1794
- SIMDE__SHUFFLE_VECTOR(32, 32, \
1795
- (simde__tmp_a_).i32, \
1796
- (simde__tmp_a_).i32, \
1797
- ((imm8) ) & 3, \
1798
- ((imm8) >> 2) & 3, \
1799
- ((imm8) >> 4) & 3, \
1800
- ((imm8) >> 6) & 3, \
1801
- (((imm8) ) & 3) + 4, \
1802
- (((imm8) >> 2) & 3) + 4, \
1803
- (((imm8) >> 4) & 3) + 4, \
1804
- (((imm8) >> 6) & 3) + 4) }); }))
1805
- #endif
1806
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1807
- # define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8)
1808
- #endif
1809
-
1810
- #if defined(SIMDE_AVX2_NATIVE)
1811
- # define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8)
1812
- #elif defined(SIMDE_ARCH_X86_SSE2)
1813
- # define simde_mm256_shufflelo_epi16(a, imm8) \
1814
- simde_mm256_set_m128i( \
1815
- simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
1816
- simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
1817
- #elif defined(SIMDE__SHUFFLE_VECTOR)
1818
- # define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \
1819
- const simde__m256i_private simde__tmp_a_ = simde__m256i_to_private(a); \
1820
- simde__m256i_from_private((simde__m256i_private) { .i16 = \
1821
- SIMDE__SHUFFLE_VECTOR(16, 32, \
1822
- (simde__tmp_a_).i16, \
1823
- (simde__tmp_a_).i16, \
1824
- (((imm8) ) & 3), \
1825
- (((imm8) >> 2) & 3), \
1826
- (((imm8) >> 4) & 3), \
1827
- (((imm8) >> 6) & 3), \
1828
- 4, 5, 6, 7, \
1829
- ((((imm8) ) & 3) + 8), \
1830
- ((((imm8) >> 2) & 3) + 8), \
1831
- ((((imm8) >> 4) & 3) + 8), \
1832
- ((((imm8) >> 6) & 3) + 8), \
1833
- 12, 13, 14, 15) }); }))
1834
- #else
1835
- # define simde_mm256_shufflelo_epi16(a, imm8) \
1836
- simde_mm256_set_m128i( \
1837
- simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \
1838
- simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8))
1839
- #endif
1840
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1841
- # define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8)
1842
- #endif
1843
-
1844
- SIMDE__FUNCTION_ATTRIBUTES
1845
- simde__m256i
1846
- simde_mm256_slli_epi16 (simde__m256i a, const int imm8)
1847
- HEDLEY_REQUIRE_MSG((imm8 & 15) == imm8, "imm8 must be in range [0, 15]") {
1848
- /* Note: There is no consistency in how compilers handle values outside of
1849
- the expected range, hence the discrepancy between what we allow and what
1850
- Intel specifies. Some compilers will return 0, others seem to just mask
1851
- off everything outside of the range. */
1852
- simde__m256i_private
1853
- r_,
1854
- a_ = simde__m256i_to_private(a);
1855
-
1856
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1857
- r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8);
1858
- #else
1859
- SIMDE__VECTORIZE
1860
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1861
- r_.i16[i] = a_.i16[i] << (imm8 & 0xff);
1862
- }
1863
- #endif
1864
-
1865
- return simde__m256i_from_private(r_);
1866
- }
1867
- #if defined(SIMDE_AVX2_NATIVE)
1868
- # define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8)
1869
- #elif defined(SIMDE_ARCH_X86_SSE2)
1870
- # define simde_mm256_slli_epi16(a, imm8) \
1871
- simde_mm256_set_m128i( \
1872
- simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
1873
- simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
1874
- #endif
1875
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1876
- # define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8)
1877
- #endif
1878
-
1879
- SIMDE__FUNCTION_ATTRIBUTES
1880
- simde__m256i
1881
- simde_mm256_slli_epi32 (simde__m256i a, const int imm8)
1882
- HEDLEY_REQUIRE_MSG((imm8 & 31) == imm8, "imm8 must be in range [0, 31]") {
1883
- simde__m256i_private
1884
- r_,
1885
- a_ = simde__m256i_to_private(a);
1886
-
1887
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1888
- r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8);
1889
- #else
1890
- SIMDE__VECTORIZE
1891
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1892
- r_.i32[i] = a_.i32[i] << (imm8 & 0xff);
1893
- }
1894
- #endif
1895
-
1896
- return simde__m256i_from_private(r_);
1897
- }
1898
- #if defined(SIMDE_AVX2_NATIVE)
1899
- # define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8)
1900
- #elif defined(SIMDE_ARCH_X86_SSE2)
1901
- # define simde_mm256_slli_epi32(a, imm8) \
1902
- simde_mm256_set_m128i( \
1903
- simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
1904
- simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
1905
- #endif
1906
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1907
- # define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8)
1908
- #endif
1909
-
1910
- SIMDE__FUNCTION_ATTRIBUTES
1911
- simde__m256i
1912
- simde_mm256_slli_epi64 (simde__m256i a, const int imm8)
1913
- HEDLEY_REQUIRE_MSG((imm8 & 15) == imm8, "imm8 must be in range [0, 63]") {
1914
- simde__m256i_private
1915
- r_,
1916
- a_ = simde__m256i_to_private(a);
1917
-
1918
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1919
- r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8);
1920
- #else
1921
- SIMDE__VECTORIZE
1922
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1923
- r_.i64[i] = a_.i64[i] << (imm8 & 0xff);
1924
- }
1925
- #endif
1926
-
1927
- return simde__m256i_from_private(r_);
1928
- }
1929
- #if defined(SIMDE_AVX2_NATIVE)
1930
- # define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8)
1931
- #elif defined(SIMDE_ARCH_X86_SSE2)
1932
- # define simde_mm256_slli_epi64(a, imm8) \
1933
- simde_mm256_set_m128i( \
1934
- simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \
1935
- simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8)))
1936
- #endif
1937
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1938
- # define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8)
1939
- #endif
1940
-
1941
- SIMDE__FUNCTION_ATTRIBUTES
1942
- simde__m256i
1943
- simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) {
1944
- #if defined(SIMDE_AVX2_NATIVE)
1945
- return _mm256_sub_epi8(a, b);
1946
- #else
1947
- simde__m256i_private
1948
- r_,
1949
- a_ = simde__m256i_to_private(a),
1950
- b_ = simde__m256i_to_private(b);
1951
-
1952
- #if defined(SIMDE_ARCH_X86_SSE2)
1953
- r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]);
1954
- r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]);
1955
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1956
- r_.i8 = a_.i8 - b_.i8;
1957
- #else
1958
- SIMDE__VECTORIZE
1959
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1960
- r_.i8[i] = a_.i8[i] - b_.i8[i];
1961
- }
1962
- #endif
1963
-
1964
- return simde__m256i_from_private(r_);
1965
- #endif
1966
- }
1967
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1968
- # define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b)
1969
- #endif
1970
-
1971
- SIMDE__FUNCTION_ATTRIBUTES
1972
- simde__m256i
1973
- simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) {
1974
- #if defined(SIMDE_AVX2_NATIVE)
1975
- return _mm256_sub_epi16(a, b);
1976
- #else
1977
- simde__m256i_private
1978
- r_,
1979
- a_ = simde__m256i_to_private(a),
1980
- b_ = simde__m256i_to_private(b);
1981
-
1982
- #if defined(SIMDE_ARCH_X86_SSE2)
1983
- r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]);
1984
- r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]);
1985
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1986
- r_.i16 = a_.i16 - b_.i16;
1987
- #else
1988
- SIMDE__VECTORIZE
1989
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1990
- r_.i16[i] = a_.i16[i] - b_.i16[i];
1991
- }
1992
- #endif
1993
-
1994
- return simde__m256i_from_private(r_);
1995
- #endif
1996
- }
1997
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
1998
- # define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b)
1999
- #endif
2000
-
2001
- SIMDE__FUNCTION_ATTRIBUTES
2002
- simde__m256i
2003
- simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) {
2004
- #if defined(SIMDE_AVX2_NATIVE)
2005
- return _mm256_sub_epi32(a, b);
2006
- #else
2007
- simde__m256i_private
2008
- r_,
2009
- a_ = simde__m256i_to_private(a),
2010
- b_ = simde__m256i_to_private(b);
2011
-
2012
- #if defined(SIMDE_ARCH_X86_SSE2)
2013
- r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]);
2014
- r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]);
2015
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2016
- r_.i32 = a_.i32 - b_.i32;
2017
- #else
2018
- SIMDE__VECTORIZE
2019
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2020
- r_.i32[i] = a_.i32[i] - b_.i32[i];
2021
- }
2022
- #endif
2023
-
2024
- return simde__m256i_from_private(r_);
2025
- #endif
2026
- }
2027
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2028
- # define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b)
2029
- #endif
2030
-
2031
- SIMDE__FUNCTION_ATTRIBUTES
2032
- simde__m256i
2033
- simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) {
2034
- #if defined(SIMDE_AVX2_NATIVE)
2035
- return _mm256_sub_epi64(a, b);
2036
- #else
2037
- simde__m256i_private
2038
- r_,
2039
- a_ = simde__m256i_to_private(a),
2040
- b_ = simde__m256i_to_private(b);
2041
-
2042
- #if defined(SIMDE_ARCH_X86_SSE2)
2043
- r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]);
2044
- r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]);
2045
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2046
- r_.i64 = a_.i64 - b_.i64;
2047
- #else
2048
- SIMDE__VECTORIZE
2049
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2050
- r_.i64[i] = a_.i64[i] - b_.i64[i];
2051
- }
2052
- #endif
2053
-
2054
- return simde__m256i_from_private(r_);
2055
- #endif
2056
- }
2057
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2058
- # define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b)
2059
- #endif
2060
-
2061
- SIMDE__FUNCTION_ATTRIBUTES
2062
- simde__m256i
2063
- simde_mm256_srli_epi64 (simde__m256i a, const int imm8) {
2064
- simde__m256i_private
2065
- r_,
2066
- a_ = simde__m256i_to_private(a);
2067
-
2068
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
2069
- r_.u64 = a_.u64 >> HEDLEY_STATIC_CAST(int32_t, imm8);
2070
- #else
2071
- SIMDE__VECTORIZE
2072
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
2073
- r_.u64[i] = a_.u64[i] >> imm8;
2074
- }
2075
- #endif
2076
-
2077
- return simde__m256i_from_private(r_);
2078
- }
2079
- #if defined(SIMDE_AVX2_NATIVE)
2080
- # define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8)
2081
- #elif defined(SIMDE_ARCH_X86_SSE2)
2082
- # define simde_mm256_srli_epi64(a, imm8) \
2083
- simde_mm256_set_m128i( \
2084
- simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \
2085
- simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8)))
2086
- #endif
2087
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2088
- # define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8)
2089
- #endif
2090
-
2091
- SIMDE__FUNCTION_ATTRIBUTES
2092
- simde__m256i
2093
- simde_mm256_srli_si256 (simde__m256i a, const int imm8) {
2094
- simde__m256i_private
2095
- r_,
2096
- a_ = simde__m256i_to_private(a);
2097
-
2098
- for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) {
2099
- SIMDE__VECTORIZE
2100
- for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {
2101
- const int e = imm8 + HEDLEY_STATIC_CAST(int, i);
2102
- r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0;
2103
- }
2104
- }
2105
-
2106
- return simde__m256i_from_private(r_);
2107
- }
2108
- #if defined(SIMDE_AVX2_NATIVE)
2109
- # define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8)
2110
- #elif defined(SIMDE_ARCH_X86_SSE2) && !defined(__PGI)
2111
- # define simde_mm256_srli_si256(a, imm8) \
2112
- simde_mm256_set_m128i( \
2113
- simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \
2114
- simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
2115
- #elif defined(SIMDE_SSE2_NEON)
2116
- # define simde_mm256_srli_si256(a, imm8) \
2117
- simde_mm256_set_m128i( \
2118
- simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \
2119
- simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
2120
- #endif
2121
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2122
- # define _mm256_srli_si256(a, imm8) simde_mm_srli_si256(a, imm8)
2123
- #endif
2124
-
2125
- SIMDE__FUNCTION_ATTRIBUTES
2126
- simde__m256i
2127
- simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) {
2128
- #if defined(SIMDE_AVX2_NATIVE)
2129
- return _mm256_unpacklo_epi8(a, b);
2130
- #else
2131
- simde__m256i_private
2132
- r_,
2133
- a_ = simde__m256i_to_private(a),
2134
- b_ = simde__m256i_to_private(b);
2135
-
2136
- #if defined(SIMDE__SHUFFLE_VECTOR)
2137
- r_.i8 = SIMDE__SHUFFLE_VECTOR(8, 32, a_.i8, b_.i8,
2138
- 0, 32, 1, 33, 2, 34, 3, 35,
2139
- 4, 36, 5, 37, 6, 38, 7, 39,
2140
- 16, 48, 17, 49, 18, 50, 19, 51,
2141
- 20, 52, 21, 53, 22, 54, 23, 55);
2142
- #else
2143
- r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]);
2144
- r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]);
2145
- #endif
2146
-
2147
- return simde__m256i_from_private(r_);
2148
- #endif
2149
- }
2150
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2151
- # define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b)
2152
- #endif
2153
-
2154
- SIMDE__FUNCTION_ATTRIBUTES
2155
- simde__m256i
2156
- simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) {
2157
- #if defined(SIMDE_AVX2_NATIVE)
2158
- return _mm256_unpacklo_epi16(a, b);
2159
- #else
2160
- simde__m256i_private
2161
- r_,
2162
- a_ = simde__m256i_to_private(a),
2163
- b_ = simde__m256i_to_private(b);
2164
-
2165
- #if defined(SIMDE__SHUFFLE_VECTOR)
2166
- r_.i16 =SIMDE__SHUFFLE_VECTOR(16, 32, a_.i16, b_.i16,
2167
- 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27);
2168
- #else
2169
- r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]);
2170
- r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]);
2171
- #endif
2172
-
2173
- return simde__m256i_from_private(r_);
2174
- #endif
2175
- }
2176
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2177
- # define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b)
2178
- #endif
2179
-
2180
- SIMDE__FUNCTION_ATTRIBUTES
2181
- simde__m256i
2182
- simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) {
2183
- #if defined(SIMDE_AVX2_NATIVE)
2184
- return _mm256_unpacklo_epi32(a, b);
2185
- #else
2186
- simde__m256i_private
2187
- r_,
2188
- a_ = simde__m256i_to_private(a),
2189
- b_ = simde__m256i_to_private(b);
2190
-
2191
- #if defined(SIMDE__SHUFFLE_VECTOR)
2192
- r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 32, a_.i32, b_.i32,
2193
- 0, 8, 1, 9, 4, 12, 5, 13);
2194
- #else
2195
- r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]);
2196
- r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]);
2197
- #endif
2198
-
2199
- return simde__m256i_from_private(r_);
2200
- #endif
2201
- }
2202
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2203
- # define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b)
2204
- #endif
2205
-
2206
- SIMDE__FUNCTION_ATTRIBUTES
2207
- simde__m256i
2208
- simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) {
2209
- #if defined(SIMDE_AVX2_NATIVE)
2210
- return _mm256_unpacklo_epi64(a, b);
2211
- #else
2212
- simde__m256i_private
2213
- r_,
2214
- a_ = simde__m256i_to_private(a),
2215
- b_ = simde__m256i_to_private(b);
2216
-
2217
- #if defined(SIMDE__SHUFFLE_VECTOR)
2218
- r_.i64 = SIMDE__SHUFFLE_VECTOR(64, 32, a_.i64, b_.i64, 0, 4, 2, 6);
2219
- #else
2220
- r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]);
2221
- r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]);
2222
- #endif
2223
-
2224
- return simde__m256i_from_private(r_);
2225
- #endif
2226
- }
2227
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2228
- # define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b)
2229
- #endif
2230
-
2231
- SIMDE__FUNCTION_ATTRIBUTES
2232
- simde__m256i
2233
- simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) {
2234
- #if defined(SIMDE_AVX2_NATIVE)
2235
- return _mm256_unpackhi_epi8(a, b);
2236
- #else
2237
- simde__m256i_private
2238
- r_,
2239
- a_ = simde__m256i_to_private(a),
2240
- b_ = simde__m256i_to_private(b);
2241
-
2242
- #if defined(SIMDE__SHUFFLE_VECTOR)
2243
- r_.i8 = SIMDE__SHUFFLE_VECTOR(8, 32, a_.i8, b_.i8,
2244
- 8, 40, 9, 41, 10, 42, 11, 43,
2245
- 12, 44, 13, 45, 14, 46, 15, 47,
2246
- 24, 56, 25, 57, 26, 58, 27, 59,
2247
- 28, 60, 29, 61, 30, 62, 31, 63);
2248
- #else
2249
- r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]);
2250
- r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]);
2251
- #endif
2252
-
2253
- return simde__m256i_from_private(r_);
2254
- #endif
2255
- }
2256
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2257
- # define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b)
2258
- #endif
2259
-
2260
- SIMDE__FUNCTION_ATTRIBUTES
2261
- simde__m256i
2262
- simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) {
2263
- #if defined(SIMDE_AVX2_NATIVE)
2264
- return _mm256_unpackhi_epi16(a, b);
2265
- #else
2266
- simde__m256i_private
2267
- r_,
2268
- a_ = simde__m256i_to_private(a),
2269
- b_ = simde__m256i_to_private(b);
2270
-
2271
- #if defined(SIMDE__SHUFFLE_VECTOR)
2272
- r_.i16 = SIMDE__SHUFFLE_VECTOR(16, 32, a_.i16, b_.i16,
2273
- 4, 20, 5, 21, 6, 22, 7, 23,
2274
- 12, 28, 13, 29, 14, 30, 15, 31);
2275
- #else
2276
- r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]);
2277
- r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]);
2278
- #endif
2279
-
2280
- return simde__m256i_from_private(r_);
2281
- #endif
2282
- }
2283
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2284
- # define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b)
2285
- #endif
2286
-
2287
- SIMDE__FUNCTION_ATTRIBUTES
2288
- simde__m256i
2289
- simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) {
2290
- #if defined(SIMDE_AVX2_NATIVE)
2291
- return _mm256_unpackhi_epi32(a, b);
2292
- #else
2293
- simde__m256i_private
2294
- r_,
2295
- a_ = simde__m256i_to_private(a),
2296
- b_ = simde__m256i_to_private(b);
2297
-
2298
- #if defined(SIMDE__SHUFFLE_VECTOR)
2299
- r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 32, a_.i32, b_.i32,
2300
- 2, 10, 3, 11, 6, 14, 7, 15);
2301
- #else
2302
- r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]);
2303
- r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]);
2304
- #endif
2305
-
2306
- return simde__m256i_from_private(r_);
2307
- #endif
2308
- }
2309
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2310
- # define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b)
2311
- #endif
2312
-
2313
- SIMDE__FUNCTION_ATTRIBUTES
2314
- simde__m256i
2315
- simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) {
2316
- #if defined(SIMDE_AVX2_NATIVE)
2317
- return _mm256_unpackhi_epi64(a, b);
2318
- #else
2319
- simde__m256i_private
2320
- r_,
2321
- a_ = simde__m256i_to_private(a),
2322
- b_ = simde__m256i_to_private(b);
2323
-
2324
- #if defined(SIMDE__SHUFFLE_VECTOR)
2325
- r_.i64 = SIMDE__SHUFFLE_VECTOR(64, 32, a_.i64, b_.i64, 1, 5, 3, 7);
2326
- #else
2327
- r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]);
2328
- r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]);
2329
- #endif
2330
-
2331
- return simde__m256i_from_private(r_);
2332
- #endif
2333
- }
2334
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2335
- # define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b)
2336
- #endif
2337
-
2338
- SIMDE__FUNCTION_ATTRIBUTES
2339
- simde__m256i
2340
- simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) {
2341
- #if defined(SIMDE_AVX2_NATIVE)
2342
- return _mm256_xor_si256(a, b);
2343
- #else
2344
- simde__m256i_private
2345
- r_,
2346
- a_ = simde__m256i_to_private(a),
2347
- b_ = simde__m256i_to_private(b);
2348
-
2349
- #if defined(SIMDE_ARCH_X86_SSE2)
2350
- r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]);
2351
- r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]);
2352
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2353
- r_.i32f = a_.i32f ^ b_.i32f;
2354
- #else
2355
- SIMDE__VECTORIZE
2356
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2357
- r_.i64[i] = a_.i64[i] ^ b_.i64[i];
2358
- }
2359
- #endif
2360
-
2361
- return simde__m256i_from_private(r_);
2362
- #endif
2363
- }
2364
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2365
- # define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b)
2366
- #endif
2367
-
2368
- SIMDE__FUNCTION_ATTRIBUTES
2369
- simde__m256i
2370
- simde_mm256_srli_epi32 (simde__m256i a, const int imm8) {
2371
- simde__m256i_private
2372
- r_,
2373
- a_ = simde__m256i_to_private(a);
2374
-
2375
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
2376
- r_.u32 = a_.u32 >> HEDLEY_STATIC_CAST(int16_t, imm8);
2377
- #else
2378
- SIMDE__VECTORIZE
2379
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
2380
- r_.u32[i] = a_.u32[i] >> imm8;
2381
- }
2382
- #endif
2383
-
2384
- return simde__m256i_from_private(r_);
2385
- }
2386
- #if defined(SIMDE_AVX2_NATIVE)
2387
- # define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8)
2388
- #elif defined(SIMDE_ARCH_X86_SSE2)
2389
- # define simde_mm256_srli_epi32(a, imm8) \
2390
- simde_mm256_set_m128i( \
2391
- simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
2392
- simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
2393
- #endif
2394
- #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
2395
- # define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8)
2396
- #endif
2397
-
2398
- SIMDE__END_DECLS
2399
-
2400
- HEDLEY_DIAGNOSTIC_POP
2401
-
2402
- #endif /* !defined(SIMDE__AVX2_H) */