minimap2 0.2.25.1 → 0.2.25.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/ext/Rakefile +2 -2
  3. data/lib/minimap2/version.rb +1 -1
  4. metadata +1 -97
  5. data/ext/minimap2/lib/simde/CONTRIBUTING.md +0 -114
  6. data/ext/minimap2/lib/simde/COPYING +0 -20
  7. data/ext/minimap2/lib/simde/README.md +0 -333
  8. data/ext/minimap2/lib/simde/amalgamate.py +0 -58
  9. data/ext/minimap2/lib/simde/meson.build +0 -33
  10. data/ext/minimap2/lib/simde/netlify.toml +0 -20
  11. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +0 -140
  12. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +0 -137
  13. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +0 -142
  14. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +0 -145
  15. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +0 -140
  16. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +0 -145
  17. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +0 -140
  18. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +0 -143
  19. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +0 -137
  20. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +0 -141
  21. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +0 -147
  22. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +0 -141
  23. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +0 -134
  24. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +0 -138
  25. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +0 -134
  26. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +0 -137
  27. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +0 -131
  28. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +0 -135
  29. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +0 -141
  30. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +0 -135
  31. data/ext/minimap2/lib/simde/simde/arm/neon.h +0 -97
  32. data/ext/minimap2/lib/simde/simde/check.h +0 -267
  33. data/ext/minimap2/lib/simde/simde/debug-trap.h +0 -83
  34. data/ext/minimap2/lib/simde/simde/hedley.h +0 -1899
  35. data/ext/minimap2/lib/simde/simde/simde-arch.h +0 -445
  36. data/ext/minimap2/lib/simde/simde/simde-common.h +0 -697
  37. data/ext/minimap2/lib/simde/simde/x86/avx.h +0 -5385
  38. data/ext/minimap2/lib/simde/simde/x86/avx2.h +0 -2402
  39. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +0 -391
  40. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +0 -3389
  41. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +0 -112
  42. data/ext/minimap2/lib/simde/simde/x86/fma.h +0 -659
  43. data/ext/minimap2/lib/simde/simde/x86/mmx.h +0 -2210
  44. data/ext/minimap2/lib/simde/simde/x86/sse.h +0 -3696
  45. data/ext/minimap2/lib/simde/simde/x86/sse2.h +0 -5991
  46. data/ext/minimap2/lib/simde/simde/x86/sse3.h +0 -343
  47. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +0 -1783
  48. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +0 -105
  49. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +0 -1053
  50. data/ext/minimap2/lib/simde/simde/x86/svml.h +0 -543
  51. data/ext/minimap2/lib/simde/test/CMakeLists.txt +0 -166
  52. data/ext/minimap2/lib/simde/test/arm/meson.build +0 -4
  53. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +0 -23
  54. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +0 -871
  55. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +0 -134
  56. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +0 -39
  57. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +0 -10
  58. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +0 -1260
  59. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +0 -873
  60. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +0 -1084
  61. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +0 -1260
  62. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +0 -18
  63. data/ext/minimap2/lib/simde/test/arm/test-arm.c +0 -20
  64. data/ext/minimap2/lib/simde/test/arm/test-arm.h +0 -8
  65. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +0 -171
  66. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +0 -68
  67. data/ext/minimap2/lib/simde/test/meson.build +0 -64
  68. data/ext/minimap2/lib/simde/test/munit/COPYING +0 -21
  69. data/ext/minimap2/lib/simde/test/munit/Makefile +0 -55
  70. data/ext/minimap2/lib/simde/test/munit/README.md +0 -54
  71. data/ext/minimap2/lib/simde/test/munit/example.c +0 -351
  72. data/ext/minimap2/lib/simde/test/munit/meson.build +0 -37
  73. data/ext/minimap2/lib/simde/test/munit/munit.c +0 -2055
  74. data/ext/minimap2/lib/simde/test/munit/munit.h +0 -535
  75. data/ext/minimap2/lib/simde/test/run-tests.c +0 -20
  76. data/ext/minimap2/lib/simde/test/run-tests.h +0 -260
  77. data/ext/minimap2/lib/simde/test/x86/avx.c +0 -13752
  78. data/ext/minimap2/lib/simde/test/x86/avx2.c +0 -9977
  79. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +0 -2664
  80. data/ext/minimap2/lib/simde/test/x86/avx512f.c +0 -10416
  81. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +0 -210
  82. data/ext/minimap2/lib/simde/test/x86/fma.c +0 -2557
  83. data/ext/minimap2/lib/simde/test/x86/meson.build +0 -33
  84. data/ext/minimap2/lib/simde/test/x86/mmx.c +0 -2878
  85. data/ext/minimap2/lib/simde/test/x86/skel.c +0 -2984
  86. data/ext/minimap2/lib/simde/test/x86/sse.c +0 -5121
  87. data/ext/minimap2/lib/simde/test/x86/sse2.c +0 -9860
  88. data/ext/minimap2/lib/simde/test/x86/sse3.c +0 -486
  89. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +0 -3446
  90. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +0 -101
  91. data/ext/minimap2/lib/simde/test/x86/ssse3.c +0 -2084
  92. data/ext/minimap2/lib/simde/test/x86/svml.c +0 -1545
  93. data/ext/minimap2/lib/simde/test/x86/test-avx.h +0 -16
  94. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +0 -25
  95. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +0 -13
  96. data/ext/minimap2/lib/simde/test/x86/test-sse.h +0 -13
  97. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +0 -13
  98. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +0 -196
  99. data/ext/minimap2/lib/simde/test/x86/test-x86.c +0 -48
  100. data/ext/minimap2/lib/simde/test/x86/test-x86.h +0 -8
@@ -1,1053 +0,0 @@
1
- /* Permission is hereby granted, free of charge, to any person
2
- * obtaining a copy of this software and associated documentation
3
- * files (the "Software"), to deal in the Software without
4
- * restriction, including without limitation the rights to use, copy,
5
- * modify, merge, publish, distribute, sublicense, and/or sell copies
6
- * of the Software, and to permit persons to whom the Software is
7
- * furnished to do so, subject to the following conditions:
8
- *
9
- * The above copyright notice and this permission notice shall be
10
- * included in all copies or substantial portions of the Software.
11
- *
12
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
- * SOFTWARE.
20
- *
21
- * Copyright:
22
- * 2017-2020 Evan Nemerson <evan@nemerson.com>
23
- */
24
-
25
- #if !defined(SIMDE__SSSE3_H)
26
- # if !defined(SIMDE__SSSE3_H)
27
- # define SIMDE__SSSE3_H
28
- # endif
29
- # include "sse3.h"
30
-
31
- HEDLEY_DIAGNOSTIC_PUSH
32
- SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
33
-
34
- # if defined(SIMDE_SSSE3_NATIVE)
35
- # undef SIMDE_SSSE3_NATIVE
36
- # endif
37
- # if defined(SIMDE_ARCH_X86_SSSE3) && !defined(SIMDE_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
38
- # define SIMDE_SSSE3_NATIVE
39
- # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_SSSE3_NO_NEON) && !defined(SIMDE_NO_NEON)
40
- # define SIMDE_SSSE3_NEON
41
- # elif defined(SIMDE_ARCH_POWER_ALTIVEC)
42
- # define SIMDE_SSSE3_POWER_ALTIVEC
43
- # endif
44
-
45
- # if defined(SIMDE_SSSE3_NATIVE) && !defined(SIMDE_SSE3_NATIVE)
46
- # if defined(SIMDE_SSSE3_FORCE_NATIVE)
47
- # error Native SSSE3 support requires native SSE3 support
48
- # else
49
- HEDLEY_WARNING("Native SSSE3 support requires native SSE3 support, disabling")
50
- # undef SIMDE_SSSE3_NATIVE
51
- # endif
52
- # elif defined(SIMDE_SSSE3_NEON) && !defined(SIMDE_SSE3_NEON)
53
- HEDLEY_WARNING("SSSE3 NEON support requires SSE3 NEON support, disabling")
54
- # undef SIMDE_SSSE3_NEON
55
- # endif
56
-
57
- # if defined(SIMDE_SSSE3_NATIVE)
58
- # include <tmmintrin.h>
59
- # else
60
- # if defined(SIMDE_SSSE3_NEON)
61
- # include <arm_neon.h>
62
- # endif
63
- # endif
64
-
65
- #if !defined(SIMDE_SSSE3_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
66
- # define SIMDE_SSSE3_ENABLE_NATIVE_ALIASES
67
- #endif
68
-
69
- SIMDE__BEGIN_DECLS
70
-
71
- SIMDE__FUNCTION_ATTRIBUTES
72
- simde__m128i
73
- simde_mm_abs_epi8 (simde__m128i a) {
74
- #if defined(SIMDE_SSSE3_NATIVE)
75
- return _mm_abs_epi8(a);
76
- #else
77
- simde__m128i_private
78
- r_,
79
- a_ = simde__m128i_to_private(a);
80
-
81
- #if defined(SIMDE_SSSE3_NEON)
82
- r_.neon_i8 = vabsq_s8(a_.neon_i8);
83
- #elif defined(SIMDE_SSSE3_POWER_ALTIVEC)
84
- r_.altivec_i8 = vec_abs(a_.altivec_i8);
85
- #else
86
- SIMDE__VECTORIZE
87
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
88
- r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]);
89
- }
90
- #endif
91
-
92
- return simde__m128i_from_private(r_);
93
- #endif
94
- }
95
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
96
- # define _mm_abs_epi8(a) simde_mm_abs_epi8(a)
97
- #endif
98
-
99
- SIMDE__FUNCTION_ATTRIBUTES
100
- simde__m128i
101
- simde_mm_abs_epi16 (simde__m128i a) {
102
- #if defined(SIMDE_SSSE3_NATIVE)
103
- return _mm_abs_epi16(a);
104
- #else
105
- simde__m128i_private
106
- r_,
107
- a_ = simde__m128i_to_private(a);
108
-
109
- #if defined(SIMDE_SSSE3_NEON)
110
- r_.neon_i16 = vabsq_s16(a_.neon_i16);
111
- #elif defined(SIMDE_SSSE3_POWER_ALTIVEC)
112
- r_.altivec_i16 = vec_abs(a_.altivec_i16);
113
- #else
114
- SIMDE__VECTORIZE
115
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
116
- r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]);
117
- }
118
- #endif
119
-
120
- return simde__m128i_from_private(r_);
121
- #endif
122
- }
123
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
124
- # define _mm_abs_epi16(a) simde_mm_abs_epi16(a)
125
- #endif
126
-
127
- SIMDE__FUNCTION_ATTRIBUTES
128
- simde__m128i
129
- simde_mm_abs_epi32 (simde__m128i a) {
130
- #if defined(SIMDE_SSSE3_NATIVE)
131
- return _mm_abs_epi32(a);
132
- #else
133
- simde__m128i_private
134
- r_,
135
- a_ = simde__m128i_to_private(a);
136
-
137
- #if defined(SIMDE_SSE3_NEON)
138
- r_.neon_i32 = vabsq_s32(a_.neon_i32);
139
- #elif defined(SIMDE_SSSE3_POWER_ALTIVEC)
140
- r_.altivec_i32 = vec_abs(a_.altivec_i32);
141
- #else
142
- SIMDE__VECTORIZE
143
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
144
- #if defined(_MSC_VER)
145
- HEDLEY_DIAGNOSTIC_PUSH
146
- #pragma warning(disable:4146)
147
- #endif
148
- r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]);
149
- #if defined(_MSC_VER)
150
- HEDLEY_DIAGNOSTIC_POP
151
- #endif
152
- }
153
- #endif
154
-
155
- return simde__m128i_from_private(r_);
156
- #endif
157
- }
158
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
159
- # define _mm_abs_epi32(a) simde_mm_abs_epi32(a)
160
- #endif
161
-
162
- SIMDE__FUNCTION_ATTRIBUTES
163
- simde__m64
164
- simde_mm_abs_pi8 (simde__m64 a) {
165
- #if defined(SIMDE_SSSE3_NATIVE)
166
- return _mm_abs_pi8(a);
167
- #else
168
- simde__m64_private
169
- r_,
170
- a_ = simde__m64_to_private(a);
171
-
172
- #if defined(SIMDE_SSSE3_NEON)
173
- r_.neon_i8 = vabs_s8(a_.neon_i8);
174
- #else
175
- SIMDE__VECTORIZE
176
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
177
- r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]);
178
- }
179
- #endif
180
-
181
- return simde__m64_from_private(r_);
182
- #endif
183
- }
184
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
185
- # define _mm_abs_pi8(a) simde_mm_abs_pi8(a)
186
- #endif
187
-
188
- SIMDE__FUNCTION_ATTRIBUTES
189
- simde__m64
190
- simde_mm_abs_pi16 (simde__m64 a) {
191
- #if defined(SIMDE_SSSE3_NATIVE)
192
- return _mm_abs_pi16(a);
193
- #else
194
- simde__m64_private
195
- r_,
196
- a_ = simde__m64_to_private(a);
197
-
198
- #if defined(SIMDE_SSSE3_NEON)
199
- r_.neon_i16 = vabs_s16(a_.neon_i16);
200
- #else
201
- SIMDE__VECTORIZE
202
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
203
- r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]);
204
- }
205
- #endif
206
-
207
- return simde__m64_from_private(r_);
208
- #endif
209
- }
210
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
211
- # define _mm_abs_pi16(a) simde_mm_abs_pi16(a)
212
- #endif
213
-
214
- SIMDE__FUNCTION_ATTRIBUTES
215
- simde__m64
216
- simde_mm_abs_pi32 (simde__m64 a) {
217
- #if defined(SIMDE_SSSE3_NATIVE)
218
- return _mm_abs_pi32(a);
219
- #else
220
- simde__m64_private
221
- r_,
222
- a_ = simde__m64_to_private(a);
223
-
224
- #if defined(SIMDE_SSSE3_NEON)
225
- r_.neon_i32 = vabs_s32(a_.neon_i32);
226
- #else
227
- SIMDE__VECTORIZE
228
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
229
- r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]);
230
- }
231
- #endif
232
-
233
- return simde__m64_from_private(r_);
234
- #endif
235
- }
236
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
237
- # define _mm_abs_pi32(a) simde_mm_abs_pi32(a)
238
- #endif
239
-
240
- SIMDE__FUNCTION_ATTRIBUTES
241
- simde__m128i
242
- simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) {
243
- simde__m128i_private
244
- r_,
245
- a_ = simde__m128i_to_private(a),
246
- b_ = simde__m128i_to_private(b);
247
-
248
- #if 0 && defined(SIMDE_BYTE_ORDER_LE)
249
- const int bits = (8 * count) % 64;
250
- const int eo = count / 8;
251
-
252
- switch (eo) {
253
- case 0:
254
- r_.u64[0] = b_.u64[0] >> bits;
255
- r_.u64[0] |= b_.u64[1] << (64 - bits);
256
- r_.u64[1] = b_.u64[1] >> bits;
257
- r_.u64[1] |= a_.u64[0] << (64 - bits);
258
- break;
259
- case 1:
260
- r_.u64[0] = b_.u64[1] >> bits;
261
- r_.u64[0] |= a_.u64[0] << (64 - bits);
262
- r_.u64[1] = a_.u64[0] >> bits;
263
- r_.u64[1] |= a_.u64[1] << (64 - bits);
264
- break;
265
- case 2:
266
- r_.u64[0] = a_.u64[0] >> bits;
267
- r_.u64[0] |= a_.u64[1] << (64 - bits);
268
- r_.u64[1] = a_.u64[1] >> bits;
269
- break;
270
- case 3:
271
- r_.u64[0] = a_.u64[1] >> bits;
272
- r_.u64[1] = 0;
273
- break;
274
- default:
275
- HEDLEY_UNREACHABLE();
276
- break;
277
- }
278
- #else
279
- if (HEDLEY_UNLIKELY(count > 31))
280
- return simde_mm_setzero_si128();
281
-
282
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
283
- const int srcpos = count + HEDLEY_STATIC_CAST(int, i);
284
- if (srcpos > 31) {
285
- r_.i8[i] = 0;
286
- } else if (srcpos > 15) {
287
- r_.i8[i] = a_.i8[(srcpos) & 15];
288
- } else {
289
- r_.i8[i] = b_.i8[srcpos];
290
- }
291
- }
292
- #endif
293
-
294
- return simde__m128i_from_private(r_);
295
- }
296
- #if defined(SIMDE_SSSE3_NATIVE)
297
- # define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count)
298
- #endif
299
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
300
- # define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count)
301
- #endif
302
-
303
- #if defined(simde_mm_alignr_pi8)
304
- # undef simde_mm_alignr_pi8
305
- #endif
306
- SIMDE__FUNCTION_ATTRIBUTES
307
- simde__m64
308
- simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) {
309
- simde__m64_private
310
- r_,
311
- a_ = simde__m64_to_private(a),
312
- b_ = simde__m64_to_private(b);
313
-
314
- if (HEDLEY_UNLIKELY(count > 15))
315
- return simde_mm_setzero_si64();
316
-
317
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
318
- const int srcpos = count + HEDLEY_STATIC_CAST(int, i);
319
- if (srcpos > 15) {
320
- r_.i8[i] = 0;
321
- } else if (srcpos > 7) {
322
- r_.i8[i] = a_.i8[(srcpos) & 7];
323
- } else {
324
- r_.i8[i] = b_.i8[srcpos];
325
- }
326
- }
327
-
328
- return simde__m64_from_private(r_);
329
- }
330
- #if defined(SIMDE_SSSE3_NATIVE)
331
- # define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count)
332
- #endif
333
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
334
- # define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count)
335
- #endif
336
-
337
- SIMDE__FUNCTION_ATTRIBUTES
338
- simde__m128i
339
- simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) {
340
- #if defined(SIMDE_SSSE3_NATIVE)
341
- return _mm_shuffle_epi8(a, b);
342
- #else
343
- simde__m128i_private
344
- r_,
345
- a_ = simde__m128i_to_private(a),
346
- b_ = simde__m128i_to_private(b);
347
-
348
- #if defined(SIMDE_SSSE3_NEON)
349
- /* Mask out the bits we're not interested in. vtbl will result in 0
350
- for any values outside of [0, 15], so if the high bit is set it
351
- will return 0, just like in SSSE3. */
352
- b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8((int8_t)((1 << 7) | 15)));
353
-
354
- /* Convert a from an int8x16_t to an int8x8x2_t */
355
- int8x8x2_t i = { .val = { vget_low_s8(a_.neon_i8), vget_high_s8(a_.neon_i8) } };
356
-
357
- /* Table lookups */
358
- int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8));
359
- int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8));
360
-
361
- r_.neon_i8 = vcombine_s8(l, h);
362
- #else
363
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
364
- r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7);
365
- }
366
- #endif
367
-
368
- return simde__m128i_from_private(r_);
369
- #endif
370
- }
371
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
372
- # define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b)
373
- #endif
374
-
375
- SIMDE__FUNCTION_ATTRIBUTES
376
- simde__m64
377
- simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) {
378
- #if defined(SIMDE_SSSE3_NATIVE)
379
- return _mm_shuffle_pi8(a, b);
380
- #else
381
- simde__m64_private
382
- r_,
383
- a_ = simde__m64_to_private(a),
384
- b_ = simde__m64_to_private(b);
385
-
386
- #if defined(SIMDE_SSSE3_NEON)
387
- b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8((int8_t)((1 << 7) | 7)));
388
- r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8);
389
- #else
390
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
391
- r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7);
392
- }
393
- #endif
394
-
395
- return simde__m64_from_private(r_);
396
- #endif
397
- }
398
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
399
- # define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b)
400
- #endif
401
-
402
- SIMDE__FUNCTION_ATTRIBUTES
403
- simde__m128i
404
- simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) {
405
- #if defined(SIMDE_SSSE3_NATIVE)
406
- return _mm_hadd_epi16(a, b);
407
- #else
408
- simde__m128i_private
409
- r_,
410
- a_ = simde__m128i_to_private(a),
411
- b_ = simde__m128i_to_private(b);
412
-
413
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
414
- r_.neon_i16 = vaddq_s16(vuzp1q_s16(a_.neon_i16, b_.neon_i16), vuzp2q_s16(a_.neon_i16, b_.neon_i16));
415
- #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
416
- r_.i16 =
417
- SIMDE__SHUFFLE_VECTOR(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14) +
418
- SIMDE__SHUFFLE_VECTOR(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15);
419
- #else
420
- r_.i16[0] = a_.i16[0] + a_.i16[1];
421
- r_.i16[1] = a_.i16[2] + a_.i16[3];
422
- r_.i16[2] = a_.i16[4] + a_.i16[5];
423
- r_.i16[3] = a_.i16[6] + a_.i16[7];
424
- r_.i16[4] = b_.i16[0] + b_.i16[1];
425
- r_.i16[5] = b_.i16[2] + b_.i16[3];
426
- r_.i16[6] = b_.i16[4] + b_.i16[5];
427
- r_.i16[7] = b_.i16[6] + b_.i16[7];
428
- #endif
429
-
430
- return simde__m128i_from_private(r_);
431
- #endif
432
- }
433
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
434
- # define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b)
435
- #endif
436
-
437
- SIMDE__FUNCTION_ATTRIBUTES
438
- simde__m128i
439
- simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) {
440
- #if defined(SIMDE_SSSE3_NATIVE)
441
- return _mm_hadd_epi32(a, b);
442
- #else
443
- simde__m128i_private
444
- r_,
445
- a_ = simde__m128i_to_private(a),
446
- b_ = simde__m128i_to_private(b);
447
-
448
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
449
- r_.neon_i32 = vaddq_s32(vuzp1q_s32(a_.neon_i32, b_.neon_i32), vuzp2q_s32(a_.neon_i32, b_.neon_i32));
450
- #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
451
- r_.i32 =
452
- SIMDE__SHUFFLE_VECTOR(32, 16, a_.i32, b_.i32, 0, 2, 4, 6) +
453
- SIMDE__SHUFFLE_VECTOR(32, 16, a_.i32, b_.i32, 1, 3, 5, 7);
454
- #else
455
- r_.i32[0] = a_.i32[0] + a_.i32[1];
456
- r_.i32[1] = a_.i32[2] + a_.i32[3];
457
- r_.i32[2] = b_.i32[0] + b_.i32[1];
458
- r_.i32[3] = b_.i32[2] + b_.i32[3];
459
- #endif
460
-
461
- return simde__m128i_from_private(r_);
462
- #endif
463
- }
464
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
465
- # define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b)
466
- #endif
467
-
468
- SIMDE__FUNCTION_ATTRIBUTES
469
- simde__m64
470
- simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) {
471
- #if defined(SIMDE_SSSE3_NATIVE)
472
- return _mm_hadd_pi16(a, b);
473
- #else
474
- simde__m64_private
475
- r_,
476
- a_ = simde__m64_to_private(a),
477
- b_ = simde__m64_to_private(b);
478
-
479
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
480
- r_.neon_i16 = vadd_s16(vuzp1_s16(a_.neon_i16, b_.neon_i16), vuzp2_s16(a_.neon_i16, b_.neon_i16));
481
- #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
482
- r_.i16 =
483
- SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) +
484
- SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 1, 3, 5, 7);
485
- #else
486
- r_.i16[0] = a_.i16[0] + a_.i16[1];
487
- r_.i16[1] = a_.i16[2] + a_.i16[3];
488
- r_.i16[2] = b_.i16[0] + b_.i16[1];
489
- r_.i16[3] = b_.i16[2] + b_.i16[3];
490
- #endif
491
-
492
- return simde__m64_from_private(r_);
493
- #endif
494
- }
495
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
496
- # define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b)
497
- #endif
498
-
499
- SIMDE__FUNCTION_ATTRIBUTES
500
- simde__m64
501
- simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) {
502
- #if defined(SIMDE_SSSE3_NATIVE)
503
- return _mm_hadd_pi32(a, b);
504
- #else
505
- simde__m64_private
506
- r_,
507
- a_ = simde__m64_to_private(a),
508
- b_ = simde__m64_to_private(b);
509
-
510
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
511
- r_.neon_i32 = vadd_s32(vuzp1_s32(a_.neon_i32, b_.neon_i32), vuzp2_s32(a_.neon_i32, b_.neon_i32));
512
- #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
513
- r_.i32 =
514
- SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 0, 2) +
515
- SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 1, 3);
516
- #else
517
- r_.i32[0] = a_.i32[0] + a_.i32[1];
518
- r_.i32[1] = b_.i32[0] + b_.i32[1];
519
- #endif
520
-
521
- return simde__m64_from_private(r_);
522
- #endif
523
- }
524
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
525
- # define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b)
526
- #endif
527
-
528
- SIMDE__FUNCTION_ATTRIBUTES
529
- simde__m128i
530
- simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) {
531
- #if defined(SIMDE_SSSE3_NATIVE)
532
- return _mm_hadds_epi16(a, b);
533
- #else
534
- simde__m128i_private
535
- r_,
536
- a_ = simde__m128i_to_private(a),
537
- b_ = simde__m128i_to_private(b);
538
-
539
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
540
- r_.neon_i16 = vqaddq_s16(vuzp1q_s16(a_.neon_i16, b_.neon_i16), vuzp2q_s16(a_.neon_i16, b_.neon_i16));
541
- #else
542
- for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {
543
- int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
544
- r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;
545
- int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);
546
- r_.i16[i + 4] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;
547
- }
548
- #endif
549
-
550
- return simde__m128i_from_private(r_);
551
- #endif
552
- }
553
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
554
- # define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b)
555
- #endif
556
-
557
- SIMDE__FUNCTION_ATTRIBUTES
558
- simde__m64
559
- simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) {
560
- #if defined(SIMDE_SSSE3_NATIVE)
561
- return _mm_hadds_pi16(a, b);
562
- #else
563
- simde__m64_private
564
- r_,
565
- a_ = simde__m64_to_private(a),
566
- b_ = simde__m64_to_private(b);
567
-
568
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
569
- r_.neon_i16 = vqadd_s16(vuzp1_s16(a_.neon_i16, b_.neon_i16), vuzp2_s16(a_.neon_i16, b_.neon_i16));
570
- #else
571
- for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {
572
- int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
573
- r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;
574
- int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);
575
- r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;
576
- }
577
- #endif
578
-
579
- return simde__m64_from_private(r_);
580
- #endif
581
- }
582
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
583
- # define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b)
584
- #endif
585
-
586
- SIMDE__FUNCTION_ATTRIBUTES
587
- simde__m128i
588
- simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) {
589
- #if defined(SIMDE_SSSE3_NATIVE)
590
- return _mm_hsub_epi16(a, b);
591
- #else
592
- simde__m128i_private
593
- r_,
594
- a_ = simde__m128i_to_private(a),
595
- b_ = simde__m128i_to_private(b);
596
-
597
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
598
- r_.neon_i16 = vsubq_s16(vuzp1q_s16(a_.neon_i16, b_.neon_i16), vuzp2q_s16(a_.neon_i16, b_.neon_i16));
599
- #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
600
- r_.i16 =
601
- SIMDE__SHUFFLE_VECTOR(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14) -
602
- SIMDE__SHUFFLE_VECTOR(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15);
603
- #else
604
- r_.i16[0] = a_.i16[0] - a_.i16[1];
605
- r_.i16[1] = a_.i16[2] - a_.i16[3];
606
- r_.i16[2] = a_.i16[4] - a_.i16[5];
607
- r_.i16[3] = a_.i16[6] - a_.i16[7];
608
- r_.i16[4] = b_.i16[0] - b_.i16[1];
609
- r_.i16[5] = b_.i16[2] - b_.i16[3];
610
- r_.i16[6] = b_.i16[4] - b_.i16[5];
611
- r_.i16[7] = b_.i16[6] - b_.i16[7];
612
- #endif
613
-
614
- return simde__m128i_from_private(r_);
615
- #endif
616
- }
617
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
618
- # define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b)
619
- #endif
620
-
621
- SIMDE__FUNCTION_ATTRIBUTES
622
- simde__m128i
623
- simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) {
624
- #if defined(SIMDE_SSSE3_NATIVE)
625
- return _mm_hsub_epi32(a, b);
626
- #else
627
- simde__m128i_private
628
- r_,
629
- a_ = simde__m128i_to_private(a),
630
- b_ = simde__m128i_to_private(b);
631
-
632
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
633
- r_.neon_i32 = vsubq_s32(vuzp1q_s32(a_.neon_i32, b_.neon_i32), vuzp2q_s32(a_.neon_i32, b_.neon_i32));
634
- #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
635
- r_.i32 =
636
- SIMDE__SHUFFLE_VECTOR(32, 16, a_.i32, b_.i32, 0, 2, 4, 6) -
637
- SIMDE__SHUFFLE_VECTOR(32, 16, a_.i32, b_.i32, 1, 3, 5, 7);
638
- #else
639
- r_.i32[0] = a_.i32[0] - a_.i32[1];
640
- r_.i32[1] = a_.i32[2] - a_.i32[3];
641
- r_.i32[2] = b_.i32[0] - b_.i32[1];
642
- r_.i32[3] = b_.i32[2] - b_.i32[3];
643
- #endif
644
-
645
- return simde__m128i_from_private(r_);
646
- #endif
647
- }
648
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
649
- # define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b)
650
- #endif
651
-
652
- SIMDE__FUNCTION_ATTRIBUTES
653
- simde__m64
654
- simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) {
655
- #if defined(SIMDE_SSSE3_NATIVE)
656
- return _mm_hsub_pi16(a, b);
657
- #else
658
- simde__m64_private
659
- r_,
660
- a_ = simde__m64_to_private(a),
661
- b_ = simde__m64_to_private(b);
662
-
663
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
664
- r_.neon_i16 = vsub_s16(vuzp1_s16(a_.neon_i16, b_.neon_i16), vuzp2_s16(a_.neon_i16, b_.neon_i16));
665
- #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
666
- r_.i16 =
667
- SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) -
668
- SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 1, 3, 5, 7);
669
- #else
670
- r_.i16[0] = a_.i16[0] - a_.i16[1];
671
- r_.i16[1] = a_.i16[2] - a_.i16[3];
672
- r_.i16[2] = b_.i16[0] - b_.i16[1];
673
- r_.i16[3] = b_.i16[2] - b_.i16[3];
674
- #endif
675
-
676
- return simde__m64_from_private(r_);
677
- #endif
678
- }
679
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
680
- # define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b)
681
- #endif
682
-
683
- SIMDE__FUNCTION_ATTRIBUTES
684
- simde__m64
685
- simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) {
686
- #if defined(SIMDE_SSSE3_NATIVE)
687
- return _mm_hsub_pi32(a, b);
688
- #else
689
- simde__m64_private
690
- r_,
691
- a_ = simde__m64_to_private(a),
692
- b_ = simde__m64_to_private(b);
693
-
694
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
695
- r_.neon_i32 = vsub_s32(vuzp1_s32(a_.neon_i32, b_.neon_i32), vuzp2_s32(a_.neon_i32, b_.neon_i32));
696
- #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
697
- r_.i32 =
698
- SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 0, 2) -
699
- SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 1, 3);
700
- #else
701
- r_.i32[0] = a_.i32[0] - a_.i32[1];
702
- r_.i32[1] = b_.i32[0] - b_.i32[1];
703
- #endif
704
-
705
- return simde__m64_from_private(r_);
706
- #endif
707
- }
708
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
709
- # define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b)
710
- #endif
711
-
712
- SIMDE__FUNCTION_ATTRIBUTES
713
- simde__m128i
714
- simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) {
715
- #if defined(SIMDE_SSSE3_NATIVE)
716
- return _mm_hsubs_epi16(a, b);
717
- #else
718
- simde__m128i_private
719
- r_,
720
- a_ = simde__m128i_to_private(a),
721
- b_ = simde__m128i_to_private(b);
722
-
723
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
724
- r_.neon_i16 = vqsubq_s16(vuzp1q_s16(a_.neon_i16, b_.neon_i16), vuzp2q_s16(a_.neon_i16, b_.neon_i16));
725
- #else
726
- for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {
727
- int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
728
- r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;
729
- int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) - HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);
730
- r_.i16[i + 4] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;
731
- }
732
- #endif
733
-
734
- return simde__m128i_from_private(r_);
735
- #endif
736
- }
737
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
738
- # define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b)
739
- #endif
740
-
741
- SIMDE__FUNCTION_ATTRIBUTES
742
- simde__m64
743
- simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) {
744
- #if defined(SIMDE_SSSE3_NATIVE)
745
- return _mm_hsubs_pi16(a, b);
746
- #else
747
- simde__m64_private
748
- r_,
749
- a_ = simde__m64_to_private(a),
750
- b_ = simde__m64_to_private(b);
751
-
752
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
753
- r_.neon_i16 = vqsub_s16(vuzp1_s16(a_.neon_i16, b_.neon_i16), vuzp2_s16(a_.neon_i16, b_.neon_i16));
754
- #else
755
- for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {
756
- int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
757
- r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;
758
- int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) - HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);
759
- r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;
760
- }
761
- #endif
762
-
763
- return simde__m64_from_private(r_);
764
- #endif
765
- }
766
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
767
- # define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b)
768
- #endif
769
-
770
- SIMDE__FUNCTION_ATTRIBUTES
771
- simde__m128i
772
- simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) {
773
- #if defined(SIMDE_SSSE3_NATIVE)
774
- return _mm_maddubs_epi16(a, b);
775
- #else
776
- simde__m128i_private
777
- r_,
778
- a_ = simde__m128i_to_private(a),
779
- b_ = simde__m128i_to_private(b);
780
-
781
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
782
- int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a_.neon_u8))), vmovl_s8(vget_low_s8(b_.neon_i8)));
783
- int16x8_t th = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a_.neon_u8))), vmovl_s8(vget_high_s8(b_.neon_i8)));
784
- r_.neon_i16 = vqaddq_s16(vuzp1q_s16(tl, th), vuzp2q_s16(tl, th));
785
- #else
786
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
787
- const int idx = HEDLEY_STATIC_CAST(int, i) << 1;
788
- int32_t ts =
789
- (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) +
790
- (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1]));
791
- r_.i16[i] = HEDLEY_LIKELY(ts > INT16_MIN) ? (HEDLEY_LIKELY(ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN;
792
- }
793
- #endif
794
-
795
- return simde__m128i_from_private(r_);
796
- #endif
797
- }
798
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
799
- # define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b)
800
- #endif
801
-
802
- SIMDE__FUNCTION_ATTRIBUTES
803
- simde__m64
804
- simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) {
805
- #if defined(SIMDE_SSSE3_NATIVE)
806
- return _mm_maddubs_pi16(a, b);
807
- #else
808
- simde__m64_private
809
- r_,
810
- a_ = simde__m64_to_private(a),
811
- b_ = simde__m64_to_private(b);
812
-
813
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
814
- int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8));
815
- int16x8_t bi = vmovl_s8(b_.neon_i8);
816
- int16x8_t p = vmulq_s16(ai, bi);
817
- int16x4_t l = vget_low_s16(p);
818
- int16x4_t h = vget_high_s16(p);
819
- r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h));
820
- #else
821
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
822
- const int idx = HEDLEY_STATIC_CAST(int, i) << 1;
823
- int32_t ts =
824
- (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) +
825
- (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1]));
826
- r_.i16[i] = HEDLEY_LIKELY(ts > INT16_MIN) ? (HEDLEY_LIKELY(ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN;
827
- }
828
- #endif
829
-
830
- return simde__m64_from_private(r_);
831
- #endif
832
- }
833
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
834
- # define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b)
835
- #endif
836
-
837
- SIMDE__FUNCTION_ATTRIBUTES
838
- simde__m128i
839
- simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) {
840
- #if defined(SIMDE_SSSE3_NATIVE)
841
- return _mm_mulhrs_epi16(a, b);
842
- #else
843
- simde__m128i_private
844
- r_,
845
- a_ = simde__m128i_to_private(a),
846
- b_ = simde__m128i_to_private(b);
847
-
848
- SIMDE__VECTORIZE
849
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
850
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15));
851
- }
852
-
853
- return simde__m128i_from_private(r_);
854
- #endif
855
- }
856
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
857
- # define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b)
858
- #endif
859
-
860
- SIMDE__FUNCTION_ATTRIBUTES
861
- simde__m64
862
- simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) {
863
- #if defined(SIMDE_SSSE3_NATIVE)
864
- return _mm_mulhrs_pi16(a, b);
865
- #else
866
- simde__m64_private
867
- r_,
868
- a_ = simde__m64_to_private(a),
869
- b_ = simde__m64_to_private(b);
870
-
871
- SIMDE__VECTORIZE
872
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
873
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15));
874
- }
875
-
876
- return simde__m64_from_private(r_);
877
- #endif
878
- }
879
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
880
- # define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b)
881
- #endif
882
-
883
- SIMDE__FUNCTION_ATTRIBUTES
884
- simde__m128i
885
- simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) {
886
- #if defined(SIMDE_SSSE3_NATIVE)
887
- return _mm_sign_epi8(a, b);
888
- #else
889
- simde__m128i_private
890
- r_,
891
- a_ = simde__m128i_to_private(a),
892
- b_ = simde__m128i_to_private(b);
893
-
894
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
895
- int8x16_t m = vreinterpretq_s8_u8(vcgezq_s8(b_.neon_i8));
896
- r_.neon_i8 = veorq_s8(vandq_s8(a_.neon_i8, m), vandq_s8(vnegq_s8(a_.neon_i8), vmvnq_s8(m)));
897
- #else
898
- SIMDE__VECTORIZE
899
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
900
- r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] > 0) ? (a_.i8[i]) : INT8_C(0));
901
- }
902
- #endif
903
-
904
- return simde__m128i_from_private(r_);
905
- #endif
906
- }
907
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
908
- # define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b)
909
- #endif
910
-
911
- SIMDE__FUNCTION_ATTRIBUTES
912
- simde__m128i
913
- simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) {
914
- #if defined(SIMDE_SSSE3_NATIVE)
915
- return _mm_sign_epi16(a, b);
916
- #else
917
- simde__m128i_private
918
- r_,
919
- a_ = simde__m128i_to_private(a),
920
- b_ = simde__m128i_to_private(b);
921
-
922
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
923
- int16x8_t m = vreinterpretq_s16_u16(vcgezq_s16(b_.neon_i16));
924
- r_.neon_i16 = veorq_s16(vandq_s16(a_.neon_i16, m), vandq_s16(vnegq_s16(a_.neon_i16), vmvnq_s16(m)));
925
- #else
926
- SIMDE__VECTORIZE
927
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
928
- r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0));
929
- }
930
- #endif
931
-
932
- return simde__m128i_from_private(r_);
933
- #endif
934
- }
935
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
936
- # define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b)
937
- #endif
938
-
939
- SIMDE__FUNCTION_ATTRIBUTES
940
- simde__m128i
941
- simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) {
942
- #if defined(SIMDE_SSSE3_NATIVE)
943
- return _mm_sign_epi32(a, b);
944
- #else
945
- simde__m128i_private
946
- r_,
947
- a_ = simde__m128i_to_private(a),
948
- b_ = simde__m128i_to_private(b);
949
-
950
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
951
- int32x4_t m = vreinterpretq_s32_u32(vcgezq_s32(b_.neon_i32));
952
- r_.neon_i32 = veorq_s32(vandq_s32(a_.neon_i32, m), vandq_s32(vnegq_s32(a_.neon_i32), vmvnq_s32(m)));
953
- #else
954
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
955
- r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0));
956
- }
957
- #endif
958
-
959
- return simde__m128i_from_private(r_);
960
- #endif
961
- }
962
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
963
- # define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b)
964
- #endif
965
-
966
- SIMDE__FUNCTION_ATTRIBUTES
967
- simde__m64
968
- simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) {
969
- #if defined(SIMDE_SSSE3_NATIVE)
970
- return _mm_sign_pi8(a, b);
971
- #else
972
- simde__m64_private
973
- r_,
974
- a_ = simde__m64_to_private(a),
975
- b_ = simde__m64_to_private(b);
976
-
977
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
978
- int8x8_t m = vreinterpret_s8_u8(vcgez_s8(b_.neon_i8));
979
- r_.neon_i8 = veor_s8(vand_s8(a_.neon_i8, m), vand_s8(vneg_s8(a_.neon_i8), vmvn_s8(m)));
980
- #else
981
- SIMDE__VECTORIZE
982
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
983
- r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] > 0) ? (a_.i8[i]) : INT8_C(0));
984
- }
985
- #endif
986
-
987
- return simde__m64_from_private(r_);
988
- #endif
989
- }
990
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
991
- # define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b)
992
- #endif
993
-
994
- SIMDE__FUNCTION_ATTRIBUTES
995
- simde__m64
996
- simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) {
997
- #if defined(SIMDE_SSSE3_NATIVE)
998
- return _mm_sign_pi16(a, b);
999
- #else
1000
- simde__m64_private
1001
- r_,
1002
- a_ = simde__m64_to_private(a),
1003
- b_ = simde__m64_to_private(b);
1004
-
1005
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
1006
- int16x4_t m = vreinterpret_s16_u16(vcgez_s16(b_.neon_i16));
1007
- r_.neon_i16 = veor_s16(vand_s16(a_.neon_i16, m), vand_s16(vneg_s16(a_.neon_i16), vmvn_s16(m)));
1008
- #else
1009
- SIMDE__VECTORIZE
1010
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1011
- r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0));
1012
- }
1013
- #endif
1014
-
1015
- return simde__m64_from_private(r_);
1016
- #endif
1017
- }
1018
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
1019
- # define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b)
1020
- #endif
1021
-
1022
- SIMDE__FUNCTION_ATTRIBUTES
1023
- simde__m64
1024
- simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) {
1025
- #if defined(SIMDE_SSSE3_NATIVE)
1026
- return _mm_sign_pi32(a, b);
1027
- #else
1028
- simde__m64_private
1029
- r_,
1030
- a_ = simde__m64_to_private(a),
1031
- b_ = simde__m64_to_private(b);
1032
-
1033
- #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
1034
- int32x2_t m = vreinterpret_s32_u32(vcgez_s32(b_.neon_i32));
1035
- r_.neon_i32 = veor_s32(vand_s32(a_.neon_i32, m), vand_s32(vneg_s32(a_.neon_i32), vmvn_s32(m)));
1036
- #else
1037
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1038
- r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0));
1039
- }
1040
- #endif
1041
-
1042
- return simde__m64_from_private(r_);
1043
- #endif
1044
- }
1045
- #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
1046
- # define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b)
1047
- #endif
1048
-
1049
- SIMDE__END_DECLS
1050
-
1051
- HEDLEY_DIAGNOSTIC_POP
1052
-
1053
- #endif /* !defined(SIMDE__SSE2_H) */