minimap2 0.2.25.0 → 0.2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
@@ -0,0 +1,1053 @@
1
+ /* Permission is hereby granted, free of charge, to any person
2
+ * obtaining a copy of this software and associated documentation
3
+ * files (the "Software"), to deal in the Software without
4
+ * restriction, including without limitation the rights to use, copy,
5
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
6
+ * of the Software, and to permit persons to whom the Software is
7
+ * furnished to do so, subject to the following conditions:
8
+ *
9
+ * The above copyright notice and this permission notice shall be
10
+ * included in all copies or substantial portions of the Software.
11
+ *
12
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ * SOFTWARE.
20
+ *
21
+ * Copyright:
22
+ * 2017-2020 Evan Nemerson <evan@nemerson.com>
23
+ */
24
+
25
+ #if !defined(SIMDE__SSSE3_H)
26
+ # if !defined(SIMDE__SSSE3_H)
27
+ # define SIMDE__SSSE3_H
28
+ # endif
29
+ # include "sse3.h"
30
+
31
+ HEDLEY_DIAGNOSTIC_PUSH
32
+ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
33
+
34
+ # if defined(SIMDE_SSSE3_NATIVE)
35
+ # undef SIMDE_SSSE3_NATIVE
36
+ # endif
37
+ # if defined(SIMDE_ARCH_X86_SSSE3) && !defined(SIMDE_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
38
+ # define SIMDE_SSSE3_NATIVE
39
+ # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_SSSE3_NO_NEON) && !defined(SIMDE_NO_NEON)
40
+ # define SIMDE_SSSE3_NEON
41
+ # elif defined(SIMDE_ARCH_POWER_ALTIVEC)
42
+ # define SIMDE_SSSE3_POWER_ALTIVEC
43
+ # endif
44
+
45
+ # if defined(SIMDE_SSSE3_NATIVE) && !defined(SIMDE_SSE3_NATIVE)
46
+ # if defined(SIMDE_SSSE3_FORCE_NATIVE)
47
+ # error Native SSSE3 support requires native SSE3 support
48
+ # else
49
+ HEDLEY_WARNING("Native SSSE3 support requires native SSE3 support, disabling")
50
+ # undef SIMDE_SSSE3_NATIVE
51
+ # endif
52
+ # elif defined(SIMDE_SSSE3_NEON) && !defined(SIMDE_SSE3_NEON)
53
+ HEDLEY_WARNING("SSSE3 NEON support requires SSE3 NEON support, disabling")
54
+ # undef SIMDE_SSSE3_NEON
55
+ # endif
56
+
57
+ # if defined(SIMDE_SSSE3_NATIVE)
58
+ # include <tmmintrin.h>
59
+ # else
60
+ # if defined(SIMDE_SSSE3_NEON)
61
+ # include <arm_neon.h>
62
+ # endif
63
+ # endif
64
+
65
+ #if !defined(SIMDE_SSSE3_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
66
+ # define SIMDE_SSSE3_ENABLE_NATIVE_ALIASES
67
+ #endif
68
+
69
+ SIMDE__BEGIN_DECLS
70
+
71
+ SIMDE__FUNCTION_ATTRIBUTES
72
+ simde__m128i
73
+ simde_mm_abs_epi8 (simde__m128i a) {
74
+ #if defined(SIMDE_SSSE3_NATIVE)
75
+ return _mm_abs_epi8(a);
76
+ #else
77
+ simde__m128i_private
78
+ r_,
79
+ a_ = simde__m128i_to_private(a);
80
+
81
+ #if defined(SIMDE_SSSE3_NEON)
82
+ r_.neon_i8 = vabsq_s8(a_.neon_i8);
83
+ #elif defined(SIMDE_SSSE3_POWER_ALTIVEC)
84
+ r_.altivec_i8 = vec_abs(a_.altivec_i8);
85
+ #else
86
+ SIMDE__VECTORIZE
87
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
88
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]);
89
+ }
90
+ #endif
91
+
92
+ return simde__m128i_from_private(r_);
93
+ #endif
94
+ }
95
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
96
+ # define _mm_abs_epi8(a) simde_mm_abs_epi8(a)
97
+ #endif
98
+
99
+ SIMDE__FUNCTION_ATTRIBUTES
100
+ simde__m128i
101
+ simde_mm_abs_epi16 (simde__m128i a) {
102
+ #if defined(SIMDE_SSSE3_NATIVE)
103
+ return _mm_abs_epi16(a);
104
+ #else
105
+ simde__m128i_private
106
+ r_,
107
+ a_ = simde__m128i_to_private(a);
108
+
109
+ #if defined(SIMDE_SSSE3_NEON)
110
+ r_.neon_i16 = vabsq_s16(a_.neon_i16);
111
+ #elif defined(SIMDE_SSSE3_POWER_ALTIVEC)
112
+ r_.altivec_i16 = vec_abs(a_.altivec_i16);
113
+ #else
114
+ SIMDE__VECTORIZE
115
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
116
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]);
117
+ }
118
+ #endif
119
+
120
+ return simde__m128i_from_private(r_);
121
+ #endif
122
+ }
123
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
124
+ # define _mm_abs_epi16(a) simde_mm_abs_epi16(a)
125
+ #endif
126
+
127
+ SIMDE__FUNCTION_ATTRIBUTES
128
+ simde__m128i
129
+ simde_mm_abs_epi32 (simde__m128i a) {
130
+ #if defined(SIMDE_SSSE3_NATIVE)
131
+ return _mm_abs_epi32(a);
132
+ #else
133
+ simde__m128i_private
134
+ r_,
135
+ a_ = simde__m128i_to_private(a);
136
+
137
+ #if defined(SIMDE_SSE3_NEON)
138
+ r_.neon_i32 = vabsq_s32(a_.neon_i32);
139
+ #elif defined(SIMDE_SSSE3_POWER_ALTIVEC)
140
+ r_.altivec_i32 = vec_abs(a_.altivec_i32);
141
+ #else
142
+ SIMDE__VECTORIZE
143
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
144
+ #if defined(_MSC_VER)
145
+ HEDLEY_DIAGNOSTIC_PUSH
146
+ #pragma warning(disable:4146)
147
+ #endif
148
+ r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]);
149
+ #if defined(_MSC_VER)
150
+ HEDLEY_DIAGNOSTIC_POP
151
+ #endif
152
+ }
153
+ #endif
154
+
155
+ return simde__m128i_from_private(r_);
156
+ #endif
157
+ }
158
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
159
+ # define _mm_abs_epi32(a) simde_mm_abs_epi32(a)
160
+ #endif
161
+
162
+ SIMDE__FUNCTION_ATTRIBUTES
163
+ simde__m64
164
+ simde_mm_abs_pi8 (simde__m64 a) {
165
+ #if defined(SIMDE_SSSE3_NATIVE)
166
+ return _mm_abs_pi8(a);
167
+ #else
168
+ simde__m64_private
169
+ r_,
170
+ a_ = simde__m64_to_private(a);
171
+
172
+ #if defined(SIMDE_SSSE3_NEON)
173
+ r_.neon_i8 = vabs_s8(a_.neon_i8);
174
+ #else
175
+ SIMDE__VECTORIZE
176
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
177
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]);
178
+ }
179
+ #endif
180
+
181
+ return simde__m64_from_private(r_);
182
+ #endif
183
+ }
184
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
185
+ # define _mm_abs_pi8(a) simde_mm_abs_pi8(a)
186
+ #endif
187
+
188
+ SIMDE__FUNCTION_ATTRIBUTES
189
+ simde__m64
190
+ simde_mm_abs_pi16 (simde__m64 a) {
191
+ #if defined(SIMDE_SSSE3_NATIVE)
192
+ return _mm_abs_pi16(a);
193
+ #else
194
+ simde__m64_private
195
+ r_,
196
+ a_ = simde__m64_to_private(a);
197
+
198
+ #if defined(SIMDE_SSSE3_NEON)
199
+ r_.neon_i16 = vabs_s16(a_.neon_i16);
200
+ #else
201
+ SIMDE__VECTORIZE
202
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
203
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]);
204
+ }
205
+ #endif
206
+
207
+ return simde__m64_from_private(r_);
208
+ #endif
209
+ }
210
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
211
+ # define _mm_abs_pi16(a) simde_mm_abs_pi16(a)
212
+ #endif
213
+
214
+ SIMDE__FUNCTION_ATTRIBUTES
215
+ simde__m64
216
+ simde_mm_abs_pi32 (simde__m64 a) {
217
+ #if defined(SIMDE_SSSE3_NATIVE)
218
+ return _mm_abs_pi32(a);
219
+ #else
220
+ simde__m64_private
221
+ r_,
222
+ a_ = simde__m64_to_private(a);
223
+
224
+ #if defined(SIMDE_SSSE3_NEON)
225
+ r_.neon_i32 = vabs_s32(a_.neon_i32);
226
+ #else
227
+ SIMDE__VECTORIZE
228
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
229
+ r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]);
230
+ }
231
+ #endif
232
+
233
+ return simde__m64_from_private(r_);
234
+ #endif
235
+ }
236
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
237
+ # define _mm_abs_pi32(a) simde_mm_abs_pi32(a)
238
+ #endif
239
+
240
+ SIMDE__FUNCTION_ATTRIBUTES
241
+ simde__m128i
242
+ simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) {
243
+ simde__m128i_private
244
+ r_,
245
+ a_ = simde__m128i_to_private(a),
246
+ b_ = simde__m128i_to_private(b);
247
+
248
+ #if 0 && defined(SIMDE_BYTE_ORDER_LE)
249
+ const int bits = (8 * count) % 64;
250
+ const int eo = count / 8;
251
+
252
+ switch (eo) {
253
+ case 0:
254
+ r_.u64[0] = b_.u64[0] >> bits;
255
+ r_.u64[0] |= b_.u64[1] << (64 - bits);
256
+ r_.u64[1] = b_.u64[1] >> bits;
257
+ r_.u64[1] |= a_.u64[0] << (64 - bits);
258
+ break;
259
+ case 1:
260
+ r_.u64[0] = b_.u64[1] >> bits;
261
+ r_.u64[0] |= a_.u64[0] << (64 - bits);
262
+ r_.u64[1] = a_.u64[0] >> bits;
263
+ r_.u64[1] |= a_.u64[1] << (64 - bits);
264
+ break;
265
+ case 2:
266
+ r_.u64[0] = a_.u64[0] >> bits;
267
+ r_.u64[0] |= a_.u64[1] << (64 - bits);
268
+ r_.u64[1] = a_.u64[1] >> bits;
269
+ break;
270
+ case 3:
271
+ r_.u64[0] = a_.u64[1] >> bits;
272
+ r_.u64[1] = 0;
273
+ break;
274
+ default:
275
+ HEDLEY_UNREACHABLE();
276
+ break;
277
+ }
278
+ #else
279
+ if (HEDLEY_UNLIKELY(count > 31))
280
+ return simde_mm_setzero_si128();
281
+
282
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
283
+ const int srcpos = count + HEDLEY_STATIC_CAST(int, i);
284
+ if (srcpos > 31) {
285
+ r_.i8[i] = 0;
286
+ } else if (srcpos > 15) {
287
+ r_.i8[i] = a_.i8[(srcpos) & 15];
288
+ } else {
289
+ r_.i8[i] = b_.i8[srcpos];
290
+ }
291
+ }
292
+ #endif
293
+
294
+ return simde__m128i_from_private(r_);
295
+ }
296
+ #if defined(SIMDE_SSSE3_NATIVE)
297
+ # define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count)
298
+ #endif
299
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
300
+ # define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count)
301
+ #endif
302
+
303
+ #if defined(simde_mm_alignr_pi8)
304
+ # undef simde_mm_alignr_pi8
305
+ #endif
306
+ SIMDE__FUNCTION_ATTRIBUTES
307
+ simde__m64
308
+ simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) {
309
+ simde__m64_private
310
+ r_,
311
+ a_ = simde__m64_to_private(a),
312
+ b_ = simde__m64_to_private(b);
313
+
314
+ if (HEDLEY_UNLIKELY(count > 15))
315
+ return simde_mm_setzero_si64();
316
+
317
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
318
+ const int srcpos = count + HEDLEY_STATIC_CAST(int, i);
319
+ if (srcpos > 15) {
320
+ r_.i8[i] = 0;
321
+ } else if (srcpos > 7) {
322
+ r_.i8[i] = a_.i8[(srcpos) & 7];
323
+ } else {
324
+ r_.i8[i] = b_.i8[srcpos];
325
+ }
326
+ }
327
+
328
+ return simde__m64_from_private(r_);
329
+ }
330
+ #if defined(SIMDE_SSSE3_NATIVE)
331
+ # define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count)
332
+ #endif
333
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
334
+ # define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count)
335
+ #endif
336
+
337
+ SIMDE__FUNCTION_ATTRIBUTES
338
+ simde__m128i
339
+ simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) {
340
+ #if defined(SIMDE_SSSE3_NATIVE)
341
+ return _mm_shuffle_epi8(a, b);
342
+ #else
343
+ simde__m128i_private
344
+ r_,
345
+ a_ = simde__m128i_to_private(a),
346
+ b_ = simde__m128i_to_private(b);
347
+
348
+ #if defined(SIMDE_SSSE3_NEON)
349
+ /* Mask out the bits we're not interested in. vtbl will result in 0
350
+ for any values outside of [0, 15], so if the high bit is set it
351
+ will return 0, just like in SSSE3. */
352
+ b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8((int8_t)((1 << 7) | 15)));
353
+
354
+ /* Convert a from an int8x16_t to an int8x8x2_t */
355
+ int8x8x2_t i = { .val = { vget_low_s8(a_.neon_i8), vget_high_s8(a_.neon_i8) } };
356
+
357
+ /* Table lookups */
358
+ int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8));
359
+ int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8));
360
+
361
+ r_.neon_i8 = vcombine_s8(l, h);
362
+ #else
363
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
364
+ r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7);
365
+ }
366
+ #endif
367
+
368
+ return simde__m128i_from_private(r_);
369
+ #endif
370
+ }
371
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
372
+ # define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b)
373
+ #endif
374
+
375
+ SIMDE__FUNCTION_ATTRIBUTES
376
+ simde__m64
377
+ simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) {
378
+ #if defined(SIMDE_SSSE3_NATIVE)
379
+ return _mm_shuffle_pi8(a, b);
380
+ #else
381
+ simde__m64_private
382
+ r_,
383
+ a_ = simde__m64_to_private(a),
384
+ b_ = simde__m64_to_private(b);
385
+
386
+ #if defined(SIMDE_SSSE3_NEON)
387
+ b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8((int8_t)((1 << 7) | 7)));
388
+ r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8);
389
+ #else
390
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
391
+ r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7);
392
+ }
393
+ #endif
394
+
395
+ return simde__m64_from_private(r_);
396
+ #endif
397
+ }
398
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
399
+ # define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b)
400
+ #endif
401
+
402
+ SIMDE__FUNCTION_ATTRIBUTES
403
+ simde__m128i
404
+ simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) {
405
+ #if defined(SIMDE_SSSE3_NATIVE)
406
+ return _mm_hadd_epi16(a, b);
407
+ #else
408
+ simde__m128i_private
409
+ r_,
410
+ a_ = simde__m128i_to_private(a),
411
+ b_ = simde__m128i_to_private(b);
412
+
413
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
414
+ r_.neon_i16 = vaddq_s16(vuzp1q_s16(a_.neon_i16, b_.neon_i16), vuzp2q_s16(a_.neon_i16, b_.neon_i16));
415
+ #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
416
+ r_.i16 =
417
+ SIMDE__SHUFFLE_VECTOR(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14) +
418
+ SIMDE__SHUFFLE_VECTOR(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15);
419
+ #else
420
+ r_.i16[0] = a_.i16[0] + a_.i16[1];
421
+ r_.i16[1] = a_.i16[2] + a_.i16[3];
422
+ r_.i16[2] = a_.i16[4] + a_.i16[5];
423
+ r_.i16[3] = a_.i16[6] + a_.i16[7];
424
+ r_.i16[4] = b_.i16[0] + b_.i16[1];
425
+ r_.i16[5] = b_.i16[2] + b_.i16[3];
426
+ r_.i16[6] = b_.i16[4] + b_.i16[5];
427
+ r_.i16[7] = b_.i16[6] + b_.i16[7];
428
+ #endif
429
+
430
+ return simde__m128i_from_private(r_);
431
+ #endif
432
+ }
433
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
434
+ # define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b)
435
+ #endif
436
+
437
+ SIMDE__FUNCTION_ATTRIBUTES
438
+ simde__m128i
439
+ simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) {
440
+ #if defined(SIMDE_SSSE3_NATIVE)
441
+ return _mm_hadd_epi32(a, b);
442
+ #else
443
+ simde__m128i_private
444
+ r_,
445
+ a_ = simde__m128i_to_private(a),
446
+ b_ = simde__m128i_to_private(b);
447
+
448
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
449
+ r_.neon_i32 = vaddq_s32(vuzp1q_s32(a_.neon_i32, b_.neon_i32), vuzp2q_s32(a_.neon_i32, b_.neon_i32));
450
+ #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
451
+ r_.i32 =
452
+ SIMDE__SHUFFLE_VECTOR(32, 16, a_.i32, b_.i32, 0, 2, 4, 6) +
453
+ SIMDE__SHUFFLE_VECTOR(32, 16, a_.i32, b_.i32, 1, 3, 5, 7);
454
+ #else
455
+ r_.i32[0] = a_.i32[0] + a_.i32[1];
456
+ r_.i32[1] = a_.i32[2] + a_.i32[3];
457
+ r_.i32[2] = b_.i32[0] + b_.i32[1];
458
+ r_.i32[3] = b_.i32[2] + b_.i32[3];
459
+ #endif
460
+
461
+ return simde__m128i_from_private(r_);
462
+ #endif
463
+ }
464
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
465
+ # define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b)
466
+ #endif
467
+
468
+ SIMDE__FUNCTION_ATTRIBUTES
469
+ simde__m64
470
+ simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) {
471
+ #if defined(SIMDE_SSSE3_NATIVE)
472
+ return _mm_hadd_pi16(a, b);
473
+ #else
474
+ simde__m64_private
475
+ r_,
476
+ a_ = simde__m64_to_private(a),
477
+ b_ = simde__m64_to_private(b);
478
+
479
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
480
+ r_.neon_i16 = vadd_s16(vuzp1_s16(a_.neon_i16, b_.neon_i16), vuzp2_s16(a_.neon_i16, b_.neon_i16));
481
+ #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
482
+ r_.i16 =
483
+ SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) +
484
+ SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 1, 3, 5, 7);
485
+ #else
486
+ r_.i16[0] = a_.i16[0] + a_.i16[1];
487
+ r_.i16[1] = a_.i16[2] + a_.i16[3];
488
+ r_.i16[2] = b_.i16[0] + b_.i16[1];
489
+ r_.i16[3] = b_.i16[2] + b_.i16[3];
490
+ #endif
491
+
492
+ return simde__m64_from_private(r_);
493
+ #endif
494
+ }
495
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
496
+ # define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b)
497
+ #endif
498
+
499
+ SIMDE__FUNCTION_ATTRIBUTES
500
+ simde__m64
501
+ simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) {
502
+ #if defined(SIMDE_SSSE3_NATIVE)
503
+ return _mm_hadd_pi32(a, b);
504
+ #else
505
+ simde__m64_private
506
+ r_,
507
+ a_ = simde__m64_to_private(a),
508
+ b_ = simde__m64_to_private(b);
509
+
510
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
511
+ r_.neon_i32 = vadd_s32(vuzp1_s32(a_.neon_i32, b_.neon_i32), vuzp2_s32(a_.neon_i32, b_.neon_i32));
512
+ #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
513
+ r_.i32 =
514
+ SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 0, 2) +
515
+ SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 1, 3);
516
+ #else
517
+ r_.i32[0] = a_.i32[0] + a_.i32[1];
518
+ r_.i32[1] = b_.i32[0] + b_.i32[1];
519
+ #endif
520
+
521
+ return simde__m64_from_private(r_);
522
+ #endif
523
+ }
524
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
525
+ # define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b)
526
+ #endif
527
+
528
+ SIMDE__FUNCTION_ATTRIBUTES
529
+ simde__m128i
530
+ simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) {
531
+ #if defined(SIMDE_SSSE3_NATIVE)
532
+ return _mm_hadds_epi16(a, b);
533
+ #else
534
+ simde__m128i_private
535
+ r_,
536
+ a_ = simde__m128i_to_private(a),
537
+ b_ = simde__m128i_to_private(b);
538
+
539
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
540
+ r_.neon_i16 = vqaddq_s16(vuzp1q_s16(a_.neon_i16, b_.neon_i16), vuzp2q_s16(a_.neon_i16, b_.neon_i16));
541
+ #else
542
+ for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {
543
+ int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
544
+ r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;
545
+ int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);
546
+ r_.i16[i + 4] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;
547
+ }
548
+ #endif
549
+
550
+ return simde__m128i_from_private(r_);
551
+ #endif
552
+ }
553
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
554
+ # define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b)
555
+ #endif
556
+
557
+ SIMDE__FUNCTION_ATTRIBUTES
558
+ simde__m64
559
+ simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) {
560
+ #if defined(SIMDE_SSSE3_NATIVE)
561
+ return _mm_hadds_pi16(a, b);
562
+ #else
563
+ simde__m64_private
564
+ r_,
565
+ a_ = simde__m64_to_private(a),
566
+ b_ = simde__m64_to_private(b);
567
+
568
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
569
+ r_.neon_i16 = vqadd_s16(vuzp1_s16(a_.neon_i16, b_.neon_i16), vuzp2_s16(a_.neon_i16, b_.neon_i16));
570
+ #else
571
+ for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {
572
+ int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
573
+ r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;
574
+ int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);
575
+ r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;
576
+ }
577
+ #endif
578
+
579
+ return simde__m64_from_private(r_);
580
+ #endif
581
+ }
582
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
583
+ # define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b)
584
+ #endif
585
+
586
+ SIMDE__FUNCTION_ATTRIBUTES
587
+ simde__m128i
588
+ simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) {
589
+ #if defined(SIMDE_SSSE3_NATIVE)
590
+ return _mm_hsub_epi16(a, b);
591
+ #else
592
+ simde__m128i_private
593
+ r_,
594
+ a_ = simde__m128i_to_private(a),
595
+ b_ = simde__m128i_to_private(b);
596
+
597
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
598
+ r_.neon_i16 = vsubq_s16(vuzp1q_s16(a_.neon_i16, b_.neon_i16), vuzp2q_s16(a_.neon_i16, b_.neon_i16));
599
+ #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
600
+ r_.i16 =
601
+ SIMDE__SHUFFLE_VECTOR(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14) -
602
+ SIMDE__SHUFFLE_VECTOR(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15);
603
+ #else
604
+ r_.i16[0] = a_.i16[0] - a_.i16[1];
605
+ r_.i16[1] = a_.i16[2] - a_.i16[3];
606
+ r_.i16[2] = a_.i16[4] - a_.i16[5];
607
+ r_.i16[3] = a_.i16[6] - a_.i16[7];
608
+ r_.i16[4] = b_.i16[0] - b_.i16[1];
609
+ r_.i16[5] = b_.i16[2] - b_.i16[3];
610
+ r_.i16[6] = b_.i16[4] - b_.i16[5];
611
+ r_.i16[7] = b_.i16[6] - b_.i16[7];
612
+ #endif
613
+
614
+ return simde__m128i_from_private(r_);
615
+ #endif
616
+ }
617
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
618
+ # define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b)
619
+ #endif
620
+
621
+ SIMDE__FUNCTION_ATTRIBUTES
622
+ simde__m128i
623
+ simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) {
624
+ #if defined(SIMDE_SSSE3_NATIVE)
625
+ return _mm_hsub_epi32(a, b);
626
+ #else
627
+ simde__m128i_private
628
+ r_,
629
+ a_ = simde__m128i_to_private(a),
630
+ b_ = simde__m128i_to_private(b);
631
+
632
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
633
+ r_.neon_i32 = vsubq_s32(vuzp1q_s32(a_.neon_i32, b_.neon_i32), vuzp2q_s32(a_.neon_i32, b_.neon_i32));
634
+ #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
635
+ r_.i32 =
636
+ SIMDE__SHUFFLE_VECTOR(32, 16, a_.i32, b_.i32, 0, 2, 4, 6) -
637
+ SIMDE__SHUFFLE_VECTOR(32, 16, a_.i32, b_.i32, 1, 3, 5, 7);
638
+ #else
639
+ r_.i32[0] = a_.i32[0] - a_.i32[1];
640
+ r_.i32[1] = a_.i32[2] - a_.i32[3];
641
+ r_.i32[2] = b_.i32[0] - b_.i32[1];
642
+ r_.i32[3] = b_.i32[2] - b_.i32[3];
643
+ #endif
644
+
645
+ return simde__m128i_from_private(r_);
646
+ #endif
647
+ }
648
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
649
+ # define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b)
650
+ #endif
651
+
652
+ SIMDE__FUNCTION_ATTRIBUTES
653
+ simde__m64
654
+ simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) {
655
+ #if defined(SIMDE_SSSE3_NATIVE)
656
+ return _mm_hsub_pi16(a, b);
657
+ #else
658
+ simde__m64_private
659
+ r_,
660
+ a_ = simde__m64_to_private(a),
661
+ b_ = simde__m64_to_private(b);
662
+
663
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
664
+ r_.neon_i16 = vsub_s16(vuzp1_s16(a_.neon_i16, b_.neon_i16), vuzp2_s16(a_.neon_i16, b_.neon_i16));
665
+ #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
666
+ r_.i16 =
667
+ SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) -
668
+ SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 1, 3, 5, 7);
669
+ #else
670
+ r_.i16[0] = a_.i16[0] - a_.i16[1];
671
+ r_.i16[1] = a_.i16[2] - a_.i16[3];
672
+ r_.i16[2] = b_.i16[0] - b_.i16[1];
673
+ r_.i16[3] = b_.i16[2] - b_.i16[3];
674
+ #endif
675
+
676
+ return simde__m64_from_private(r_);
677
+ #endif
678
+ }
679
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
680
+ # define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b)
681
+ #endif
682
+
683
+ SIMDE__FUNCTION_ATTRIBUTES
684
+ simde__m64
685
+ simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) {
686
+ #if defined(SIMDE_SSSE3_NATIVE)
687
+ return _mm_hsub_pi32(a, b);
688
+ #else
689
+ simde__m64_private
690
+ r_,
691
+ a_ = simde__m64_to_private(a),
692
+ b_ = simde__m64_to_private(b);
693
+
694
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
695
+ r_.neon_i32 = vsub_s32(vuzp1_s32(a_.neon_i32, b_.neon_i32), vuzp2_s32(a_.neon_i32, b_.neon_i32));
696
+ #elif defined(SIMDE_ASSUME_VECTORIZATION) && defined(SIMDE__SHUFFLE_VECTOR)
697
+ r_.i32 =
698
+ SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 0, 2) -
699
+ SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 1, 3);
700
+ #else
701
+ r_.i32[0] = a_.i32[0] - a_.i32[1];
702
+ r_.i32[1] = b_.i32[0] - b_.i32[1];
703
+ #endif
704
+
705
+ return simde__m64_from_private(r_);
706
+ #endif
707
+ }
708
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
709
+ # define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b)
710
+ #endif
711
+
712
+ SIMDE__FUNCTION_ATTRIBUTES
713
+ simde__m128i
714
+ simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) {
715
+ #if defined(SIMDE_SSSE3_NATIVE)
716
+ return _mm_hsubs_epi16(a, b);
717
+ #else
718
+ simde__m128i_private
719
+ r_,
720
+ a_ = simde__m128i_to_private(a),
721
+ b_ = simde__m128i_to_private(b);
722
+
723
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
724
+ r_.neon_i16 = vqsubq_s16(vuzp1q_s16(a_.neon_i16, b_.neon_i16), vuzp2q_s16(a_.neon_i16, b_.neon_i16));
725
+ #else
726
+ for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {
727
+ int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
728
+ r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;
729
+ int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) - HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);
730
+ r_.i16[i + 4] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;
731
+ }
732
+ #endif
733
+
734
+ return simde__m128i_from_private(r_);
735
+ #endif
736
+ }
737
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
738
+ # define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b)
739
+ #endif
740
+
741
+ SIMDE__FUNCTION_ATTRIBUTES
742
+ simde__m64
743
+ simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) {
744
+ #if defined(SIMDE_SSSE3_NATIVE)
745
+ return _mm_hsubs_pi16(a, b);
746
+ #else
747
+ simde__m64_private
748
+ r_,
749
+ a_ = simde__m64_to_private(a),
750
+ b_ = simde__m64_to_private(b);
751
+
752
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
753
+ r_.neon_i16 = vqsub_s16(vuzp1_s16(a_.neon_i16, b_.neon_i16), vuzp2_s16(a_.neon_i16, b_.neon_i16));
754
+ #else
755
+ for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) {
756
+ int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]);
757
+ r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN;
758
+ int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) - HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]);
759
+ r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN;
760
+ }
761
+ #endif
762
+
763
+ return simde__m64_from_private(r_);
764
+ #endif
765
+ }
766
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
767
+ # define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b)
768
+ #endif
769
+
770
+ SIMDE__FUNCTION_ATTRIBUTES
771
+ simde__m128i
772
+ simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) {
773
+ #if defined(SIMDE_SSSE3_NATIVE)
774
+ return _mm_maddubs_epi16(a, b);
775
+ #else
776
+ simde__m128i_private
777
+ r_,
778
+ a_ = simde__m128i_to_private(a),
779
+ b_ = simde__m128i_to_private(b);
780
+
781
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
782
+ int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a_.neon_u8))), vmovl_s8(vget_low_s8(b_.neon_i8)));
783
+ int16x8_t th = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a_.neon_u8))), vmovl_s8(vget_high_s8(b_.neon_i8)));
784
+ r_.neon_i16 = vqaddq_s16(vuzp1q_s16(tl, th), vuzp2q_s16(tl, th));
785
+ #else
786
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
787
+ const int idx = HEDLEY_STATIC_CAST(int, i) << 1;
788
+ int32_t ts =
789
+ (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) +
790
+ (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1]));
791
+ r_.i16[i] = HEDLEY_LIKELY(ts > INT16_MIN) ? (HEDLEY_LIKELY(ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN;
792
+ }
793
+ #endif
794
+
795
+ return simde__m128i_from_private(r_);
796
+ #endif
797
+ }
798
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
799
+ # define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b)
800
+ #endif
801
+
802
+ SIMDE__FUNCTION_ATTRIBUTES
803
+ simde__m64
804
+ simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) {
805
+ #if defined(SIMDE_SSSE3_NATIVE)
806
+ return _mm_maddubs_pi16(a, b);
807
+ #else
808
+ simde__m64_private
809
+ r_,
810
+ a_ = simde__m64_to_private(a),
811
+ b_ = simde__m64_to_private(b);
812
+
813
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
814
+ int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8));
815
+ int16x8_t bi = vmovl_s8(b_.neon_i8);
816
+ int16x8_t p = vmulq_s16(ai, bi);
817
+ int16x4_t l = vget_low_s16(p);
818
+ int16x4_t h = vget_high_s16(p);
819
+ r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h));
820
+ #else
821
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
822
+ const int idx = HEDLEY_STATIC_CAST(int, i) << 1;
823
+ int32_t ts =
824
+ (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) +
825
+ (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1]));
826
+ r_.i16[i] = HEDLEY_LIKELY(ts > INT16_MIN) ? (HEDLEY_LIKELY(ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN;
827
+ }
828
+ #endif
829
+
830
+ return simde__m64_from_private(r_);
831
+ #endif
832
+ }
833
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
834
+ # define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b)
835
+ #endif
836
+
837
+ SIMDE__FUNCTION_ATTRIBUTES
838
+ simde__m128i
839
+ simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) {
840
+ #if defined(SIMDE_SSSE3_NATIVE)
841
+ return _mm_mulhrs_epi16(a, b);
842
+ #else
843
+ simde__m128i_private
844
+ r_,
845
+ a_ = simde__m128i_to_private(a),
846
+ b_ = simde__m128i_to_private(b);
847
+
848
+ SIMDE__VECTORIZE
849
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
850
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15));
851
+ }
852
+
853
+ return simde__m128i_from_private(r_);
854
+ #endif
855
+ }
856
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
857
+ # define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b)
858
+ #endif
859
+
860
+ SIMDE__FUNCTION_ATTRIBUTES
861
+ simde__m64
862
+ simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) {
863
+ #if defined(SIMDE_SSSE3_NATIVE)
864
+ return _mm_mulhrs_pi16(a, b);
865
+ #else
866
+ simde__m64_private
867
+ r_,
868
+ a_ = simde__m64_to_private(a),
869
+ b_ = simde__m64_to_private(b);
870
+
871
+ SIMDE__VECTORIZE
872
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
873
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15));
874
+ }
875
+
876
+ return simde__m64_from_private(r_);
877
+ #endif
878
+ }
879
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
880
+ # define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b)
881
+ #endif
882
+
883
+ SIMDE__FUNCTION_ATTRIBUTES
884
+ simde__m128i
885
+ simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) {
886
+ #if defined(SIMDE_SSSE3_NATIVE)
887
+ return _mm_sign_epi8(a, b);
888
+ #else
889
+ simde__m128i_private
890
+ r_,
891
+ a_ = simde__m128i_to_private(a),
892
+ b_ = simde__m128i_to_private(b);
893
+
894
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
895
+ int8x16_t m = vreinterpretq_s8_u8(vcgezq_s8(b_.neon_i8));
896
+ r_.neon_i8 = veorq_s8(vandq_s8(a_.neon_i8, m), vandq_s8(vnegq_s8(a_.neon_i8), vmvnq_s8(m)));
897
+ #else
898
+ SIMDE__VECTORIZE
899
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
900
+ r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] > 0) ? (a_.i8[i]) : INT8_C(0));
901
+ }
902
+ #endif
903
+
904
+ return simde__m128i_from_private(r_);
905
+ #endif
906
+ }
907
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
908
+ # define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b)
909
+ #endif
910
+
911
+ SIMDE__FUNCTION_ATTRIBUTES
912
+ simde__m128i
913
+ simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) {
914
+ #if defined(SIMDE_SSSE3_NATIVE)
915
+ return _mm_sign_epi16(a, b);
916
+ #else
917
+ simde__m128i_private
918
+ r_,
919
+ a_ = simde__m128i_to_private(a),
920
+ b_ = simde__m128i_to_private(b);
921
+
922
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
923
+ int16x8_t m = vreinterpretq_s16_u16(vcgezq_s16(b_.neon_i16));
924
+ r_.neon_i16 = veorq_s16(vandq_s16(a_.neon_i16, m), vandq_s16(vnegq_s16(a_.neon_i16), vmvnq_s16(m)));
925
+ #else
926
+ SIMDE__VECTORIZE
927
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
928
+ r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0));
929
+ }
930
+ #endif
931
+
932
+ return simde__m128i_from_private(r_);
933
+ #endif
934
+ }
935
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
936
+ # define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b)
937
+ #endif
938
+
939
+ SIMDE__FUNCTION_ATTRIBUTES
940
+ simde__m128i
941
+ simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) {
942
+ #if defined(SIMDE_SSSE3_NATIVE)
943
+ return _mm_sign_epi32(a, b);
944
+ #else
945
+ simde__m128i_private
946
+ r_,
947
+ a_ = simde__m128i_to_private(a),
948
+ b_ = simde__m128i_to_private(b);
949
+
950
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
951
+ int32x4_t m = vreinterpretq_s32_u32(vcgezq_s32(b_.neon_i32));
952
+ r_.neon_i32 = veorq_s32(vandq_s32(a_.neon_i32, m), vandq_s32(vnegq_s32(a_.neon_i32), vmvnq_s32(m)));
953
+ #else
954
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
955
+ r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0));
956
+ }
957
+ #endif
958
+
959
+ return simde__m128i_from_private(r_);
960
+ #endif
961
+ }
962
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
963
+ # define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b)
964
+ #endif
965
+
966
+ SIMDE__FUNCTION_ATTRIBUTES
967
+ simde__m64
968
+ simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) {
969
+ #if defined(SIMDE_SSSE3_NATIVE)
970
+ return _mm_sign_pi8(a, b);
971
+ #else
972
+ simde__m64_private
973
+ r_,
974
+ a_ = simde__m64_to_private(a),
975
+ b_ = simde__m64_to_private(b);
976
+
977
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
978
+ int8x8_t m = vreinterpret_s8_u8(vcgez_s8(b_.neon_i8));
979
+ r_.neon_i8 = veor_s8(vand_s8(a_.neon_i8, m), vand_s8(vneg_s8(a_.neon_i8), vmvn_s8(m)));
980
+ #else
981
+ SIMDE__VECTORIZE
982
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
983
+ r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] > 0) ? (a_.i8[i]) : INT8_C(0));
984
+ }
985
+ #endif
986
+
987
+ return simde__m64_from_private(r_);
988
+ #endif
989
+ }
990
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
991
+ # define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b)
992
+ #endif
993
+
994
+ SIMDE__FUNCTION_ATTRIBUTES
995
+ simde__m64
996
+ simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) {
997
+ #if defined(SIMDE_SSSE3_NATIVE)
998
+ return _mm_sign_pi16(a, b);
999
+ #else
1000
+ simde__m64_private
1001
+ r_,
1002
+ a_ = simde__m64_to_private(a),
1003
+ b_ = simde__m64_to_private(b);
1004
+
1005
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
1006
+ int16x4_t m = vreinterpret_s16_u16(vcgez_s16(b_.neon_i16));
1007
+ r_.neon_i16 = veor_s16(vand_s16(a_.neon_i16, m), vand_s16(vneg_s16(a_.neon_i16), vmvn_s16(m)));
1008
+ #else
1009
+ SIMDE__VECTORIZE
1010
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1011
+ r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0));
1012
+ }
1013
+ #endif
1014
+
1015
+ return simde__m64_from_private(r_);
1016
+ #endif
1017
+ }
1018
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
1019
+ # define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b)
1020
+ #endif
1021
+
1022
+ SIMDE__FUNCTION_ATTRIBUTES
1023
+ simde__m64
1024
+ simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) {
1025
+ #if defined(SIMDE_SSSE3_NATIVE)
1026
+ return _mm_sign_pi32(a, b);
1027
+ #else
1028
+ simde__m64_private
1029
+ r_,
1030
+ a_ = simde__m64_to_private(a),
1031
+ b_ = simde__m64_to_private(b);
1032
+
1033
+ #if defined(SIMDE_SSSE3_NEON) && defined(SIMDE_ARCH_AARCH64)
1034
+ int32x2_t m = vreinterpret_s32_u32(vcgez_s32(b_.neon_i32));
1035
+ r_.neon_i32 = veor_s32(vand_s32(a_.neon_i32, m), vand_s32(vneg_s32(a_.neon_i32), vmvn_s32(m)));
1036
+ #else
1037
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1038
+ r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0));
1039
+ }
1040
+ #endif
1041
+
1042
+ return simde__m64_from_private(r_);
1043
+ #endif
1044
+ }
1045
+ #if defined(SIMDE_SSSE3_ENABLE_NATIVE_ALIASES)
1046
+ # define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b)
1047
+ #endif
1048
+
1049
+ SIMDE__END_DECLS
1050
+
1051
+ HEDLEY_DIAGNOSTIC_POP
1052
+
1053
+ #endif /* !defined(SIMDE__SSE2_H) */