minimap2 0.2.25.1 → 0.2.26.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +9 -0
  3. data/ext/Rakefile +2 -2
  4. data/ext/minimap2/NEWS.md +9 -0
  5. data/ext/minimap2/README.md +2 -2
  6. data/ext/minimap2/cookbook.md +2 -2
  7. data/ext/minimap2/minimap.h +1 -1
  8. data/ext/minimap2/minimap2.1 +1 -1
  9. data/ext/minimap2/misc/paftools.js +1 -1
  10. data/ext/minimap2/python/mappy.pyx +1 -1
  11. data/ext/minimap2/setup.py +22 -32
  12. data/lib/minimap2/version.rb +1 -1
  13. metadata +1 -97
  14. data/ext/minimap2/lib/simde/CONTRIBUTING.md +0 -114
  15. data/ext/minimap2/lib/simde/COPYING +0 -20
  16. data/ext/minimap2/lib/simde/README.md +0 -333
  17. data/ext/minimap2/lib/simde/amalgamate.py +0 -58
  18. data/ext/minimap2/lib/simde/meson.build +0 -33
  19. data/ext/minimap2/lib/simde/netlify.toml +0 -20
  20. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +0 -140
  21. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +0 -137
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +0 -142
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +0 -145
  24. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +0 -140
  25. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +0 -145
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +0 -140
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +0 -143
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +0 -137
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +0 -141
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +0 -147
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +0 -141
  32. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +0 -134
  33. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +0 -138
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +0 -134
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +0 -137
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +0 -131
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +0 -135
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +0 -141
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +0 -135
  40. data/ext/minimap2/lib/simde/simde/arm/neon.h +0 -97
  41. data/ext/minimap2/lib/simde/simde/check.h +0 -267
  42. data/ext/minimap2/lib/simde/simde/debug-trap.h +0 -83
  43. data/ext/minimap2/lib/simde/simde/hedley.h +0 -1899
  44. data/ext/minimap2/lib/simde/simde/simde-arch.h +0 -445
  45. data/ext/minimap2/lib/simde/simde/simde-common.h +0 -697
  46. data/ext/minimap2/lib/simde/simde/x86/avx.h +0 -5385
  47. data/ext/minimap2/lib/simde/simde/x86/avx2.h +0 -2402
  48. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +0 -391
  49. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +0 -3389
  50. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +0 -112
  51. data/ext/minimap2/lib/simde/simde/x86/fma.h +0 -659
  52. data/ext/minimap2/lib/simde/simde/x86/mmx.h +0 -2210
  53. data/ext/minimap2/lib/simde/simde/x86/sse.h +0 -3696
  54. data/ext/minimap2/lib/simde/simde/x86/sse2.h +0 -5991
  55. data/ext/minimap2/lib/simde/simde/x86/sse3.h +0 -343
  56. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +0 -1783
  57. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +0 -105
  58. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +0 -1053
  59. data/ext/minimap2/lib/simde/simde/x86/svml.h +0 -543
  60. data/ext/minimap2/lib/simde/test/CMakeLists.txt +0 -166
  61. data/ext/minimap2/lib/simde/test/arm/meson.build +0 -4
  62. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +0 -23
  63. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +0 -871
  64. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +0 -134
  65. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +0 -39
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +0 -10
  67. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +0 -1260
  68. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +0 -873
  69. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +0 -1084
  70. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +0 -1260
  71. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +0 -18
  72. data/ext/minimap2/lib/simde/test/arm/test-arm.c +0 -20
  73. data/ext/minimap2/lib/simde/test/arm/test-arm.h +0 -8
  74. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +0 -171
  75. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +0 -68
  76. data/ext/minimap2/lib/simde/test/meson.build +0 -64
  77. data/ext/minimap2/lib/simde/test/munit/COPYING +0 -21
  78. data/ext/minimap2/lib/simde/test/munit/Makefile +0 -55
  79. data/ext/minimap2/lib/simde/test/munit/README.md +0 -54
  80. data/ext/minimap2/lib/simde/test/munit/example.c +0 -351
  81. data/ext/minimap2/lib/simde/test/munit/meson.build +0 -37
  82. data/ext/minimap2/lib/simde/test/munit/munit.c +0 -2055
  83. data/ext/minimap2/lib/simde/test/munit/munit.h +0 -535
  84. data/ext/minimap2/lib/simde/test/run-tests.c +0 -20
  85. data/ext/minimap2/lib/simde/test/run-tests.h +0 -260
  86. data/ext/minimap2/lib/simde/test/x86/avx.c +0 -13752
  87. data/ext/minimap2/lib/simde/test/x86/avx2.c +0 -9977
  88. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +0 -2664
  89. data/ext/minimap2/lib/simde/test/x86/avx512f.c +0 -10416
  90. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +0 -210
  91. data/ext/minimap2/lib/simde/test/x86/fma.c +0 -2557
  92. data/ext/minimap2/lib/simde/test/x86/meson.build +0 -33
  93. data/ext/minimap2/lib/simde/test/x86/mmx.c +0 -2878
  94. data/ext/minimap2/lib/simde/test/x86/skel.c +0 -2984
  95. data/ext/minimap2/lib/simde/test/x86/sse.c +0 -5121
  96. data/ext/minimap2/lib/simde/test/x86/sse2.c +0 -9860
  97. data/ext/minimap2/lib/simde/test/x86/sse3.c +0 -486
  98. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +0 -3446
  99. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +0 -101
  100. data/ext/minimap2/lib/simde/test/x86/ssse3.c +0 -2084
  101. data/ext/minimap2/lib/simde/test/x86/svml.c +0 -1545
  102. data/ext/minimap2/lib/simde/test/x86/test-avx.h +0 -16
  103. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +0 -25
  104. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +0 -13
  105. data/ext/minimap2/lib/simde/test/x86/test-sse.h +0 -13
  106. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +0 -13
  107. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +0 -196
  108. data/ext/minimap2/lib/simde/test/x86/test-x86.c +0 -48
  109. data/ext/minimap2/lib/simde/test/x86/test-x86.h +0 -8
@@ -1,1783 +0,0 @@
1
- /* Copyright (c) 2017-2020 Evan Nemerson <evan@nemerson.com>
2
- *
3
- * Permission is hereby granted, free of charge, to any person
4
- * obtaining a copy of this software and associated documentation
5
- * files (the "Software"), to deal in the Software without
6
- * restriction, including without limitation the rights to use, copy,
7
- * modify, merge, publish, distribute, sublicense, and/or sell copies
8
- * of the Software, and to permit persons to whom the Software is
9
- * furnished to do so, subject to the following conditions:
10
- *
11
- * The above copyright notice and this permission notice shall be
12
- * included in all copies or substantial portions of the Software.
13
- *
14
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- * SOFTWARE.
22
- */
23
-
24
- #if !defined(SIMDE__SSE4_1_H)
25
- # if !defined(SIMDE__SSE4_1_H)
26
- # define SIMDE__SSE4_1_H
27
- # endif
28
- # include "ssse3.h"
29
-
30
- HEDLEY_DIAGNOSTIC_PUSH
31
- SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
32
-
33
- # if defined(SIMDE_SSE4_1_NATIVE)
34
- # undef SIMDE_SSE4_1_NATIVE
35
- # endif
36
- # if defined(SIMDE_ARCH_X86_SSE4_1) && !defined(SIMDE_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
37
- # define SIMDE_SSE4_1_NATIVE
38
- # elif defined(__ARM_NEON) && !defined(SIMDE_SSE4_1_NO_NEON) && !defined(SIMDE_NO_NEON)
39
- # define SIMDE_SSE4_1_NEON
40
- # elif defined(SIMDE_ARCH_POWER_ALTIVEC)
41
- # define SIMDE_SSE4_1_POWER_ALTIVEC
42
- # endif
43
-
44
- # if defined(SIMDE_SSE4_1_NATIVE) && !defined(SIMDE_SSE3_NATIVE)
45
- # if defined(SIMDE_SSE4_1_FORCE_NATIVE)
46
- # error Native SSE4.1 support requires native SSE3 support
47
- # else
48
- HEDLEY_WARNING("Native SSE4.1 support requires native SSE3 support, disabling")
49
- # undef SIMDE_SSE4_1_NATIVE
50
- # endif
51
- # elif defined(SIMDE_SSE4_1_NEON) && !defined(SIMDE_SSE3_NEON)
52
- HEDLEY_WARNING("SSE4.1 NEON support requires SSE3 NEON support, disabling")
53
- # undef SIMDE_SSE4_1_NEON
54
- # endif
55
-
56
- # if defined(SIMDE_SSE4_1_NATIVE)
57
- # include <smmintrin.h>
58
- # else
59
- # if defined(SIMDE_SSE4_1_NEON)
60
- # include <arm_neon.h>
61
- # endif
62
- # endif
63
-
64
- SIMDE__BEGIN_DECLS
65
-
66
- #if !defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
67
- # define SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES
68
- #endif
69
-
70
- #if defined(SIMDE_SSE4_1_NATIVE)
71
- # define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT
72
- # define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF
73
- # define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF
74
- # define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO
75
- # define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION
76
-
77
- # define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC
78
- # define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC
79
- #else
80
- # define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00
81
- # define SIMDE_MM_FROUND_TO_NEG_INF 0x01
82
- # define SIMDE_MM_FROUND_TO_POS_INF 0x02
83
- # define SIMDE_MM_FROUND_TO_ZERO 0x03
84
- # define SIMDE_MM_FROUND_CUR_DIRECTION 0x04
85
-
86
- # define SIMDE_MM_FROUND_RAISE_EXC 0x00
87
- # define SIMDE_MM_FROUND_NO_EXC 0x08
88
- #endif
89
-
90
- #define SIMDE_MM_FROUND_NINT \
91
- (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC)
92
- #define SIMDE_MM_FROUND_FLOOR \
93
- (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC)
94
- #define SIMDE_MM_FROUND_CEIL \
95
- (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC)
96
- #define SIMDE_MM_FROUND_TRUNC \
97
- (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC)
98
- #define SIMDE_MM_FROUND_RINT \
99
- (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC)
100
- #define SIMDE_MM_FROUND_NEARBYINT \
101
- (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC)
102
-
103
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
104
- # define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT
105
- # define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
106
- # define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF
107
- # define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO
108
- # define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION
109
- # define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC
110
- # define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT
111
- # define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR
112
- # define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL
113
- # define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC
114
- # define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT
115
- # define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT
116
- #endif
117
-
118
- SIMDE__FUNCTION_ATTRIBUTES
119
- simde__m128i
120
- simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8)
121
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
122
- simde__m128i_private
123
- r_,
124
- a_ = simde__m128i_to_private(a),
125
- b_ = simde__m128i_to_private(b);
126
-
127
- SIMDE__VECTORIZE
128
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
129
- r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i];
130
- }
131
-
132
- return simde__m128i_from_private(r_);
133
- }
134
- #if defined(SIMDE_SSE4_1_NATIVE)
135
- # define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8)
136
- #endif
137
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
138
- # define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8)
139
- #endif
140
-
141
- SIMDE__FUNCTION_ATTRIBUTES
142
- simde__m128d
143
- simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8)
144
- HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
145
- simde__m128d_private
146
- r_,
147
- a_ = simde__m128d_to_private(a),
148
- b_ = simde__m128d_to_private(b);
149
-
150
- SIMDE__VECTORIZE
151
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
152
- r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i];
153
- }
154
- return simde__m128d_from_private(r_);
155
- }
156
- #if defined(SIMDE_SSE4_1_NATIVE)
157
- # define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8)
158
- #endif
159
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
160
- # define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8)
161
- #endif
162
-
163
- SIMDE__FUNCTION_ATTRIBUTES
164
- simde__m128
165
- simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8)
166
- HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
167
- simde__m128_private
168
- r_,
169
- a_ = simde__m128_to_private(a),
170
- b_ = simde__m128_to_private(b);
171
-
172
- SIMDE__VECTORIZE
173
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
174
- r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i];
175
- }
176
- return simde__m128_from_private(r_);
177
- }
178
- #if defined(SIMDE_SSE4_1_NATIVE)
179
- # define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8)
180
- #endif
181
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
182
- # define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8)
183
- #endif
184
-
185
- SIMDE__FUNCTION_ATTRIBUTES
186
- simde__m128i
187
- simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) {
188
- #if defined(SIMDE_SSE4_1_NATIVE)
189
- return _mm_blendv_epi8(a, b, mask);
190
- #else
191
- simde__m128i_private
192
- r_,
193
- a_ = simde__m128i_to_private(a),
194
- b_ = simde__m128i_to_private(b),
195
- mask_ = simde__m128i_to_private(mask);
196
-
197
- #if defined(SIMDE_SSE4_1_NEON)
198
- mask_ = simde__m128i_to_private(simde_mm_cmplt_epi8(mask, simde_mm_setzero_si128()));
199
- r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8);
200
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
201
- /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */
202
- #if defined(HEDLEY_INTEL_VERSION_CHECK)
203
- __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
204
- mask_.i8 = HEDLEY_STATIC_CAST(__typeof__(mask_.i8), mask_.i8 < z);
205
- #else
206
- mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1;
207
- #endif
208
-
209
- r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8);
210
- #else
211
- SIMDE__VECTORIZE
212
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
213
- int8_t m = mask_.i8[i] >> 7;
214
- r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]);
215
- }
216
- #endif
217
-
218
- return simde__m128i_from_private(r_);
219
- #endif
220
- }
221
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
222
- # define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask)
223
- #endif
224
-
225
- SIMDE__FUNCTION_ATTRIBUTES
226
- simde__m128i
227
- simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) {
228
- #if defined(SIMDE_SSE2_NATIVE)
229
- mask = simde_mm_srai_epi16(mask, 15);
230
- return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a));
231
- #else
232
- simde__m128i_private
233
- r_,
234
- a_ = simde__m128i_to_private(a),
235
- b_ = simde__m128i_to_private(b),
236
- mask_ = simde__m128i_to_private(mask);
237
-
238
- #if defined(SIMDE_SSE4_1_NEON)
239
- mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128()));
240
- r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16);
241
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
242
- #if defined(HEDLEY_INTEL_VERSION_CHECK)
243
- __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 };
244
- mask_.i16 = mask_.i16 < z;
245
- #else
246
- mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1;
247
- #endif
248
-
249
- r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16);
250
- #else
251
- SIMDE__VECTORIZE
252
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
253
- int16_t m = mask_.i16[i] >> 15;
254
- r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]);
255
- }
256
- #endif
257
-
258
- return simde__m128i_from_private(r_);
259
- #endif
260
- }
261
-
262
- SIMDE__FUNCTION_ATTRIBUTES
263
- simde__m128i
264
- simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) {
265
- #if defined(SIMDE_SSE4_1_NATIVE)
266
- return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask)));
267
- #else
268
- simde__m128i_private
269
- r_,
270
- a_ = simde__m128i_to_private(a),
271
- b_ = simde__m128i_to_private(b),
272
- mask_ = simde__m128i_to_private(mask);
273
-
274
- #if defined(SIMDE_SSE4_1_NEON)
275
- mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128()));
276
- r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32);
277
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
278
- #if defined(HEDLEY_INTEL_VERSION_CHECK)
279
- __typeof__(mask_.i32) z = { 0, 0, 0, 0 };
280
- mask_.i32 = mask_.i32 < z;
281
- #else
282
- mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1;
283
- #endif
284
-
285
- r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32);
286
- #else
287
- SIMDE__VECTORIZE
288
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
289
- int32_t m = mask_.i32[i] >> 31;
290
- r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]);
291
- }
292
- #endif
293
-
294
- return simde__m128i_from_private(r_);
295
- #endif
296
- }
297
-
298
- SIMDE__FUNCTION_ATTRIBUTES
299
- simde__m128i
300
- simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) {
301
- #if defined(SIMDE_SSE4_1_NATIVE)
302
- return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask)));
303
- #else
304
- simde__m128i_private
305
- r_,
306
- a_ = simde__m128i_to_private(a),
307
- b_ = simde__m128i_to_private(b),
308
- mask_ = simde__m128i_to_private(mask);
309
-
310
- #if defined(SIMDE_SSE4_1_NEON) && defined(SIMDE_ARCH_AARCH64)
311
- mask_.i64 = vreinterpretq_s64_u64(vcltq_s64(mask_.i64, vdupq_n_s64(UINT64_C(0))));
312
- r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64);
313
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
314
- #if defined(HEDLEY_INTEL_VERSION_CHECK)
315
- __typeof__(mask_.i64) z = { 0, 0 };
316
- mask_.i64 = HEDLEY_STATIC_CAST(__typeof__(mask_.i64), mask_.i64 < z);
317
- #else
318
- mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1;
319
- #endif
320
-
321
- r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64);
322
- #else
323
- SIMDE__VECTORIZE
324
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
325
- int64_t m = mask_.i64[i] >> 63;
326
- r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]);
327
- }
328
- #endif
329
-
330
- return simde__m128i_from_private(r_);
331
- #endif
332
- }
333
-
334
- SIMDE__FUNCTION_ATTRIBUTES
335
- simde__m128d
336
- simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) {
337
- #if defined(SIMDE_SSE4_1_NATIVE)
338
- return _mm_blendv_pd(a, b, mask);
339
- #else
340
- return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask)));
341
- #endif
342
- }
343
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
344
- # define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask)
345
- #endif
346
-
347
- SIMDE__FUNCTION_ATTRIBUTES
348
- simde__m128
349
- simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) {
350
- #if defined(SIMDE_SSE4_1_NATIVE)
351
- return _mm_blendv_ps(a, b, mask);
352
- #else
353
- return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask)));
354
- #endif
355
- }
356
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
357
- # define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask)
358
- #endif
359
-
360
- SIMDE__FUNCTION_ATTRIBUTES
361
- simde__m128d
362
- simde_mm_ceil_pd (simde__m128d a) {
363
- #if defined(SIMDE_SSE4_1_NATIVE)
364
- return _mm_ceil_pd(a);
365
- #else
366
- simde__m128d_private
367
- r_,
368
- a_ = simde__m128d_to_private(a);
369
-
370
- #if defined(SIMDE_SSE4_1_NEON) && defined(SIMDE_ARCH_AARCH64)
371
- r_.neon_f64 = vrndpq_f64(a_.neon_f64);
372
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
373
- r_.altivec_f64 = vec_ceil(a_.altivec_f64);
374
- #elif defined(SIMDE_HAVE_MATH_H)
375
- SIMDE__VECTORIZE
376
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
377
- r_.f64[i] = ceil(a_.f64[i]);
378
- }
379
- #else
380
- HEDLEY_UNREACHABLE();
381
- #endif
382
-
383
- return simde__m128d_from_private(r_);
384
- #endif
385
- }
386
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
387
- # define _mm_ceil_pd(a) simde_mm_ceil_pd(a)
388
- #endif
389
-
390
- SIMDE__FUNCTION_ATTRIBUTES
391
- simde__m128
392
- simde_mm_ceil_ps (simde__m128 a) {
393
- #if defined(SIMDE_SSE4_1_NATIVE)
394
- return _mm_ceil_ps(a);
395
- #else
396
- simde__m128_private
397
- r_,
398
- a_ = simde__m128_to_private(a);
399
-
400
- #if defined(SIMDE_SSE4_1_NEON) && (SIMDE_ARCH_ARM >= 80) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0))
401
- r_.neon_f32 = vrndpq_f32(a_.neon_f32);
402
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
403
- r_.altivec_f32 = vec_ceil(a_.altivec_f32);
404
- #elif defined(SIMDE_HAVE_MATH_H)
405
- SIMDE__VECTORIZE
406
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
407
- r_.f32[i] = ceilf(a_.f32[i]);
408
- }
409
- #else
410
- HEDLEY_UNREACHABLE();
411
- #endif
412
-
413
- return simde__m128_from_private(r_);
414
- #endif
415
- }
416
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
417
- # define _mm_ceil_ps(a) simde_mm_ceil_ps(a)
418
- #endif
419
-
420
- SIMDE__FUNCTION_ATTRIBUTES
421
- simde__m128d
422
- simde_mm_ceil_sd (simde__m128d a, simde__m128d b) {
423
- #if defined(SIMDE_SSE4_1_NATIVE)
424
- return _mm_ceil_sd(a, b);
425
- #else
426
- simde__m128d_private
427
- r_,
428
- a_ = simde__m128d_to_private(a),
429
- b_ = simde__m128d_to_private(b);
430
-
431
- #if defined(SIMDE_HAVE_MATH_H)
432
- r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], ceil(b_.f64[0])));
433
- #else
434
- HEDLEY_UNREACHABLE();
435
- #endif
436
-
437
- return simde__m128d_from_private(r_);
438
- #endif
439
- }
440
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
441
- # define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b)
442
- #endif
443
-
444
- SIMDE__FUNCTION_ATTRIBUTES
445
- simde__m128
446
- simde_mm_ceil_ss (simde__m128 a, simde__m128 b) {
447
- #if defined(SIMDE_SSE4_1_NATIVE)
448
- return _mm_ceil_ss(a, b);
449
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
450
- return simde_mm_move_ss(a, simde_mm_ceil_ps(b));
451
- #else
452
- simde__m128_private
453
- r_,
454
- a_ = simde__m128_to_private(a),
455
- b_ = simde__m128_to_private(b);
456
-
457
- #if defined(SIMDE_HAVE_MATH_H)
458
- r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], ceilf(b_.f32[0])));
459
- #else
460
- HEDLEY_UNREACHABLE();
461
- #endif
462
-
463
- return simde__m128_from_private(r_);
464
- #endif
465
- }
466
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
467
- # define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b)
468
- #endif
469
-
470
- SIMDE__FUNCTION_ATTRIBUTES
471
- simde__m128i
472
- simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) {
473
- #if defined(SIMDE_SSE4_1_NATIVE)
474
- return _mm_cmpeq_epi64(a, b);
475
- #else
476
- simde__m128i_private
477
- r_,
478
- a_ = simde__m128i_to_private(a),
479
- b_ = simde__m128i_to_private(b);
480
-
481
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
482
- r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), a_.i64 == b_.i64);
483
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
484
- r_.altivec_i64 = (vector signed long long) vec_cmpeq(a_.altivec_i64, b_.altivec_i64);
485
- #else
486
- SIMDE__VECTORIZE
487
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
488
- r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0);
489
- }
490
- #endif
491
-
492
- return simde__m128i_from_private(r_);
493
- #endif
494
- }
495
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
496
- # define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b)
497
- #endif
498
-
499
- SIMDE__FUNCTION_ATTRIBUTES
500
- simde__m128i
501
- simde_mm_cvtepi8_epi16 (simde__m128i a) {
502
- #if defined(SIMDE_SSE4_1_NATIVE)
503
- return _mm_cvtepi8_epi16(a);
504
- #else
505
- simde__m128i_private
506
- r_,
507
- a_ = simde__m128i_to_private(a);
508
-
509
- #if defined(SIMDE__CONVERT_VECTOR)
510
- SIMDE__CONVERT_VECTOR(r_.i16, a_.m64_private[0].i8);
511
- #else
512
- SIMDE__VECTORIZE
513
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
514
- r_.i16[i] = a_.i8[i];
515
- }
516
- #endif
517
-
518
- return simde__m128i_from_private(r_);
519
- #endif
520
- }
521
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
522
- # define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a)
523
- #endif
524
-
525
- SIMDE__FUNCTION_ATTRIBUTES
526
- simde__m128i
527
- simde_mm_cvtepi8_epi32 (simde__m128i a) {
528
- #if defined(SIMDE_SSE4_1_NATIVE)
529
- return _mm_cvtepi8_epi32(a);
530
- #else
531
- simde__m128i_private
532
- r_,
533
- a_ = simde__m128i_to_private(a);
534
-
535
- SIMDE__VECTORIZE
536
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
537
- r_.i32[i] = a_.i8[i];
538
- }
539
-
540
- return simde__m128i_from_private(r_);
541
- #endif
542
- }
543
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
544
- # define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a)
545
- #endif
546
-
547
- SIMDE__FUNCTION_ATTRIBUTES
548
- simde__m128i
549
- simde_mm_cvtepi8_epi64 (simde__m128i a) {
550
- #if defined(SIMDE_SSE4_1_NATIVE)
551
- return _mm_cvtepi8_epi64(a);
552
- #else
553
- simde__m128i_private
554
- r_,
555
- a_ = simde__m128i_to_private(a);
556
-
557
- SIMDE__VECTORIZE
558
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
559
- r_.i64[i] = a_.i8[i];
560
- }
561
-
562
- return simde__m128i_from_private(r_);
563
- #endif
564
- }
565
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
566
- # define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a)
567
- #endif
568
-
569
- SIMDE__FUNCTION_ATTRIBUTES
570
- simde__m128i
571
- simde_mm_cvtepu8_epi16 (simde__m128i a) {
572
- #if defined(SIMDE_SSE4_1_NATIVE)
573
- return _mm_cvtepu8_epi16(a);
574
- #else
575
- simde__m128i_private
576
- r_,
577
- a_ = simde__m128i_to_private(a);
578
-
579
- #if defined(SIMDE__CONVERT_VECTOR) && !defined(SIMDE_BUG_CLANG_45541)
580
- SIMDE__CONVERT_VECTOR(r_.i16, a_.m64_private[0].u8);
581
- #else
582
- SIMDE__VECTORIZE
583
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
584
- r_.i16[i] = a_.u8[i];
585
- }
586
- #endif
587
-
588
- return simde__m128i_from_private(r_);
589
- #endif
590
- }
591
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
592
- # define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a)
593
- #endif
594
-
595
- SIMDE__FUNCTION_ATTRIBUTES
596
- simde__m128i
597
- simde_mm_cvtepu8_epi32 (simde__m128i a) {
598
- #if defined(SIMDE_SSE4_1_NATIVE)
599
- return _mm_cvtepu8_epi32(a);
600
- #else
601
- simde__m128i_private
602
- r_,
603
- a_ = simde__m128i_to_private(a);
604
-
605
- #if defined(SIMDE_SSE4_1_NEON)
606
- uint8x16_t u8x16 = a_.neon_u8;
607
- uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));
608
- r_.neon_u32 = vmovl_u16(vget_low_u16(u16x8));
609
- #else
610
- SIMDE__VECTORIZE
611
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
612
- r_.i32[i] = a_.u8[i];
613
- }
614
- #endif
615
-
616
- return simde__m128i_from_private(r_);
617
- #endif
618
- }
619
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
620
- # define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a)
621
- #endif
622
-
623
- SIMDE__FUNCTION_ATTRIBUTES
624
- simde__m128i
625
- simde_mm_cvtepu8_epi64 (simde__m128i a) {
626
- #if defined(SIMDE_SSE4_1_NATIVE)
627
- return _mm_cvtepu8_epi64(a);
628
- #else
629
- simde__m128i_private
630
- r_,
631
- a_ = simde__m128i_to_private(a);
632
-
633
- SIMDE__VECTORIZE
634
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
635
- r_.i64[i] = a_.u8[i];
636
- }
637
-
638
- return simde__m128i_from_private(r_);
639
- #endif
640
- }
641
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
642
- # define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a)
643
- #endif
644
-
645
- SIMDE__FUNCTION_ATTRIBUTES
646
- simde__m128i
647
- simde_mm_cvtepi16_epi32 (simde__m128i a) {
648
- #if defined(SIMDE_SSE4_1_NATIVE)
649
- return _mm_cvtepi16_epi32(a);
650
- #else
651
- simde__m128i_private
652
- r_,
653
- a_ = simde__m128i_to_private(a);
654
-
655
- #if defined(SIMDE_SSE4_1_NEON)
656
- r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16));
657
- #else
658
- SIMDE__VECTORIZE
659
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
660
- r_.i32[i] = a_.i16[i];
661
- }
662
- #endif
663
-
664
- return simde__m128i_from_private(r_);
665
- #endif
666
- }
667
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
668
- # define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a)
669
- #endif
670
-
671
- SIMDE__FUNCTION_ATTRIBUTES
672
- simde__m128i
673
- simde_mm_cvtepu16_epi32 (simde__m128i a) {
674
- #if defined(SIMDE_SSE4_1_NATIVE)
675
- return _mm_cvtepu16_epi32(a);
676
- #else
677
- simde__m128i_private
678
- r_,
679
- a_ = simde__m128i_to_private(a);
680
-
681
- #if defined(SIMDE__CONVERT_VECTOR) && !defined(SIMDE_BUG_CLANG_45541)
682
- SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].u16);
683
- #else
684
- SIMDE__VECTORIZE
685
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
686
- r_.i32[i] = a_.u16[i];
687
- }
688
- #endif
689
-
690
- return simde__m128i_from_private(r_);
691
- #endif
692
- }
693
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
694
- # define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a)
695
- #endif
696
-
697
- SIMDE__FUNCTION_ATTRIBUTES
698
- simde__m128i
699
- simde_mm_cvtepu16_epi64 (simde__m128i a) {
700
- #if defined(SIMDE_SSE4_1_NATIVE)
701
- return _mm_cvtepu16_epi64(a);
702
- #else
703
- simde__m128i_private
704
- r_,
705
- a_ = simde__m128i_to_private(a);
706
-
707
- SIMDE__VECTORIZE
708
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
709
- r_.i64[i] = a_.u16[i];
710
- }
711
-
712
- return simde__m128i_from_private(r_);
713
- #endif
714
- }
715
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
716
- # define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a)
717
- #endif
718
-
719
- SIMDE__FUNCTION_ATTRIBUTES
720
- simde__m128i
721
- simde_mm_cvtepi16_epi64 (simde__m128i a) {
722
- #if defined(SIMDE_SSE4_1_NATIVE)
723
- return _mm_cvtepi16_epi64(a);
724
- #else
725
- simde__m128i_private
726
- r_,
727
- a_ = simde__m128i_to_private(a);
728
-
729
- SIMDE__VECTORIZE
730
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
731
- r_.i64[i] = a_.i16[i];
732
- }
733
-
734
- return simde__m128i_from_private(r_);
735
- #endif
736
- }
737
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
738
- # define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a)
739
- #endif
740
-
741
- SIMDE__FUNCTION_ATTRIBUTES
742
- simde__m128i
743
- simde_mm_cvtepi32_epi64 (simde__m128i a) {
744
- #if defined(SIMDE_SSE4_1_NATIVE)
745
- return _mm_cvtepi32_epi64(a);
746
- #else
747
- simde__m128i_private
748
- r_,
749
- a_ = simde__m128i_to_private(a);
750
-
751
- #if defined(SIMDE__CONVERT_VECTOR)
752
- SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i32);
753
- #else
754
- SIMDE__VECTORIZE
755
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
756
- r_.i64[i] = a_.i32[i];
757
- }
758
- #endif
759
-
760
- return simde__m128i_from_private(r_);
761
- #endif
762
- }
763
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
764
- # define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a)
765
- #endif
766
-
767
- SIMDE__FUNCTION_ATTRIBUTES
768
- simde__m128i
769
- simde_mm_cvtepu32_epi64 (simde__m128i a) {
770
- #if defined(SIMDE_SSE4_1_NATIVE)
771
- return _mm_cvtepu32_epi64(a);
772
- #else
773
- simde__m128i_private
774
- r_,
775
- a_ = simde__m128i_to_private(a);
776
-
777
- #if defined(SIMDE__CONVERT_VECTOR)
778
- SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].u32);
779
- #else
780
- SIMDE__VECTORIZE
781
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
782
- r_.i64[i] = a_.u32[i];
783
- }
784
- #endif
785
-
786
- return simde__m128i_from_private(r_);
787
- #endif
788
- }
789
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
790
- # define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a)
791
- #endif
792
-
793
- SIMDE__FUNCTION_ATTRIBUTES
794
- simde__m128d
795
- simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8)
796
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
797
- simde__m128d_private
798
- r_,
799
- a_ = simde__m128d_to_private(a),
800
- b_ = simde__m128d_to_private(b);
801
-
802
- simde_float64 sum = SIMDE_FLOAT64_C(0.0);
803
-
804
- SIMDE__VECTORIZE_REDUCTION(+:sum)
805
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
806
- sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0;
807
- }
808
-
809
- SIMDE__VECTORIZE
810
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
811
- r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0;
812
- }
813
-
814
- return simde__m128d_from_private(r_);
815
- }
816
- #if defined(SIMDE_SSE4_1_NATIVE)
817
- # define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8)
818
- #endif
819
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
820
- # define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8)
821
- #endif
822
-
823
- SIMDE__FUNCTION_ATTRIBUTES
824
- simde__m128
825
- simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8)
826
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
827
- simde__m128_private
828
- r_,
829
- a_ = simde__m128_to_private(a),
830
- b_ = simde__m128_to_private(b);
831
-
832
- simde_float32 sum = SIMDE_FLOAT32_C(0.0);
833
-
834
- SIMDE__VECTORIZE_REDUCTION(+:sum)
835
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
836
- sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0);
837
- }
838
-
839
- SIMDE__VECTORIZE
840
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
841
- r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0);
842
- }
843
-
844
- return simde__m128_from_private(r_);
845
- }
846
- #if defined(SIMDE_SSE4_1_NATIVE)
847
- # define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8)
848
- #endif
849
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
850
- # define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8)
851
- #endif
852
-
853
- #if defined(simde_mm_extract_epi8)
854
- # undef simde_mm_extract_epi8
855
- #endif
856
- SIMDE__FUNCTION_ATTRIBUTES
857
- int8_t
858
- simde_mm_extract_epi8 (simde__m128i a, const int imm8)
859
- HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
860
- simde__m128i_private
861
- a_ = simde__m128i_to_private(a);
862
-
863
- return a_.i8[imm8&15];
864
- }
865
- #if defined(SIMDE_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8)
866
- # define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8))
867
- #elif defined(SIMDE_SSE4_1_NEON)
868
- # define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_private(a).neon_i8, imm8)
869
- #endif
870
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
871
- # define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8))
872
- #endif
873
-
874
- #if defined(simde_mm_extract_epi32)
875
- # undef simde_mm_extract_epi32
876
- #endif
877
- SIMDE__FUNCTION_ATTRIBUTES
878
- int32_t
879
- simde_mm_extract_epi32 (simde__m128i a, const int imm8)
880
- HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
881
- simde__m128i_private
882
- a_ = simde__m128i_to_private(a);
883
-
884
- return a_.i32[imm8&3];
885
- }
886
- #if defined(SIMDE_SSE4_1_NATIVE)
887
- # define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8)
888
- #elif defined(SIMDE_SSE4_1_NEON)
889
- # define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_private(a).neon_i32, imm8)
890
- #endif
891
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
892
- # define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8)
893
- #endif
894
-
895
- #if defined(simde_mm_extract_epi64)
896
- # undef simde_mm_extract_epi64
897
- #endif
898
- SIMDE__FUNCTION_ATTRIBUTES
899
- int64_t
900
- simde_mm_extract_epi64 (simde__m128i a, const int imm8)
901
- HEDLEY_REQUIRE_MSG((imm8 & 1) == imm8, "imm8 must be 0 or 1") {
902
- simde__m128i_private
903
- a_ = simde__m128i_to_private(a);
904
-
905
- return a_.i64[imm8&1];
906
- }
907
- #if defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)
908
- # define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8)
909
- #elif defined(SIMDE_SSE4_1_NEON)
910
- # define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_private(a).neon_i64, imm8)
911
- #endif
912
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
913
- # define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8)
914
- #endif
915
-
916
- SIMDE__FUNCTION_ATTRIBUTES
917
- simde__m128d
918
- simde_mm_floor_pd (simde__m128d a) {
919
- #if defined(SIMDE_SSE4_1_NATIVE)
920
- return _mm_floor_pd(a);
921
- #else
922
- simde__m128d_private
923
- r_,
924
- a_ = simde__m128d_to_private(a);
925
-
926
- #if defined(SIMDE_HAVE_MATH_H)
927
- SIMDE__VECTORIZE
928
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
929
- r_.f64[i] = floor(a_.f64[i]);
930
- }
931
- #else
932
- HEDLEY_UNREACHABLE();
933
- #endif
934
-
935
- return simde__m128d_from_private(r_);
936
- #endif
937
- }
938
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
939
- # define _mm_floor_pd(a) simde_mm_floor_pd(a)
940
- #endif
941
-
942
- SIMDE__FUNCTION_ATTRIBUTES
943
- simde__m128
944
- simde_mm_floor_ps (simde__m128 a) {
945
- #if defined(SIMDE_SSE4_1_NATIVE)
946
- return _mm_floor_ps(a);
947
- #else
948
- simde__m128_private
949
- r_,
950
- a_ = simde__m128_to_private(a);
951
-
952
- #if defined(SIMDE_HAVE_MATH_H)
953
- SIMDE__VECTORIZE
954
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
955
- r_.f32[i] = floorf(a_.f32[i]);
956
- }
957
- #else
958
- HEDLEY_UNREACHABLE();
959
- #endif
960
-
961
- return simde__m128_from_private(r_);
962
- #endif
963
- }
964
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
965
- # define _mm_floor_ps(a) simde_mm_floor_ps(a)
966
- #endif
967
-
968
- SIMDE__FUNCTION_ATTRIBUTES
969
- simde__m128d
970
- simde_mm_floor_sd (simde__m128d a, simde__m128d b) {
971
- #if defined(SIMDE_SSE4_1_NATIVE)
972
- return _mm_floor_sd(a, b);
973
- #else
974
- simde__m128d_private
975
- r_,
976
- a_ = simde__m128d_to_private(a),
977
- b_ = simde__m128d_to_private(b);
978
-
979
- #if defined(SIMDE_HAVE_MATH_H)
980
- r_.f64[0] = floor(b_.f64[0]);
981
- r_.f64[1] = a_.f64[1];
982
- #else
983
- HEDLEY_UNREACHABLE();
984
- #endif
985
-
986
- return simde__m128d_from_private(r_);
987
- #endif
988
- }
989
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
990
- # define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b)
991
- #endif
992
-
993
- SIMDE__FUNCTION_ATTRIBUTES
994
- simde__m128
995
- simde_mm_floor_ss (simde__m128 a, simde__m128 b) {
996
- #if defined(SIMDE_SSE4_1_NATIVE)
997
- return _mm_floor_ss(a, b);
998
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
999
- return simde_mm_move_ss(a, simde_mm_floor_ps(b));
1000
- #else
1001
- simde__m128_private
1002
- r_,
1003
- a_ = simde__m128_to_private(a),
1004
- b_ = simde__m128_to_private(b);
1005
-
1006
- #if defined(SIMDE_HAVE_MATH_H)
1007
- r_.f32[0] = floorf(b_.f32[0]);
1008
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1009
- r_.f32[i] = a_.f32[i];
1010
- }
1011
- #else
1012
- HEDLEY_UNREACHABLE();
1013
- #endif
1014
-
1015
- return simde__m128_from_private(r_);
1016
- #endif
1017
- }
1018
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1019
- # define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b)
1020
- #endif
1021
-
1022
- SIMDE__FUNCTION_ATTRIBUTES
1023
- simde__m128i
1024
- simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8)
1025
- HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
1026
- simde__m128i_private
1027
- r_ = simde__m128i_to_private(a);
1028
-
1029
- r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i);
1030
-
1031
- return simde__m128i_from_private(r_);
1032
- }
1033
- #if defined(SIMDE_SSE4_1_NATIVE)
1034
- # define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8)
1035
- #endif
1036
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1037
- # define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8)
1038
- #endif
1039
-
1040
- SIMDE__FUNCTION_ATTRIBUTES
1041
- simde__m128i
1042
- simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8)
1043
- HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
1044
- simde__m128i_private
1045
- r_ = simde__m128i_to_private(a);
1046
-
1047
- r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i);
1048
-
1049
- return simde__m128i_from_private(r_);
1050
- }
1051
- #if defined(SIMDE_SSE4_1_NATIVE)
1052
- # define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8)
1053
- #endif
1054
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1055
- # define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8)
1056
- #endif
1057
-
1058
- SIMDE__FUNCTION_ATTRIBUTES
1059
- simde__m128i
1060
- simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8)
1061
- HEDLEY_REQUIRE_MSG((imm8 & 1) == imm8, "imm8 must be 0 or 1") {
1062
- #if defined(SIMDE_BUG_GCC_94482)
1063
- simde__m128i_private
1064
- a_ = simde__m128i_to_private(a);
1065
-
1066
- switch(imm8) {
1067
- case 0:
1068
- return simde_mm_set_epi64x(a_.i64[1], i);
1069
- break;
1070
- case 1:
1071
- return simde_mm_set_epi64x(i, a_.i64[0]);
1072
- break;
1073
- default:
1074
- HEDLEY_UNREACHABLE();
1075
- break;
1076
- }
1077
- #else
1078
- simde__m128i_private
1079
- r_ = simde__m128i_to_private(a);
1080
-
1081
- r_.i64[imm8] = i;
1082
-
1083
- return simde__m128i_from_private(r_);
1084
- #endif
1085
- }
1086
- #if defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)
1087
- # define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8)
1088
- #endif
1089
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1090
- # define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8)
1091
- #endif
1092
-
1093
- SIMDE__FUNCTION_ATTRIBUTES
1094
- simde__m128
1095
- simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8)
1096
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
1097
- simde__m128_private
1098
- r_,
1099
- a_ = simde__m128_to_private(a),
1100
- b_ = simde__m128_to_private(b);
1101
-
1102
- a_.f32[0] = b_.f32[(imm8 >> 6) & 3];
1103
- a_.f32[(imm8 >> 4) & 3] = a_.f32[0];
1104
-
1105
- SIMDE__VECTORIZE
1106
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1107
- r_.f32[i] = (imm8 >> i) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i];
1108
- }
1109
-
1110
- return simde__m128_from_private(r_);
1111
- }
1112
- #if defined(SIMDE_SSE4_1_NATIVE)
1113
- # define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8)
1114
- #endif
1115
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1116
- # define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8)
1117
- #endif
1118
-
1119
- SIMDE__FUNCTION_ATTRIBUTES
1120
- simde__m128i
1121
- simde_mm_max_epi8 (simde__m128i a, simde__m128i b) {
1122
- #if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
1123
- return _mm_max_epi8(a, b);
1124
- #else
1125
- simde__m128i_private
1126
- r_,
1127
- a_ = simde__m128i_to_private(a),
1128
- b_ = simde__m128i_to_private(b);
1129
-
1130
- #if defined(SIMDE_SSE4_1_NEON)
1131
- r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8);
1132
- #else
1133
- SIMDE__VECTORIZE
1134
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1135
- r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i];
1136
- }
1137
- #endif
1138
-
1139
- return simde__m128i_from_private(r_);
1140
- #endif
1141
- }
1142
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1143
- # define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b)
1144
- #endif
1145
-
1146
- SIMDE__FUNCTION_ATTRIBUTES
1147
- simde__m128i
1148
- simde_mm_max_epi32 (simde__m128i a, simde__m128i b) {
1149
- #if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
1150
- return _mm_max_epi32(a, b);
1151
- #else
1152
- simde__m128i_private
1153
- r_,
1154
- a_ = simde__m128i_to_private(a),
1155
- b_ = simde__m128i_to_private(b);
1156
-
1157
- #if defined(SIMDE_SSE4_1_NEON)
1158
- r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32);
1159
- #else
1160
- SIMDE__VECTORIZE
1161
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1162
- r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i];
1163
- }
1164
- #endif
1165
-
1166
- return simde__m128i_from_private(r_);
1167
- #endif
1168
- }
1169
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1170
- # define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b)
1171
- #endif
1172
-
1173
- SIMDE__FUNCTION_ATTRIBUTES
1174
- simde__m128i
1175
- simde_mm_max_epu16 (simde__m128i a, simde__m128i b) {
1176
- #if defined(SIMDE_SSE4_1_NATIVE)
1177
- return _mm_max_epu16(a, b);
1178
- #else
1179
- simde__m128i_private
1180
- r_,
1181
- a_ = simde__m128i_to_private(a),
1182
- b_ = simde__m128i_to_private(b);
1183
-
1184
- #if defined(SIMDE_SSE4_1_NEON)
1185
- r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16);
1186
- #else
1187
- SIMDE__VECTORIZE
1188
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1189
- r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i];
1190
- }
1191
- #endif
1192
-
1193
- return simde__m128i_from_private(r_);
1194
- #endif
1195
- }
1196
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1197
- # define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b)
1198
- #endif
1199
-
1200
- SIMDE__FUNCTION_ATTRIBUTES
1201
- simde__m128i
1202
- simde_mm_max_epu32 (simde__m128i a, simde__m128i b) {
1203
- #if defined(SIMDE_SSE4_1_NATIVE)
1204
- return _mm_max_epu32(a, b);
1205
- #else
1206
- simde__m128i_private
1207
- r_,
1208
- a_ = simde__m128i_to_private(a),
1209
- b_ = simde__m128i_to_private(b);
1210
-
1211
- #if defined(SIMDE_SSE4_1_NEON)
1212
- r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32);
1213
- #else
1214
- SIMDE__VECTORIZE
1215
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1216
- r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i];
1217
- }
1218
- #endif
1219
-
1220
- return simde__m128i_from_private(r_);
1221
- #endif
1222
- }
1223
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1224
- # define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b)
1225
- #endif
1226
-
1227
- SIMDE__FUNCTION_ATTRIBUTES
1228
- simde__m128i
1229
- simde_mm_min_epi8 (simde__m128i a, simde__m128i b) {
1230
- #if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
1231
- return _mm_min_epi8(a, b);
1232
- #else
1233
- simde__m128i_private
1234
- r_,
1235
- a_ = simde__m128i_to_private(a),
1236
- b_ = simde__m128i_to_private(b);
1237
-
1238
- #if defined(SIMDE_SSE4_1_NEON)
1239
- r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8);
1240
- #else
1241
- SIMDE__VECTORIZE
1242
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1243
- r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];
1244
- }
1245
- #endif
1246
-
1247
- return simde__m128i_from_private(r_);
1248
- #endif
1249
- }
1250
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1251
- # define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b)
1252
- #endif
1253
-
1254
- SIMDE__FUNCTION_ATTRIBUTES
1255
- simde__m128i
1256
- simde_mm_min_epi32 (simde__m128i a, simde__m128i b) {
1257
- #if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
1258
- return _mm_min_epi32(a, b);
1259
- #else
1260
- simde__m128i_private
1261
- r_,
1262
- a_ = simde__m128i_to_private(a),
1263
- b_ = simde__m128i_to_private(b);
1264
-
1265
- #if defined(SIMDE_SSE4_1_NEON)
1266
- r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32);
1267
- #else
1268
- SIMDE__VECTORIZE
1269
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1270
- r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];
1271
- }
1272
- #endif
1273
-
1274
- return simde__m128i_from_private(r_);
1275
- #endif
1276
- }
1277
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1278
- # define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b)
1279
- #endif
1280
-
1281
- SIMDE__FUNCTION_ATTRIBUTES
1282
- simde__m128i
1283
- simde_mm_min_epu16 (simde__m128i a, simde__m128i b) {
1284
- #if defined(SIMDE_SSE4_1_NATIVE)
1285
- return _mm_min_epu16(a, b);
1286
- #else
1287
- simde__m128i_private
1288
- r_,
1289
- a_ = simde__m128i_to_private(a),
1290
- b_ = simde__m128i_to_private(b);
1291
-
1292
- #if defined(SIMDE_SSE4_1_NEON)
1293
- r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16);
1294
- #else
1295
- SIMDE__VECTORIZE
1296
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1297
- r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i];
1298
- }
1299
- #endif
1300
-
1301
- return simde__m128i_from_private(r_);
1302
- #endif
1303
- }
1304
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1305
- # define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b)
1306
- #endif
1307
-
1308
- SIMDE__FUNCTION_ATTRIBUTES
1309
- simde__m128i
1310
- simde_mm_min_epu32 (simde__m128i a, simde__m128i b) {
1311
- #if defined(SIMDE_SSE4_1_NATIVE)
1312
- return _mm_min_epu32(a, b);
1313
- #else
1314
- simde__m128i_private
1315
- r_,
1316
- a_ = simde__m128i_to_private(a),
1317
- b_ = simde__m128i_to_private(b);
1318
-
1319
- #if defined(SIMDE_SSE4_1_NEON)
1320
- r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32);
1321
- #else
1322
- SIMDE__VECTORIZE
1323
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1324
- r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i];
1325
- }
1326
- #endif
1327
-
1328
- return simde__m128i_from_private(r_);
1329
- #endif
1330
- }
1331
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1332
- # define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b)
1333
- #endif
1334
-
1335
- SIMDE__FUNCTION_ATTRIBUTES
1336
- simde__m128i
1337
- simde_mm_minpos_epu16 (simde__m128i a) {
1338
- #if defined(SIMDE_SSE4_1_NATIVE)
1339
- return _mm_minpos_epu16(a);
1340
- #else
1341
- simde__m128i_private
1342
- r_ = simde__m128i_to_private(simde_mm_setzero_si128()),
1343
- a_ = simde__m128i_to_private(a);
1344
-
1345
- r_.u16[0] = UINT16_MAX;
1346
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1347
- if (a_.u16[i] < r_.u16[0]) {
1348
- r_.u16[0] = a_.u16[i];
1349
- r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i);
1350
- }
1351
- }
1352
-
1353
- return simde__m128i_from_private(r_);
1354
- #endif
1355
- }
1356
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1357
- # define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a)
1358
- #endif
1359
-
1360
- SIMDE__FUNCTION_ATTRIBUTES
1361
- simde__m128i
1362
- simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8)
1363
- HEDLEY_REQUIRE_MSG((imm8 & 7) == imm8, "imm8 must be in range [0, 7]") {
1364
- simde__m128i_private
1365
- r_,
1366
- a_ = simde__m128i_to_private(a),
1367
- b_ = simde__m128i_to_private(b);
1368
-
1369
- const int a_offset = imm8 & 4;
1370
- const int b_offset = (imm8 & 3) << 2;
1371
-
1372
- #if defined(SIMDE_HAVE_MATH_H)
1373
- for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) {
1374
- r_.u16[i] =
1375
- HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0])) +
1376
- HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1])) +
1377
- HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2])) +
1378
- HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]));
1379
- }
1380
- #else
1381
- HEDLEY_UNREACHABLE();
1382
- #endif
1383
-
1384
- return simde__m128i_from_private(r_);
1385
- }
1386
- #if defined(SIMDE_SSE4_1_NATIVE)
1387
- # define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8)
1388
- #endif
1389
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1390
- # define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8)
1391
- #endif
1392
-
1393
- SIMDE__FUNCTION_ATTRIBUTES
1394
- simde__m128i
1395
- simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) {
1396
- #if defined(SIMDE_SSE4_1_NATIVE)
1397
- return _mm_mul_epi32(a, b);
1398
- #else
1399
- simde__m128i_private
1400
- r_,
1401
- a_ = simde__m128i_to_private(a),
1402
- b_ = simde__m128i_to_private(b);
1403
-
1404
- SIMDE__VECTORIZE
1405
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1406
- r_.i64[i] =
1407
- HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) *
1408
- HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]);
1409
- }
1410
-
1411
- return simde__m128i_from_private(r_);
1412
- #endif
1413
- }
1414
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1415
- # define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b)
1416
- #endif
1417
-
1418
- SIMDE__FUNCTION_ATTRIBUTES
1419
- simde__m128i
1420
- simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) {
1421
- #if defined(SIMDE_SSE4_1_NATIVE)
1422
- return _mm_mullo_epi32(a, b);
1423
- #else
1424
- simde__m128i_private
1425
- r_,
1426
- a_ = simde__m128i_to_private(a),
1427
- b_ = simde__m128i_to_private(b);
1428
-
1429
- #if defined(SIMDE_SSE4_1_NEON)
1430
- r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32);
1431
- #else
1432
- SIMDE__VECTORIZE
1433
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1434
- r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff));
1435
- }
1436
- #endif
1437
-
1438
- return simde__m128i_from_private(r_);
1439
- #endif
1440
- }
1441
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1442
- # define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b)
1443
- #endif
1444
-
1445
- SIMDE__FUNCTION_ATTRIBUTES
1446
- simde__m128i
1447
- simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) {
1448
- #if defined(SIMDE_SSE4_1_NATIVE)
1449
- return _mm_packus_epi32(a, b);
1450
- #else
1451
- simde__m128i_private
1452
- r_,
1453
- a_ = simde__m128i_to_private(a),
1454
- b_ = simde__m128i_to_private(b);
1455
-
1456
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1457
- r_.u16[i + 0] = (a_.i32[i] < 0) ? UINT16_C(0) : ((a_.i32[i] > UINT16_MAX) ? (UINT16_MAX) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i]));
1458
- r_.u16[i + 4] = (b_.i32[i] < 0) ? UINT16_C(0) : ((b_.i32[i] > UINT16_MAX) ? (UINT16_MAX) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i]));
1459
- }
1460
- return simde__m128i_from_private(r_);
1461
- #endif
1462
- }
1463
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1464
- # define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b)
1465
- #endif
1466
-
1467
- SIMDE__FUNCTION_ATTRIBUTES
1468
- simde__m128d
1469
- simde_mm_round_pd (simde__m128d a, int rounding) {
1470
- simde__m128d_private
1471
- r_,
1472
- a_ = simde__m128d_to_private(a);
1473
-
1474
- #if defined(SIMDE_HAVE_MATH_H)
1475
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1476
- switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
1477
- case SIMDE_MM_FROUND_TO_NEAREST_INT:
1478
- r_.f64[i] = nearbyint(a_.f64[i]);
1479
- break;
1480
- case SIMDE_MM_FROUND_TO_NEG_INF:
1481
- r_.f64[i] = floor(a_.f64[i]);
1482
- break;
1483
- case SIMDE_MM_FROUND_TO_POS_INF:
1484
- r_.f64[i] = ceil(a_.f64[i]);
1485
- break;
1486
- case SIMDE_MM_FROUND_TO_ZERO:
1487
- r_.f64[i] = trunc(a_.f64[i]);
1488
- break;
1489
- case SIMDE_MM_FROUND_CUR_DIRECTION:
1490
- r_.f64[i] = nearbyint(a_.f64[i]);
1491
- break;
1492
- default:
1493
- HEDLEY_UNREACHABLE();
1494
- break;
1495
- }
1496
- }
1497
- #else
1498
- HEDLEY_UNREACHABLE();
1499
- #endif
1500
-
1501
- return simde__m128d_from_private(r_);
1502
- }
1503
- #if defined(SIMDE_SSE4_1_NATIVE)
1504
- # define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding)
1505
- #endif
1506
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1507
- # define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding)
1508
- #endif
1509
-
1510
- SIMDE__FUNCTION_ATTRIBUTES
1511
- simde__m128
1512
- simde_mm_round_ps (simde__m128 a, int rounding) {
1513
- simde__m128_private
1514
- r_,
1515
- a_ = simde__m128_to_private(a);
1516
-
1517
- #if defined(SIMDE_HAVE_MATH_H)
1518
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1519
- switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
1520
- case SIMDE_MM_FROUND_TO_NEAREST_INT:
1521
- r_.f32[i] = nearbyintf(a_.f32[i]);
1522
- break;
1523
- case SIMDE_MM_FROUND_TO_NEG_INF:
1524
- r_.f32[i] = floorf(a_.f32[i]);
1525
- break;
1526
- case SIMDE_MM_FROUND_TO_POS_INF:
1527
- r_.f32[i] = ceilf(a_.f32[i]);
1528
- break;
1529
- case SIMDE_MM_FROUND_TO_ZERO:
1530
- r_.f32[i] = truncf(a_.f32[i]);
1531
- break;
1532
- case SIMDE_MM_FROUND_CUR_DIRECTION:
1533
- r_.f32[i] = nearbyintf (a_.f32[i]);
1534
- break;
1535
- default:
1536
- HEDLEY_UNREACHABLE();
1537
- break;
1538
- }
1539
- }
1540
- #else
1541
- HEDLEY_UNREACHABLE();
1542
- #endif
1543
-
1544
- return simde__m128_from_private(r_);
1545
- }
1546
- #if defined(SIMDE_SSE4_1_NATIVE)
1547
- # define simde_mm_round_ps(a, rounding) _mm_round_ps(a, rounding)
1548
- #endif
1549
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1550
- # define _mm_round_ps(a, rounding) simde_mm_round_ps(a, rounding)
1551
- #endif
1552
-
1553
- SIMDE__FUNCTION_ATTRIBUTES
1554
- simde__m128d
1555
- simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) {
1556
- simde__m128d_private
1557
- r_ = simde__m128d_to_private(a),
1558
- b_ = simde__m128d_to_private(b);
1559
-
1560
- #if defined(SIMDE_HAVE_MATH_H)
1561
- switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
1562
- case SIMDE_MM_FROUND_TO_NEAREST_INT:
1563
- r_.f64[0] = nearbyint(b_.f64[0]);
1564
- break;
1565
- case SIMDE_MM_FROUND_TO_NEG_INF:
1566
- r_.f64[0] = floor(b_.f64[0]);
1567
- break;
1568
- case SIMDE_MM_FROUND_TO_POS_INF:
1569
- r_.f64[0] = ceil(b_.f64[0]);
1570
- break;
1571
- case SIMDE_MM_FROUND_TO_ZERO:
1572
- r_.f64[0] = trunc(b_.f64[0]);
1573
- break;
1574
- case SIMDE_MM_FROUND_CUR_DIRECTION:
1575
- r_.f64[0] = nearbyint(b_.f64[0]);
1576
- break;
1577
- default:
1578
- HEDLEY_UNREACHABLE();
1579
- break;
1580
- }
1581
- #else
1582
- HEDLEY_UNREACHABLE();
1583
- #endif
1584
-
1585
- return simde__m128d_from_private(r_);
1586
- }
1587
- #if defined(SIMDE_SSE4_1_NATIVE)
1588
- # define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding)
1589
- #endif
1590
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1591
- # define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding)
1592
- #endif
1593
-
1594
- SIMDE__FUNCTION_ATTRIBUTES
1595
- simde__m128
1596
- simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) {
1597
- simde__m128_private
1598
- r_ = simde__m128_to_private(a),
1599
- b_ = simde__m128_to_private(b);
1600
-
1601
- #if defined(SIMDE_HAVE_MATH_H)
1602
- switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
1603
- case SIMDE_MM_FROUND_TO_NEAREST_INT:
1604
- r_.f32[0] = nearbyintf(b_.f32[0]);
1605
- break;
1606
- case SIMDE_MM_FROUND_TO_NEG_INF:
1607
- r_.f32[0] = floorf(b_.f32[0]);
1608
- break;
1609
- case SIMDE_MM_FROUND_TO_POS_INF:
1610
- r_.f32[0] = ceilf(b_.f32[0]);
1611
- break;
1612
- case SIMDE_MM_FROUND_TO_ZERO:
1613
- r_.f32[0] = truncf(b_.f32[0]);
1614
- break;
1615
- case SIMDE_MM_FROUND_CUR_DIRECTION:
1616
- r_.f32[0] = nearbyintf (b_.f32[0]);
1617
- break;
1618
- default:
1619
- HEDLEY_UNREACHABLE();
1620
- break;
1621
- }
1622
- #else
1623
- HEDLEY_UNREACHABLE();
1624
- #endif
1625
-
1626
- return simde__m128_from_private(r_);
1627
- }
1628
- #if defined(SIMDE_SSE4_1_NATIVE)
1629
- # define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding)
1630
- #endif
1631
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1632
- # define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding)
1633
- #endif
1634
-
1635
- SIMDE__FUNCTION_ATTRIBUTES
1636
- simde__m128i
1637
- simde_mm_stream_load_si128 (const simde__m128i* mem_addr) {
1638
- #if defined(SIMDE_SSE4_1_NATIVE)
1639
- return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr));
1640
- #else
1641
- return *mem_addr;
1642
- #endif
1643
- }
1644
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1645
- # define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr)
1646
- #endif
1647
-
1648
- SIMDE__FUNCTION_ATTRIBUTES
1649
- int
1650
- simde_mm_test_all_ones (simde__m128i a) {
1651
- #if defined(SIMDE_SSE4_1_NATIVE)
1652
- return _mm_test_all_ones(a);
1653
- #else
1654
- simde__m128i_private a_ = simde__m128i_to_private(a);
1655
-
1656
- for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
1657
- if (a_.u64[i] != ~UINT64_C(0))
1658
- return 0;
1659
- }
1660
-
1661
- return 1;
1662
- #endif
1663
- }
1664
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1665
- # define _mm_test_all_ones(a) simde_mm_test_all_ones(a)
1666
- #endif
1667
-
1668
- SIMDE__FUNCTION_ATTRIBUTES
1669
- int
1670
- simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) {
1671
- #if defined(SIMDE_SSE4_1_NATIVE)
1672
- return _mm_test_all_zeros(a, mask);
1673
- #else
1674
- simde__m128i_private
1675
- a_ = simde__m128i_to_private(a),
1676
- mask_ = simde__m128i_to_private(mask);
1677
-
1678
- for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
1679
- if ((a_.u64[i] & mask_.u64[i]) != 0)
1680
- return 0;
1681
- }
1682
-
1683
- return 1;
1684
- #endif
1685
- }
1686
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1687
- # define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask)
1688
- #endif
1689
-
1690
- SIMDE__FUNCTION_ATTRIBUTES
1691
- int
1692
- simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) {
1693
- #if defined(SIMDE_SSE4_1_NATIVE)
1694
- return _mm_test_mix_ones_zeros(a, mask);
1695
- #else
1696
- simde__m128i_private
1697
- a_ = simde__m128i_to_private(a),
1698
- mask_ = simde__m128i_to_private(mask);
1699
-
1700
- for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++)
1701
- if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0))
1702
- return 1;
1703
-
1704
- return 0;
1705
- #endif
1706
- }
1707
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1708
- # define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask)
1709
- #endif
1710
-
1711
- SIMDE__FUNCTION_ATTRIBUTES
1712
- int
1713
- simde_mm_testc_si128 (simde__m128i a, simde__m128i b) {
1714
- #if defined(SIMDE_SSE4_1_NATIVE)
1715
- return _mm_testc_si128(a, b);
1716
- #else
1717
- simde__m128i_private
1718
- a_ = simde__m128i_to_private(a),
1719
- b_ = simde__m128i_to_private(b);
1720
-
1721
- int_fast32_t r = 0;
1722
-
1723
- SIMDE__VECTORIZE_REDUCTION(|:r)
1724
- for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {
1725
- r |= ~a_.i32f[i] & b_.i32f[i];
1726
- }
1727
-
1728
- return HEDLEY_STATIC_CAST(int, !r);
1729
- #endif
1730
- }
1731
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1732
- # define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b)
1733
- #endif
1734
-
1735
- SIMDE__FUNCTION_ATTRIBUTES
1736
- int
1737
- simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) {
1738
- #if defined(SIMDE_SSE4_1_NATIVE)
1739
- return _mm_testnzc_si128(a, b);
1740
- #else
1741
- simde__m128i_private
1742
- a_ = simde__m128i_to_private(a),
1743
- b_ = simde__m128i_to_private(b);
1744
-
1745
- for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
1746
- if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0))
1747
- return 1;
1748
- }
1749
-
1750
- return 0;
1751
- #endif
1752
- }
1753
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1754
- # define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b)
1755
- #endif
1756
-
1757
- SIMDE__FUNCTION_ATTRIBUTES
1758
- int
1759
- simde_mm_testz_si128 (simde__m128i a, simde__m128i b) {
1760
- #if defined(SIMDE_SSE4_1_NATIVE)
1761
- return _mm_testz_si128(a, b);
1762
- #else
1763
- simde__m128i_private
1764
- a_ = simde__m128i_to_private(a),
1765
- b_ = simde__m128i_to_private(b);
1766
-
1767
- for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
1768
- if ((a_.u64[i] & b_.u64[i]) == 0)
1769
- return 1;
1770
- }
1771
-
1772
- return 0;
1773
- #endif
1774
- }
1775
- #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1776
- # define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b)
1777
- #endif
1778
-
1779
- SIMDE__END_DECLS
1780
-
1781
- HEDLEY_DIAGNOSTIC_POP
1782
-
1783
- #endif /* !defined(SIMDE__SSE4_1_H) */