minimap2 0.2.25.0 → 0.2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
@@ -0,0 +1,1783 @@
1
+ /* Copyright (c) 2017-2020 Evan Nemerson <evan@nemerson.com>
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person
4
+ * obtaining a copy of this software and associated documentation
5
+ * files (the "Software"), to deal in the Software without
6
+ * restriction, including without limitation the rights to use, copy,
7
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ * of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be
12
+ * included in all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ * SOFTWARE.
22
+ */
23
+
24
+ #if !defined(SIMDE__SSE4_1_H)
25
+ # if !defined(SIMDE__SSE4_1_H)
26
+ # define SIMDE__SSE4_1_H
27
+ # endif
28
+ # include "ssse3.h"
29
+
30
+ HEDLEY_DIAGNOSTIC_PUSH
31
+ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
32
+
33
+ # if defined(SIMDE_SSE4_1_NATIVE)
34
+ # undef SIMDE_SSE4_1_NATIVE
35
+ # endif
36
+ # if defined(SIMDE_ARCH_X86_SSE4_1) && !defined(SIMDE_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
37
+ # define SIMDE_SSE4_1_NATIVE
38
+ # elif defined(__ARM_NEON) && !defined(SIMDE_SSE4_1_NO_NEON) && !defined(SIMDE_NO_NEON)
39
+ # define SIMDE_SSE4_1_NEON
40
+ # elif defined(SIMDE_ARCH_POWER_ALTIVEC)
41
+ # define SIMDE_SSE4_1_POWER_ALTIVEC
42
+ # endif
43
+
44
+ # if defined(SIMDE_SSE4_1_NATIVE) && !defined(SIMDE_SSE3_NATIVE)
45
+ # if defined(SIMDE_SSE4_1_FORCE_NATIVE)
46
+ # error Native SSE4.1 support requires native SSE3 support
47
+ # else
48
+ HEDLEY_WARNING("Native SSE4.1 support requires native SSE3 support, disabling")
49
+ # undef SIMDE_SSE4_1_NATIVE
50
+ # endif
51
+ # elif defined(SIMDE_SSE4_1_NEON) && !defined(SIMDE_SSE3_NEON)
52
+ HEDLEY_WARNING("SSE4.1 NEON support requires SSE3 NEON support, disabling")
53
+ # undef SIMDE_SSE4_1_NEON
54
+ # endif
55
+
56
+ # if defined(SIMDE_SSE4_1_NATIVE)
57
+ # include <smmintrin.h>
58
+ # else
59
+ # if defined(SIMDE_SSE4_1_NEON)
60
+ # include <arm_neon.h>
61
+ # endif
62
+ # endif
63
+
64
+ SIMDE__BEGIN_DECLS
65
+
66
+ #if !defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
67
+ # define SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES
68
+ #endif
69
+
70
+ #if defined(SIMDE_SSE4_1_NATIVE)
71
+ # define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT
72
+ # define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF
73
+ # define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF
74
+ # define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO
75
+ # define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION
76
+
77
+ # define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC
78
+ # define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC
79
+ #else
80
+ # define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00
81
+ # define SIMDE_MM_FROUND_TO_NEG_INF 0x01
82
+ # define SIMDE_MM_FROUND_TO_POS_INF 0x02
83
+ # define SIMDE_MM_FROUND_TO_ZERO 0x03
84
+ # define SIMDE_MM_FROUND_CUR_DIRECTION 0x04
85
+
86
+ # define SIMDE_MM_FROUND_RAISE_EXC 0x00
87
+ # define SIMDE_MM_FROUND_NO_EXC 0x08
88
+ #endif
89
+
90
+ #define SIMDE_MM_FROUND_NINT \
91
+ (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC)
92
+ #define SIMDE_MM_FROUND_FLOOR \
93
+ (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC)
94
+ #define SIMDE_MM_FROUND_CEIL \
95
+ (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC)
96
+ #define SIMDE_MM_FROUND_TRUNC \
97
+ (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC)
98
+ #define SIMDE_MM_FROUND_RINT \
99
+ (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC)
100
+ #define SIMDE_MM_FROUND_NEARBYINT \
101
+ (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC)
102
+
103
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
104
+ # define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT
105
+ # define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF
106
+ # define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF
107
+ # define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO
108
+ # define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION
109
+ # define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC
110
+ # define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT
111
+ # define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR
112
+ # define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL
113
+ # define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC
114
+ # define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT
115
+ # define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT
116
+ #endif
117
+
118
+ SIMDE__FUNCTION_ATTRIBUTES
119
+ simde__m128i
120
+ simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8)
121
+ HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
122
+ simde__m128i_private
123
+ r_,
124
+ a_ = simde__m128i_to_private(a),
125
+ b_ = simde__m128i_to_private(b);
126
+
127
+ SIMDE__VECTORIZE
128
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
129
+ r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i];
130
+ }
131
+
132
+ return simde__m128i_from_private(r_);
133
+ }
134
+ #if defined(SIMDE_SSE4_1_NATIVE)
135
+ # define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8)
136
+ #endif
137
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
138
+ # define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8)
139
+ #endif
140
+
141
+ SIMDE__FUNCTION_ATTRIBUTES
142
+ simde__m128d
143
+ simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8)
144
+ HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
145
+ simde__m128d_private
146
+ r_,
147
+ a_ = simde__m128d_to_private(a),
148
+ b_ = simde__m128d_to_private(b);
149
+
150
+ SIMDE__VECTORIZE
151
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
152
+ r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i];
153
+ }
154
+ return simde__m128d_from_private(r_);
155
+ }
156
+ #if defined(SIMDE_SSE4_1_NATIVE)
157
+ # define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8)
158
+ #endif
159
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
160
+ # define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8)
161
+ #endif
162
+
163
+ SIMDE__FUNCTION_ATTRIBUTES
164
+ simde__m128
165
+ simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8)
166
+ HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
167
+ simde__m128_private
168
+ r_,
169
+ a_ = simde__m128_to_private(a),
170
+ b_ = simde__m128_to_private(b);
171
+
172
+ SIMDE__VECTORIZE
173
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
174
+ r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i];
175
+ }
176
+ return simde__m128_from_private(r_);
177
+ }
178
+ #if defined(SIMDE_SSE4_1_NATIVE)
179
+ # define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8)
180
+ #endif
181
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
182
+ # define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8)
183
+ #endif
184
+
185
+ SIMDE__FUNCTION_ATTRIBUTES
186
+ simde__m128i
187
+ simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) {
188
+ #if defined(SIMDE_SSE4_1_NATIVE)
189
+ return _mm_blendv_epi8(a, b, mask);
190
+ #else
191
+ simde__m128i_private
192
+ r_,
193
+ a_ = simde__m128i_to_private(a),
194
+ b_ = simde__m128i_to_private(b),
195
+ mask_ = simde__m128i_to_private(mask);
196
+
197
+ #if defined(SIMDE_SSE4_1_NEON)
198
+ mask_ = simde__m128i_to_private(simde_mm_cmplt_epi8(mask, simde_mm_setzero_si128()));
199
+ r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8);
200
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
201
+ /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */
202
+ #if defined(HEDLEY_INTEL_VERSION_CHECK)
203
+ __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
204
+ mask_.i8 = HEDLEY_STATIC_CAST(__typeof__(mask_.i8), mask_.i8 < z);
205
+ #else
206
+ mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1;
207
+ #endif
208
+
209
+ r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8);
210
+ #else
211
+ SIMDE__VECTORIZE
212
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
213
+ int8_t m = mask_.i8[i] >> 7;
214
+ r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]);
215
+ }
216
+ #endif
217
+
218
+ return simde__m128i_from_private(r_);
219
+ #endif
220
+ }
221
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
222
+ # define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask)
223
+ #endif
224
+
225
+ SIMDE__FUNCTION_ATTRIBUTES
226
+ simde__m128i
227
+ simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) {
228
+ #if defined(SIMDE_SSE2_NATIVE)
229
+ mask = simde_mm_srai_epi16(mask, 15);
230
+ return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a));
231
+ #else
232
+ simde__m128i_private
233
+ r_,
234
+ a_ = simde__m128i_to_private(a),
235
+ b_ = simde__m128i_to_private(b),
236
+ mask_ = simde__m128i_to_private(mask);
237
+
238
+ #if defined(SIMDE_SSE4_1_NEON)
239
+ mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128()));
240
+ r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16);
241
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
242
+ #if defined(HEDLEY_INTEL_VERSION_CHECK)
243
+ __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 };
244
+ mask_.i16 = mask_.i16 < z;
245
+ #else
246
+ mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1;
247
+ #endif
248
+
249
+ r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16);
250
+ #else
251
+ SIMDE__VECTORIZE
252
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
253
+ int16_t m = mask_.i16[i] >> 15;
254
+ r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]);
255
+ }
256
+ #endif
257
+
258
+ return simde__m128i_from_private(r_);
259
+ #endif
260
+ }
261
+
262
+ SIMDE__FUNCTION_ATTRIBUTES
263
+ simde__m128i
264
+ simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) {
265
+ #if defined(SIMDE_SSE4_1_NATIVE)
266
+ return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask)));
267
+ #else
268
+ simde__m128i_private
269
+ r_,
270
+ a_ = simde__m128i_to_private(a),
271
+ b_ = simde__m128i_to_private(b),
272
+ mask_ = simde__m128i_to_private(mask);
273
+
274
+ #if defined(SIMDE_SSE4_1_NEON)
275
+ mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128()));
276
+ r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32);
277
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
278
+ #if defined(HEDLEY_INTEL_VERSION_CHECK)
279
+ __typeof__(mask_.i32) z = { 0, 0, 0, 0 };
280
+ mask_.i32 = mask_.i32 < z;
281
+ #else
282
+ mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1;
283
+ #endif
284
+
285
+ r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32);
286
+ #else
287
+ SIMDE__VECTORIZE
288
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
289
+ int32_t m = mask_.i32[i] >> 31;
290
+ r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]);
291
+ }
292
+ #endif
293
+
294
+ return simde__m128i_from_private(r_);
295
+ #endif
296
+ }
297
+
298
+ SIMDE__FUNCTION_ATTRIBUTES
299
+ simde__m128i
300
+ simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) {
301
+ #if defined(SIMDE_SSE4_1_NATIVE)
302
+ return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask)));
303
+ #else
304
+ simde__m128i_private
305
+ r_,
306
+ a_ = simde__m128i_to_private(a),
307
+ b_ = simde__m128i_to_private(b),
308
+ mask_ = simde__m128i_to_private(mask);
309
+
310
+ #if defined(SIMDE_SSE4_1_NEON) && defined(SIMDE_ARCH_AARCH64)
311
+ mask_.i64 = vreinterpretq_s64_u64(vcltq_s64(mask_.i64, vdupq_n_s64(UINT64_C(0))));
312
+ r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64);
313
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
314
+ #if defined(HEDLEY_INTEL_VERSION_CHECK)
315
+ __typeof__(mask_.i64) z = { 0, 0 };
316
+ mask_.i64 = HEDLEY_STATIC_CAST(__typeof__(mask_.i64), mask_.i64 < z);
317
+ #else
318
+ mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1;
319
+ #endif
320
+
321
+ r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64);
322
+ #else
323
+ SIMDE__VECTORIZE
324
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
325
+ int64_t m = mask_.i64[i] >> 63;
326
+ r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]);
327
+ }
328
+ #endif
329
+
330
+ return simde__m128i_from_private(r_);
331
+ #endif
332
+ }
333
+
334
+ SIMDE__FUNCTION_ATTRIBUTES
335
+ simde__m128d
336
+ simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) {
337
+ #if defined(SIMDE_SSE4_1_NATIVE)
338
+ return _mm_blendv_pd(a, b, mask);
339
+ #else
340
+ return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask)));
341
+ #endif
342
+ }
343
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
344
+ # define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask)
345
+ #endif
346
+
347
+ SIMDE__FUNCTION_ATTRIBUTES
348
+ simde__m128
349
+ simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) {
350
+ #if defined(SIMDE_SSE4_1_NATIVE)
351
+ return _mm_blendv_ps(a, b, mask);
352
+ #else
353
+ return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask)));
354
+ #endif
355
+ }
356
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
357
+ # define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask)
358
+ #endif
359
+
360
+ SIMDE__FUNCTION_ATTRIBUTES
361
+ simde__m128d
362
+ simde_mm_ceil_pd (simde__m128d a) {
363
+ #if defined(SIMDE_SSE4_1_NATIVE)
364
+ return _mm_ceil_pd(a);
365
+ #else
366
+ simde__m128d_private
367
+ r_,
368
+ a_ = simde__m128d_to_private(a);
369
+
370
+ #if defined(SIMDE_SSE4_1_NEON) && defined(SIMDE_ARCH_AARCH64)
371
+ r_.neon_f64 = vrndpq_f64(a_.neon_f64);
372
+ #elif defined(SIMDE_SSE_POWER_ALTIVEC)
373
+ r_.altivec_f64 = vec_ceil(a_.altivec_f64);
374
+ #elif defined(SIMDE_HAVE_MATH_H)
375
+ SIMDE__VECTORIZE
376
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
377
+ r_.f64[i] = ceil(a_.f64[i]);
378
+ }
379
+ #else
380
+ HEDLEY_UNREACHABLE();
381
+ #endif
382
+
383
+ return simde__m128d_from_private(r_);
384
+ #endif
385
+ }
386
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
387
+ # define _mm_ceil_pd(a) simde_mm_ceil_pd(a)
388
+ #endif
389
+
390
+ SIMDE__FUNCTION_ATTRIBUTES
391
+ simde__m128
392
+ simde_mm_ceil_ps (simde__m128 a) {
393
+ #if defined(SIMDE_SSE4_1_NATIVE)
394
+ return _mm_ceil_ps(a);
395
+ #else
396
+ simde__m128_private
397
+ r_,
398
+ a_ = simde__m128_to_private(a);
399
+
400
+ #if defined(SIMDE_SSE4_1_NEON) && (SIMDE_ARCH_ARM >= 80) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0))
401
+ r_.neon_f32 = vrndpq_f32(a_.neon_f32);
402
+ #elif defined(SIMDE_SSE_POWER_ALTIVEC)
403
+ r_.altivec_f32 = vec_ceil(a_.altivec_f32);
404
+ #elif defined(SIMDE_HAVE_MATH_H)
405
+ SIMDE__VECTORIZE
406
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
407
+ r_.f32[i] = ceilf(a_.f32[i]);
408
+ }
409
+ #else
410
+ HEDLEY_UNREACHABLE();
411
+ #endif
412
+
413
+ return simde__m128_from_private(r_);
414
+ #endif
415
+ }
416
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
417
+ # define _mm_ceil_ps(a) simde_mm_ceil_ps(a)
418
+ #endif
419
+
420
+ SIMDE__FUNCTION_ATTRIBUTES
421
+ simde__m128d
422
+ simde_mm_ceil_sd (simde__m128d a, simde__m128d b) {
423
+ #if defined(SIMDE_SSE4_1_NATIVE)
424
+ return _mm_ceil_sd(a, b);
425
+ #else
426
+ simde__m128d_private
427
+ r_,
428
+ a_ = simde__m128d_to_private(a),
429
+ b_ = simde__m128d_to_private(b);
430
+
431
+ #if defined(SIMDE_HAVE_MATH_H)
432
+ r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], ceil(b_.f64[0])));
433
+ #else
434
+ HEDLEY_UNREACHABLE();
435
+ #endif
436
+
437
+ return simde__m128d_from_private(r_);
438
+ #endif
439
+ }
440
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
441
+ # define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b)
442
+ #endif
443
+
444
+ SIMDE__FUNCTION_ATTRIBUTES
445
+ simde__m128
446
+ simde_mm_ceil_ss (simde__m128 a, simde__m128 b) {
447
+ #if defined(SIMDE_SSE4_1_NATIVE)
448
+ return _mm_ceil_ss(a, b);
449
+ #elif defined(SIMDE_ASSUME_VECTORIZATION)
450
+ return simde_mm_move_ss(a, simde_mm_ceil_ps(b));
451
+ #else
452
+ simde__m128_private
453
+ r_,
454
+ a_ = simde__m128_to_private(a),
455
+ b_ = simde__m128_to_private(b);
456
+
457
+ #if defined(SIMDE_HAVE_MATH_H)
458
+ r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], ceilf(b_.f32[0])));
459
+ #else
460
+ HEDLEY_UNREACHABLE();
461
+ #endif
462
+
463
+ return simde__m128_from_private(r_);
464
+ #endif
465
+ }
466
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
467
+ # define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b)
468
+ #endif
469
+
470
+ SIMDE__FUNCTION_ATTRIBUTES
471
+ simde__m128i
472
+ simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) {
473
+ #if defined(SIMDE_SSE4_1_NATIVE)
474
+ return _mm_cmpeq_epi64(a, b);
475
+ #else
476
+ simde__m128i_private
477
+ r_,
478
+ a_ = simde__m128i_to_private(a),
479
+ b_ = simde__m128i_to_private(b);
480
+
481
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
482
+ r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), a_.i64 == b_.i64);
483
+ #elif defined(SIMDE_SSE_POWER_ALTIVEC)
484
+ r_.altivec_i64 = (vector signed long long) vec_cmpeq(a_.altivec_i64, b_.altivec_i64);
485
+ #else
486
+ SIMDE__VECTORIZE
487
+ for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
488
+ r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0);
489
+ }
490
+ #endif
491
+
492
+ return simde__m128i_from_private(r_);
493
+ #endif
494
+ }
495
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
496
+ # define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b)
497
+ #endif
498
+
499
+ SIMDE__FUNCTION_ATTRIBUTES
500
+ simde__m128i
501
+ simde_mm_cvtepi8_epi16 (simde__m128i a) {
502
+ #if defined(SIMDE_SSE4_1_NATIVE)
503
+ return _mm_cvtepi8_epi16(a);
504
+ #else
505
+ simde__m128i_private
506
+ r_,
507
+ a_ = simde__m128i_to_private(a);
508
+
509
+ #if defined(SIMDE__CONVERT_VECTOR)
510
+ SIMDE__CONVERT_VECTOR(r_.i16, a_.m64_private[0].i8);
511
+ #else
512
+ SIMDE__VECTORIZE
513
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
514
+ r_.i16[i] = a_.i8[i];
515
+ }
516
+ #endif
517
+
518
+ return simde__m128i_from_private(r_);
519
+ #endif
520
+ }
521
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
522
+ # define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a)
523
+ #endif
524
+
525
+ SIMDE__FUNCTION_ATTRIBUTES
526
+ simde__m128i
527
+ simde_mm_cvtepi8_epi32 (simde__m128i a) {
528
+ #if defined(SIMDE_SSE4_1_NATIVE)
529
+ return _mm_cvtepi8_epi32(a);
530
+ #else
531
+ simde__m128i_private
532
+ r_,
533
+ a_ = simde__m128i_to_private(a);
534
+
535
+ SIMDE__VECTORIZE
536
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
537
+ r_.i32[i] = a_.i8[i];
538
+ }
539
+
540
+ return simde__m128i_from_private(r_);
541
+ #endif
542
+ }
543
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
544
+ # define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a)
545
+ #endif
546
+
547
+ SIMDE__FUNCTION_ATTRIBUTES
548
+ simde__m128i
549
+ simde_mm_cvtepi8_epi64 (simde__m128i a) {
550
+ #if defined(SIMDE_SSE4_1_NATIVE)
551
+ return _mm_cvtepi8_epi64(a);
552
+ #else
553
+ simde__m128i_private
554
+ r_,
555
+ a_ = simde__m128i_to_private(a);
556
+
557
+ SIMDE__VECTORIZE
558
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
559
+ r_.i64[i] = a_.i8[i];
560
+ }
561
+
562
+ return simde__m128i_from_private(r_);
563
+ #endif
564
+ }
565
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
566
+ # define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a)
567
+ #endif
568
+
569
+ SIMDE__FUNCTION_ATTRIBUTES
570
+ simde__m128i
571
+ simde_mm_cvtepu8_epi16 (simde__m128i a) {
572
+ #if defined(SIMDE_SSE4_1_NATIVE)
573
+ return _mm_cvtepu8_epi16(a);
574
+ #else
575
+ simde__m128i_private
576
+ r_,
577
+ a_ = simde__m128i_to_private(a);
578
+
579
+ #if defined(SIMDE__CONVERT_VECTOR) && !defined(SIMDE_BUG_CLANG_45541)
580
+ SIMDE__CONVERT_VECTOR(r_.i16, a_.m64_private[0].u8);
581
+ #else
582
+ SIMDE__VECTORIZE
583
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
584
+ r_.i16[i] = a_.u8[i];
585
+ }
586
+ #endif
587
+
588
+ return simde__m128i_from_private(r_);
589
+ #endif
590
+ }
591
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
592
+ # define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a)
593
+ #endif
594
+
595
+ SIMDE__FUNCTION_ATTRIBUTES
596
+ simde__m128i
597
+ simde_mm_cvtepu8_epi32 (simde__m128i a) {
598
+ #if defined(SIMDE_SSE4_1_NATIVE)
599
+ return _mm_cvtepu8_epi32(a);
600
+ #else
601
+ simde__m128i_private
602
+ r_,
603
+ a_ = simde__m128i_to_private(a);
604
+
605
+ #if defined(SIMDE_SSE4_1_NEON)
606
+ uint8x16_t u8x16 = a_.neon_u8;
607
+ uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));
608
+ r_.neon_u32 = vmovl_u16(vget_low_u16(u16x8));
609
+ #else
610
+ SIMDE__VECTORIZE
611
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
612
+ r_.i32[i] = a_.u8[i];
613
+ }
614
+ #endif
615
+
616
+ return simde__m128i_from_private(r_);
617
+ #endif
618
+ }
619
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
620
+ # define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a)
621
+ #endif
622
+
623
+ SIMDE__FUNCTION_ATTRIBUTES
624
+ simde__m128i
625
+ simde_mm_cvtepu8_epi64 (simde__m128i a) {
626
+ #if defined(SIMDE_SSE4_1_NATIVE)
627
+ return _mm_cvtepu8_epi64(a);
628
+ #else
629
+ simde__m128i_private
630
+ r_,
631
+ a_ = simde__m128i_to_private(a);
632
+
633
+ SIMDE__VECTORIZE
634
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
635
+ r_.i64[i] = a_.u8[i];
636
+ }
637
+
638
+ return simde__m128i_from_private(r_);
639
+ #endif
640
+ }
641
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
642
+ # define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a)
643
+ #endif
644
+
645
+ SIMDE__FUNCTION_ATTRIBUTES
646
+ simde__m128i
647
+ simde_mm_cvtepi16_epi32 (simde__m128i a) {
648
+ #if defined(SIMDE_SSE4_1_NATIVE)
649
+ return _mm_cvtepi16_epi32(a);
650
+ #else
651
+ simde__m128i_private
652
+ r_,
653
+ a_ = simde__m128i_to_private(a);
654
+
655
+ #if defined(SIMDE_SSE4_1_NEON)
656
+ r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16));
657
+ #else
658
+ SIMDE__VECTORIZE
659
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
660
+ r_.i32[i] = a_.i16[i];
661
+ }
662
+ #endif
663
+
664
+ return simde__m128i_from_private(r_);
665
+ #endif
666
+ }
667
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
668
+ # define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a)
669
+ #endif
670
+
671
+ SIMDE__FUNCTION_ATTRIBUTES
672
+ simde__m128i
673
+ simde_mm_cvtepu16_epi32 (simde__m128i a) {
674
+ #if defined(SIMDE_SSE4_1_NATIVE)
675
+ return _mm_cvtepu16_epi32(a);
676
+ #else
677
+ simde__m128i_private
678
+ r_,
679
+ a_ = simde__m128i_to_private(a);
680
+
681
+ #if defined(SIMDE__CONVERT_VECTOR) && !defined(SIMDE_BUG_CLANG_45541)
682
+ SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].u16);
683
+ #else
684
+ SIMDE__VECTORIZE
685
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
686
+ r_.i32[i] = a_.u16[i];
687
+ }
688
+ #endif
689
+
690
+ return simde__m128i_from_private(r_);
691
+ #endif
692
+ }
693
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
694
+ # define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a)
695
+ #endif
696
+
697
+ SIMDE__FUNCTION_ATTRIBUTES
698
+ simde__m128i
699
+ simde_mm_cvtepu16_epi64 (simde__m128i a) {
700
+ #if defined(SIMDE_SSE4_1_NATIVE)
701
+ return _mm_cvtepu16_epi64(a);
702
+ #else
703
+ simde__m128i_private
704
+ r_,
705
+ a_ = simde__m128i_to_private(a);
706
+
707
+ SIMDE__VECTORIZE
708
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
709
+ r_.i64[i] = a_.u16[i];
710
+ }
711
+
712
+ return simde__m128i_from_private(r_);
713
+ #endif
714
+ }
715
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
716
+ # define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a)
717
+ #endif
718
+
719
+ SIMDE__FUNCTION_ATTRIBUTES
720
+ simde__m128i
721
+ simde_mm_cvtepi16_epi64 (simde__m128i a) {
722
+ #if defined(SIMDE_SSE4_1_NATIVE)
723
+ return _mm_cvtepi16_epi64(a);
724
+ #else
725
+ simde__m128i_private
726
+ r_,
727
+ a_ = simde__m128i_to_private(a);
728
+
729
+ SIMDE__VECTORIZE
730
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
731
+ r_.i64[i] = a_.i16[i];
732
+ }
733
+
734
+ return simde__m128i_from_private(r_);
735
+ #endif
736
+ }
737
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
738
+ # define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a)
739
+ #endif
740
+
741
+ SIMDE__FUNCTION_ATTRIBUTES
742
+ simde__m128i
743
+ simde_mm_cvtepi32_epi64 (simde__m128i a) {
744
+ #if defined(SIMDE_SSE4_1_NATIVE)
745
+ return _mm_cvtepi32_epi64(a);
746
+ #else
747
+ simde__m128i_private
748
+ r_,
749
+ a_ = simde__m128i_to_private(a);
750
+
751
+ #if defined(SIMDE__CONVERT_VECTOR)
752
+ SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i32);
753
+ #else
754
+ SIMDE__VECTORIZE
755
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
756
+ r_.i64[i] = a_.i32[i];
757
+ }
758
+ #endif
759
+
760
+ return simde__m128i_from_private(r_);
761
+ #endif
762
+ }
763
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
764
+ # define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a)
765
+ #endif
766
+
767
+ SIMDE__FUNCTION_ATTRIBUTES
768
+ simde__m128i
769
+ simde_mm_cvtepu32_epi64 (simde__m128i a) {
770
+ #if defined(SIMDE_SSE4_1_NATIVE)
771
+ return _mm_cvtepu32_epi64(a);
772
+ #else
773
+ simde__m128i_private
774
+ r_,
775
+ a_ = simde__m128i_to_private(a);
776
+
777
+ #if defined(SIMDE__CONVERT_VECTOR)
778
+ SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].u32);
779
+ #else
780
+ SIMDE__VECTORIZE
781
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
782
+ r_.i64[i] = a_.u32[i];
783
+ }
784
+ #endif
785
+
786
+ return simde__m128i_from_private(r_);
787
+ #endif
788
+ }
789
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
790
+ # define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a)
791
+ #endif
792
+
793
+ SIMDE__FUNCTION_ATTRIBUTES
794
+ simde__m128d
795
+ simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8)
796
+ HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
797
+ simde__m128d_private
798
+ r_,
799
+ a_ = simde__m128d_to_private(a),
800
+ b_ = simde__m128d_to_private(b);
801
+
802
+ simde_float64 sum = SIMDE_FLOAT64_C(0.0);
803
+
804
+ SIMDE__VECTORIZE_REDUCTION(+:sum)
805
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
806
+ sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0;
807
+ }
808
+
809
+ SIMDE__VECTORIZE
810
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
811
+ r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0;
812
+ }
813
+
814
+ return simde__m128d_from_private(r_);
815
+ }
816
+ #if defined(SIMDE_SSE4_1_NATIVE)
817
+ # define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8)
818
+ #endif
819
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
820
+ # define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8)
821
+ #endif
822
+
823
+ SIMDE__FUNCTION_ATTRIBUTES
824
+ simde__m128
825
+ simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8)
826
+ HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
827
+ simde__m128_private
828
+ r_,
829
+ a_ = simde__m128_to_private(a),
830
+ b_ = simde__m128_to_private(b);
831
+
832
+ simde_float32 sum = SIMDE_FLOAT32_C(0.0);
833
+
834
+ SIMDE__VECTORIZE_REDUCTION(+:sum)
835
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
836
+ sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0);
837
+ }
838
+
839
+ SIMDE__VECTORIZE
840
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
841
+ r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0);
842
+ }
843
+
844
+ return simde__m128_from_private(r_);
845
+ }
846
+ #if defined(SIMDE_SSE4_1_NATIVE)
847
+ # define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8)
848
+ #endif
849
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
850
+ # define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8)
851
+ #endif
852
+
853
+ #if defined(simde_mm_extract_epi8)
854
+ # undef simde_mm_extract_epi8
855
+ #endif
856
+ SIMDE__FUNCTION_ATTRIBUTES
857
+ int8_t
858
+ simde_mm_extract_epi8 (simde__m128i a, const int imm8)
859
+ HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
860
+ simde__m128i_private
861
+ a_ = simde__m128i_to_private(a);
862
+
863
+ return a_.i8[imm8&15];
864
+ }
865
+ #if defined(SIMDE_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8)
866
+ # define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8))
867
+ #elif defined(SIMDE_SSE4_1_NEON)
868
+ # define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_private(a).neon_i8, imm8)
869
+ #endif
870
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
871
+ # define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8))
872
+ #endif
873
+
874
+ #if defined(simde_mm_extract_epi32)
875
+ # undef simde_mm_extract_epi32
876
+ #endif
877
+ SIMDE__FUNCTION_ATTRIBUTES
878
+ int32_t
879
+ simde_mm_extract_epi32 (simde__m128i a, const int imm8)
880
+ HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
881
+ simde__m128i_private
882
+ a_ = simde__m128i_to_private(a);
883
+
884
+ return a_.i32[imm8&3];
885
+ }
886
+ #if defined(SIMDE_SSE4_1_NATIVE)
887
+ # define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8)
888
+ #elif defined(SIMDE_SSE4_1_NEON)
889
+ # define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_private(a).neon_i32, imm8)
890
+ #endif
891
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
892
+ # define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8)
893
+ #endif
894
+
895
+ #if defined(simde_mm_extract_epi64)
896
+ # undef simde_mm_extract_epi64
897
+ #endif
898
+ SIMDE__FUNCTION_ATTRIBUTES
899
+ int64_t
900
+ simde_mm_extract_epi64 (simde__m128i a, const int imm8)
901
+ HEDLEY_REQUIRE_MSG((imm8 & 1) == imm8, "imm8 must be 0 or 1") {
902
+ simde__m128i_private
903
+ a_ = simde__m128i_to_private(a);
904
+
905
+ return a_.i64[imm8&1];
906
+ }
907
+ #if defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)
908
+ # define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8)
909
+ #elif defined(SIMDE_SSE4_1_NEON)
910
+ # define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_private(a).neon_i64, imm8)
911
+ #endif
912
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
913
+ # define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8)
914
+ #endif
915
+
916
+ SIMDE__FUNCTION_ATTRIBUTES
917
+ simde__m128d
918
+ simde_mm_floor_pd (simde__m128d a) {
919
+ #if defined(SIMDE_SSE4_1_NATIVE)
920
+ return _mm_floor_pd(a);
921
+ #else
922
+ simde__m128d_private
923
+ r_,
924
+ a_ = simde__m128d_to_private(a);
925
+
926
+ #if defined(SIMDE_HAVE_MATH_H)
927
+ SIMDE__VECTORIZE
928
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
929
+ r_.f64[i] = floor(a_.f64[i]);
930
+ }
931
+ #else
932
+ HEDLEY_UNREACHABLE();
933
+ #endif
934
+
935
+ return simde__m128d_from_private(r_);
936
+ #endif
937
+ }
938
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
939
+ # define _mm_floor_pd(a) simde_mm_floor_pd(a)
940
+ #endif
941
+
942
+ SIMDE__FUNCTION_ATTRIBUTES
943
+ simde__m128
944
+ simde_mm_floor_ps (simde__m128 a) {
945
+ #if defined(SIMDE_SSE4_1_NATIVE)
946
+ return _mm_floor_ps(a);
947
+ #else
948
+ simde__m128_private
949
+ r_,
950
+ a_ = simde__m128_to_private(a);
951
+
952
+ #if defined(SIMDE_HAVE_MATH_H)
953
+ SIMDE__VECTORIZE
954
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
955
+ r_.f32[i] = floorf(a_.f32[i]);
956
+ }
957
+ #else
958
+ HEDLEY_UNREACHABLE();
959
+ #endif
960
+
961
+ return simde__m128_from_private(r_);
962
+ #endif
963
+ }
964
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
965
+ # define _mm_floor_ps(a) simde_mm_floor_ps(a)
966
+ #endif
967
+
968
+ SIMDE__FUNCTION_ATTRIBUTES
969
+ simde__m128d
970
+ simde_mm_floor_sd (simde__m128d a, simde__m128d b) {
971
+ #if defined(SIMDE_SSE4_1_NATIVE)
972
+ return _mm_floor_sd(a, b);
973
+ #else
974
+ simde__m128d_private
975
+ r_,
976
+ a_ = simde__m128d_to_private(a),
977
+ b_ = simde__m128d_to_private(b);
978
+
979
+ #if defined(SIMDE_HAVE_MATH_H)
980
+ r_.f64[0] = floor(b_.f64[0]);
981
+ r_.f64[1] = a_.f64[1];
982
+ #else
983
+ HEDLEY_UNREACHABLE();
984
+ #endif
985
+
986
+ return simde__m128d_from_private(r_);
987
+ #endif
988
+ }
989
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
990
+ # define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b)
991
+ #endif
992
+
993
+ SIMDE__FUNCTION_ATTRIBUTES
994
+ simde__m128
995
+ simde_mm_floor_ss (simde__m128 a, simde__m128 b) {
996
+ #if defined(SIMDE_SSE4_1_NATIVE)
997
+ return _mm_floor_ss(a, b);
998
+ #elif defined(SIMDE_ASSUME_VECTORIZATION)
999
+ return simde_mm_move_ss(a, simde_mm_floor_ps(b));
1000
+ #else
1001
+ simde__m128_private
1002
+ r_,
1003
+ a_ = simde__m128_to_private(a),
1004
+ b_ = simde__m128_to_private(b);
1005
+
1006
+ #if defined(SIMDE_HAVE_MATH_H)
1007
+ r_.f32[0] = floorf(b_.f32[0]);
1008
+ for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1009
+ r_.f32[i] = a_.f32[i];
1010
+ }
1011
+ #else
1012
+ HEDLEY_UNREACHABLE();
1013
+ #endif
1014
+
1015
+ return simde__m128_from_private(r_);
1016
+ #endif
1017
+ }
1018
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1019
+ # define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b)
1020
+ #endif
1021
+
1022
+ SIMDE__FUNCTION_ATTRIBUTES
1023
+ simde__m128i
1024
+ simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8)
1025
+ HEDLEY_REQUIRE_MSG((imm8 & 0xf) == imm8, "imm8 must be in range [0, 15]") {
1026
+ simde__m128i_private
1027
+ r_ = simde__m128i_to_private(a);
1028
+
1029
+ r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i);
1030
+
1031
+ return simde__m128i_from_private(r_);
1032
+ }
1033
+ #if defined(SIMDE_SSE4_1_NATIVE)
1034
+ # define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8)
1035
+ #endif
1036
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1037
+ # define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8)
1038
+ #endif
1039
+
1040
+ SIMDE__FUNCTION_ATTRIBUTES
1041
+ simde__m128i
1042
+ simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8)
1043
+ HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
1044
+ simde__m128i_private
1045
+ r_ = simde__m128i_to_private(a);
1046
+
1047
+ r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i);
1048
+
1049
+ return simde__m128i_from_private(r_);
1050
+ }
1051
+ #if defined(SIMDE_SSE4_1_NATIVE)
1052
+ # define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8)
1053
+ #endif
1054
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1055
+ # define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8)
1056
+ #endif
1057
+
1058
+ SIMDE__FUNCTION_ATTRIBUTES
1059
+ simde__m128i
1060
+ simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8)
1061
+ HEDLEY_REQUIRE_MSG((imm8 & 1) == imm8, "imm8 must be 0 or 1") {
1062
+ #if defined(SIMDE_BUG_GCC_94482)
1063
+ simde__m128i_private
1064
+ a_ = simde__m128i_to_private(a);
1065
+
1066
+ switch(imm8) {
1067
+ case 0:
1068
+ return simde_mm_set_epi64x(a_.i64[1], i);
1069
+ break;
1070
+ case 1:
1071
+ return simde_mm_set_epi64x(i, a_.i64[0]);
1072
+ break;
1073
+ default:
1074
+ HEDLEY_UNREACHABLE();
1075
+ break;
1076
+ }
1077
+ #else
1078
+ simde__m128i_private
1079
+ r_ = simde__m128i_to_private(a);
1080
+
1081
+ r_.i64[imm8] = i;
1082
+
1083
+ return simde__m128i_from_private(r_);
1084
+ #endif
1085
+ }
1086
+ #if defined(SIMDE_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64)
1087
+ # define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8)
1088
+ #endif
1089
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1090
+ # define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8)
1091
+ #endif
1092
+
1093
+ SIMDE__FUNCTION_ATTRIBUTES
1094
+ simde__m128
1095
+ simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8)
1096
+ HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
1097
+ simde__m128_private
1098
+ r_,
1099
+ a_ = simde__m128_to_private(a),
1100
+ b_ = simde__m128_to_private(b);
1101
+
1102
+ a_.f32[0] = b_.f32[(imm8 >> 6) & 3];
1103
+ a_.f32[(imm8 >> 4) & 3] = a_.f32[0];
1104
+
1105
+ SIMDE__VECTORIZE
1106
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1107
+ r_.f32[i] = (imm8 >> i) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i];
1108
+ }
1109
+
1110
+ return simde__m128_from_private(r_);
1111
+ }
1112
+ #if defined(SIMDE_SSE4_1_NATIVE)
1113
+ # define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8)
1114
+ #endif
1115
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1116
+ # define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8)
1117
+ #endif
1118
+
1119
+ SIMDE__FUNCTION_ATTRIBUTES
1120
+ simde__m128i
1121
+ simde_mm_max_epi8 (simde__m128i a, simde__m128i b) {
1122
+ #if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
1123
+ return _mm_max_epi8(a, b);
1124
+ #else
1125
+ simde__m128i_private
1126
+ r_,
1127
+ a_ = simde__m128i_to_private(a),
1128
+ b_ = simde__m128i_to_private(b);
1129
+
1130
+ #if defined(SIMDE_SSE4_1_NEON)
1131
+ r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8);
1132
+ #else
1133
+ SIMDE__VECTORIZE
1134
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1135
+ r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i];
1136
+ }
1137
+ #endif
1138
+
1139
+ return simde__m128i_from_private(r_);
1140
+ #endif
1141
+ }
1142
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1143
+ # define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b)
1144
+ #endif
1145
+
1146
+ SIMDE__FUNCTION_ATTRIBUTES
1147
+ simde__m128i
1148
+ simde_mm_max_epi32 (simde__m128i a, simde__m128i b) {
1149
+ #if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
1150
+ return _mm_max_epi32(a, b);
1151
+ #else
1152
+ simde__m128i_private
1153
+ r_,
1154
+ a_ = simde__m128i_to_private(a),
1155
+ b_ = simde__m128i_to_private(b);
1156
+
1157
+ #if defined(SIMDE_SSE4_1_NEON)
1158
+ r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32);
1159
+ #else
1160
+ SIMDE__VECTORIZE
1161
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1162
+ r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i];
1163
+ }
1164
+ #endif
1165
+
1166
+ return simde__m128i_from_private(r_);
1167
+ #endif
1168
+ }
1169
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1170
+ # define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b)
1171
+ #endif
1172
+
1173
+ SIMDE__FUNCTION_ATTRIBUTES
1174
+ simde__m128i
1175
+ simde_mm_max_epu16 (simde__m128i a, simde__m128i b) {
1176
+ #if defined(SIMDE_SSE4_1_NATIVE)
1177
+ return _mm_max_epu16(a, b);
1178
+ #else
1179
+ simde__m128i_private
1180
+ r_,
1181
+ a_ = simde__m128i_to_private(a),
1182
+ b_ = simde__m128i_to_private(b);
1183
+
1184
+ #if defined(SIMDE_SSE4_1_NEON)
1185
+ r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16);
1186
+ #else
1187
+ SIMDE__VECTORIZE
1188
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1189
+ r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i];
1190
+ }
1191
+ #endif
1192
+
1193
+ return simde__m128i_from_private(r_);
1194
+ #endif
1195
+ }
1196
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1197
+ # define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b)
1198
+ #endif
1199
+
1200
+ SIMDE__FUNCTION_ATTRIBUTES
1201
+ simde__m128i
1202
+ simde_mm_max_epu32 (simde__m128i a, simde__m128i b) {
1203
+ #if defined(SIMDE_SSE4_1_NATIVE)
1204
+ return _mm_max_epu32(a, b);
1205
+ #else
1206
+ simde__m128i_private
1207
+ r_,
1208
+ a_ = simde__m128i_to_private(a),
1209
+ b_ = simde__m128i_to_private(b);
1210
+
1211
+ #if defined(SIMDE_SSE4_1_NEON)
1212
+ r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32);
1213
+ #else
1214
+ SIMDE__VECTORIZE
1215
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1216
+ r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i];
1217
+ }
1218
+ #endif
1219
+
1220
+ return simde__m128i_from_private(r_);
1221
+ #endif
1222
+ }
1223
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1224
+ # define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b)
1225
+ #endif
1226
+
1227
+ SIMDE__FUNCTION_ATTRIBUTES
1228
+ simde__m128i
1229
+ simde_mm_min_epi8 (simde__m128i a, simde__m128i b) {
1230
+ #if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
1231
+ return _mm_min_epi8(a, b);
1232
+ #else
1233
+ simde__m128i_private
1234
+ r_,
1235
+ a_ = simde__m128i_to_private(a),
1236
+ b_ = simde__m128i_to_private(b);
1237
+
1238
+ #if defined(SIMDE_SSE4_1_NEON)
1239
+ r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8);
1240
+ #else
1241
+ SIMDE__VECTORIZE
1242
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1243
+ r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];
1244
+ }
1245
+ #endif
1246
+
1247
+ return simde__m128i_from_private(r_);
1248
+ #endif
1249
+ }
1250
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1251
+ # define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b)
1252
+ #endif
1253
+
1254
+ SIMDE__FUNCTION_ATTRIBUTES
1255
+ simde__m128i
1256
+ simde_mm_min_epi32 (simde__m128i a, simde__m128i b) {
1257
+ #if defined(SIMDE_SSE4_1_NATIVE) && !defined(__PGI)
1258
+ return _mm_min_epi32(a, b);
1259
+ #else
1260
+ simde__m128i_private
1261
+ r_,
1262
+ a_ = simde__m128i_to_private(a),
1263
+ b_ = simde__m128i_to_private(b);
1264
+
1265
+ #if defined(SIMDE_SSE4_1_NEON)
1266
+ r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32);
1267
+ #else
1268
+ SIMDE__VECTORIZE
1269
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1270
+ r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];
1271
+ }
1272
+ #endif
1273
+
1274
+ return simde__m128i_from_private(r_);
1275
+ #endif
1276
+ }
1277
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1278
+ # define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b)
1279
+ #endif
1280
+
1281
+ SIMDE__FUNCTION_ATTRIBUTES
1282
+ simde__m128i
1283
+ simde_mm_min_epu16 (simde__m128i a, simde__m128i b) {
1284
+ #if defined(SIMDE_SSE4_1_NATIVE)
1285
+ return _mm_min_epu16(a, b);
1286
+ #else
1287
+ simde__m128i_private
1288
+ r_,
1289
+ a_ = simde__m128i_to_private(a),
1290
+ b_ = simde__m128i_to_private(b);
1291
+
1292
+ #if defined(SIMDE_SSE4_1_NEON)
1293
+ r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16);
1294
+ #else
1295
+ SIMDE__VECTORIZE
1296
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1297
+ r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i];
1298
+ }
1299
+ #endif
1300
+
1301
+ return simde__m128i_from_private(r_);
1302
+ #endif
1303
+ }
1304
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1305
+ # define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b)
1306
+ #endif
1307
+
1308
+ SIMDE__FUNCTION_ATTRIBUTES
1309
+ simde__m128i
1310
+ simde_mm_min_epu32 (simde__m128i a, simde__m128i b) {
1311
+ #if defined(SIMDE_SSE4_1_NATIVE)
1312
+ return _mm_min_epu32(a, b);
1313
+ #else
1314
+ simde__m128i_private
1315
+ r_,
1316
+ a_ = simde__m128i_to_private(a),
1317
+ b_ = simde__m128i_to_private(b);
1318
+
1319
+ #if defined(SIMDE_SSE4_1_NEON)
1320
+ r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32);
1321
+ #else
1322
+ SIMDE__VECTORIZE
1323
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1324
+ r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i];
1325
+ }
1326
+ #endif
1327
+
1328
+ return simde__m128i_from_private(r_);
1329
+ #endif
1330
+ }
1331
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1332
+ # define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b)
1333
+ #endif
1334
+
1335
+ SIMDE__FUNCTION_ATTRIBUTES
1336
+ simde__m128i
1337
+ simde_mm_minpos_epu16 (simde__m128i a) {
1338
+ #if defined(SIMDE_SSE4_1_NATIVE)
1339
+ return _mm_minpos_epu16(a);
1340
+ #else
1341
+ simde__m128i_private
1342
+ r_ = simde__m128i_to_private(simde_mm_setzero_si128()),
1343
+ a_ = simde__m128i_to_private(a);
1344
+
1345
+ r_.u16[0] = UINT16_MAX;
1346
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1347
+ if (a_.u16[i] < r_.u16[0]) {
1348
+ r_.u16[0] = a_.u16[i];
1349
+ r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i);
1350
+ }
1351
+ }
1352
+
1353
+ return simde__m128i_from_private(r_);
1354
+ #endif
1355
+ }
1356
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1357
+ # define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a)
1358
+ #endif
1359
+
1360
+ SIMDE__FUNCTION_ATTRIBUTES
1361
+ simde__m128i
1362
+ simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8)
1363
+ HEDLEY_REQUIRE_MSG((imm8 & 7) == imm8, "imm8 must be in range [0, 7]") {
1364
+ simde__m128i_private
1365
+ r_,
1366
+ a_ = simde__m128i_to_private(a),
1367
+ b_ = simde__m128i_to_private(b);
1368
+
1369
+ const int a_offset = imm8 & 4;
1370
+ const int b_offset = (imm8 & 3) << 2;
1371
+
1372
+ #if defined(SIMDE_HAVE_MATH_H)
1373
+ for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) {
1374
+ r_.u16[i] =
1375
+ HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0])) +
1376
+ HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1])) +
1377
+ HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2])) +
1378
+ HEDLEY_STATIC_CAST(uint16_t, abs(a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]));
1379
+ }
1380
+ #else
1381
+ HEDLEY_UNREACHABLE();
1382
+ #endif
1383
+
1384
+ return simde__m128i_from_private(r_);
1385
+ }
1386
+ #if defined(SIMDE_SSE4_1_NATIVE)
1387
+ # define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8)
1388
+ #endif
1389
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1390
+ # define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8)
1391
+ #endif
1392
+
1393
+ SIMDE__FUNCTION_ATTRIBUTES
1394
+ simde__m128i
1395
+ simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) {
1396
+ #if defined(SIMDE_SSE4_1_NATIVE)
1397
+ return _mm_mul_epi32(a, b);
1398
+ #else
1399
+ simde__m128i_private
1400
+ r_,
1401
+ a_ = simde__m128i_to_private(a),
1402
+ b_ = simde__m128i_to_private(b);
1403
+
1404
+ SIMDE__VECTORIZE
1405
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1406
+ r_.i64[i] =
1407
+ HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) *
1408
+ HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]);
1409
+ }
1410
+
1411
+ return simde__m128i_from_private(r_);
1412
+ #endif
1413
+ }
1414
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1415
+ # define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b)
1416
+ #endif
1417
+
1418
+ SIMDE__FUNCTION_ATTRIBUTES
1419
+ simde__m128i
1420
+ simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) {
1421
+ #if defined(SIMDE_SSE4_1_NATIVE)
1422
+ return _mm_mullo_epi32(a, b);
1423
+ #else
1424
+ simde__m128i_private
1425
+ r_,
1426
+ a_ = simde__m128i_to_private(a),
1427
+ b_ = simde__m128i_to_private(b);
1428
+
1429
+ #if defined(SIMDE_SSE4_1_NEON)
1430
+ r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32);
1431
+ #else
1432
+ SIMDE__VECTORIZE
1433
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1434
+ r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff));
1435
+ }
1436
+ #endif
1437
+
1438
+ return simde__m128i_from_private(r_);
1439
+ #endif
1440
+ }
1441
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1442
+ # define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b)
1443
+ #endif
1444
+
1445
+ SIMDE__FUNCTION_ATTRIBUTES
1446
+ simde__m128i
1447
+ simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) {
1448
+ #if defined(SIMDE_SSE4_1_NATIVE)
1449
+ return _mm_packus_epi32(a, b);
1450
+ #else
1451
+ simde__m128i_private
1452
+ r_,
1453
+ a_ = simde__m128i_to_private(a),
1454
+ b_ = simde__m128i_to_private(b);
1455
+
1456
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1457
+ r_.u16[i + 0] = (a_.i32[i] < 0) ? UINT16_C(0) : ((a_.i32[i] > UINT16_MAX) ? (UINT16_MAX) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i]));
1458
+ r_.u16[i + 4] = (b_.i32[i] < 0) ? UINT16_C(0) : ((b_.i32[i] > UINT16_MAX) ? (UINT16_MAX) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i]));
1459
+ }
1460
+ return simde__m128i_from_private(r_);
1461
+ #endif
1462
+ }
1463
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1464
+ # define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b)
1465
+ #endif
1466
+
1467
+ SIMDE__FUNCTION_ATTRIBUTES
1468
+ simde__m128d
1469
+ simde_mm_round_pd (simde__m128d a, int rounding) {
1470
+ simde__m128d_private
1471
+ r_,
1472
+ a_ = simde__m128d_to_private(a);
1473
+
1474
+ #if defined(SIMDE_HAVE_MATH_H)
1475
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1476
+ switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
1477
+ case SIMDE_MM_FROUND_TO_NEAREST_INT:
1478
+ r_.f64[i] = nearbyint(a_.f64[i]);
1479
+ break;
1480
+ case SIMDE_MM_FROUND_TO_NEG_INF:
1481
+ r_.f64[i] = floor(a_.f64[i]);
1482
+ break;
1483
+ case SIMDE_MM_FROUND_TO_POS_INF:
1484
+ r_.f64[i] = ceil(a_.f64[i]);
1485
+ break;
1486
+ case SIMDE_MM_FROUND_TO_ZERO:
1487
+ r_.f64[i] = trunc(a_.f64[i]);
1488
+ break;
1489
+ case SIMDE_MM_FROUND_CUR_DIRECTION:
1490
+ r_.f64[i] = nearbyint(a_.f64[i]);
1491
+ break;
1492
+ default:
1493
+ HEDLEY_UNREACHABLE();
1494
+ break;
1495
+ }
1496
+ }
1497
+ #else
1498
+ HEDLEY_UNREACHABLE();
1499
+ #endif
1500
+
1501
+ return simde__m128d_from_private(r_);
1502
+ }
1503
+ #if defined(SIMDE_SSE4_1_NATIVE)
1504
+ # define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding)
1505
+ #endif
1506
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1507
+ # define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding)
1508
+ #endif
1509
+
1510
+ SIMDE__FUNCTION_ATTRIBUTES
1511
+ simde__m128
1512
+ simde_mm_round_ps (simde__m128 a, int rounding) {
1513
+ simde__m128_private
1514
+ r_,
1515
+ a_ = simde__m128_to_private(a);
1516
+
1517
+ #if defined(SIMDE_HAVE_MATH_H)
1518
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1519
+ switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
1520
+ case SIMDE_MM_FROUND_TO_NEAREST_INT:
1521
+ r_.f32[i] = nearbyintf(a_.f32[i]);
1522
+ break;
1523
+ case SIMDE_MM_FROUND_TO_NEG_INF:
1524
+ r_.f32[i] = floorf(a_.f32[i]);
1525
+ break;
1526
+ case SIMDE_MM_FROUND_TO_POS_INF:
1527
+ r_.f32[i] = ceilf(a_.f32[i]);
1528
+ break;
1529
+ case SIMDE_MM_FROUND_TO_ZERO:
1530
+ r_.f32[i] = truncf(a_.f32[i]);
1531
+ break;
1532
+ case SIMDE_MM_FROUND_CUR_DIRECTION:
1533
+ r_.f32[i] = nearbyintf (a_.f32[i]);
1534
+ break;
1535
+ default:
1536
+ HEDLEY_UNREACHABLE();
1537
+ break;
1538
+ }
1539
+ }
1540
+ #else
1541
+ HEDLEY_UNREACHABLE();
1542
+ #endif
1543
+
1544
+ return simde__m128_from_private(r_);
1545
+ }
1546
+ #if defined(SIMDE_SSE4_1_NATIVE)
1547
+ # define simde_mm_round_ps(a, rounding) _mm_round_ps(a, rounding)
1548
+ #endif
1549
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1550
+ # define _mm_round_ps(a, rounding) simde_mm_round_ps(a, rounding)
1551
+ #endif
1552
+
1553
+ SIMDE__FUNCTION_ATTRIBUTES
1554
+ simde__m128d
1555
+ simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) {
1556
+ simde__m128d_private
1557
+ r_ = simde__m128d_to_private(a),
1558
+ b_ = simde__m128d_to_private(b);
1559
+
1560
+ #if defined(SIMDE_HAVE_MATH_H)
1561
+ switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
1562
+ case SIMDE_MM_FROUND_TO_NEAREST_INT:
1563
+ r_.f64[0] = nearbyint(b_.f64[0]);
1564
+ break;
1565
+ case SIMDE_MM_FROUND_TO_NEG_INF:
1566
+ r_.f64[0] = floor(b_.f64[0]);
1567
+ break;
1568
+ case SIMDE_MM_FROUND_TO_POS_INF:
1569
+ r_.f64[0] = ceil(b_.f64[0]);
1570
+ break;
1571
+ case SIMDE_MM_FROUND_TO_ZERO:
1572
+ r_.f64[0] = trunc(b_.f64[0]);
1573
+ break;
1574
+ case SIMDE_MM_FROUND_CUR_DIRECTION:
1575
+ r_.f64[0] = nearbyint(b_.f64[0]);
1576
+ break;
1577
+ default:
1578
+ HEDLEY_UNREACHABLE();
1579
+ break;
1580
+ }
1581
+ #else
1582
+ HEDLEY_UNREACHABLE();
1583
+ #endif
1584
+
1585
+ return simde__m128d_from_private(r_);
1586
+ }
1587
+ #if defined(SIMDE_SSE4_1_NATIVE)
1588
+ # define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding)
1589
+ #endif
1590
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1591
+ # define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding)
1592
+ #endif
1593
+
1594
+ SIMDE__FUNCTION_ATTRIBUTES
1595
+ simde__m128
1596
+ simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) {
1597
+ simde__m128_private
1598
+ r_ = simde__m128_to_private(a),
1599
+ b_ = simde__m128_to_private(b);
1600
+
1601
+ #if defined(SIMDE_HAVE_MATH_H)
1602
+ switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
1603
+ case SIMDE_MM_FROUND_TO_NEAREST_INT:
1604
+ r_.f32[0] = nearbyintf(b_.f32[0]);
1605
+ break;
1606
+ case SIMDE_MM_FROUND_TO_NEG_INF:
1607
+ r_.f32[0] = floorf(b_.f32[0]);
1608
+ break;
1609
+ case SIMDE_MM_FROUND_TO_POS_INF:
1610
+ r_.f32[0] = ceilf(b_.f32[0]);
1611
+ break;
1612
+ case SIMDE_MM_FROUND_TO_ZERO:
1613
+ r_.f32[0] = truncf(b_.f32[0]);
1614
+ break;
1615
+ case SIMDE_MM_FROUND_CUR_DIRECTION:
1616
+ r_.f32[0] = nearbyintf (b_.f32[0]);
1617
+ break;
1618
+ default:
1619
+ HEDLEY_UNREACHABLE();
1620
+ break;
1621
+ }
1622
+ #else
1623
+ HEDLEY_UNREACHABLE();
1624
+ #endif
1625
+
1626
+ return simde__m128_from_private(r_);
1627
+ }
1628
+ #if defined(SIMDE_SSE4_1_NATIVE)
1629
+ # define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding)
1630
+ #endif
1631
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1632
+ # define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding)
1633
+ #endif
1634
+
1635
+ SIMDE__FUNCTION_ATTRIBUTES
1636
+ simde__m128i
1637
+ simde_mm_stream_load_si128 (const simde__m128i* mem_addr) {
1638
+ #if defined(SIMDE_SSE4_1_NATIVE)
1639
+ return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr));
1640
+ #else
1641
+ return *mem_addr;
1642
+ #endif
1643
+ }
1644
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1645
+ # define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr)
1646
+ #endif
1647
+
1648
+ SIMDE__FUNCTION_ATTRIBUTES
1649
+ int
1650
+ simde_mm_test_all_ones (simde__m128i a) {
1651
+ #if defined(SIMDE_SSE4_1_NATIVE)
1652
+ return _mm_test_all_ones(a);
1653
+ #else
1654
+ simde__m128i_private a_ = simde__m128i_to_private(a);
1655
+
1656
+ for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
1657
+ if (a_.u64[i] != ~UINT64_C(0))
1658
+ return 0;
1659
+ }
1660
+
1661
+ return 1;
1662
+ #endif
1663
+ }
1664
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1665
+ # define _mm_test_all_ones(a) simde_mm_test_all_ones(a)
1666
+ #endif
1667
+
1668
+ SIMDE__FUNCTION_ATTRIBUTES
1669
+ int
1670
+ simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) {
1671
+ #if defined(SIMDE_SSE4_1_NATIVE)
1672
+ return _mm_test_all_zeros(a, mask);
1673
+ #else
1674
+ simde__m128i_private
1675
+ a_ = simde__m128i_to_private(a),
1676
+ mask_ = simde__m128i_to_private(mask);
1677
+
1678
+ for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
1679
+ if ((a_.u64[i] & mask_.u64[i]) != 0)
1680
+ return 0;
1681
+ }
1682
+
1683
+ return 1;
1684
+ #endif
1685
+ }
1686
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1687
+ # define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask)
1688
+ #endif
1689
+
1690
+ SIMDE__FUNCTION_ATTRIBUTES
1691
+ int
1692
+ simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) {
1693
+ #if defined(SIMDE_SSE4_1_NATIVE)
1694
+ return _mm_test_mix_ones_zeros(a, mask);
1695
+ #else
1696
+ simde__m128i_private
1697
+ a_ = simde__m128i_to_private(a),
1698
+ mask_ = simde__m128i_to_private(mask);
1699
+
1700
+ for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++)
1701
+ if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0))
1702
+ return 1;
1703
+
1704
+ return 0;
1705
+ #endif
1706
+ }
1707
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1708
+ # define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask)
1709
+ #endif
1710
+
1711
+ SIMDE__FUNCTION_ATTRIBUTES
1712
+ int
1713
+ simde_mm_testc_si128 (simde__m128i a, simde__m128i b) {
1714
+ #if defined(SIMDE_SSE4_1_NATIVE)
1715
+ return _mm_testc_si128(a, b);
1716
+ #else
1717
+ simde__m128i_private
1718
+ a_ = simde__m128i_to_private(a),
1719
+ b_ = simde__m128i_to_private(b);
1720
+
1721
+ int_fast32_t r = 0;
1722
+
1723
+ SIMDE__VECTORIZE_REDUCTION(|:r)
1724
+ for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {
1725
+ r |= ~a_.i32f[i] & b_.i32f[i];
1726
+ }
1727
+
1728
+ return HEDLEY_STATIC_CAST(int, !r);
1729
+ #endif
1730
+ }
1731
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1732
+ # define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b)
1733
+ #endif
1734
+
1735
+ SIMDE__FUNCTION_ATTRIBUTES
1736
+ int
1737
+ simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) {
1738
+ #if defined(SIMDE_SSE4_1_NATIVE)
1739
+ return _mm_testnzc_si128(a, b);
1740
+ #else
1741
+ simde__m128i_private
1742
+ a_ = simde__m128i_to_private(a),
1743
+ b_ = simde__m128i_to_private(b);
1744
+
1745
+ for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
1746
+ if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0))
1747
+ return 1;
1748
+ }
1749
+
1750
+ return 0;
1751
+ #endif
1752
+ }
1753
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1754
+ # define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b)
1755
+ #endif
1756
+
1757
+ SIMDE__FUNCTION_ATTRIBUTES
1758
+ int
1759
+ simde_mm_testz_si128 (simde__m128i a, simde__m128i b) {
1760
+ #if defined(SIMDE_SSE4_1_NATIVE)
1761
+ return _mm_testz_si128(a, b);
1762
+ #else
1763
+ simde__m128i_private
1764
+ a_ = simde__m128i_to_private(a),
1765
+ b_ = simde__m128i_to_private(b);
1766
+
1767
+ for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
1768
+ if ((a_.u64[i] & b_.u64[i]) == 0)
1769
+ return 1;
1770
+ }
1771
+
1772
+ return 0;
1773
+ #endif
1774
+ }
1775
+ #if defined(SIMDE_SSE4_1_ENABLE_NATIVE_ALIASES)
1776
+ # define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b)
1777
+ #endif
1778
+
1779
+ SIMDE__END_DECLS
1780
+
1781
+ HEDLEY_DIAGNOSTIC_POP
1782
+
1783
+ #endif /* !defined(SIMDE__SSE4_1_H) */