minimap2 0.2.25.1 → 0.2.26.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +9 -0
  3. data/ext/Rakefile +2 -2
  4. data/ext/minimap2/NEWS.md +9 -0
  5. data/ext/minimap2/README.md +2 -2
  6. data/ext/minimap2/cookbook.md +2 -2
  7. data/ext/minimap2/minimap.h +1 -1
  8. data/ext/minimap2/minimap2.1 +1 -1
  9. data/ext/minimap2/misc/paftools.js +1 -1
  10. data/ext/minimap2/python/mappy.pyx +1 -1
  11. data/ext/minimap2/setup.py +22 -32
  12. data/lib/minimap2/version.rb +1 -1
  13. metadata +1 -97
  14. data/ext/minimap2/lib/simde/CONTRIBUTING.md +0 -114
  15. data/ext/minimap2/lib/simde/COPYING +0 -20
  16. data/ext/minimap2/lib/simde/README.md +0 -333
  17. data/ext/minimap2/lib/simde/amalgamate.py +0 -58
  18. data/ext/minimap2/lib/simde/meson.build +0 -33
  19. data/ext/minimap2/lib/simde/netlify.toml +0 -20
  20. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +0 -140
  21. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +0 -137
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +0 -142
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +0 -145
  24. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +0 -140
  25. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +0 -145
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +0 -140
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +0 -143
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +0 -137
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +0 -141
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +0 -147
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +0 -141
  32. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +0 -134
  33. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +0 -138
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +0 -134
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +0 -137
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +0 -131
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +0 -135
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +0 -141
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +0 -135
  40. data/ext/minimap2/lib/simde/simde/arm/neon.h +0 -97
  41. data/ext/minimap2/lib/simde/simde/check.h +0 -267
  42. data/ext/minimap2/lib/simde/simde/debug-trap.h +0 -83
  43. data/ext/minimap2/lib/simde/simde/hedley.h +0 -1899
  44. data/ext/minimap2/lib/simde/simde/simde-arch.h +0 -445
  45. data/ext/minimap2/lib/simde/simde/simde-common.h +0 -697
  46. data/ext/minimap2/lib/simde/simde/x86/avx.h +0 -5385
  47. data/ext/minimap2/lib/simde/simde/x86/avx2.h +0 -2402
  48. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +0 -391
  49. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +0 -3389
  50. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +0 -112
  51. data/ext/minimap2/lib/simde/simde/x86/fma.h +0 -659
  52. data/ext/minimap2/lib/simde/simde/x86/mmx.h +0 -2210
  53. data/ext/minimap2/lib/simde/simde/x86/sse.h +0 -3696
  54. data/ext/minimap2/lib/simde/simde/x86/sse2.h +0 -5991
  55. data/ext/minimap2/lib/simde/simde/x86/sse3.h +0 -343
  56. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +0 -1783
  57. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +0 -105
  58. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +0 -1053
  59. data/ext/minimap2/lib/simde/simde/x86/svml.h +0 -543
  60. data/ext/minimap2/lib/simde/test/CMakeLists.txt +0 -166
  61. data/ext/minimap2/lib/simde/test/arm/meson.build +0 -4
  62. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +0 -23
  63. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +0 -871
  64. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +0 -134
  65. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +0 -39
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +0 -10
  67. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +0 -1260
  68. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +0 -873
  69. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +0 -1084
  70. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +0 -1260
  71. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +0 -18
  72. data/ext/minimap2/lib/simde/test/arm/test-arm.c +0 -20
  73. data/ext/minimap2/lib/simde/test/arm/test-arm.h +0 -8
  74. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +0 -171
  75. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +0 -68
  76. data/ext/minimap2/lib/simde/test/meson.build +0 -64
  77. data/ext/minimap2/lib/simde/test/munit/COPYING +0 -21
  78. data/ext/minimap2/lib/simde/test/munit/Makefile +0 -55
  79. data/ext/minimap2/lib/simde/test/munit/README.md +0 -54
  80. data/ext/minimap2/lib/simde/test/munit/example.c +0 -351
  81. data/ext/minimap2/lib/simde/test/munit/meson.build +0 -37
  82. data/ext/minimap2/lib/simde/test/munit/munit.c +0 -2055
  83. data/ext/minimap2/lib/simde/test/munit/munit.h +0 -535
  84. data/ext/minimap2/lib/simde/test/run-tests.c +0 -20
  85. data/ext/minimap2/lib/simde/test/run-tests.h +0 -260
  86. data/ext/minimap2/lib/simde/test/x86/avx.c +0 -13752
  87. data/ext/minimap2/lib/simde/test/x86/avx2.c +0 -9977
  88. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +0 -2664
  89. data/ext/minimap2/lib/simde/test/x86/avx512f.c +0 -10416
  90. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +0 -210
  91. data/ext/minimap2/lib/simde/test/x86/fma.c +0 -2557
  92. data/ext/minimap2/lib/simde/test/x86/meson.build +0 -33
  93. data/ext/minimap2/lib/simde/test/x86/mmx.c +0 -2878
  94. data/ext/minimap2/lib/simde/test/x86/skel.c +0 -2984
  95. data/ext/minimap2/lib/simde/test/x86/sse.c +0 -5121
  96. data/ext/minimap2/lib/simde/test/x86/sse2.c +0 -9860
  97. data/ext/minimap2/lib/simde/test/x86/sse3.c +0 -486
  98. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +0 -3446
  99. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +0 -101
  100. data/ext/minimap2/lib/simde/test/x86/ssse3.c +0 -2084
  101. data/ext/minimap2/lib/simde/test/x86/svml.c +0 -1545
  102. data/ext/minimap2/lib/simde/test/x86/test-avx.h +0 -16
  103. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +0 -25
  104. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +0 -13
  105. data/ext/minimap2/lib/simde/test/x86/test-sse.h +0 -13
  106. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +0 -13
  107. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +0 -196
  108. data/ext/minimap2/lib/simde/test/x86/test-x86.c +0 -48
  109. data/ext/minimap2/lib/simde/test/x86/test-x86.h +0 -8
@@ -1,3696 +0,0 @@
1
- /* Permission is hereby granted, free of charge, to any person
2
- * obtaining a copy of this software and associated documentation
3
- * files (the "Software"), to deal in the Software without
4
- * restriction, including without limitation the rights to use, copy,
5
- * modify, merge, publish, distribute, sublicense, and/or sell copies
6
- * of the Software, and to permit persons to whom the Software is
7
- * furnished to do so, subject to the following conditions:
8
- *
9
- * The above copyright notice and this permission notice shall be
10
- * included in all copies or substantial portions of the Software.
11
- *
12
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
- * SOFTWARE.
20
- *
21
- * Copyright:
22
- * 2017-2020 Evan Nemerson <evan@nemerson.com>
23
- * 2015-2017 John W. Ratcliff <jratcliffscarab@gmail.com>
24
- * 2015 Brandon Rowlett <browlett@nvidia.com>
25
- * 2015 Ken Fast <kfast@gdeb.com>
26
- */
27
-
28
- #if !defined(SIMDE__SSE_H)
29
- # if !defined(SIMDE__SSE_H)
30
- # define SIMDE__SSE_H
31
- # endif
32
- # include "mmx.h"
33
-
34
- HEDLEY_DIAGNOSTIC_PUSH
35
- SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
36
-
37
- # if defined(SIMDE_SSE_NATIVE)
38
- # undef SIMDE_SSE_NATIVE
39
- # endif
40
- # if defined(SIMDE_ARCH_X86_SSE) && !defined(SIMDE_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
41
- # define SIMDE_SSE_NATIVE
42
- # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_SSE_NO_NEON) && !defined(SIMDE_NO_NEON)
43
- # define SIMDE_SSE_NEON
44
- # elif defined(SIMDE_ARCH_WASM_SIMD128)
45
- # define SIMDE_SSE_WASM_SIMD128
46
- # elif defined(SIMDE_ARCH_POWER_ALTIVEC)
47
- # define SIMDE_SSE_POWER_ALTIVEC
48
- # endif
49
-
50
- # if defined(SIMDE_SSE_NATIVE)
51
- # include <xmmintrin.h>
52
- # else
53
- # if defined(SIMDE_SSE_NEON)
54
- # include <arm_neon.h>
55
- # endif
56
- # if defined(SIMDE_SSE_WASM_SIMD128)
57
- # if !defined(__wasm_unimplemented_simd128__)
58
- # define __wasm_unimplemented_simd128__
59
- # endif
60
- # include <wasm_simd128.h>
61
- # endif
62
- # if defined(SIMDE_SSE_POWER_ALTIVEC)
63
- # include <altivec.h>
64
- # endif
65
-
66
- # if !defined(HEDLEY_INTEL_VERSION) && !defined(HEDLEY_EMSCRIPTEN_VERSION) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
67
- # include <stdatomic.h>
68
- # elif defined(_WIN32)
69
- # include <windows.h>
70
- # endif
71
- # endif
72
-
73
- SIMDE__BEGIN_DECLS
74
-
75
- typedef union {
76
- #if defined(SIMDE_VECTOR_SUBSCRIPT)
77
- SIMDE_ALIGN(16) int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
78
- SIMDE_ALIGN(16) int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
79
- SIMDE_ALIGN(16) int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
80
- SIMDE_ALIGN(16) int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
81
- SIMDE_ALIGN(16) uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
82
- SIMDE_ALIGN(16) uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
83
- SIMDE_ALIGN(16) uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
84
- SIMDE_ALIGN(16) uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
85
- #if defined(SIMDE__HAVE_INT128)
86
- SIMDE_ALIGN(16) simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
87
- SIMDE_ALIGN(16) simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
88
- #endif
89
- SIMDE_ALIGN(16) simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
90
- SIMDE_ALIGN(16) int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
91
- SIMDE_ALIGN(16) uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
92
- #else
93
- SIMDE_ALIGN(16) int8_t i8[16];
94
- SIMDE_ALIGN(16) int16_t i16[8];
95
- SIMDE_ALIGN(16) int32_t i32[4];
96
- SIMDE_ALIGN(16) int64_t i64[2];
97
- SIMDE_ALIGN(16) uint8_t u8[16];
98
- SIMDE_ALIGN(16) uint16_t u16[8];
99
- SIMDE_ALIGN(16) uint32_t u32[4];
100
- SIMDE_ALIGN(16) uint64_t u64[2];
101
- #if defined(SIMDE__HAVE_INT128)
102
- SIMDE_ALIGN(16) simde_int128 i128[1];
103
- SIMDE_ALIGN(16) simde_uint128 u128[1];
104
- #endif
105
- SIMDE_ALIGN(16) simde_float32 f32[4];
106
- SIMDE_ALIGN(16) int_fast32_t i32f[16 / sizeof(int_fast32_t)];
107
- SIMDE_ALIGN(16) uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
108
- #endif
109
-
110
- SIMDE_ALIGN(16) simde__m64_private m64_private[2];
111
- SIMDE_ALIGN(16) simde__m64 m64[2];
112
-
113
- #if defined(SIMDE_SSE_NATIVE)
114
- SIMDE_ALIGN(16) __m128 n;
115
- #elif defined(SIMDE_SSE_NEON)
116
- SIMDE_ALIGN(16) int8x16_t neon_i8;
117
- SIMDE_ALIGN(16) int16x8_t neon_i16;
118
- SIMDE_ALIGN(16) int32x4_t neon_i32;
119
- SIMDE_ALIGN(16) int64x2_t neon_i64;
120
- SIMDE_ALIGN(16) uint8x16_t neon_u8;
121
- SIMDE_ALIGN(16) uint16x8_t neon_u16;
122
- SIMDE_ALIGN(16) uint32x4_t neon_u32;
123
- SIMDE_ALIGN(16) uint64x2_t neon_u64;
124
- SIMDE_ALIGN(16) float32x4_t neon_f32;
125
- #if defined(SIMDE_ARCH_AARCH64)
126
- SIMDE_ALIGN(16) float64x2_t neon_f64;
127
- #endif
128
- #elif defined(SIMDE_SSE_WASM_SIMD128)
129
- SIMDE_ALIGN(16) v128_t wasm_v128;
130
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
131
- SIMDE_ALIGN(16) vector unsigned char altivec_u8;
132
- SIMDE_ALIGN(16) vector unsigned short altivec_u16;
133
- SIMDE_ALIGN(16) vector unsigned int altivec_u32;
134
- SIMDE_ALIGN(16) vector unsigned long long altivec_u64;
135
- SIMDE_ALIGN(16) vector signed char altivec_i8;
136
- SIMDE_ALIGN(16) vector signed short altivec_i16;
137
- SIMDE_ALIGN(16) vector signed int altivec_i32;
138
- SIMDE_ALIGN(16) vector signed long long altivec_i64;
139
- SIMDE_ALIGN(16) vector float altivec_f32;
140
- SIMDE_ALIGN(16) vector double altivec_f64;
141
- #endif
142
- } simde__m128_private;
143
-
144
- #if defined(SIMDE_SSE_NATIVE)
145
- typedef __m128 simde__m128;
146
- #elif defined(SIMDE_SSE_NEON)
147
- typedef float32x4_t simde__m128;
148
- #elif defined(SIMDE_SSE_WASM_SIMD128)
149
- typedef v128_t simde__m128;
150
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
151
- typedef vector float simde__m128;
152
- #elif defined(SIMDE_VECTOR_SUBSCRIPT)
153
- typedef simde_float32 simde__m128 SIMDE_ALIGN(16) SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
154
- #else
155
- typedef simde__m128_private simde__m128;
156
- #endif
157
-
158
- #if !defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
159
- #define SIMDE_SSE_ENABLE_NATIVE_ALIASES
160
- typedef simde__m128 __m128;
161
- #endif
162
-
163
- HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect");
164
- HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect");
165
- #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
166
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned");
167
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned");
168
- #endif
169
-
170
- SIMDE__FUNCTION_ATTRIBUTES
171
- simde__m128
172
- simde__m128_from_private(simde__m128_private v) {
173
- simde__m128 r;
174
- simde_memcpy(&r, &v, sizeof(r));
175
- return r;
176
- }
177
-
178
- SIMDE__FUNCTION_ATTRIBUTES
179
- simde__m128_private
180
- simde__m128_to_private(simde__m128 v) {
181
- simde__m128_private r;
182
- simde_memcpy(&r, &v, sizeof(r));
183
- return r;
184
- }
185
-
186
- #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
187
- HEDLEY_DIAGNOSTIC_POP
188
- #endif
189
-
190
- SIMDE__FUNCTION_ATTRIBUTES
191
- simde__m128
192
- simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
193
- #if defined(SIMDE_SSE_NATIVE)
194
- return _mm_set_ps(e3, e2, e1, e0);
195
- #else
196
- simde__m128_private r_;
197
-
198
- #if defined(SIMDE_SSE_NEON)
199
- SIMDE_ALIGN(16) simde_float32 data[4] = { e0, e1, e2, e3 };
200
- r_.neon_f32 = vld1q_f32(data);
201
- #elif defined(SIMDE_SSE_WASM_SIMD128)
202
- r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3);
203
- #else
204
- r_.f32[0] = e0;
205
- r_.f32[1] = e1;
206
- r_.f32[2] = e2;
207
- r_.f32[3] = e3;
208
- #endif
209
-
210
- return simde__m128_from_private(r_);
211
- #endif
212
- }
213
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
214
- # define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0)
215
- #endif
216
-
217
- SIMDE__FUNCTION_ATTRIBUTES
218
- simde__m128
219
- simde_mm_set_ps1 (simde_float32 a) {
220
- #if defined(SIMDE_SSE_NATIVE)
221
- return _mm_set_ps1(a);
222
- #elif defined(SIMDE_SSE_NEON)
223
- return vdupq_n_f32(a);
224
- #else
225
- return simde_mm_set_ps(a, a, a, a);
226
- #endif
227
- }
228
- #define simde_mm_set1_ps(a) simde_mm_set_ps1(a)
229
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
230
- # define _mm_set_ps1(a) simde_mm_set_ps1(a)
231
- # define _mm_set1_ps(a) simde_mm_set1_ps(a)
232
- #endif
233
-
234
- SIMDE__FUNCTION_ATTRIBUTES
235
- simde__m128
236
- simde_mm_move_ss (simde__m128 a, simde__m128 b) {
237
- #if defined(SIMDE_SSE_NATIVE)
238
- return _mm_move_ss(a, b);
239
- #else
240
- simde__m128_private
241
- r_,
242
- a_ = simde__m128_to_private(a),
243
- b_ = simde__m128_to_private(b);
244
-
245
- #if defined(SIMDE_SSE_NEON)
246
- r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0);
247
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
248
- vector unsigned char m = {
249
- 16, 17, 18, 19,
250
- 4, 5, 6, 7,
251
- 8, 9, 10, 11,
252
- 12, 13, 14, 15
253
- };
254
- r_.altivec_f32 = vec_perm(a_.altivec_f32, b_.altivec_f32, m);
255
- #elif defined(SIMDE__SHUFFLE_VECTOR)
256
- r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 4, 1, 2, 3);
257
- #else
258
- r_.f32[0] = b_.f32[0];
259
- r_.f32[1] = a_.f32[1];
260
- r_.f32[2] = a_.f32[2];
261
- r_.f32[3] = a_.f32[3];
262
- #endif
263
-
264
- return simde__m128_from_private(r_);
265
- #endif
266
- }
267
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
268
- # define _mm_move_ss(a, b) simde_mm_move_ss((a), (b))
269
- #endif
270
-
271
- SIMDE__FUNCTION_ATTRIBUTES
272
- simde__m128
273
- simde_mm_add_ps (simde__m128 a, simde__m128 b) {
274
- #if defined(SIMDE_SSE_NATIVE)
275
- return _mm_add_ps(a, b);
276
- #else
277
- simde__m128_private
278
- r_,
279
- a_ = simde__m128_to_private(a),
280
- b_ = simde__m128_to_private(b);
281
-
282
- #if defined(SIMDE_SSE_NEON)
283
- r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32);
284
- #elif defined(SIMDE_SSE_WASM_SIMD128)
285
- r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128);
286
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
287
- r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32);
288
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
289
- r_.f32 = a_.f32 + b_.f32;
290
- #else
291
- SIMDE__VECTORIZE
292
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
293
- r_.f32[i] = a_.f32[i] + b_.f32[i];
294
- }
295
- #endif
296
-
297
- return simde__m128_from_private(r_);
298
- #endif
299
- }
300
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
301
- # define _mm_add_ps(a, b) simde_mm_add_ps((a), (b))
302
- #endif
303
-
304
- SIMDE__FUNCTION_ATTRIBUTES
305
- simde__m128
306
- simde_mm_add_ss (simde__m128 a, simde__m128 b) {
307
- #if defined(SIMDE_SSE_NATIVE)
308
- return _mm_add_ss(a, b);
309
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
310
- return simde_mm_move_ss(a, simde_mm_add_ps(a, b));
311
- #else
312
- simde__m128_private
313
- r_,
314
- a_ = simde__m128_to_private(a),
315
- b_ = simde__m128_to_private(b);
316
-
317
- r_.f32[0] = a_.f32[0] + b_.f32[0];
318
- r_.f32[1] = a_.f32[1];
319
- r_.f32[2] = a_.f32[2];
320
- r_.f32[3] = a_.f32[3];
321
-
322
- return simde__m128_from_private(r_);
323
- #endif
324
- }
325
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
326
- # define _mm_add_ss(a, b) simde_mm_add_ss((a), (b))
327
- #endif
328
-
329
- SIMDE__FUNCTION_ATTRIBUTES
330
- simde__m128
331
- simde_mm_and_ps (simde__m128 a, simde__m128 b) {
332
- #if defined(SIMDE_SSE_NATIVE)
333
- return _mm_and_ps(a, b);
334
- #else
335
- simde__m128_private
336
- r_,
337
- a_ = simde__m128_to_private(a),
338
- b_ = simde__m128_to_private(b);
339
-
340
- #if defined(SIMDE_SSE_NEON)
341
- r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
342
- #elif defined(SIMDE_SSE_WASM_SIMD128)
343
- r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
344
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
345
- r_.i32 = a_.i32 & b_.i32;
346
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
347
- r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32);
348
- #else
349
- SIMDE__VECTORIZE
350
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
351
- r_.i32[i] = a_.i32[i] & b_.i32[i];
352
- }
353
- #endif
354
-
355
- return simde__m128_from_private(r_);
356
- #endif
357
- }
358
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
359
- # define _mm_and_ps(a, b) simde_mm_and_ps((a), (b))
360
- #endif
361
-
362
- SIMDE__FUNCTION_ATTRIBUTES
363
- simde__m128
364
- simde_mm_andnot_ps (simde__m128 a, simde__m128 b) {
365
- #if defined(SIMDE_SSE_NATIVE)
366
- return _mm_andnot_ps(a, b);
367
- #else
368
- simde__m128_private
369
- r_,
370
- a_ = simde__m128_to_private(a),
371
- b_ = simde__m128_to_private(b);
372
-
373
- #if defined(SIMDE_SSE_NEON)
374
- r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
375
- #elif defined(SIMDE_SSE_WASM_SIMD128)
376
- r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
377
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
378
- r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32);
379
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
380
- r_.i32 = ~a_.i32 & b_.i32;
381
- #else
382
- SIMDE__VECTORIZE
383
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
384
- r_.i32[i] = ~(a_.i32[i]) & b_.i32[i];
385
- }
386
- #endif
387
-
388
- return simde__m128_from_private(r_);
389
- #endif
390
- }
391
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
392
- # define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b))
393
- #endif
394
-
395
- SIMDE__FUNCTION_ATTRIBUTES
396
- simde__m64
397
- simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) {
398
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
399
- return _mm_avg_pu16(a, b);
400
- #else
401
- simde__m64_private
402
- r_,
403
- a_ = simde__m64_to_private(a),
404
- b_ = simde__m64_to_private(b);
405
-
406
- #if defined(SIMDE_SSE_NEON)
407
- r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16);
408
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE__CONVERT_VECTOR)
409
- uint32_t wa SIMDE_VECTOR(16);
410
- uint32_t wb SIMDE_VECTOR(16);
411
- uint32_t wr SIMDE_VECTOR(16);
412
- SIMDE__CONVERT_VECTOR(wa, a_.u16);
413
- SIMDE__CONVERT_VECTOR(wb, b_.u16);
414
- wr = (wa + wb + 1) >> 1;
415
- SIMDE__CONVERT_VECTOR(r_.u16, wr);
416
- #else
417
- SIMDE__VECTORIZE
418
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
419
- r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
420
- }
421
- #endif
422
-
423
- return simde__m64_from_private(r_);
424
- #endif
425
- }
426
- #define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b)
427
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
428
- # define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b)
429
- # define _m_pavgw(a, b) simde_mm_avg_pu16(a, b)
430
- #endif
431
-
432
- SIMDE__FUNCTION_ATTRIBUTES
433
- simde__m64
434
- simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) {
435
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
436
- return _mm_avg_pu8(a, b);
437
- #else
438
- simde__m64_private
439
- r_,
440
- a_ = simde__m64_to_private(a),
441
- b_ = simde__m64_to_private(b);
442
-
443
- #if defined(SIMDE_SSE_NEON)
444
- r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8);
445
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE__CONVERT_VECTOR)
446
- uint16_t wa SIMDE_VECTOR(16);
447
- uint16_t wb SIMDE_VECTOR(16);
448
- uint16_t wr SIMDE_VECTOR(16);
449
- SIMDE__CONVERT_VECTOR(wa, a_.u8);
450
- SIMDE__CONVERT_VECTOR(wb, b_.u8);
451
- wr = (wa + wb + 1) >> 1;
452
- SIMDE__CONVERT_VECTOR(r_.u8, wr);
453
- #else
454
- SIMDE__VECTORIZE
455
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
456
- r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
457
- }
458
- #endif
459
-
460
- return simde__m64_from_private(r_);
461
- #endif
462
- }
463
- #define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b)
464
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
465
- # define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b)
466
- # define _m_pavgb(a, b) simde_mm_avg_pu8(a, b)
467
- #endif
468
-
469
- SIMDE__FUNCTION_ATTRIBUTES
470
- simde__m128
471
- simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) {
472
- #if defined(SIMDE_SSE_NATIVE)
473
- return _mm_cmpeq_ps(a, b);
474
- #else
475
- simde__m128_private
476
- r_,
477
- a_ = simde__m128_to_private(a),
478
- b_ = simde__m128_to_private(b);
479
-
480
- #if defined(SIMDE_SSE_NEON)
481
- r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32);
482
- #elif defined(SIMDE_SSE_WASM_SIMD128)
483
- r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128);
484
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
485
- r_.altivec_f32 = (vector float) vec_cmpeq(a_.altivec_f32, b_.altivec_f32);
486
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
487
- r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.f32 == b_.f32);
488
- #else
489
- SIMDE__VECTORIZE
490
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
491
- r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
492
- }
493
- #endif
494
-
495
- return simde__m128_from_private(r_);
496
- #endif
497
- }
498
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
499
- # define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b))
500
- #endif
501
-
502
- SIMDE__FUNCTION_ATTRIBUTES
503
- simde__m128
504
- simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) {
505
- #if defined(SIMDE_SSE_NATIVE)
506
- return _mm_cmpeq_ss(a, b);
507
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
508
- return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b));
509
- #else
510
- simde__m128_private
511
- r_,
512
- a_ = simde__m128_to_private(a),
513
- b_ = simde__m128_to_private(b);
514
-
515
- r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
516
- SIMDE__VECTORIZE
517
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
518
- r_.u32[i] = a_.u32[i];
519
- }
520
-
521
- return simde__m128_from_private(r_);
522
- #endif
523
- }
524
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
525
- # define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b))
526
- #endif
527
-
528
- SIMDE__FUNCTION_ATTRIBUTES
529
- simde__m128
530
- simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) {
531
- #if defined(SIMDE_SSE_NATIVE)
532
- return _mm_cmpge_ps(a, b);
533
- #else
534
- simde__m128_private
535
- r_,
536
- a_ = simde__m128_to_private(a),
537
- b_ = simde__m128_to_private(b);
538
-
539
- #if defined(SIMDE_SSE_NEON)
540
- r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32);
541
- #elif defined(SIMDE_SSE_WASM_SIMD128)
542
- r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128);
543
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
544
- r_.altivec_f32 = (vector float) vec_cmpge(a_.altivec_f32, b_.altivec_f32);
545
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
546
- r_.i32 = (__typeof__(r_.i32)) (a_.f32 >= b_.f32);
547
- #else
548
- SIMDE__VECTORIZE
549
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
550
- r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
551
- }
552
- #endif
553
-
554
- return simde__m128_from_private(r_);
555
- #endif
556
- }
557
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
558
- # define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b))
559
- #endif
560
-
561
- SIMDE__FUNCTION_ATTRIBUTES
562
- simde__m128
563
- simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) {
564
- #if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
565
- return _mm_cmpge_ss(a, b);
566
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
567
- return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b));
568
- #else
569
- simde__m128_private
570
- r_,
571
- a_ = simde__m128_to_private(a),
572
- b_ = simde__m128_to_private(b);
573
-
574
- r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
575
- SIMDE__VECTORIZE
576
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
577
- r_.u32[i] = a_.u32[i];
578
- }
579
-
580
- return simde__m128_from_private(r_);
581
- #endif
582
- }
583
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
584
- # define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b))
585
- #endif
586
-
587
- SIMDE__FUNCTION_ATTRIBUTES
588
- simde__m128
589
- simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) {
590
- #if defined(SIMDE_SSE_NATIVE)
591
- return _mm_cmpgt_ps(a, b);
592
- #else
593
- simde__m128_private
594
- r_,
595
- a_ = simde__m128_to_private(a),
596
- b_ = simde__m128_to_private(b);
597
-
598
- #if defined(SIMDE_SSE_NEON)
599
- r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32);
600
- #elif defined(SIMDE_SSE_WASM_SIMD128)
601
- r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128);
602
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
603
- r_.altivec_f32 = (vector float) vec_cmpgt(a_.altivec_f32, b_.altivec_f32);
604
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
605
- r_.i32 = (__typeof__(r_.i32)) (a_.f32 > b_.f32);
606
- #else
607
- SIMDE__VECTORIZE
608
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
609
- r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
610
- }
611
- #endif
612
-
613
- return simde__m128_from_private(r_);
614
- #endif
615
- }
616
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
617
- # define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b))
618
- #endif
619
-
620
- SIMDE__FUNCTION_ATTRIBUTES
621
- simde__m128
622
- simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) {
623
- #if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
624
- return _mm_cmpgt_ss(a, b);
625
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
626
- return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b));
627
- #else
628
- simde__m128_private
629
- r_,
630
- a_ = simde__m128_to_private(a),
631
- b_ = simde__m128_to_private(b);
632
-
633
- r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
634
- SIMDE__VECTORIZE
635
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
636
- r_.u32[i] = a_.u32[i];
637
- }
638
-
639
- return simde__m128_from_private(r_);
640
- #endif
641
- }
642
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
643
- # define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b))
644
- #endif
645
-
646
- SIMDE__FUNCTION_ATTRIBUTES
647
- simde__m128
648
- simde_mm_cmple_ps (simde__m128 a, simde__m128 b) {
649
- #if defined(SIMDE_SSE_NATIVE)
650
- return _mm_cmple_ps(a, b);
651
- #else
652
- simde__m128_private
653
- r_,
654
- a_ = simde__m128_to_private(a),
655
- b_ = simde__m128_to_private(b);
656
-
657
- #if defined(SIMDE_SSE_NEON)
658
- r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32);
659
- #elif defined(SIMDE_SSE_WASM_SIMD128)
660
- r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128);
661
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
662
- r_.altivec_f32 = (vector float) vec_cmple(a_.altivec_f32, b_.altivec_f32);
663
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
664
- r_.i32 = (__typeof__(r_.i32)) (a_.f32 <= b_.f32);
665
- #else
666
- SIMDE__VECTORIZE
667
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
668
- r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
669
- }
670
- #endif
671
-
672
- return simde__m128_from_private(r_);
673
- #endif
674
- }
675
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
676
- # define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b))
677
- #endif
678
-
679
- SIMDE__FUNCTION_ATTRIBUTES
680
- simde__m128
681
- simde_mm_cmple_ss (simde__m128 a, simde__m128 b) {
682
- #if defined(SIMDE_SSE_NATIVE)
683
- return _mm_cmple_ss(a, b);
684
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
685
- return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b));
686
- #else
687
- simde__m128_private
688
- r_,
689
- a_ = simde__m128_to_private(a),
690
- b_ = simde__m128_to_private(b);
691
-
692
- r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
693
- SIMDE__VECTORIZE
694
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
695
- r_.u32[i] = a_.u32[i];
696
- }
697
-
698
- return simde__m128_from_private(r_);
699
- #endif
700
- }
701
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
702
- # define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b))
703
- #endif
704
-
705
- SIMDE__FUNCTION_ATTRIBUTES
706
- simde__m128
707
- simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) {
708
- #if defined(SIMDE_SSE_NATIVE)
709
- return _mm_cmplt_ps(a, b);
710
- #else
711
- simde__m128_private
712
- r_,
713
- a_ = simde__m128_to_private(a),
714
- b_ = simde__m128_to_private(b);
715
-
716
- #if defined(SIMDE_SSE_NEON)
717
- r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32);
718
- #elif defined(SIMDE_SSE_WASM_SIMD128)
719
- r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128);
720
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
721
- r_.altivec_f32 = (vector float) vec_cmplt(a_.altivec_f32, b_.altivec_f32);
722
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
723
- r_.i32 = (__typeof__(r_.i32)) (a_.f32 < b_.f32);
724
- #else
725
- SIMDE__VECTORIZE
726
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
727
- r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
728
- }
729
- #endif
730
-
731
- return simde__m128_from_private(r_);
732
- #endif
733
- }
734
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
735
- # define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b))
736
- #endif
737
-
738
- SIMDE__FUNCTION_ATTRIBUTES
739
- simde__m128
740
- simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) {
741
- #if defined(SIMDE_SSE_NATIVE)
742
- return _mm_cmplt_ss(a, b);
743
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
744
- return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b));
745
- #else
746
- simde__m128_private
747
- r_,
748
- a_ = simde__m128_to_private(a),
749
- b_ = simde__m128_to_private(b);
750
-
751
- r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
752
- SIMDE__VECTORIZE
753
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
754
- r_.u32[i] = a_.u32[i];
755
- }
756
-
757
- return simde__m128_from_private(r_);
758
- #endif
759
- }
760
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
761
- # define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b))
762
- #endif
763
-
764
- SIMDE__FUNCTION_ATTRIBUTES
765
- simde__m128
766
- simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) {
767
- #if defined(SIMDE_SSE_NATIVE)
768
- return _mm_cmpneq_ps(a, b);
769
- #else
770
- simde__m128_private
771
- r_,
772
- a_ = simde__m128_to_private(a),
773
- b_ = simde__m128_to_private(b);
774
-
775
- #if defined(SIMDE_SSE_NEON)
776
- r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
777
- #elif defined(SIMDE_SSE_WASM_SIMD128)
778
- r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128);
779
- #elif defined(SIMDE_SSE_POWER_ALTIVEC) && (SIMDE_ARCH_POWER >= 900) && !defined(HEDLEY_IBM_VERSION)
780
- /* vec_cmpne(vector float, vector float) is missing from XL C/C++ v16.1.1,
781
- though the documentation (table 89 on page 432 of the IBM XL C/C++ for
782
- Linux Compiler Reference, Version 16.1.1) shows that it should be
783
- present. Both GCC and clang support it. */
784
- r_.altivec_f32 = (vector float) vec_cmpne(a_.altivec_f32, b_.altivec_f32);
785
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
786
- r_.i32 = (__typeof__(r_.i32)) (a_.f32 != b_.f32);
787
- #else
788
- SIMDE__VECTORIZE
789
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
790
- r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
791
- }
792
- #endif
793
-
794
- return simde__m128_from_private(r_);
795
- #endif
796
- }
797
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
798
- # define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b))
799
- #endif
800
-
801
- SIMDE__FUNCTION_ATTRIBUTES
802
- simde__m128
803
- simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) {
804
- #if defined(SIMDE_SSE_NATIVE)
805
- return _mm_cmpneq_ss(a, b);
806
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
807
- return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b));
808
- #else
809
- simde__m128_private
810
- r_,
811
- a_ = simde__m128_to_private(a),
812
- b_ = simde__m128_to_private(b);
813
-
814
- r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
815
- SIMDE__VECTORIZE
816
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
817
- r_.u32[i] = a_.u32[i];
818
- }
819
-
820
- return simde__m128_from_private(r_);
821
- #endif
822
- }
823
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
824
- # define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b))
825
- #endif
826
-
827
- SIMDE__FUNCTION_ATTRIBUTES
828
- simde__m128
829
- simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) {
830
- return simde_mm_cmplt_ps(a, b);
831
- }
832
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
833
- # define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b))
834
- #endif
835
-
836
- SIMDE__FUNCTION_ATTRIBUTES
837
- simde__m128
838
- simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) {
839
- return simde_mm_cmplt_ss(a, b);
840
- }
841
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
842
- # define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b))
843
- #endif
844
-
845
- SIMDE__FUNCTION_ATTRIBUTES
846
- simde__m128
847
- simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) {
848
- return simde_mm_cmple_ps(a, b);
849
- }
850
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
851
- # define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b))
852
- #endif
853
-
854
- SIMDE__FUNCTION_ATTRIBUTES
855
- simde__m128
856
- simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) {
857
- return simde_mm_cmple_ss(a, b);
858
- }
859
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
860
- # define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b))
861
- #endif
862
-
863
- SIMDE__FUNCTION_ATTRIBUTES
864
- simde__m128
865
- simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) {
866
- return simde_mm_cmpgt_ps(a, b);
867
- }
868
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
869
- # define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b))
870
- #endif
871
-
872
- SIMDE__FUNCTION_ATTRIBUTES
873
- simde__m128
874
- simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) {
875
- return simde_mm_cmpgt_ss(a, b);
876
- }
877
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
878
- # define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b))
879
- #endif
880
-
881
- SIMDE__FUNCTION_ATTRIBUTES
882
- simde__m128
883
- simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) {
884
- return simde_mm_cmpge_ps(a, b);
885
- }
886
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
887
- # define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b))
888
- #endif
889
-
890
- SIMDE__FUNCTION_ATTRIBUTES
891
- simde__m128
892
- simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) {
893
- return simde_mm_cmpge_ss(a, b);
894
- }
895
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
896
- # define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b))
897
- #endif
898
-
899
- SIMDE__FUNCTION_ATTRIBUTES
900
- simde__m128
901
- simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) {
902
- #if defined(SIMDE_SSE_NATIVE)
903
- return _mm_cmpord_ps(a, b);
904
- #else
905
- simde__m128_private
906
- r_,
907
- a_ = simde__m128_to_private(a),
908
- b_ = simde__m128_to_private(b);
909
-
910
- #if defined(SIMDE_SSE_NEON)
911
- /* Note: NEON does not have ordered compare builtin
912
- Need to compare a eq a and b eq b to check for NaN
913
- Do AND of results to get final */
914
- uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);
915
- uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);
916
- r_.neon_u32 = vandq_u32(ceqaa, ceqbb);
917
- #elif defined(simde_isnanf)
918
- SIMDE__VECTORIZE
919
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
920
- r_.u32[i] = (simde_isnanf(a_.f32[i]) || simde_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);
921
- }
922
- #else
923
- HEDLEY_UNREACHABLE();
924
- #endif
925
-
926
- return simde__m128_from_private(r_);
927
- #endif
928
- }
929
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
930
- # define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b))
931
- #endif
932
-
933
- SIMDE__FUNCTION_ATTRIBUTES
934
- simde__m128
935
- simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) {
936
- #if defined(SIMDE_SSE_NATIVE)
937
- return _mm_cmpunord_ps(a, b);
938
- #else
939
- simde__m128_private
940
- r_,
941
- a_ = simde__m128_to_private(a),
942
- b_ = simde__m128_to_private(b);
943
-
944
- #if defined(simde_isnanf)
945
- SIMDE__VECTORIZE
946
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
947
- r_.u32[i] = (simde_isnanf(a_.f32[i]) || simde_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
948
- }
949
- #else
950
- HEDLEY_UNREACHABLE();
951
- #endif
952
-
953
- return simde__m128_from_private(r_);
954
- #endif
955
- }
956
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
957
- # define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b))
958
- #endif
959
-
960
- SIMDE__FUNCTION_ATTRIBUTES
961
- simde__m128
962
- simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) {
963
- #if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
964
- return _mm_cmpunord_ss(a, b);
965
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
966
- return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b));
967
- #else
968
- simde__m128_private
969
- r_,
970
- a_ = simde__m128_to_private(a),
971
- b_ = simde__m128_to_private(b);
972
-
973
- #if defined(simde_isnanf)
974
- r_.u32[0] = (simde_isnanf(a_.f32[0]) || simde_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0);
975
- SIMDE__VECTORIZE
976
- for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
977
- r_.u32[i] = a_.u32[i];
978
- }
979
- #else
980
- HEDLEY_UNREACHABLE();
981
- #endif
982
-
983
- return simde__m128_from_private(r_);
984
- #endif
985
- }
986
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
987
- # define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b))
988
- #endif
989
-
990
- SIMDE__FUNCTION_ATTRIBUTES
991
- int
992
- simde_mm_comieq_ss (simde__m128 a, simde__m128 b) {
993
- #if defined(SIMDE_SSE_NATIVE)
994
- return _mm_comieq_ss(a, b);
995
- #else
996
- simde__m128_private
997
- a_ = simde__m128_to_private(a),
998
- b_ = simde__m128_to_private(b);
999
-
1000
- #if defined(SIMDE_SSE_NEON)
1001
- uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1002
- uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1003
- uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
1004
- uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);
1005
- return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);
1006
- #else
1007
- return a_.f32[0] == b_.f32[0];
1008
- #endif
1009
- #endif
1010
- }
1011
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1012
- # define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b))
1013
- #endif
1014
-
1015
- SIMDE__FUNCTION_ATTRIBUTES
1016
- int
1017
- simde_mm_comige_ss (simde__m128 a, simde__m128 b) {
1018
- #if defined(SIMDE_SSE_NATIVE)
1019
- return _mm_comige_ss(a, b);
1020
- #else
1021
- simde__m128_private
1022
- a_ = simde__m128_to_private(a),
1023
- b_ = simde__m128_to_private(b);
1024
-
1025
- #if defined(SIMDE_SSE_NEON)
1026
- uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1027
- uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1028
- uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
1029
- uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);
1030
- return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);
1031
- #else
1032
- return a_.f32[0] >= b_.f32[0];
1033
- #endif
1034
- #endif
1035
- }
1036
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1037
- # define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b))
1038
- #endif
1039
-
1040
- SIMDE__FUNCTION_ATTRIBUTES
1041
- int
1042
- simde_mm_comigt_ss (simde__m128 a, simde__m128 b) {
1043
- #if defined(SIMDE_SSE_NATIVE)
1044
- return _mm_comigt_ss(a, b);
1045
- #else
1046
- simde__m128_private
1047
- a_ = simde__m128_to_private(a),
1048
- b_ = simde__m128_to_private(b);
1049
-
1050
- #if defined(SIMDE_SSE_NEON)
1051
- uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1052
- uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1053
- uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
1054
- uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);
1055
- return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);
1056
- #else
1057
- return a_.f32[0] > b_.f32[0];
1058
- #endif
1059
- #endif
1060
- }
1061
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1062
- # define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b))
1063
- #endif
1064
-
1065
- SIMDE__FUNCTION_ATTRIBUTES
1066
- int
1067
- simde_mm_comile_ss (simde__m128 a, simde__m128 b) {
1068
- #if defined(SIMDE_SSE_NATIVE)
1069
- return _mm_comile_ss(a, b);
1070
- #else
1071
- simde__m128_private
1072
- a_ = simde__m128_to_private(a),
1073
- b_ = simde__m128_to_private(b);
1074
-
1075
- #if defined(SIMDE_SSE_NEON)
1076
- uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1077
- uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1078
- uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
1079
- uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);
1080
- return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);
1081
- #else
1082
- return a_.f32[0] <= b_.f32[0];
1083
- #endif
1084
- #endif
1085
- }
1086
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1087
- # define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b))
1088
- #endif
1089
-
1090
- SIMDE__FUNCTION_ATTRIBUTES
1091
- int
1092
- simde_mm_comilt_ss (simde__m128 a, simde__m128 b) {
1093
- #if defined(SIMDE_SSE_NATIVE)
1094
- return _mm_comilt_ss(a, b);
1095
- #else
1096
- simde__m128_private
1097
- a_ = simde__m128_to_private(a),
1098
- b_ = simde__m128_to_private(b);
1099
-
1100
- #if defined(SIMDE_SSE_NEON)
1101
- uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1102
- uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1103
- uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
1104
- uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);
1105
- return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);
1106
- #else
1107
- return a_.f32[0] < b_.f32[0];
1108
- #endif
1109
- #endif
1110
- }
1111
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1112
- # define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b))
1113
- #endif
1114
-
1115
- SIMDE__FUNCTION_ATTRIBUTES
1116
- int
1117
- simde_mm_comineq_ss (simde__m128 a, simde__m128 b) {
1118
- #if defined(SIMDE_SSE_NATIVE)
1119
- return _mm_comineq_ss(a, b);
1120
- #else
1121
- simde__m128_private
1122
- a_ = simde__m128_to_private(a),
1123
- b_ = simde__m128_to_private(b);
1124
-
1125
- #if defined(SIMDE_SSE_NEON)
1126
- uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
1127
- uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
1128
- uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
1129
- uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
1130
- return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);
1131
- #else
1132
- return a_.f32[0] != b_.f32[0];
1133
- #endif
1134
- #endif
1135
- }
1136
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1137
- # define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b))
1138
- #endif
1139
-
1140
- SIMDE__FUNCTION_ATTRIBUTES
1141
- simde__m128
1142
- simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) {
1143
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1144
- return _mm_cvt_pi2ps(a, b);
1145
- #else
1146
- simde__m128_private
1147
- r_,
1148
- a_ = simde__m128_to_private(a);
1149
- simde__m64_private b_ = simde__m64_to_private(b);
1150
-
1151
- #if defined(SIMDE_SSE_NEON)
1152
- r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));
1153
- #elif defined(SIMDE__CONVERT_VECTOR)
1154
- SIMDE__CONVERT_VECTOR(r_.m64_private[0].f32, b_.i32);
1155
- r_.m64_private[1] = a_.m64_private[1];
1156
-
1157
- #else
1158
- r_.f32[0] = (simde_float32) b_.i32[0];
1159
- r_.f32[1] = (simde_float32) b_.i32[1];
1160
- r_.i32[2] = a_.i32[2];
1161
- r_.i32[3] = a_.i32[3];
1162
- #endif
1163
-
1164
- return simde__m128_from_private(r_);
1165
- #endif
1166
- }
1167
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1168
- # define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), b)
1169
- #endif
1170
-
1171
- SIMDE__FUNCTION_ATTRIBUTES
1172
- simde__m64
1173
- simde_mm_cvt_ps2pi (simde__m128 a) {
1174
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1175
- return _mm_cvt_ps2pi(a);
1176
- #else
1177
- simde__m64_private r_;
1178
- simde__m128_private a_ = simde__m128_to_private(a);
1179
-
1180
- #if defined(SIMDE_SSE_NEON)
1181
- r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
1182
- #elif defined(SIMDE__CONVERT_VECTOR) && !defined(__clang__)
1183
- SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].f32);
1184
- #else
1185
- SIMDE__VECTORIZE
1186
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1187
- r_.i32[i] = (int32_t) a_.f32[i];
1188
- }
1189
- #endif
1190
-
1191
- return simde__m64_from_private(r_);
1192
- #endif
1193
- }
1194
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1195
- # define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a))
1196
- #endif
1197
-
1198
- SIMDE__FUNCTION_ATTRIBUTES
1199
- simde__m128
1200
- simde_mm_cvt_si2ss (simde__m128 a, int32_t b) {
1201
- #if defined(SIMDE_SSE_NATIVE)
1202
- return _mm_cvt_si2ss(a, b);
1203
- #else
1204
- simde__m128_private
1205
- r_,
1206
- a_ = simde__m128_to_private(a);
1207
-
1208
- #if defined(SIMDE_SSE_NEON)
1209
- r_.neon_f32 = vsetq_lane_f32((float) b, a_.neon_f32, 0);
1210
- #else
1211
- r_.f32[0] = (simde_float32) b;
1212
- r_.i32[1] = a_.i32[1];
1213
- r_.i32[2] = a_.i32[2];
1214
- r_.i32[3] = a_.i32[3];
1215
- #endif
1216
-
1217
- return simde__m128_from_private(r_);
1218
- #endif
1219
- }
1220
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1221
- # define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b)
1222
- #endif
1223
-
1224
- SIMDE__FUNCTION_ATTRIBUTES
1225
- int32_t
1226
- simde_mm_cvt_ss2si (simde__m128 a) {
1227
- #if defined(SIMDE_SSE_NATIVE)
1228
- return _mm_cvt_ss2si(a);
1229
- #else
1230
- simde__m128_private a_ = simde__m128_to_private(a);
1231
-
1232
- #if defined(SIMDE_SSE_NEON)
1233
- return SIMDE_CONVERT_FTOI(int32_t, nearbyintf(vgetq_lane_f32(a_.neon_f32, 0)));
1234
- #elif defined(SIMDE_HAVE_MATH_H)
1235
- return SIMDE_CONVERT_FTOI(int32_t, nearbyintf(a_.f32[0]));
1236
- #else
1237
- HEDLEY_UNREACHABLE();
1238
- #endif
1239
- #endif
1240
- }
1241
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1242
- # define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a))
1243
- #endif
1244
-
1245
- SIMDE__FUNCTION_ATTRIBUTES
1246
- simde__m128
1247
- simde_mm_cvtpi16_ps (simde__m64 a) {
1248
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1249
- return _mm_cvtpi16_ps(a);
1250
- #else
1251
- simde__m128_private r_;
1252
- simde__m64_private a_ = simde__m64_to_private(a);
1253
-
1254
- #if defined(SIMDE_SSE_NEON) && 0 /* TODO */
1255
- r_.neon_f32 = vmovl_s16(vget_low_s16(vuzp1q_s16(a_.neon_i16, vmovq_n_s16(0))));
1256
- #elif defined(SIMDE__CONVERT_VECTOR)
1257
- SIMDE__CONVERT_VECTOR(r_.f32, a_.i16);
1258
- #else
1259
- SIMDE__VECTORIZE
1260
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1261
- simde_float32 v = a_.i16[i];
1262
- r_.f32[i] = v;
1263
- }
1264
- #endif
1265
-
1266
- return simde__m128_from_private(r_);
1267
- #endif
1268
- }
1269
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1270
- # define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a)
1271
- #endif
1272
-
1273
- SIMDE__FUNCTION_ATTRIBUTES
1274
- simde__m128
1275
- simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) {
1276
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1277
- return _mm_cvtpi32_ps(a, b);
1278
- #else
1279
- simde__m128_private
1280
- r_,
1281
- a_ = simde__m128_to_private(a);
1282
- simde__m64_private b_ = simde__m64_to_private(b);
1283
-
1284
- #if defined(SIMDE_SSE_NEON)
1285
- r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));
1286
- #elif defined(SIMDE__CONVERT_VECTOR)
1287
- SIMDE__CONVERT_VECTOR(r_.m64_private[0].f32, b_.i32);
1288
- r_.m64_private[1] = a_.m64_private[1];
1289
- #else
1290
- r_.f32[0] = (simde_float32) b_.i32[0];
1291
- r_.f32[1] = (simde_float32) b_.i32[1];
1292
- r_.i32[2] = a_.i32[2];
1293
- r_.i32[3] = a_.i32[3];
1294
- #endif
1295
-
1296
- return simde__m128_from_private(r_);
1297
- #endif
1298
- }
1299
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1300
- # define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b)
1301
- #endif
1302
-
1303
- SIMDE__FUNCTION_ATTRIBUTES
1304
- simde__m128
1305
- simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) {
1306
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1307
- return _mm_cvtpi32x2_ps(a, b);
1308
- #else
1309
- simde__m128_private r_;
1310
- simde__m64_private
1311
- a_ = simde__m64_to_private(a),
1312
- b_ = simde__m64_to_private(b);
1313
-
1314
- #if defined(SIMDE_SSE_NEON)
1315
- r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
1316
- #elif defined(SIMDE__CONVERT_VECTOR)
1317
- SIMDE__CONVERT_VECTOR(r_.m64_private[0].f32, a_.i32);
1318
- SIMDE__CONVERT_VECTOR(r_.m64_private[1].f32, b_.i32);
1319
- #else
1320
- r_.f32[0] = (simde_float32) a_.i32[0];
1321
- r_.f32[1] = (simde_float32) a_.i32[1];
1322
- r_.f32[2] = (simde_float32) b_.i32[0];
1323
- r_.f32[3] = (simde_float32) b_.i32[1];
1324
- #endif
1325
-
1326
- return simde__m128_from_private(r_);
1327
- #endif
1328
- }
1329
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1330
- # define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b)
1331
- #endif
1332
-
1333
- SIMDE__FUNCTION_ATTRIBUTES
1334
- simde__m128
1335
- simde_mm_cvtpi8_ps (simde__m64 a) {
1336
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1337
- return _mm_cvtpi8_ps(a);
1338
- #else
1339
- simde__m128_private r_;
1340
- simde__m64_private a_ = simde__m64_to_private(a);
1341
-
1342
- #if defined(SIMDE_SSE_NEON)
1343
- r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8))));
1344
- #else
1345
- r_.f32[0] = (simde_float32) a_.i8[0];
1346
- r_.f32[1] = (simde_float32) a_.i8[1];
1347
- r_.f32[2] = (simde_float32) a_.i8[2];
1348
- r_.f32[3] = (simde_float32) a_.i8[3];
1349
- #endif
1350
-
1351
- return simde__m128_from_private(r_);
1352
- #endif
1353
- }
1354
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1355
- # define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a)
1356
- #endif
1357
-
1358
- SIMDE__FUNCTION_ATTRIBUTES
1359
- simde__m64
1360
- simde_mm_cvtps_pi16 (simde__m128 a) {
1361
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1362
- return _mm_cvtps_pi16(a);
1363
- #else
1364
- simde__m64_private r_;
1365
- simde__m128_private a_ = simde__m128_to_private(a);
1366
-
1367
- #if defined(SIMDE__CONVERT_VECTOR)
1368
- SIMDE__CONVERT_VECTOR(r_.i16, a_.f32);
1369
- #elif defined(SIMDE_SSE_NEON)
1370
- r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(a_.neon_f32));
1371
- #else
1372
- SIMDE__VECTORIZE
1373
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1374
- r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, a_.f32[i]);
1375
- }
1376
- #endif
1377
-
1378
- return simde__m64_from_private(r_);
1379
- #endif
1380
- }
1381
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1382
- # define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a))
1383
- #endif
1384
-
1385
- SIMDE__FUNCTION_ATTRIBUTES
1386
- simde__m64
1387
- simde_mm_cvtps_pi32 (simde__m128 a) {
1388
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1389
- return _mm_cvtps_pi32(a);
1390
- #else
1391
- simde__m64_private r_;
1392
- simde__m128_private a_ = simde__m128_to_private(a);
1393
-
1394
- #if defined(SIMDE_SSE_NEON)
1395
- r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
1396
- #elif defined(SIMDE__CONVERT_VECTOR)
1397
- SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].f32);
1398
- #else
1399
- SIMDE__VECTORIZE
1400
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1401
- r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, a_.f32[i]);
1402
- }
1403
- #endif
1404
-
1405
- return simde__m64_from_private(r_);
1406
- #endif
1407
- }
1408
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1409
- # define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a))
1410
- #endif
1411
-
1412
- SIMDE__FUNCTION_ATTRIBUTES
1413
- simde__m64
1414
- simde_mm_cvtps_pi8 (simde__m128 a) {
1415
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1416
- return _mm_cvtps_pi8(a);
1417
- #else
1418
- simde__m64_private r_;
1419
- simde__m128_private a_ = simde__m128_to_private(a);
1420
-
1421
- #if defined(SIMDE_SSE_NEON)
1422
- int16x4_t b = vmovn_s32(vcvtq_s32_f32(a_.neon_f32));
1423
- int16x8_t c = vcombine_s16(b, vmov_n_s16(0));
1424
- r_.neon_i8 = vmovn_s16(c);
1425
- #else
1426
- SIMDE__VECTORIZE
1427
- for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
1428
- r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, a_.f32[i]);
1429
- }
1430
- /* Note: the upper half is undefined */
1431
- #endif
1432
-
1433
- return simde__m64_from_private(r_);
1434
- #endif
1435
- }
1436
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1437
- # define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a))
1438
- #endif
1439
-
1440
- SIMDE__FUNCTION_ATTRIBUTES
1441
- simde__m128
1442
- simde_mm_cvtpu16_ps (simde__m64 a) {
1443
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1444
- return _mm_cvtpu16_ps(a);
1445
- #else
1446
- simde__m128_private r_;
1447
- simde__m64_private a_ = simde__m64_to_private(a);
1448
-
1449
- #if defined(SIMDE_SSE_NEON)
1450
- r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16));
1451
- #elif defined(SIMDE__CONVERT_VECTOR)
1452
- SIMDE__CONVERT_VECTOR(r_.f32, a_.u16);
1453
- #else
1454
- SIMDE__VECTORIZE
1455
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1456
- r_.f32[i] = (simde_float32) a_.u16[i];
1457
- }
1458
- #endif
1459
-
1460
- return simde__m128_from_private(r_);
1461
- #endif
1462
- }
1463
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1464
- # define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a)
1465
- #endif
1466
-
1467
- SIMDE__FUNCTION_ATTRIBUTES
1468
- simde__m128
1469
- simde_mm_cvtpu8_ps (simde__m64 a) {
1470
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1471
- return _mm_cvtpu8_ps(a);
1472
- #else
1473
- simde__m128_private r_;
1474
- simde__m64_private a_ = simde__m64_to_private(a);
1475
-
1476
- #if defined(SIMDE_SSE_NEON)
1477
- r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8))));
1478
- #else
1479
- SIMDE__VECTORIZE
1480
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1481
- r_.f32[i] = (simde_float32) a_.u8[i];
1482
- }
1483
- #endif
1484
-
1485
- return simde__m128_from_private(r_);
1486
- #endif
1487
- }
1488
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1489
- # define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a)
1490
- #endif
1491
-
1492
- SIMDE__FUNCTION_ATTRIBUTES
1493
- simde__m128
1494
- simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) {
1495
- #if defined(SIMDE_SSE_NATIVE)
1496
- return _mm_cvtsi32_ss(a, b);
1497
- #else
1498
- simde__m128_private r_;
1499
- simde__m128_private a_ = simde__m128_to_private(a);
1500
-
1501
- #if defined(SIMDE_SSE_NEON)
1502
- r_.neon_f32 = vsetq_lane_f32((simde_float32) b, a_.neon_f32, 0);
1503
- #else
1504
- r_.f32[0] = (simde_float32) b;
1505
- SIMDE__VECTORIZE
1506
- for (size_t i = 1 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1507
- r_.i32[i] = a_.i32[i];
1508
- }
1509
- #endif
1510
-
1511
- return simde__m128_from_private(r_);
1512
- #endif
1513
- }
1514
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1515
- # define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b)
1516
- #endif
1517
-
1518
- SIMDE__FUNCTION_ATTRIBUTES
1519
- simde__m128
1520
- simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) {
1521
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
1522
- #if !defined(__PGI)
1523
- return _mm_cvtsi64_ss(a, b);
1524
- #else
1525
- return _mm_cvtsi64x_ss(a, b);
1526
- #endif
1527
- #else
1528
- simde__m128_private r_;
1529
- simde__m128_private a_ = simde__m128_to_private(a);
1530
-
1531
- #if defined(SIMDE_SSE_NEON)
1532
- r_.neon_f32 = vsetq_lane_f32((simde_float32) b, a_.neon_f32, 0);
1533
- #else
1534
- r_ = a_;
1535
- r_.f32[0] = (simde_float32) b;
1536
- #endif
1537
-
1538
- return simde__m128_from_private(r_);
1539
- #endif
1540
- }
1541
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1542
- # define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b)
1543
- #endif
1544
-
1545
- SIMDE__FUNCTION_ATTRIBUTES
1546
- simde_float32
1547
- simde_mm_cvtss_f32 (simde__m128 a) {
1548
- #if defined(SIMDE_SSE_NATIVE)
1549
- return _mm_cvtss_f32(a);
1550
- #else
1551
- simde__m128_private a_ = simde__m128_to_private(a);
1552
- #if defined(SIMDE_SSE_NEON)
1553
- return vgetq_lane_f32(a_.neon_f32, 0);
1554
- #else
1555
- return a_.f32[0];
1556
- #endif
1557
- #endif
1558
- }
1559
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1560
- # define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a))
1561
- #endif
1562
-
1563
- SIMDE__FUNCTION_ATTRIBUTES
1564
- int32_t
1565
- simde_mm_cvtss_si32 (simde__m128 a) {
1566
- return simde_mm_cvt_ss2si(a);
1567
- }
1568
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1569
- # define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a))
1570
- #endif
1571
-
1572
- SIMDE__FUNCTION_ATTRIBUTES
1573
- int64_t
1574
- simde_mm_cvtss_si64 (simde__m128 a) {
1575
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
1576
- #if !defined(__PGI)
1577
- return _mm_cvtss_si64(a);
1578
- #else
1579
- return _mm_cvtss_si64x(a);
1580
- #endif
1581
- #else
1582
- simde__m128_private a_ = simde__m128_to_private(a);
1583
- #if defined(SIMDE_SSE_NEON)
1584
- return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0));
1585
- #else
1586
- return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]);
1587
- #endif
1588
- #endif
1589
- }
1590
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1591
- # define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a))
1592
- #endif
1593
-
1594
- SIMDE__FUNCTION_ATTRIBUTES
1595
- simde__m64
1596
- simde_mm_cvtt_ps2pi (simde__m128 a) {
1597
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1598
- return _mm_cvtt_ps2pi(a);
1599
- #else
1600
- simde__m64_private r_;
1601
- simde__m128_private a_ = simde__m128_to_private(a);
1602
-
1603
- #if defined(SIMDE_SSE_NEON)
1604
- r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
1605
- #elif defined(SIMDE__CONVERT_VECTOR)
1606
- SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].f32);
1607
- #else
1608
- SIMDE__VECTORIZE
1609
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1610
- r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, a_.f32[i]);
1611
- }
1612
- #endif
1613
-
1614
- return simde__m64_from_private(r_);
1615
- #endif
1616
- }
1617
- #define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a)
1618
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1619
- # define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a))
1620
- # define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a))
1621
- #endif
1622
-
1623
- SIMDE__FUNCTION_ATTRIBUTES
1624
- int32_t
1625
- simde_mm_cvtt_ss2si (simde__m128 a) {
1626
- #if defined(SIMDE_SSE_NATIVE)
1627
- return _mm_cvtt_ss2si(a);
1628
- #else
1629
- simde__m128_private a_ = simde__m128_to_private(a);
1630
-
1631
- #if defined(SIMDE_SSE_NEON)
1632
- return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0));
1633
- #else
1634
- return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]);
1635
- #endif
1636
- #endif
1637
- }
1638
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1639
- # define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a))
1640
- # define _mm_cvttss_si32(a) simde_mm_cvttss_si32((a))
1641
- #endif
1642
-
1643
- SIMDE__FUNCTION_ATTRIBUTES
1644
- int64_t
1645
- simde_mm_cvttss_si64 (simde__m128 a) {
1646
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER)
1647
- #if defined(__PGI)
1648
- return _mm_cvttss_si64x(a);
1649
- #else
1650
- return _mm_cvttss_si64(a);
1651
- #endif
1652
- #else
1653
- simde__m128_private a_ = simde__m128_to_private(a);
1654
-
1655
- #if defined(SIMDE_SSE_NEON)
1656
- return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0));
1657
- #else
1658
- return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]);
1659
- #endif
1660
- #endif
1661
- }
1662
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1663
- # define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a))
1664
- #endif
1665
-
1666
- SIMDE__FUNCTION_ATTRIBUTES
1667
- simde__m128
1668
- simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) {
1669
- #if defined(SIMDE_SSE_NATIVE)
1670
- return _mm_cmpord_ss(a, b);
1671
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
1672
- return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b));
1673
- #else
1674
- simde__m128_private
1675
- r_,
1676
- a_ = simde__m128_to_private(a);
1677
-
1678
- #if defined(simde_isnanf)
1679
- r_.u32[0] = (simde_isnanf(simde_mm_cvtss_f32(a)) || simde_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0);
1680
- SIMDE__VECTORIZE
1681
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1682
- r_.u32[i] = a_.u32[i];
1683
- }
1684
- #else
1685
- HEDLEY_UNREACHABLE();
1686
- #endif
1687
-
1688
- return simde__m128_from_private(r_);
1689
- #endif
1690
- }
1691
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1692
- # define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b))
1693
- #endif
1694
-
1695
- SIMDE__FUNCTION_ATTRIBUTES
1696
- simde__m128
1697
- simde_mm_div_ps (simde__m128 a, simde__m128 b) {
1698
- #if defined(SIMDE_SSE_NATIVE)
1699
- return _mm_div_ps(a, b);
1700
- #else
1701
- simde__m128_private
1702
- r_,
1703
- a_ = simde__m128_to_private(a),
1704
- b_ = simde__m128_to_private(b);
1705
-
1706
- #if defined(SIMDE_SSE_NEON) && defined(SIMDE_ARCH_AARCH64)
1707
- r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32);
1708
- #elif defined(SIMDE_SSE_NEON)
1709
- float32x4_t recip0 = vrecpeq_f32(b_.neon_f32);
1710
- float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32));
1711
- r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1);
1712
- #elif defined(SIMDE_SSE_WASM_SIMD128)
1713
- r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128);
1714
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1715
- r_.f32 = a_.f32 / b_.f32;
1716
- #else
1717
- SIMDE__VECTORIZE
1718
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1719
- r_.f32[i] = a_.f32[i] / b_.f32[i];
1720
- }
1721
- #endif
1722
-
1723
- return simde__m128_from_private(r_);
1724
- #endif
1725
- }
1726
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1727
- # define _mm_div_ps(a, b) simde_mm_div_ps((a), (b))
1728
- #endif
1729
-
1730
- SIMDE__FUNCTION_ATTRIBUTES
1731
- simde__m128
1732
- simde_mm_div_ss (simde__m128 a, simde__m128 b) {
1733
- #if defined(SIMDE_SSE_NATIVE)
1734
- return _mm_div_ss(a, b);
1735
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
1736
- return simde_mm_move_ss(a, simde_mm_div_ps(a, b));
1737
- #else
1738
- simde__m128_private
1739
- r_,
1740
- a_ = simde__m128_to_private(a),
1741
- b_ = simde__m128_to_private(b);
1742
-
1743
- r_.f32[0] = a_.f32[0] / b_.f32[0];
1744
- SIMDE__VECTORIZE
1745
- for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1746
- r_.f32[i] = a_.f32[i];
1747
- }
1748
-
1749
- return simde__m128_from_private(r_);
1750
- #endif
1751
- }
1752
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1753
- # define _mm_div_ss(a, b) simde_mm_div_ss((a), (b))
1754
- #endif
1755
-
1756
- SIMDE__FUNCTION_ATTRIBUTES
1757
- int16_t
1758
- simde_mm_extract_pi16 (simde__m64 a, const int imm8)
1759
- HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
1760
- simde__m64_private a_ = simde__m64_to_private(a);
1761
- return a_.i16[imm8];
1762
- }
1763
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX) && !defined(HEDLEY_PGI_VERSION)
1764
- # if HEDLEY_HAS_WARNING("-Wvector-conversion")
1765
- /* https://bugs.llvm.org/show_bug.cgi?id=44589 */
1766
- # define simde_mm_extract_pi16(a, imm8) ( \
1767
- HEDLEY_DIAGNOSTIC_PUSH \
1768
- _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \
1769
- HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16((a), (imm8))) \
1770
- HEDLEY_DIAGNOSTIC_POP \
1771
- )
1772
- # else
1773
- # define simde_mm_extract_pi16(a, imm8) ((int16_t) (_mm_extract_pi16(a, imm8)))
1774
- # endif
1775
- #elif defined(SIMDE_SSE_NEON)
1776
- # define simde_mm_extract_pi16(a, imm8) ((int16_t) (vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8)))
1777
- #endif
1778
- #define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8)
1779
-
1780
- enum {
1781
- #if defined(SIMDE_SSE_NATIVE)
1782
- SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST,
1783
- SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN,
1784
- SIMDE_MM_ROUND_UP = _MM_ROUND_UP,
1785
- SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO
1786
- #else
1787
- SIMDE_MM_ROUND_NEAREST
1788
- #if defined(FE_TONEAREST)
1789
- = FE_TONEAREST
1790
- #endif
1791
- ,
1792
-
1793
- SIMDE_MM_ROUND_DOWN
1794
- #if defined(FE_DOWNWARD)
1795
- = FE_DOWNWARD
1796
- #endif
1797
- ,
1798
-
1799
- SIMDE_MM_ROUND_UP
1800
- #if defined(FE_UPWARD)
1801
- = FE_UPWARD
1802
- #endif
1803
- ,
1804
-
1805
- SIMDE_MM_ROUND_TOWARD_ZERO
1806
- #if defined(FE_TOWARDZERO)
1807
- = FE_TOWARDZERO
1808
- #endif
1809
- #endif
1810
- };
1811
-
1812
- SIMDE__FUNCTION_ATTRIBUTES
1813
- unsigned int
1814
- SIMDE_MM_GET_ROUNDING_MODE(void) {
1815
- #if defined(SIMDE_SSE_NATIVE)
1816
- return _MM_GET_ROUNDING_MODE();
1817
- #elif defined(SIMDE_HAVE_MATH_H)
1818
- return (unsigned int) fegetround();
1819
- #else
1820
- HEDLEY_UNREACHABLE();
1821
- #endif
1822
- }
1823
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1824
- # define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), imm8)
1825
- #endif
1826
-
1827
- SIMDE__FUNCTION_ATTRIBUTES
1828
- void
1829
- SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) {
1830
- #if defined(SIMDE_SSE_NATIVE)
1831
- _MM_SET_ROUNDING_MODE(a);
1832
- #else
1833
- fesetround((int) a);
1834
- #endif
1835
- }
1836
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1837
- # define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a)
1838
- #endif
1839
-
1840
- SIMDE__FUNCTION_ATTRIBUTES
1841
- simde__m64
1842
- simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8)
1843
- HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
1844
- simde__m64_private
1845
- r_,
1846
- a_ = simde__m64_to_private(a);
1847
-
1848
- r_.i64[0] = a_.i64[0];
1849
- r_.i16[imm8] = i;
1850
-
1851
- return simde__m64_from_private(r_);
1852
- }
1853
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX) && !defined(__PGI)
1854
- # if HEDLEY_HAS_WARNING("-Wvector-conversion")
1855
- /* https://bugs.llvm.org/show_bug.cgi?id=44589 */
1856
- # define ssimde_mm_insert_pi16(a, i, imm8) ( \
1857
- HEDLEY_DIAGNOSTIC_PUSH \
1858
- _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \
1859
- (_mm_insert_pi16((a), (i), (imm8))) \
1860
- HEDLEY_DIAGNOSTIC_POP \
1861
- )
1862
- # else
1863
- # define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8)
1864
- # endif
1865
- #elif defined(SIMDE_SSE_NEON)
1866
- # define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_private((simde__m64_private) { .neon_i16 = vset_lane_s16(i, simde__m64_to_private(a).neon_i16, (imm8)) })
1867
- #endif
1868
- #define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8))
1869
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1870
- # define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
1871
- # define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
1872
- #endif
1873
-
1874
- SIMDE__FUNCTION_ATTRIBUTES
1875
- simde__m128
1876
- simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
1877
- simde_assert_aligned(16, mem_addr);
1878
-
1879
- #if defined(SIMDE_SSE_NATIVE)
1880
- return _mm_load_ps(mem_addr);
1881
- #else
1882
- simde__m128_private r_;
1883
-
1884
- #if defined(SIMDE_SSE_NEON)
1885
- r_.neon_f32 = vld1q_f32(mem_addr);
1886
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
1887
- r_.altivec_f32 = vec_ld(0, mem_addr);
1888
- #else
1889
- r_ = *SIMDE_CAST_ALIGN(16, simde__m128_private const*, mem_addr);
1890
- #endif
1891
-
1892
- return simde__m128_from_private(r_);
1893
- #endif
1894
- }
1895
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1896
- # define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr)
1897
- #endif
1898
-
1899
- SIMDE__FUNCTION_ATTRIBUTES
1900
- simde__m128
1901
- simde_mm_load_ps1 (simde_float32 const* mem_addr) {
1902
- #if defined(SIMDE_SSE_NATIVE)
1903
- return _mm_load_ps1(mem_addr);
1904
- #else
1905
- simde__m128_private r_;
1906
-
1907
- #if defined(SIMDE_SSE_NEON)
1908
- r_.neon_f32 = vld1q_dup_f32(mem_addr);
1909
- #else
1910
- r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr));
1911
- #endif
1912
-
1913
- return simde__m128_from_private(r_);
1914
- #endif
1915
- }
1916
- #define simde_mm_load1_ps(mem_addr) simde_mm_load_ps1(mem_addr)
1917
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1918
- # define _mm_load_ps1(mem_addr) simde_mm_load_ps1(mem_addr)
1919
- # define _mm_load1_ps(mem_addr) simde_mm_load_ps1(mem_addr)
1920
- #endif
1921
-
1922
- SIMDE__FUNCTION_ATTRIBUTES
1923
- simde__m128
1924
- simde_mm_load_ss (simde_float32 const* mem_addr) {
1925
- #if defined(SIMDE_SSE_NATIVE)
1926
- return _mm_load_ss(mem_addr);
1927
- #else
1928
- simde__m128_private r_;
1929
-
1930
- #if defined(SIMDE_SSE_NEON)
1931
- r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0);
1932
- #else
1933
- r_.f32[0] = *mem_addr;
1934
- r_.i32[1] = 0;
1935
- r_.i32[2] = 0;
1936
- r_.i32[3] = 0;
1937
- #endif
1938
-
1939
- return simde__m128_from_private(r_);
1940
- #endif
1941
- }
1942
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1943
- # define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr)
1944
- #endif
1945
-
1946
- SIMDE__FUNCTION_ATTRIBUTES
1947
- simde__m128
1948
- simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) {
1949
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
1950
- return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));
1951
- #else
1952
- simde__m128_private
1953
- r_,
1954
- a_ = simde__m128_to_private(a);
1955
-
1956
- #if defined(SIMDE_SSE_NEON)
1957
- r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)));
1958
- #else
1959
- simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr);
1960
- r_.f32[0] = a_.f32[0];
1961
- r_.f32[1] = a_.f32[1];
1962
- r_.f32[2] = b_.f32[0];
1963
- r_.f32[3] = b_.f32[1];
1964
- #endif
1965
-
1966
- return simde__m128_from_private(r_);
1967
- #endif
1968
- }
1969
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
1970
- # define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr))
1971
- #endif
1972
-
1973
- /* The SSE documentation says that there are no alignment requirements
1974
- for mem_addr. Unfortunately they used the __m64 type for the argument
1975
- which is supposed to be 8-byte aligned, so some compilers (like clang
1976
- with -Wcast-align) will generate a warning if you try to cast, say,
1977
- a simde_float32* to a simde__m64* for this function.
1978
-
1979
- I think the choice of argument type is unfortunate, but I do think we
1980
- need to stick to it here. If there is demand I can always add something
1981
- like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */
1982
- SIMDE__FUNCTION_ATTRIBUTES
1983
- simde__m128
1984
- simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) {
1985
- #if defined(SIMDE_SSE_NATIVE)
1986
- return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));
1987
- #else
1988
- simde__m128_private
1989
- r_,
1990
- a_ = simde__m128_to_private(a);
1991
-
1992
- #if defined(SIMDE_SSE_NEON)
1993
- r_.neon_f32 = vcombine_f32(vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32));
1994
- #else
1995
- simde__m64_private b_;
1996
- simde_memcpy(&b_, mem_addr, sizeof(b_));
1997
- r_.i32[0] = b_.i32[0];
1998
- r_.i32[1] = b_.i32[1];
1999
- r_.i32[2] = a_.i32[2];
2000
- r_.i32[3] = a_.i32[3];
2001
- #endif
2002
-
2003
- return simde__m128_from_private(r_);
2004
- #endif
2005
- }
2006
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2007
- # define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr))
2008
- #endif
2009
-
2010
- SIMDE__FUNCTION_ATTRIBUTES
2011
- simde__m128
2012
- simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
2013
- simde_assert_aligned(16, mem_addr);
2014
-
2015
- #if defined(SIMDE_SSE_NATIVE)
2016
- return _mm_loadr_ps(mem_addr);
2017
- #else
2018
- simde__m128_private
2019
- r_,
2020
- v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr));
2021
-
2022
- #if defined(SIMDE_SSE_NEON)
2023
- r_.neon_f32 = vrev64q_f32(v_.neon_f32);
2024
- r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2);
2025
- #elif defined(SIMDE__SHUFFLE_VECTOR)
2026
- r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, v_.f32, v_.f32, 3, 2, 1, 0);
2027
- #else
2028
- r_.f32[0] = v_.f32[3];
2029
- r_.f32[1] = v_.f32[2];
2030
- r_.f32[2] = v_.f32[1];
2031
- r_.f32[3] = v_.f32[0];
2032
- #endif
2033
-
2034
- return simde__m128_from_private(r_);
2035
- #endif
2036
- }
2037
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2038
- # define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr)
2039
- #endif
2040
-
2041
- SIMDE__FUNCTION_ATTRIBUTES
2042
- simde__m128
2043
- simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
2044
- #if defined(SIMDE_SSE_NATIVE)
2045
- return _mm_loadu_ps(mem_addr);
2046
- #else
2047
- simde__m128_private r_;
2048
-
2049
- #if defined(SIMDE_SSE_NEON)
2050
- r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr));
2051
- #else
2052
- r_.f32[0] = mem_addr[0];
2053
- r_.f32[1] = mem_addr[1];
2054
- r_.f32[2] = mem_addr[2];
2055
- r_.f32[3] = mem_addr[3];
2056
- #endif
2057
-
2058
- return simde__m128_from_private(r_);
2059
- #endif
2060
- }
2061
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2062
- # define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr)
2063
- #endif
2064
-
2065
- SIMDE__FUNCTION_ATTRIBUTES
2066
- void
2067
- simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) {
2068
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2069
- _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr));
2070
- #else
2071
- simde__m64_private
2072
- a_ = simde__m64_to_private(a),
2073
- mask_ = simde__m64_to_private(mask);
2074
-
2075
- SIMDE__VECTORIZE
2076
- for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++)
2077
- if (mask_.i8[i] < 0)
2078
- mem_addr[i] = a_.i8[i];
2079
- #endif
2080
- }
2081
- #define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr)
2082
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2083
- # define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64(a, (mask), mem_addr)
2084
- #endif
2085
-
2086
- SIMDE__FUNCTION_ATTRIBUTES
2087
- simde__m64
2088
- simde_mm_max_pi16 (simde__m64 a, simde__m64 b) {
2089
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2090
- return _mm_max_pi16(a, b);
2091
- #else
2092
- simde__m64_private
2093
- r_,
2094
- a_ = simde__m64_to_private(a),
2095
- b_ = simde__m64_to_private(b);
2096
-
2097
- #if defined(SIMDE_SSE_NEON)
2098
- r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16);
2099
- #else
2100
- SIMDE__VECTORIZE
2101
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2102
- r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
2103
- }
2104
- #endif
2105
-
2106
- return simde__m64_from_private(r_);
2107
- #endif
2108
- }
2109
- #define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
2110
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2111
- # define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b)
2112
- # define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
2113
- #endif
2114
-
2115
- SIMDE__FUNCTION_ATTRIBUTES
2116
- simde__m128
2117
- simde_mm_max_ps (simde__m128 a, simde__m128 b) {
2118
- #if defined(SIMDE_SSE_NATIVE)
2119
- return _mm_max_ps(a, b);
2120
- #else
2121
- simde__m128_private
2122
- r_,
2123
- a_ = simde__m128_to_private(a),
2124
- b_ = simde__m128_to_private(b);
2125
-
2126
- #if defined(SIMDE_SSE_NEON)
2127
- r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32);
2128
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
2129
- r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32);
2130
- #else
2131
- SIMDE__VECTORIZE
2132
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2133
- r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i];
2134
- }
2135
- #endif
2136
-
2137
- return simde__m128_from_private(r_);
2138
- #endif
2139
- }
2140
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2141
- # define _mm_max_ps(a, b) simde_mm_max_ps((a), (b))
2142
- #endif
2143
-
2144
- SIMDE__FUNCTION_ATTRIBUTES
2145
- simde__m64
2146
- simde_mm_max_pu8 (simde__m64 a, simde__m64 b) {
2147
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2148
- return _mm_max_pu8(a, b);
2149
- #else
2150
- simde__m64_private
2151
- r_,
2152
- a_ = simde__m64_to_private(a),
2153
- b_ = simde__m64_to_private(b);
2154
-
2155
- #if defined(SIMDE_SSE_NEON)
2156
- r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8);
2157
- #else
2158
- SIMDE__VECTORIZE
2159
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
2160
- r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
2161
- }
2162
- #endif
2163
-
2164
- return simde__m64_from_private(r_);
2165
- #endif
2166
- }
2167
- #define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b)
2168
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2169
- # define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b)
2170
- # define _m_pmaxub(a, b) simde_mm_max_pu8(a, b)
2171
- #endif
2172
-
2173
- SIMDE__FUNCTION_ATTRIBUTES
2174
- simde__m128
2175
- simde_mm_max_ss (simde__m128 a, simde__m128 b) {
2176
- #if defined(SIMDE_SSE_NATIVE)
2177
- return _mm_max_ss(a, b);
2178
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
2179
- return simde_mm_move_ss(a, simde_mm_max_ps(a, b));
2180
- #else
2181
- simde__m128_private
2182
- r_,
2183
- a_ = simde__m128_to_private(a),
2184
- b_ = simde__m128_to_private(b);
2185
-
2186
- r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0];
2187
- r_.f32[1] = a_.f32[1];
2188
- r_.f32[2] = a_.f32[2];
2189
- r_.f32[3] = a_.f32[3];
2190
-
2191
- return simde__m128_from_private(r_);
2192
- #endif
2193
- }
2194
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2195
- # define _mm_max_ss(a, b) simde_mm_max_ss((a), (b))
2196
- #endif
2197
-
2198
- SIMDE__FUNCTION_ATTRIBUTES
2199
- simde__m64
2200
- simde_mm_min_pi16 (simde__m64 a, simde__m64 b) {
2201
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2202
- return _mm_min_pi16(a, b);
2203
- #else
2204
- simde__m64_private
2205
- r_,
2206
- a_ = simde__m64_to_private(a),
2207
- b_ = simde__m64_to_private(b);
2208
-
2209
- #if defined(SIMDE_SSE_NEON)
2210
- r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16);
2211
- #else
2212
- SIMDE__VECTORIZE
2213
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2214
- r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
2215
- }
2216
- #endif
2217
-
2218
- return simde__m64_from_private(r_);
2219
- #endif
2220
- }
2221
- #define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b)
2222
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2223
- # define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b)
2224
- # define _m_pminsw(a, b) simde_mm_min_pi16(a, b)
2225
- #endif
2226
-
2227
- SIMDE__FUNCTION_ATTRIBUTES
2228
- simde__m128
2229
- simde_mm_min_ps (simde__m128 a, simde__m128 b) {
2230
- #if defined(SIMDE_SSE_NATIVE)
2231
- return _mm_min_ps(a, b);
2232
- #else
2233
- simde__m128_private
2234
- r_,
2235
- a_ = simde__m128_to_private(a),
2236
- b_ = simde__m128_to_private(b);
2237
-
2238
- #if defined(SIMDE_SSE_NEON)
2239
- r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32);
2240
- #elif defined(SIMDE_SSE_POWER_ALTIVEC)
2241
- r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32);
2242
- #else
2243
- SIMDE__VECTORIZE
2244
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2245
- r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i];
2246
- }
2247
- #endif
2248
-
2249
- return simde__m128_from_private(r_);
2250
- #endif
2251
- }
2252
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2253
- # define _mm_min_ps(a, b) simde_mm_min_ps((a), (b))
2254
- #endif
2255
-
2256
- SIMDE__FUNCTION_ATTRIBUTES
2257
- simde__m64
2258
- simde_mm_min_pu8 (simde__m64 a, simde__m64 b) {
2259
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2260
- return _mm_min_pu8(a, b);
2261
- #else
2262
- simde__m64_private
2263
- r_,
2264
- a_ = simde__m64_to_private(a),
2265
- b_ = simde__m64_to_private(b);
2266
-
2267
- #if defined(SIMDE_SSE_NEON)
2268
- r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8);
2269
- #else
2270
- SIMDE__VECTORIZE
2271
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
2272
- r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
2273
- }
2274
- #endif
2275
-
2276
- return simde__m64_from_private(r_);
2277
- #endif
2278
- }
2279
- #define simde_m_pminub(a, b) simde_mm_min_pu8(a, b)
2280
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2281
- # define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b)
2282
- # define _m_pminub(a, b) simde_mm_min_pu8(a, b)
2283
- #endif
2284
-
2285
- SIMDE__FUNCTION_ATTRIBUTES
2286
- simde__m128
2287
- simde_mm_min_ss (simde__m128 a, simde__m128 b) {
2288
- #if defined(SIMDE_SSE_NATIVE)
2289
- return _mm_min_ss(a, b);
2290
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
2291
- return simde_mm_move_ss(a, simde_mm_min_ps(a, b));
2292
- #else
2293
- simde__m128_private
2294
- r_,
2295
- a_ = simde__m128_to_private(a),
2296
- b_ = simde__m128_to_private(b);
2297
-
2298
- r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0];
2299
- r_.f32[1] = a_.f32[1];
2300
- r_.f32[2] = a_.f32[2];
2301
- r_.f32[3] = a_.f32[3];
2302
-
2303
- return simde__m128_from_private(r_);
2304
- #endif
2305
- }
2306
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2307
- # define _mm_min_ss(a, b) simde_mm_min_ss((a), (b))
2308
- #endif
2309
-
2310
- SIMDE__FUNCTION_ATTRIBUTES
2311
- simde__m128
2312
- simde_mm_movehl_ps (simde__m128 a, simde__m128 b) {
2313
- #if defined(SIMDE_SSE_NATIVE)
2314
- return _mm_movehl_ps(a, b);
2315
- #else
2316
- simde__m128_private
2317
- r_,
2318
- a_ = simde__m128_to_private(a),
2319
- b_ = simde__m128_to_private(b);
2320
-
2321
- #if defined(SIMDE__SHUFFLE_VECTOR)
2322
- r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 6, 7, 2, 3);
2323
- #else
2324
- r_.f32[0] = b_.f32[2];
2325
- r_.f32[1] = b_.f32[3];
2326
- r_.f32[2] = a_.f32[2];
2327
- r_.f32[3] = a_.f32[3];
2328
- #endif
2329
-
2330
- return simde__m128_from_private(r_);
2331
- #endif
2332
- }
2333
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2334
- # define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b))
2335
- #endif
2336
-
2337
- SIMDE__FUNCTION_ATTRIBUTES
2338
- simde__m128
2339
- simde_mm_movelh_ps (simde__m128 a, simde__m128 b) {
2340
- #if defined(SIMDE_SSE_NATIVE)
2341
- return _mm_movelh_ps(a, b);
2342
- #else
2343
- simde__m128_private
2344
- r_,
2345
- a_ = simde__m128_to_private(a),
2346
- b_ = simde__m128_to_private(b);
2347
-
2348
- #if defined(SIMDE__SHUFFLE_VECTOR)
2349
- r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 0, 1, 4, 5);
2350
- #else
2351
- r_.f32[0] = a_.f32[0];
2352
- r_.f32[1] = a_.f32[1];
2353
- r_.f32[2] = b_.f32[0];
2354
- r_.f32[3] = b_.f32[1];
2355
- #endif
2356
-
2357
- return simde__m128_from_private(r_);
2358
- #endif
2359
- }
2360
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2361
- # define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b))
2362
- #endif
2363
-
2364
- SIMDE__FUNCTION_ATTRIBUTES
2365
- int
2366
- simde_mm_movemask_pi8 (simde__m64 a) {
2367
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2368
- return _mm_movemask_pi8(a);
2369
- #else
2370
- simde__m64_private a_ = simde__m64_to_private(a);
2371
- int r = 0;
2372
- const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]);
2373
-
2374
- SIMDE__VECTORIZE_REDUCTION(|:r)
2375
- for (size_t i = 0 ; i < nmemb ; i++) {
2376
- r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i);
2377
- }
2378
-
2379
- return r;
2380
- #endif
2381
- }
2382
- #define simde_m_pmovmskb(a, b) simde_mm_movemask_pi8(a, b)
2383
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2384
- # define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a)
2385
- #endif
2386
-
2387
- SIMDE__FUNCTION_ATTRIBUTES
2388
- int
2389
- simde_mm_movemask_ps (simde__m128 a) {
2390
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2391
- return _mm_movemask_ps(a);
2392
- #else
2393
- int r = 0;
2394
- simde__m128_private a_ = simde__m128_to_private(a);
2395
-
2396
- #if defined(SIMDE_SSE_NEON)
2397
- /* TODO: check to see if NEON version is faster than the portable version */
2398
- static const uint32x4_t movemask = { 1, 2, 4, 8 };
2399
- static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
2400
- uint32x4_t t0 = a_.neon_u32;
2401
- uint32x4_t t1 = vtstq_u32(t0, highbit);
2402
- uint32x4_t t2 = vandq_u32(t1, movemask);
2403
- uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
2404
- r = vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
2405
- #else
2406
- SIMDE__VECTORIZE_REDUCTION(|:r)
2407
- for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) {
2408
- r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i;
2409
- }
2410
- #endif
2411
-
2412
- return r;
2413
- #endif
2414
- }
2415
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2416
- # define _mm_movemask_ps(a) simde_mm_movemask_ps((a))
2417
- #endif
2418
-
2419
- SIMDE__FUNCTION_ATTRIBUTES
2420
- simde__m128
2421
- simde_mm_mul_ps (simde__m128 a, simde__m128 b) {
2422
- #if defined(SIMDE_SSE_NATIVE)
2423
- return _mm_mul_ps(a, b);
2424
- #else
2425
- simde__m128_private
2426
- r_,
2427
- a_ = simde__m128_to_private(a),
2428
- b_ = simde__m128_to_private(b);
2429
-
2430
- #if defined(SIMDE_SSE_NEON)
2431
- r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32);
2432
- #elif defined(SIMDE_SSE_WASM_SIMD128)
2433
- r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128);
2434
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2435
- r_.f32 = a_.f32 * b_.f32;
2436
- #else
2437
- SIMDE__VECTORIZE
2438
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2439
- r_.f32[i] = a_.f32[i] * b_.f32[i];
2440
- }
2441
- #endif
2442
-
2443
- return simde__m128_from_private(r_);
2444
- #endif
2445
- }
2446
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2447
- # define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b))
2448
- #endif
2449
-
2450
- SIMDE__FUNCTION_ATTRIBUTES
2451
- simde__m128
2452
- simde_mm_mul_ss (simde__m128 a, simde__m128 b) {
2453
- #if defined(SIMDE_SSE_NATIVE)
2454
- return _mm_mul_ss(a, b);
2455
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
2456
- return simde_mm_move_ss(a, simde_mm_mul_ps(a, b));
2457
- #else
2458
- simde__m128_private
2459
- r_,
2460
- a_ = simde__m128_to_private(a),
2461
- b_ = simde__m128_to_private(b);
2462
-
2463
- r_.f32[0] = a_.f32[0] * b_.f32[0];
2464
- r_.f32[1] = a_.f32[1];
2465
- r_.f32[2] = a_.f32[2];
2466
- r_.f32[3] = a_.f32[3];
2467
-
2468
- return simde__m128_from_private(r_);
2469
- #endif
2470
- }
2471
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2472
- # define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b))
2473
- #endif
2474
-
2475
- SIMDE__FUNCTION_ATTRIBUTES
2476
- simde__m64
2477
- simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) {
2478
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2479
- return _mm_mulhi_pu16(a, b);
2480
- #else
2481
- simde__m64_private
2482
- r_,
2483
- a_ = simde__m64_to_private(a),
2484
- b_ = simde__m64_to_private(b);
2485
-
2486
- SIMDE__VECTORIZE
2487
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
2488
- r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16)));
2489
- }
2490
-
2491
- return simde__m64_from_private(r_);
2492
- #endif
2493
- }
2494
- #define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)
2495
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2496
- # define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b)
2497
- #endif
2498
-
2499
- SIMDE__FUNCTION_ATTRIBUTES
2500
- simde__m128
2501
- simde_mm_or_ps (simde__m128 a, simde__m128 b) {
2502
- #if defined(SIMDE_SSE_NATIVE)
2503
- return _mm_or_ps(a, b);
2504
- #else
2505
- simde__m128_private
2506
- r_,
2507
- a_ = simde__m128_to_private(a),
2508
- b_ = simde__m128_to_private(b);
2509
-
2510
- #if defined(SIMDE_SSE_NEON)
2511
- r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);
2512
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2513
- r_.i32f = a_.i32f | b_.i32f;
2514
- #else
2515
- SIMDE__VECTORIZE
2516
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
2517
- r_.u32[i] = a_.u32[i] | b_.u32[i];
2518
- }
2519
- #endif
2520
-
2521
- return simde__m128_from_private(r_);
2522
- #endif
2523
- }
2524
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2525
- # define _mm_or_ps(a, b) simde_mm_or_ps((a), (b))
2526
- #endif
2527
-
2528
- SIMDE__FUNCTION_ATTRIBUTES
2529
- void
2530
- simde_mm_prefetch (char const* p, int i) {
2531
- (void) p;
2532
- (void) i;
2533
- }
2534
- #if defined(SIMDE_SSE_NATIVE)
2535
- # define simde_mm_prefetch(p, i) _mm_prefetch(p, i)
2536
- #endif
2537
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2538
- # define _mm_prefetch(p, i) simde_mm_prefetch(p, i)
2539
- #endif
2540
-
2541
- SIMDE__FUNCTION_ATTRIBUTES
2542
- simde__m128
2543
- simde_mm_rcp_ps (simde__m128 a) {
2544
- #if defined(SIMDE_SSE_NATIVE)
2545
- return _mm_rcp_ps(a);
2546
- #else
2547
- simde__m128_private
2548
- r_,
2549
- a_ = simde__m128_to_private(a);
2550
-
2551
- #if defined(SIMDE_SSE_NEON)
2552
- float32x4_t recip = vrecpeq_f32(a_.neon_f32);
2553
-
2554
- # if !defined(SIMDE_MM_RCP_PS_ITERS)
2555
- # define SIMDE_MM_RCP_PS_ITERS SIMDE_ACCURACY_ITERS
2556
- # endif
2557
-
2558
- for (int i = 0; i < SIMDE_MM_RCP_PS_ITERS ; ++i) {
2559
- recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32));
2560
- }
2561
-
2562
- r_.neon_f32 = recip;
2563
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
2564
- r_.f32 = 1.0f / a_.f32;
2565
- #else
2566
- SIMDE__VECTORIZE
2567
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2568
- r_.f32[i] = 1.0f / a_.f32[i];
2569
- }
2570
- #endif
2571
-
2572
- return simde__m128_from_private(r_);
2573
- #endif
2574
- }
2575
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2576
- # define _mm_rcp_ps(a) simde_mm_rcp_ps((a))
2577
- #endif
2578
-
2579
- SIMDE__FUNCTION_ATTRIBUTES
2580
- simde__m128
2581
- simde_mm_rcp_ss (simde__m128 a) {
2582
- #if defined(SIMDE_SSE_NATIVE)
2583
- return _mm_rcp_ss(a);
2584
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
2585
- return simde_mm_move_ss(a, simde_mm_rcp_ps(a));
2586
- #else
2587
- simde__m128_private
2588
- r_,
2589
- a_ = simde__m128_to_private(a);
2590
-
2591
- r_.f32[0] = 1.0f / a_.f32[0];
2592
- r_.f32[1] = a_.f32[1];
2593
- r_.f32[2] = a_.f32[2];
2594
- r_.f32[3] = a_.f32[3];
2595
-
2596
- return simde__m128_from_private(r_);
2597
- #endif
2598
- }
2599
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2600
- # define _mm_rcp_ss(a) simde_mm_rcp_ss((a))
2601
- #endif
2602
-
2603
- SIMDE__FUNCTION_ATTRIBUTES
2604
- simde__m128
2605
- simde_mm_rsqrt_ps (simde__m128 a) {
2606
- #if defined(SIMDE_SSE_NATIVE)
2607
- return _mm_rsqrt_ps(a);
2608
- #else
2609
- simde__m128_private
2610
- r_,
2611
- a_ = simde__m128_to_private(a);
2612
-
2613
- #if defined(SIMDE_SSE_NEON)
2614
- r_.neon_f32 = vrsqrteq_f32(a_.neon_f32);
2615
- #elif defined(__STDC_IEC_559__)
2616
- /* http://h14s.p5r.org/2012/09/0x5f3759df.html?mwh=1 */
2617
- SIMDE__VECTORIZE
2618
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2619
- r_.i32[i] = INT32_C(0x5f3759df) - (a_.i32[i] >> 1);
2620
-
2621
- #if SIMDE_ACCURACY_ITERS > 2
2622
- const float half = SIMDE_FLOAT32_C(0.5) * a_.f32[i];
2623
- for (int ai = 2 ; ai < SIMDE_ACCURACY_ITERS ; ai++)
2624
- r_.f32[i] *= SIMDE_FLOAT32_C(1.5) - (half * r_.f32[i] * r_.f32[i]);
2625
- #endif
2626
- }
2627
- #elif defined(SIMDE_HAVE_MATH_H)
2628
- SIMDE__VECTORIZE
2629
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2630
- r_.f32[i] = 1.0f / sqrtf(a_.f32[i]);
2631
- }
2632
- #else
2633
- HEDLEY_UNREACHABLE();
2634
- #endif
2635
-
2636
- return simde__m128_from_private(r_);
2637
- #endif
2638
- }
2639
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2640
- # define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a))
2641
- #endif
2642
-
2643
- SIMDE__FUNCTION_ATTRIBUTES
2644
- simde__m128
2645
- simde_mm_rsqrt_ss (simde__m128 a) {
2646
- #if defined(SIMDE_SSE_NATIVE)
2647
- return _mm_rsqrt_ss(a);
2648
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
2649
- return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a));
2650
- #else
2651
- simde__m128_private
2652
- r_,
2653
- a_ = simde__m128_to_private(a);
2654
-
2655
- #if defined(__STDC_IEC_559__)
2656
- {
2657
- r_.i32[0] = INT32_C(0x5f3759df) - (a_.i32[0] >> 1);
2658
-
2659
- #if SIMDE_ACCURACY_ITERS > 2
2660
- float half = SIMDE_FLOAT32_C(0.5) * a_.f32[0];
2661
- for (int ai = 2 ; ai < SIMDE_ACCURACY_ITERS ; ai++)
2662
- r_.f32[0] *= SIMDE_FLOAT32_C(1.5) - (half * r_.f32[0] * r_.f32[0]);
2663
- #endif
2664
- }
2665
- r_.f32[0] = 1.0f / sqrtf(a_.f32[0]);
2666
- r_.f32[1] = a_.f32[1];
2667
- r_.f32[2] = a_.f32[2];
2668
- r_.f32[3] = a_.f32[3];
2669
- #elif defined(SIMDE_HAVE_MATH_H)
2670
- r_.f32[0] = 1.0f / sqrtf(a_.f32[0]);
2671
- r_.f32[1] = a_.f32[1];
2672
- r_.f32[2] = a_.f32[2];
2673
- r_.f32[3] = a_.f32[3];
2674
- #else
2675
- HEDLEY_UNREACHABLE();
2676
- #endif
2677
-
2678
- return simde__m128_from_private(r_);
2679
- #endif
2680
- }
2681
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2682
- # define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a))
2683
- #endif
2684
-
2685
- SIMDE__FUNCTION_ATTRIBUTES
2686
- simde__m64
2687
- simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) {
2688
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
2689
- return _mm_sad_pu8(a, b);
2690
- #else
2691
- simde__m64_private
2692
- r_,
2693
- a_ = simde__m64_to_private(a),
2694
- b_ = simde__m64_to_private(b);
2695
- uint16_t sum = 0;
2696
-
2697
- #if defined(SIMDE_HAVE_STDLIB_H)
2698
- SIMDE__VECTORIZE_REDUCTION(+:sum)
2699
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
2700
- sum += (uint8_t) abs(a_.u8[i] - b_.u8[i]);
2701
- }
2702
-
2703
- r_.i16[0] = (int16_t) sum;
2704
- r_.i16[1] = 0;
2705
- r_.i16[2] = 0;
2706
- r_.i16[3] = 0;
2707
- #else
2708
- HEDLEY_UNREACHABLE();
2709
- #endif
2710
-
2711
- return simde__m64_from_private(r_);
2712
- #endif
2713
- }
2714
- #define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b)
2715
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2716
- # define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b)
2717
- # define _m_psadbw(a, b) simde_mm_sad_pu8(a, b)
2718
- #endif
2719
-
2720
- SIMDE__FUNCTION_ATTRIBUTES
2721
- simde__m128
2722
- simde_mm_set_ss (simde_float32 a) {
2723
- #if defined(SIMDE_SSE_NATIVE)
2724
- return _mm_set_ss(a);
2725
- #elif defined(SIMDE_SSE_NEON)
2726
- return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0);
2727
- #else
2728
- return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a);
2729
- #endif
2730
- }
2731
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2732
- # define _mm_set_ss(a) simde_mm_set_ss(a)
2733
- #endif
2734
-
2735
- SIMDE__FUNCTION_ATTRIBUTES
2736
- simde__m128
2737
- simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
2738
- #if defined(SIMDE_SSE_NATIVE)
2739
- return _mm_setr_ps(e3, e2, e1, e0);
2740
- #elif defined(SIMDE_SSE_NEON)
2741
- SIMDE_ALIGN(16) simde_float32 data[4] = { e3, e2, e1, e0 };
2742
- return vld1q_f32(data);
2743
- #else
2744
- return simde_mm_set_ps(e0, e1, e2, e3);
2745
- #endif
2746
- }
2747
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2748
- # define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0)
2749
- #endif
2750
-
2751
- SIMDE__FUNCTION_ATTRIBUTES
2752
- simde__m128
2753
- simde_mm_setzero_ps (void) {
2754
- #if defined(SIMDE_SSE_NATIVE)
2755
- return _mm_setzero_ps();
2756
- #elif defined(SIMDE_SSE_NEON)
2757
- return vdupq_n_f32(SIMDE_FLOAT32_C(0.0));
2758
- #else
2759
- simde__m128 r;
2760
- simde_memset(&r, 0, sizeof(r));
2761
- return r;
2762
- #endif
2763
- }
2764
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2765
- # define _mm_setzero_ps() simde_mm_setzero_ps()
2766
- #endif
2767
-
2768
- #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
2769
- HEDLEY_DIAGNOSTIC_PUSH
2770
- SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
2771
- #endif
2772
-
2773
- SIMDE__FUNCTION_ATTRIBUTES
2774
- simde__m128
2775
- simde_mm_undefined_ps (void) {
2776
- simde__m128_private r_;
2777
-
2778
- #if defined(SIMDE__HAVE_UNDEFINED128)
2779
- r_.n = _mm_undefined_ps();
2780
- #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
2781
- r_ = simde__m128_to_private(simde_mm_setzero_ps());
2782
- #endif
2783
-
2784
- return simde__m128_from_private(r_);
2785
- }
2786
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2787
- # define _mm_undefined_ps() simde_mm_undefined_ps()
2788
- #endif
2789
-
2790
- #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
2791
- HEDLEY_DIAGNOSTIC_POP
2792
- #endif
2793
-
2794
- SIMDE__FUNCTION_ATTRIBUTES
2795
- simde__m128
2796
- simde_mm_setone_ps (void) {
2797
- simde__m128 t = simde_mm_setzero_ps();
2798
- return simde_mm_cmpeq_ps(t, t);
2799
- }
2800
-
2801
- SIMDE__FUNCTION_ATTRIBUTES
2802
- void
2803
- simde_mm_sfence (void) {
2804
- /* TODO: Use Hedley. */
2805
- #if defined(SIMDE_SSE_NATIVE)
2806
- _mm_sfence();
2807
- #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
2808
- __atomic_thread_fence(__ATOMIC_SEQ_CST);
2809
- #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
2810
- # if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)
2811
- __atomic_thread_fence(__ATOMIC_SEQ_CST);
2812
- # else
2813
- atomic_thread_fence(memory_order_seq_cst);
2814
- # endif
2815
- #elif defined(_MSC_VER)
2816
- MemoryBarrier();
2817
- #elif HEDLEY_HAS_EXTENSION(c_atomic)
2818
- __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
2819
- #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
2820
- __sync_synchronize();
2821
- #elif defined(_OPENMP)
2822
- # pragma omp critical(simde_mm_sfence_)
2823
- { }
2824
- #endif
2825
- }
2826
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2827
- # define _mm_sfence() simde_mm_sfence()
2828
- #endif
2829
-
2830
- #define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
2831
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2832
- # define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w)
2833
- #endif
2834
-
2835
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX) && !defined(__PGI)
2836
- # define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8)
2837
- #elif defined(SIMDE__SHUFFLE_VECTOR)
2838
- # define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \
2839
- const simde__m64_private simde__tmp_a_ = simde__m64_to_private(a); \
2840
- simde__m64_from_private((simde__m64_private) { .i16 = \
2841
- SIMDE__SHUFFLE_VECTOR(16, 8, \
2842
- (simde__tmp_a_).i16, \
2843
- (simde__tmp_a_).i16, \
2844
- (((imm8) ) & 3), \
2845
- (((imm8) >> 2) & 3), \
2846
- (((imm8) >> 4) & 3), \
2847
- (((imm8) >> 6) & 3)) }); }))
2848
- #else
2849
- SIMDE__FUNCTION_ATTRIBUTES
2850
- simde__m64
2851
- simde_mm_shuffle_pi16 (simde__m64 a, const int imm8)
2852
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
2853
- simde__m64_private r_;
2854
- simde__m64_private a_ = simde__m64_to_private(a);
2855
-
2856
- for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) {
2857
- r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3];
2858
- }
2859
-
2860
- HEDLEY_DIAGNOSTIC_PUSH
2861
- #if HEDLEY_HAS_WARNING("-Wconditional-uninitialized")
2862
- # pragma clang diagnostic ignored "-Wconditional-uninitialized"
2863
- #endif
2864
- return simde__m64_from_private(r_);
2865
- HEDLEY_DIAGNOSTIC_POP
2866
- }
2867
- #endif
2868
- #if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
2869
- # define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8)
2870
- #else
2871
- # define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
2872
- #endif
2873
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2874
- # define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8)
2875
- # define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
2876
- #endif
2877
-
2878
- #if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
2879
- # define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8)
2880
- #elif defined(SIMDE__SHUFFLE_VECTOR)
2881
- # define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \
2882
- simde__m128_from_private((simde__m128_private) { .f32 = \
2883
- SIMDE__SHUFFLE_VECTOR(32, 16, \
2884
- simde__m128_to_private(a).f32, \
2885
- simde__m128_to_private(b).f32, \
2886
- (((imm8) ) & 3), \
2887
- (((imm8) >> 2) & 3), \
2888
- (((imm8) >> 4) & 3) + 4, \
2889
- (((imm8) >> 6) & 3) + 4) }); }))
2890
- #else
2891
- SIMDE__FUNCTION_ATTRIBUTES
2892
- simde__m128
2893
- simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8)
2894
- HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
2895
- simde__m128_private
2896
- r_,
2897
- a_ = simde__m128_to_private(a),
2898
- b_ = simde__m128_to_private(b);
2899
-
2900
- r_.f32[0] = a_.f32[(imm8 >> 0) & 3];
2901
- r_.f32[1] = a_.f32[(imm8 >> 2) & 3];
2902
- r_.f32[2] = b_.f32[(imm8 >> 4) & 3];
2903
- r_.f32[3] = b_.f32[(imm8 >> 6) & 3];
2904
-
2905
- return simde__m128_from_private(r_);
2906
- }
2907
- #endif
2908
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2909
- # define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8)
2910
- #endif
2911
-
2912
- SIMDE__FUNCTION_ATTRIBUTES
2913
- simde__m128
2914
- simde_mm_sqrt_ps (simde__m128 a) {
2915
- #if defined(SIMDE_SSE_NATIVE)
2916
- return _mm_sqrt_ps(a);
2917
- #else
2918
- simde__m128_private
2919
- r_,
2920
- a_ = simde__m128_to_private(a);
2921
-
2922
- #if defined(SIMDE_SSE_NEON)
2923
- float32x4_t recipsq = vrsqrteq_f32(a_.neon_f32);
2924
- float32x4_t sq = vrecpeq_f32(recipsq);
2925
- /* ??? use step versions of both sqrt and recip for better accuracy? */
2926
- r_.neon_f32 = sq;
2927
- #elif defined(SIMDE_HAVE_MATH_H)
2928
- SIMDE__VECTORIZE
2929
- for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) {
2930
- r_.f32[i] = sqrtf(a_.f32[i]);
2931
- }
2932
- #else
2933
- HEDLEY_UNREACHABLE();
2934
- #endif
2935
-
2936
- return simde__m128_from_private(r_);
2937
- #endif
2938
- }
2939
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2940
- # define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a))
2941
- #endif
2942
-
2943
- SIMDE__FUNCTION_ATTRIBUTES
2944
- simde__m128
2945
- simde_mm_sqrt_ss (simde__m128 a) {
2946
- #if defined(SIMDE_SSE_NATIVE)
2947
- return _mm_sqrt_ss(a);
2948
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
2949
- return simde_mm_move_ss(a, simde_mm_sqrt_ps(a));
2950
- #else
2951
- simde__m128_private
2952
- r_,
2953
- a_ = simde__m128_to_private(a);
2954
-
2955
- #if defined(SIMDE_HAVE_MATH_H)
2956
- r_.f32[0] = sqrtf(a_.f32[0]);
2957
- r_.f32[1] = a_.f32[1];
2958
- r_.f32[2] = a_.f32[2];
2959
- r_.f32[3] = a_.f32[3];
2960
- #else
2961
- HEDLEY_UNREACHABLE();
2962
- #endif
2963
-
2964
- return simde__m128_from_private(r_);
2965
- #endif
2966
- }
2967
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2968
- # define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a))
2969
- #endif
2970
-
2971
- SIMDE__FUNCTION_ATTRIBUTES
2972
- void
2973
- simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {
2974
- simde_assert_aligned(16, mem_addr);
2975
-
2976
- #if defined(SIMDE_SSE_NATIVE)
2977
- _mm_store_ps(mem_addr, a);
2978
- #else
2979
- simde__m128_private a_ = simde__m128_to_private(a);
2980
-
2981
- #if defined(SIMDE_SSE_NEON)
2982
- vst1q_f32(mem_addr, a_.neon_f32);
2983
- #elif defined(SIMDE_SSE_WASM_SIMD128)
2984
- wasm_v128_store(mem_addr, a_.wasm_v128);
2985
- #else
2986
- SIMDE__VECTORIZE_ALIGNED(mem_addr:16)
2987
- for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
2988
- mem_addr[i] = a_.f32[i];
2989
- }
2990
- #endif
2991
- #endif
2992
- }
2993
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
2994
- # define _mm_store_ps(mem_addr, a) simde_mm_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
2995
- #endif
2996
-
2997
- SIMDE__FUNCTION_ATTRIBUTES
2998
- void
2999
- simde_mm_store_ps1 (simde_float32 mem_addr[4], simde__m128 a) {
3000
- simde_assert_aligned(16, mem_addr);
3001
-
3002
- #if defined(SIMDE_SSE_NATIVE)
3003
- _mm_store_ps1(mem_addr, a);
3004
- #else
3005
- simde__m128_private a_ = simde__m128_to_private(a);
3006
-
3007
- SIMDE__VECTORIZE_ALIGNED(mem_addr:16)
3008
- for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
3009
- mem_addr[i] = a_.f32[0];
3010
- }
3011
- #endif
3012
- }
3013
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3014
- # define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
3015
- #endif
3016
-
3017
- SIMDE__FUNCTION_ATTRIBUTES
3018
- void
3019
- simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {
3020
- #if defined(SIMDE_SSE_NATIVE)
3021
- _mm_store_ss(mem_addr, a);
3022
- #else
3023
- simde__m128_private a_ = simde__m128_to_private(a);
3024
-
3025
- #if defined(SIMDE_SSE_NEON)
3026
- vst1q_lane_f32(mem_addr, a_.neon_f32, 0);
3027
- #else
3028
- *mem_addr = a_.f32[0];
3029
- #endif
3030
- #endif
3031
- }
3032
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3033
- # define _mm_store_ss(mem_addr, a) simde_mm_store_ss(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
3034
- #endif
3035
-
3036
- SIMDE__FUNCTION_ATTRIBUTES
3037
- void
3038
- simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {
3039
- simde_assert_aligned(16, mem_addr);
3040
-
3041
- #if defined(SIMDE_SSE_NATIVE)
3042
- _mm_store1_ps(mem_addr, a);
3043
- #else
3044
- simde_mm_store_ps1(mem_addr, a);
3045
- #endif
3046
- }
3047
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3048
- # define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
3049
- #endif
3050
-
3051
- SIMDE__FUNCTION_ATTRIBUTES
3052
- void
3053
- simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) {
3054
- #if defined(SIMDE_SSE_NATIVE)
3055
- _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
3056
- #else
3057
- simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr);
3058
- simde__m128_private a_ = simde__m128_to_private(a);
3059
-
3060
- dest_->f32[0] = a_.f32[2];
3061
- dest_->f32[1] = a_.f32[3];
3062
- #endif
3063
- }
3064
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3065
- # define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a))
3066
- #endif
3067
-
3068
- SIMDE__FUNCTION_ATTRIBUTES
3069
- void
3070
- simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) {
3071
- #if defined(SIMDE_SSE_NATIVE)
3072
- _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
3073
- #else
3074
- simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr);
3075
- simde__m128_private a_ = simde__m128_to_private(a);
3076
-
3077
- dest_->f32[0] = a_.f32[0];
3078
- dest_->f32[1] = a_.f32[1];
3079
- #endif
3080
- }
3081
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3082
- # define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a))
3083
- #endif
3084
-
3085
- SIMDE__FUNCTION_ATTRIBUTES
3086
- void
3087
- simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {
3088
- simde_assert_aligned(16, mem_addr);
3089
-
3090
- #if defined(SIMDE_SSE_NATIVE)
3091
- _mm_storer_ps(mem_addr, a);
3092
- #else
3093
- simde__m128_private a_ = simde__m128_to_private(a);
3094
-
3095
- #if defined(SIMDE__SHUFFLE_VECTOR)
3096
- a_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, a_.f32, 3, 2, 1, 0);
3097
- simde_mm_store_ps(mem_addr, simde__m128_from_private(a_));
3098
- #else
3099
- SIMDE__VECTORIZE_ALIGNED(mem_addr:16)
3100
- for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
3101
- mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i];
3102
- }
3103
- #endif
3104
- #endif
3105
- }
3106
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3107
- # define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
3108
- #endif
3109
-
3110
- SIMDE__FUNCTION_ATTRIBUTES
3111
- void
3112
- simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {
3113
- #if defined(SIMDE_SSE_NATIVE)
3114
- _mm_storeu_ps(mem_addr, a);
3115
- #else
3116
- simde__m128_private a_ = simde__m128_to_private(a);
3117
-
3118
- #if defined(SIMDE_SSE_NEON)
3119
- vst1q_f32(mem_addr, a_.neon_f32);
3120
- #else
3121
- simde_memcpy(mem_addr, &a_, sizeof(a_));
3122
- #endif
3123
- #endif
3124
- }
3125
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3126
- # define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
3127
- #endif
3128
-
3129
- SIMDE__FUNCTION_ATTRIBUTES
3130
- simde__m128
3131
- simde_mm_sub_ps (simde__m128 a, simde__m128 b) {
3132
- #if defined(SIMDE_SSE_NATIVE)
3133
- return _mm_sub_ps(a, b);
3134
- #else
3135
- simde__m128_private
3136
- r_,
3137
- a_ = simde__m128_to_private(a),
3138
- b_ = simde__m128_to_private(b);
3139
-
3140
- #if defined(SIMDE_SSE_NEON)
3141
- r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32);
3142
- #elif defined(SIMDE_SSE_WASM_SIMD128)
3143
- r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128);
3144
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3145
- r_.f32 = a_.f32 - b_.f32;
3146
- #else
3147
- SIMDE__VECTORIZE
3148
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3149
- r_.f32[i] = a_.f32[i] - b_.f32[i];
3150
- }
3151
- #endif
3152
-
3153
- return simde__m128_from_private(r_);
3154
- #endif
3155
- }
3156
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3157
- # define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b))
3158
- #endif
3159
-
3160
- SIMDE__FUNCTION_ATTRIBUTES
3161
- simde__m128
3162
- simde_mm_sub_ss (simde__m128 a, simde__m128 b) {
3163
- #if defined(SIMDE_SSE_NATIVE)
3164
- return _mm_sub_ss(a, b);
3165
- #elif defined(SIMDE_ASSUME_VECTORIZATION)
3166
- return simde_mm_move_ss(a, simde_mm_sub_ps(a, b));
3167
- #else
3168
- simde__m128_private
3169
- r_,
3170
- a_ = simde__m128_to_private(a),
3171
- b_ = simde__m128_to_private(b);
3172
-
3173
- r_.f32[0] = a_.f32[0] - b_.f32[0];
3174
- r_.f32[1] = a_.f32[1];
3175
- r_.f32[2] = a_.f32[2];
3176
- r_.f32[3] = a_.f32[3];
3177
-
3178
- return simde__m128_from_private(r_);
3179
- #endif
3180
- }
3181
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3182
- # define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b))
3183
- #endif
3184
-
3185
- SIMDE__FUNCTION_ATTRIBUTES
3186
- int
3187
- simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) {
3188
- #if defined(SIMDE_SSE_NATIVE)
3189
- return _mm_ucomieq_ss(a, b);
3190
- #else
3191
- simde__m128_private
3192
- a_ = simde__m128_to_private(a),
3193
- b_ = simde__m128_to_private(b);
3194
- int r;
3195
-
3196
- #if defined(SIMDE_HAVE_FENV_H)
3197
- fenv_t envp;
3198
- int x = feholdexcept(&envp);
3199
- r = a_.f32[0] == b_.f32[0];
3200
- if (HEDLEY_LIKELY(x == 0))
3201
- fesetenv(&envp);
3202
- #else
3203
- HEDLEY_UNREACHABLE();
3204
- #endif
3205
-
3206
- return r;
3207
- #endif
3208
- }
3209
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3210
- # define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b))
3211
- #endif
3212
-
3213
- SIMDE__FUNCTION_ATTRIBUTES
3214
- int
3215
- simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) {
3216
- #if defined(SIMDE_SSE_NATIVE)
3217
- return _mm_ucomige_ss(a, b);
3218
- #else
3219
- simde__m128_private
3220
- a_ = simde__m128_to_private(a),
3221
- b_ = simde__m128_to_private(b);
3222
- int r;
3223
-
3224
- #if defined(SIMDE_HAVE_FENV_H)
3225
- fenv_t envp;
3226
- int x = feholdexcept(&envp);
3227
- r = a_.f32[0] >= b_.f32[0];
3228
- if (HEDLEY_LIKELY(x == 0))
3229
- fesetenv(&envp);
3230
- #else
3231
- HEDLEY_UNREACHABLE();
3232
- #endif
3233
-
3234
- return r;
3235
- #endif
3236
- }
3237
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3238
- # define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b))
3239
- #endif
3240
-
3241
- SIMDE__FUNCTION_ATTRIBUTES
3242
- int
3243
- simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) {
3244
- #if defined(SIMDE_SSE_NATIVE)
3245
- return _mm_ucomigt_ss(a, b);
3246
- #else
3247
- simde__m128_private
3248
- a_ = simde__m128_to_private(a),
3249
- b_ = simde__m128_to_private(b);
3250
- int r;
3251
-
3252
- #if defined(SIMDE_HAVE_FENV_H)
3253
- fenv_t envp;
3254
- int x = feholdexcept(&envp);
3255
- r = a_.f32[0] > b_.f32[0];
3256
- if (HEDLEY_LIKELY(x == 0))
3257
- fesetenv(&envp);
3258
- #else
3259
- HEDLEY_UNREACHABLE();
3260
- #endif
3261
-
3262
- return r;
3263
- #endif
3264
- }
3265
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3266
- # define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b))
3267
- #endif
3268
-
3269
- SIMDE__FUNCTION_ATTRIBUTES
3270
- int
3271
- simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) {
3272
- #if defined(SIMDE_SSE_NATIVE)
3273
- return _mm_ucomile_ss(a, b);
3274
- #else
3275
- simde__m128_private
3276
- a_ = simde__m128_to_private(a),
3277
- b_ = simde__m128_to_private(b);
3278
- int r;
3279
-
3280
- #if defined(SIMDE_HAVE_FENV_H)
3281
- fenv_t envp;
3282
- int x = feholdexcept(&envp);
3283
- r = a_.f32[0] <= b_.f32[0];
3284
- if (HEDLEY_LIKELY(x == 0))
3285
- fesetenv(&envp);
3286
- #else
3287
- HEDLEY_UNREACHABLE();
3288
- #endif
3289
-
3290
- return r;
3291
- #endif
3292
- }
3293
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3294
- # define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b))
3295
- #endif
3296
-
3297
- SIMDE__FUNCTION_ATTRIBUTES
3298
- int
3299
- simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) {
3300
- #if defined(SIMDE_SSE_NATIVE)
3301
- return _mm_ucomilt_ss(a, b);
3302
- #else
3303
- simde__m128_private
3304
- a_ = simde__m128_to_private(a),
3305
- b_ = simde__m128_to_private(b);
3306
- int r;
3307
-
3308
- #if defined(SIMDE_HAVE_FENV_H)
3309
- fenv_t envp;
3310
- int x = feholdexcept(&envp);
3311
- r = a_.f32[0] < b_.f32[0];
3312
- if (HEDLEY_LIKELY(x == 0))
3313
- fesetenv(&envp);
3314
- #else
3315
- HEDLEY_UNREACHABLE();
3316
- #endif
3317
-
3318
- return r;
3319
- #endif
3320
- }
3321
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3322
- # define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b))
3323
- #endif
3324
-
3325
- SIMDE__FUNCTION_ATTRIBUTES
3326
- int
3327
- simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) {
3328
- #if defined(SIMDE_SSE_NATIVE)
3329
- return _mm_ucomineq_ss(a, b);
3330
- #else
3331
- simde__m128_private
3332
- a_ = simde__m128_to_private(a),
3333
- b_ = simde__m128_to_private(b);
3334
- int r;
3335
-
3336
- #if defined(SIMDE_HAVE_FENV_H)
3337
- fenv_t envp;
3338
- int x = feholdexcept(&envp);
3339
- r = a_.f32[0] != b_.f32[0];
3340
- if (HEDLEY_LIKELY(x == 0))
3341
- fesetenv(&envp);
3342
- #else
3343
- HEDLEY_UNREACHABLE();
3344
- #endif
3345
-
3346
- return r;
3347
- #endif
3348
- }
3349
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3350
- # define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b))
3351
- #endif
3352
-
3353
- #if defined(SIMDE_SSE_NATIVE)
3354
- # if defined(__has_builtin)
3355
- # if __has_builtin(__builtin_ia32_undef128)
3356
- # define SIMDE__HAVE_UNDEFINED128
3357
- # endif
3358
- # elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER)
3359
- # define SIMDE__HAVE_UNDEFINED128
3360
- # endif
3361
- #endif
3362
-
3363
- #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
3364
- HEDLEY_DIAGNOSTIC_PUSH
3365
- SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
3366
- #endif
3367
-
3368
- SIMDE__FUNCTION_ATTRIBUTES
3369
- simde__m128
3370
- simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) {
3371
- #if defined(SIMDE_SSE_NATIVE)
3372
- return _mm_unpackhi_ps(a, b);
3373
- #else
3374
- simde__m128_private
3375
- r_,
3376
- a_ = simde__m128_to_private(a),
3377
- b_ = simde__m128_to_private(b);
3378
-
3379
- #if defined(SIMDE_SSE_NEON)
3380
- float32x2_t a1 = vget_high_f32(a_.neon_f32);
3381
- float32x2_t b1 = vget_high_f32(b_.neon_f32);
3382
- float32x2x2_t result = vzip_f32(a1, b1);
3383
- r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
3384
- #elif defined(SIMDE__SHUFFLE_VECTOR)
3385
- r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 2, 6, 3, 7);
3386
- #else
3387
- r_.f32[0] = a_.f32[2];
3388
- r_.f32[1] = b_.f32[2];
3389
- r_.f32[2] = a_.f32[3];
3390
- r_.f32[3] = b_.f32[3];
3391
- #endif
3392
-
3393
- return simde__m128_from_private(r_);
3394
- #endif
3395
- }
3396
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3397
- # define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b))
3398
- #endif
3399
-
3400
- SIMDE__FUNCTION_ATTRIBUTES
3401
- simde__m128
3402
- simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) {
3403
- #if defined(SIMDE_SSE_NATIVE)
3404
- return _mm_unpacklo_ps(a, b);
3405
- #else
3406
- simde__m128_private
3407
- r_,
3408
- a_ = simde__m128_to_private(a),
3409
- b_ = simde__m128_to_private(b);
3410
-
3411
- #if defined(SIMDE__SHUFFLE_VECTOR)
3412
- r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 0, 4, 1, 5);
3413
- #elif defined(SIMDE_SSE_NEON)
3414
- float32x2_t a1 = vget_low_f32(a_.neon_f32);
3415
- float32x2_t b1 = vget_low_f32(b_.neon_f32);
3416
- float32x2x2_t result = vzip_f32(a1, b1);
3417
- r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
3418
- #else
3419
- r_.f32[0] = a_.f32[0];
3420
- r_.f32[1] = b_.f32[0];
3421
- r_.f32[2] = a_.f32[1];
3422
- r_.f32[3] = b_.f32[1];
3423
- #endif
3424
-
3425
- return simde__m128_from_private(r_);
3426
- #endif
3427
- }
3428
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3429
- # define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b))
3430
- #endif
3431
-
3432
- SIMDE__FUNCTION_ATTRIBUTES
3433
- simde__m128
3434
- simde_mm_xor_ps (simde__m128 a, simde__m128 b) {
3435
- #if defined(SIMDE_SSE_NATIVE)
3436
- return _mm_xor_ps(a, b);
3437
- #else
3438
- simde__m128_private
3439
- r_,
3440
- a_ = simde__m128_to_private(a),
3441
- b_ = simde__m128_to_private(b);
3442
-
3443
- #if defined(SIMDE_SSE_NEON)
3444
- r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);
3445
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3446
- r_.i32f = a_.i32f ^ b_.i32f;
3447
- #else
3448
- SIMDE__VECTORIZE
3449
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
3450
- r_.u32[i] = a_.u32[i] ^ b_.u32[i];
3451
- }
3452
- #endif
3453
-
3454
- return simde__m128_from_private(r_);
3455
- #endif
3456
- }
3457
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3458
- # define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b))
3459
- #endif
3460
-
3461
- SIMDE__FUNCTION_ATTRIBUTES
3462
- void
3463
- simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) {
3464
- #if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
3465
- _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
3466
- #else
3467
- simde__m64_private*
3468
- dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr),
3469
- a_ = simde__m64_to_private(a);
3470
-
3471
- #if defined(SIMDE_SSE_NEON)
3472
- dest->i64[0] = vget_lane_s64(a_.neon_i64, 0);
3473
- #else
3474
- dest->i64[0] = a_.i64[0];
3475
- #endif
3476
- #endif
3477
- }
3478
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3479
- # define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a))
3480
- #endif
3481
-
3482
- SIMDE__FUNCTION_ATTRIBUTES
3483
- void
3484
- simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {
3485
- simde_assert_aligned(16, mem_addr);
3486
-
3487
- #if defined(SIMDE_SSE_NATIVE)
3488
- _mm_stream_ps(mem_addr, a);
3489
- #else
3490
- simde__m128_private a_ = simde__m128_to_private(a);
3491
-
3492
- #if defined(SIMDE_SSE_NEON)
3493
- vst1q_f32(mem_addr, a_.neon_f32);
3494
- #else
3495
- SIMDE__ASSUME_ALIGNED(mem_addr, 16);
3496
- simde_memcpy(mem_addr, &a_, sizeof(a_));
3497
- #endif
3498
- #endif
3499
- }
3500
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3501
- # define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
3502
- #endif
3503
-
3504
- SIMDE__FUNCTION_ATTRIBUTES
3505
- uint32_t
3506
- simde_mm_getcsr (void) {
3507
- #if defined(SIMDE_SSE_NATIVE)
3508
- return _mm_getcsr();
3509
- #else
3510
- uint32_t r = 0;
3511
-
3512
- #if defined(SIMDE_HAVE_FENV_H)
3513
- int rounding_mode = fegetround();
3514
-
3515
- switch(rounding_mode) {
3516
- #if defined(FE_TONEAREST)
3517
- case FE_TONEAREST:
3518
- break;
3519
- #endif
3520
- #if defined(FE_UPWARD)
3521
- case FE_UPWARD:
3522
- r |= 2 << 13;
3523
- break;
3524
- #endif
3525
- #if defined(FE_DOWNWARD)
3526
- case FE_DOWNWARD:
3527
- r |= 1 << 13;
3528
- break;
3529
- #endif
3530
- #if defined(FE_TOWARDZERO)
3531
- case FE_TOWARDZERO:
3532
- r = 3 << 13;
3533
- break;
3534
- #endif
3535
- }
3536
- #else
3537
- HEDLEY_UNREACHABLE();
3538
- #endif
3539
-
3540
- return r;
3541
- #endif
3542
- }
3543
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3544
- # define _mm_getcsr() simde_mm_getcsr()
3545
- #endif
3546
-
3547
- SIMDE__FUNCTION_ATTRIBUTES
3548
- void
3549
- simde_mm_setcsr (uint32_t a) {
3550
- #if defined(SIMDE_SSE_NATIVE)
3551
- _mm_setcsr(a);
3552
- #else
3553
- switch((a >> 13) & 3) {
3554
- #if defined(FE_TONEAREST)
3555
- case 0:
3556
- fesetround(FE_TONEAREST);
3557
- #endif
3558
- #if defined(FE_DOWNWARD)
3559
- break;
3560
- case 1:
3561
- fesetround(FE_DOWNWARD);
3562
- #endif
3563
- #if defined(FE_UPWARD)
3564
- break;
3565
- case 2:
3566
- fesetround(FE_UPWARD);
3567
- #endif
3568
- #if defined(FE_TOWARDZERO)
3569
- break;
3570
- case 3:
3571
- fesetround(FE_TOWARDZERO);
3572
- break;
3573
- #endif
3574
- }
3575
- #endif
3576
- }
3577
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3578
- # define _mm_setcsr(a) simde_mm_setcsr(a)
3579
- #endif
3580
-
3581
- #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
3582
- do { \
3583
- simde__m128 tmp3, tmp2, tmp1, tmp0; \
3584
- tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \
3585
- tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \
3586
- tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \
3587
- tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \
3588
- row0 = simde_mm_movelh_ps(tmp0, tmp2); \
3589
- row1 = simde_mm_movehl_ps(tmp2, tmp0); \
3590
- row2 = simde_mm_movelh_ps(tmp1, tmp3); \
3591
- row3 = simde_mm_movehl_ps(tmp3, tmp1); \
3592
- } while (0)
3593
-
3594
- #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
3595
- # define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3)
3596
- #endif
3597
-
3598
- #if defined(_MM_EXCEPT_INVALID)
3599
- # define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID
3600
- #else
3601
- # define SIMDE_MM_EXCEPT_INVALID (0x0001)
3602
- #endif
3603
- #if defined(_MM_EXCEPT_DENORM)
3604
- # define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM
3605
- #else
3606
- # define SIMDE_MM_EXCEPT_DENORM (0x0002)
3607
- #endif
3608
- #if defined(_MM_EXCEPT_DIV_ZERO)
3609
- # define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO
3610
- #else
3611
- # define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004)
3612
- #endif
3613
- #if defined(_MM_EXCEPT_OVERFLOW)
3614
- # define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW
3615
- #else
3616
- # define SIMDE_MM_EXCEPT_OVERFLOW (0x0008)
3617
- #endif
3618
- #if defined(_MM_EXCEPT_UNDERFLOW)
3619
- # define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW
3620
- #else
3621
- # define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010)
3622
- #endif
3623
- #if defined(_MM_EXCEPT_INEXACT)
3624
- # define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT
3625
- #else
3626
- # define SIMDE_MM_EXCEPT_INEXACT (0x0020)
3627
- #endif
3628
- #if defined(_MM_EXCEPT_MASK)
3629
- # define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK
3630
- #else
3631
- # define SIMDE_MM_EXCEPT_MASK \
3632
- (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \
3633
- SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \
3634
- SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT)
3635
- #endif
3636
-
3637
- #if defined(_MM_MASK_INVALID)
3638
- # define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID
3639
- #else
3640
- # define SIMDE_MM_MASK_INVALID (0x0080)
3641
- #endif
3642
- #if defined(_MM_MASK_DENORM)
3643
- # define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM
3644
- #else
3645
- # define SIMDE_MM_MASK_DENORM (0x0100)
3646
- #endif
3647
- #if defined(_MM_MASK_DIV_ZERO)
3648
- # define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO
3649
- #else
3650
- # define SIMDE_MM_MASK_DIV_ZERO (0x0200)
3651
- #endif
3652
- #if defined(_MM_MASK_OVERFLOW)
3653
- # define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW
3654
- #else
3655
- # define SIMDE_MM_MASK_OVERFLOW (0x0400)
3656
- #endif
3657
- #if defined(_MM_MASK_UNDERFLOW)
3658
- # define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW
3659
- #else
3660
- # define SIMDE_MM_MASK_UNDERFLOW (0x0800)
3661
- #endif
3662
- #if defined(_MM_MASK_INEXACT)
3663
- # define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT
3664
- #else
3665
- # define SIMDE_MM_MASK_INEXACT (0x1000)
3666
- #endif
3667
- #if defined(_MM_MASK_MASK)
3668
- # define SIMDE_MM_MASK_MASK _MM_MASK_MASK
3669
- #else
3670
- # define SIMDE_MM_MASK_MASK \
3671
- (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \
3672
- SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \
3673
- SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT)
3674
- #endif
3675
-
3676
- #if defined(_MM_FLUSH_ZERO_MASK)
3677
- # define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK
3678
- #else
3679
- # define SIMDE_MM_FLUSH_ZERO_MASK (0x8000)
3680
- #endif
3681
- #if defined(_MM_FLUSH_ZERO_ON)
3682
- # define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON
3683
- #else
3684
- # define SIMDE_MM_FLUSH_ZERO_ON (0x8000)
3685
- #endif
3686
- #if defined(_MM_FLUSH_ZERO_OFF)
3687
- # define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF
3688
- #else
3689
- # define SIMDE_MM_FLUSH_ZERO_OFF (0x0000)
3690
- #endif
3691
-
3692
- SIMDE__END_DECLS
3693
-
3694
- HEDLEY_DIAGNOSTIC_POP
3695
-
3696
- #endif /* !defined(SIMDE__SSE_H) */