minimap2 0.2.25.0 → 0.2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
- data/ext/minimap2/lib/simde/COPYING +20 -0
- data/ext/minimap2/lib/simde/README.md +333 -0
- data/ext/minimap2/lib/simde/amalgamate.py +58 -0
- data/ext/minimap2/lib/simde/meson.build +33 -0
- data/ext/minimap2/lib/simde/netlify.toml +20 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
- data/ext/minimap2/lib/simde/simde/check.h +267 -0
- data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
- data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
- data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
- data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
- data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
- data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
- data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
- data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
- data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
- data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
- data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
- data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
- data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
- data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
- data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
- data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
- data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
- data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
- data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
- data/ext/minimap2/lib/simde/test/meson.build +64 -0
- data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
- data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
- data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
- data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
- data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
- data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
- data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
- data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
- data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
- data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
- data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
- data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
- data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
- data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
- data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
- data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
- data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
- data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
- data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
- data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
- data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
- data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
- data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
- data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/lib/minimap2/version.rb +1 -1
- metadata +100 -3
|
@@ -0,0 +1,3696 @@
|
|
|
1
|
+
/* Permission is hereby granted, free of charge, to any person
|
|
2
|
+
* obtaining a copy of this software and associated documentation
|
|
3
|
+
* files (the "Software"), to deal in the Software without
|
|
4
|
+
* restriction, including without limitation the rights to use, copy,
|
|
5
|
+
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
6
|
+
* of the Software, and to permit persons to whom the Software is
|
|
7
|
+
* furnished to do so, subject to the following conditions:
|
|
8
|
+
*
|
|
9
|
+
* The above copyright notice and this permission notice shall be
|
|
10
|
+
* included in all copies or substantial portions of the Software.
|
|
11
|
+
*
|
|
12
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
13
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
14
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
15
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
16
|
+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
17
|
+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
18
|
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
* SOFTWARE.
|
|
20
|
+
*
|
|
21
|
+
* Copyright:
|
|
22
|
+
* 2017-2020 Evan Nemerson <evan@nemerson.com>
|
|
23
|
+
* 2015-2017 John W. Ratcliff <jratcliffscarab@gmail.com>
|
|
24
|
+
* 2015 Brandon Rowlett <browlett@nvidia.com>
|
|
25
|
+
* 2015 Ken Fast <kfast@gdeb.com>
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
#if !defined(SIMDE__SSE_H)
|
|
29
|
+
# if !defined(SIMDE__SSE_H)
|
|
30
|
+
# define SIMDE__SSE_H
|
|
31
|
+
# endif
|
|
32
|
+
# include "mmx.h"
|
|
33
|
+
|
|
34
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
35
|
+
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
36
|
+
|
|
37
|
+
# if defined(SIMDE_SSE_NATIVE)
|
|
38
|
+
# undef SIMDE_SSE_NATIVE
|
|
39
|
+
# endif
|
|
40
|
+
# if defined(SIMDE_ARCH_X86_SSE) && !defined(SIMDE_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
|
41
|
+
# define SIMDE_SSE_NATIVE
|
|
42
|
+
# elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_SSE_NO_NEON) && !defined(SIMDE_NO_NEON)
|
|
43
|
+
# define SIMDE_SSE_NEON
|
|
44
|
+
# elif defined(SIMDE_ARCH_WASM_SIMD128)
|
|
45
|
+
# define SIMDE_SSE_WASM_SIMD128
|
|
46
|
+
# elif defined(SIMDE_ARCH_POWER_ALTIVEC)
|
|
47
|
+
# define SIMDE_SSE_POWER_ALTIVEC
|
|
48
|
+
# endif
|
|
49
|
+
|
|
50
|
+
# if defined(SIMDE_SSE_NATIVE)
|
|
51
|
+
# include <xmmintrin.h>
|
|
52
|
+
# else
|
|
53
|
+
# if defined(SIMDE_SSE_NEON)
|
|
54
|
+
# include <arm_neon.h>
|
|
55
|
+
# endif
|
|
56
|
+
# if defined(SIMDE_SSE_WASM_SIMD128)
|
|
57
|
+
# if !defined(__wasm_unimplemented_simd128__)
|
|
58
|
+
# define __wasm_unimplemented_simd128__
|
|
59
|
+
# endif
|
|
60
|
+
# include <wasm_simd128.h>
|
|
61
|
+
# endif
|
|
62
|
+
# if defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
63
|
+
# include <altivec.h>
|
|
64
|
+
# endif
|
|
65
|
+
|
|
66
|
+
# if !defined(HEDLEY_INTEL_VERSION) && !defined(HEDLEY_EMSCRIPTEN_VERSION) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
|
|
67
|
+
# include <stdatomic.h>
|
|
68
|
+
# elif defined(_WIN32)
|
|
69
|
+
# include <windows.h>
|
|
70
|
+
# endif
|
|
71
|
+
# endif
|
|
72
|
+
|
|
73
|
+
SIMDE__BEGIN_DECLS
|
|
74
|
+
|
|
75
|
+
typedef union {
|
|
76
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
77
|
+
SIMDE_ALIGN(16) int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
78
|
+
SIMDE_ALIGN(16) int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
79
|
+
SIMDE_ALIGN(16) int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
80
|
+
SIMDE_ALIGN(16) int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
81
|
+
SIMDE_ALIGN(16) uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
82
|
+
SIMDE_ALIGN(16) uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
83
|
+
SIMDE_ALIGN(16) uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
84
|
+
SIMDE_ALIGN(16) uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
85
|
+
#if defined(SIMDE__HAVE_INT128)
|
|
86
|
+
SIMDE_ALIGN(16) simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
87
|
+
SIMDE_ALIGN(16) simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
88
|
+
#endif
|
|
89
|
+
SIMDE_ALIGN(16) simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
90
|
+
SIMDE_ALIGN(16) int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
91
|
+
SIMDE_ALIGN(16) uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
92
|
+
#else
|
|
93
|
+
SIMDE_ALIGN(16) int8_t i8[16];
|
|
94
|
+
SIMDE_ALIGN(16) int16_t i16[8];
|
|
95
|
+
SIMDE_ALIGN(16) int32_t i32[4];
|
|
96
|
+
SIMDE_ALIGN(16) int64_t i64[2];
|
|
97
|
+
SIMDE_ALIGN(16) uint8_t u8[16];
|
|
98
|
+
SIMDE_ALIGN(16) uint16_t u16[8];
|
|
99
|
+
SIMDE_ALIGN(16) uint32_t u32[4];
|
|
100
|
+
SIMDE_ALIGN(16) uint64_t u64[2];
|
|
101
|
+
#if defined(SIMDE__HAVE_INT128)
|
|
102
|
+
SIMDE_ALIGN(16) simde_int128 i128[1];
|
|
103
|
+
SIMDE_ALIGN(16) simde_uint128 u128[1];
|
|
104
|
+
#endif
|
|
105
|
+
SIMDE_ALIGN(16) simde_float32 f32[4];
|
|
106
|
+
SIMDE_ALIGN(16) int_fast32_t i32f[16 / sizeof(int_fast32_t)];
|
|
107
|
+
SIMDE_ALIGN(16) uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
|
|
108
|
+
#endif
|
|
109
|
+
|
|
110
|
+
SIMDE_ALIGN(16) simde__m64_private m64_private[2];
|
|
111
|
+
SIMDE_ALIGN(16) simde__m64 m64[2];
|
|
112
|
+
|
|
113
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
114
|
+
SIMDE_ALIGN(16) __m128 n;
|
|
115
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
116
|
+
SIMDE_ALIGN(16) int8x16_t neon_i8;
|
|
117
|
+
SIMDE_ALIGN(16) int16x8_t neon_i16;
|
|
118
|
+
SIMDE_ALIGN(16) int32x4_t neon_i32;
|
|
119
|
+
SIMDE_ALIGN(16) int64x2_t neon_i64;
|
|
120
|
+
SIMDE_ALIGN(16) uint8x16_t neon_u8;
|
|
121
|
+
SIMDE_ALIGN(16) uint16x8_t neon_u16;
|
|
122
|
+
SIMDE_ALIGN(16) uint32x4_t neon_u32;
|
|
123
|
+
SIMDE_ALIGN(16) uint64x2_t neon_u64;
|
|
124
|
+
SIMDE_ALIGN(16) float32x4_t neon_f32;
|
|
125
|
+
#if defined(SIMDE_ARCH_AARCH64)
|
|
126
|
+
SIMDE_ALIGN(16) float64x2_t neon_f64;
|
|
127
|
+
#endif
|
|
128
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
129
|
+
SIMDE_ALIGN(16) v128_t wasm_v128;
|
|
130
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
131
|
+
SIMDE_ALIGN(16) vector unsigned char altivec_u8;
|
|
132
|
+
SIMDE_ALIGN(16) vector unsigned short altivec_u16;
|
|
133
|
+
SIMDE_ALIGN(16) vector unsigned int altivec_u32;
|
|
134
|
+
SIMDE_ALIGN(16) vector unsigned long long altivec_u64;
|
|
135
|
+
SIMDE_ALIGN(16) vector signed char altivec_i8;
|
|
136
|
+
SIMDE_ALIGN(16) vector signed short altivec_i16;
|
|
137
|
+
SIMDE_ALIGN(16) vector signed int altivec_i32;
|
|
138
|
+
SIMDE_ALIGN(16) vector signed long long altivec_i64;
|
|
139
|
+
SIMDE_ALIGN(16) vector float altivec_f32;
|
|
140
|
+
SIMDE_ALIGN(16) vector double altivec_f64;
|
|
141
|
+
#endif
|
|
142
|
+
} simde__m128_private;
|
|
143
|
+
|
|
144
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
145
|
+
typedef __m128 simde__m128;
|
|
146
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
147
|
+
typedef float32x4_t simde__m128;
|
|
148
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
149
|
+
typedef v128_t simde__m128;
|
|
150
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
151
|
+
typedef vector float simde__m128;
|
|
152
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
153
|
+
typedef simde_float32 simde__m128 SIMDE_ALIGN(16) SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
|
|
154
|
+
#else
|
|
155
|
+
typedef simde__m128_private simde__m128;
|
|
156
|
+
#endif
|
|
157
|
+
|
|
158
|
+
#if !defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
|
159
|
+
#define SIMDE_SSE_ENABLE_NATIVE_ALIASES
|
|
160
|
+
typedef simde__m128 __m128;
|
|
161
|
+
#endif
|
|
162
|
+
|
|
163
|
+
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect");
|
|
164
|
+
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect");
|
|
165
|
+
#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
|
|
166
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned");
|
|
167
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned");
|
|
168
|
+
#endif
|
|
169
|
+
|
|
170
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
171
|
+
simde__m128
|
|
172
|
+
simde__m128_from_private(simde__m128_private v) {
|
|
173
|
+
simde__m128 r;
|
|
174
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
175
|
+
return r;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
179
|
+
simde__m128_private
|
|
180
|
+
simde__m128_to_private(simde__m128 v) {
|
|
181
|
+
simde__m128_private r;
|
|
182
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
183
|
+
return r;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
|
187
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
188
|
+
#endif
|
|
189
|
+
|
|
190
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
191
|
+
simde__m128
|
|
192
|
+
simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
|
|
193
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
194
|
+
return _mm_set_ps(e3, e2, e1, e0);
|
|
195
|
+
#else
|
|
196
|
+
simde__m128_private r_;
|
|
197
|
+
|
|
198
|
+
#if defined(SIMDE_SSE_NEON)
|
|
199
|
+
SIMDE_ALIGN(16) simde_float32 data[4] = { e0, e1, e2, e3 };
|
|
200
|
+
r_.neon_f32 = vld1q_f32(data);
|
|
201
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
202
|
+
r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3);
|
|
203
|
+
#else
|
|
204
|
+
r_.f32[0] = e0;
|
|
205
|
+
r_.f32[1] = e1;
|
|
206
|
+
r_.f32[2] = e2;
|
|
207
|
+
r_.f32[3] = e3;
|
|
208
|
+
#endif
|
|
209
|
+
|
|
210
|
+
return simde__m128_from_private(r_);
|
|
211
|
+
#endif
|
|
212
|
+
}
|
|
213
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
214
|
+
# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0)
|
|
215
|
+
#endif
|
|
216
|
+
|
|
217
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
218
|
+
simde__m128
|
|
219
|
+
simde_mm_set_ps1 (simde_float32 a) {
|
|
220
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
221
|
+
return _mm_set_ps1(a);
|
|
222
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
223
|
+
return vdupq_n_f32(a);
|
|
224
|
+
#else
|
|
225
|
+
return simde_mm_set_ps(a, a, a, a);
|
|
226
|
+
#endif
|
|
227
|
+
}
|
|
228
|
+
#define simde_mm_set1_ps(a) simde_mm_set_ps1(a)
|
|
229
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
230
|
+
# define _mm_set_ps1(a) simde_mm_set_ps1(a)
|
|
231
|
+
# define _mm_set1_ps(a) simde_mm_set1_ps(a)
|
|
232
|
+
#endif
|
|
233
|
+
|
|
234
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
235
|
+
simde__m128
|
|
236
|
+
simde_mm_move_ss (simde__m128 a, simde__m128 b) {
|
|
237
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
238
|
+
return _mm_move_ss(a, b);
|
|
239
|
+
#else
|
|
240
|
+
simde__m128_private
|
|
241
|
+
r_,
|
|
242
|
+
a_ = simde__m128_to_private(a),
|
|
243
|
+
b_ = simde__m128_to_private(b);
|
|
244
|
+
|
|
245
|
+
#if defined(SIMDE_SSE_NEON)
|
|
246
|
+
r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0);
|
|
247
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
248
|
+
vector unsigned char m = {
|
|
249
|
+
16, 17, 18, 19,
|
|
250
|
+
4, 5, 6, 7,
|
|
251
|
+
8, 9, 10, 11,
|
|
252
|
+
12, 13, 14, 15
|
|
253
|
+
};
|
|
254
|
+
r_.altivec_f32 = vec_perm(a_.altivec_f32, b_.altivec_f32, m);
|
|
255
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
256
|
+
r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 4, 1, 2, 3);
|
|
257
|
+
#else
|
|
258
|
+
r_.f32[0] = b_.f32[0];
|
|
259
|
+
r_.f32[1] = a_.f32[1];
|
|
260
|
+
r_.f32[2] = a_.f32[2];
|
|
261
|
+
r_.f32[3] = a_.f32[3];
|
|
262
|
+
#endif
|
|
263
|
+
|
|
264
|
+
return simde__m128_from_private(r_);
|
|
265
|
+
#endif
|
|
266
|
+
}
|
|
267
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
268
|
+
# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b))
|
|
269
|
+
#endif
|
|
270
|
+
|
|
271
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
272
|
+
simde__m128
|
|
273
|
+
simde_mm_add_ps (simde__m128 a, simde__m128 b) {
|
|
274
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
275
|
+
return _mm_add_ps(a, b);
|
|
276
|
+
#else
|
|
277
|
+
simde__m128_private
|
|
278
|
+
r_,
|
|
279
|
+
a_ = simde__m128_to_private(a),
|
|
280
|
+
b_ = simde__m128_to_private(b);
|
|
281
|
+
|
|
282
|
+
#if defined(SIMDE_SSE_NEON)
|
|
283
|
+
r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32);
|
|
284
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
285
|
+
r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128);
|
|
286
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
287
|
+
r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32);
|
|
288
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
289
|
+
r_.f32 = a_.f32 + b_.f32;
|
|
290
|
+
#else
|
|
291
|
+
SIMDE__VECTORIZE
|
|
292
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
293
|
+
r_.f32[i] = a_.f32[i] + b_.f32[i];
|
|
294
|
+
}
|
|
295
|
+
#endif
|
|
296
|
+
|
|
297
|
+
return simde__m128_from_private(r_);
|
|
298
|
+
#endif
|
|
299
|
+
}
|
|
300
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
301
|
+
# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b))
|
|
302
|
+
#endif
|
|
303
|
+
|
|
304
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
305
|
+
simde__m128
|
|
306
|
+
simde_mm_add_ss (simde__m128 a, simde__m128 b) {
|
|
307
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
308
|
+
return _mm_add_ss(a, b);
|
|
309
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
310
|
+
return simde_mm_move_ss(a, simde_mm_add_ps(a, b));
|
|
311
|
+
#else
|
|
312
|
+
simde__m128_private
|
|
313
|
+
r_,
|
|
314
|
+
a_ = simde__m128_to_private(a),
|
|
315
|
+
b_ = simde__m128_to_private(b);
|
|
316
|
+
|
|
317
|
+
r_.f32[0] = a_.f32[0] + b_.f32[0];
|
|
318
|
+
r_.f32[1] = a_.f32[1];
|
|
319
|
+
r_.f32[2] = a_.f32[2];
|
|
320
|
+
r_.f32[3] = a_.f32[3];
|
|
321
|
+
|
|
322
|
+
return simde__m128_from_private(r_);
|
|
323
|
+
#endif
|
|
324
|
+
}
|
|
325
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
326
|
+
# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b))
|
|
327
|
+
#endif
|
|
328
|
+
|
|
329
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
330
|
+
simde__m128
|
|
331
|
+
simde_mm_and_ps (simde__m128 a, simde__m128 b) {
|
|
332
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
333
|
+
return _mm_and_ps(a, b);
|
|
334
|
+
#else
|
|
335
|
+
simde__m128_private
|
|
336
|
+
r_,
|
|
337
|
+
a_ = simde__m128_to_private(a),
|
|
338
|
+
b_ = simde__m128_to_private(b);
|
|
339
|
+
|
|
340
|
+
#if defined(SIMDE_SSE_NEON)
|
|
341
|
+
r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
|
|
342
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
343
|
+
r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
|
|
344
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
345
|
+
r_.i32 = a_.i32 & b_.i32;
|
|
346
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
347
|
+
r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32);
|
|
348
|
+
#else
|
|
349
|
+
SIMDE__VECTORIZE
|
|
350
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
351
|
+
r_.i32[i] = a_.i32[i] & b_.i32[i];
|
|
352
|
+
}
|
|
353
|
+
#endif
|
|
354
|
+
|
|
355
|
+
return simde__m128_from_private(r_);
|
|
356
|
+
#endif
|
|
357
|
+
}
|
|
358
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
359
|
+
# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b))
|
|
360
|
+
#endif
|
|
361
|
+
|
|
362
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
363
|
+
simde__m128
|
|
364
|
+
simde_mm_andnot_ps (simde__m128 a, simde__m128 b) {
|
|
365
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
366
|
+
return _mm_andnot_ps(a, b);
|
|
367
|
+
#else
|
|
368
|
+
simde__m128_private
|
|
369
|
+
r_,
|
|
370
|
+
a_ = simde__m128_to_private(a),
|
|
371
|
+
b_ = simde__m128_to_private(b);
|
|
372
|
+
|
|
373
|
+
#if defined(SIMDE_SSE_NEON)
|
|
374
|
+
r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
|
|
375
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
376
|
+
r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
|
|
377
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
378
|
+
r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32);
|
|
379
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
380
|
+
r_.i32 = ~a_.i32 & b_.i32;
|
|
381
|
+
#else
|
|
382
|
+
SIMDE__VECTORIZE
|
|
383
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
384
|
+
r_.i32[i] = ~(a_.i32[i]) & b_.i32[i];
|
|
385
|
+
}
|
|
386
|
+
#endif
|
|
387
|
+
|
|
388
|
+
return simde__m128_from_private(r_);
|
|
389
|
+
#endif
|
|
390
|
+
}
|
|
391
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
392
|
+
# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b))
|
|
393
|
+
#endif
|
|
394
|
+
|
|
395
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
396
|
+
simde__m64
|
|
397
|
+
simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) {
|
|
398
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
399
|
+
return _mm_avg_pu16(a, b);
|
|
400
|
+
#else
|
|
401
|
+
simde__m64_private
|
|
402
|
+
r_,
|
|
403
|
+
a_ = simde__m64_to_private(a),
|
|
404
|
+
b_ = simde__m64_to_private(b);
|
|
405
|
+
|
|
406
|
+
#if defined(SIMDE_SSE_NEON)
|
|
407
|
+
r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16);
|
|
408
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE__CONVERT_VECTOR)
|
|
409
|
+
uint32_t wa SIMDE_VECTOR(16);
|
|
410
|
+
uint32_t wb SIMDE_VECTOR(16);
|
|
411
|
+
uint32_t wr SIMDE_VECTOR(16);
|
|
412
|
+
SIMDE__CONVERT_VECTOR(wa, a_.u16);
|
|
413
|
+
SIMDE__CONVERT_VECTOR(wb, b_.u16);
|
|
414
|
+
wr = (wa + wb + 1) >> 1;
|
|
415
|
+
SIMDE__CONVERT_VECTOR(r_.u16, wr);
|
|
416
|
+
#else
|
|
417
|
+
SIMDE__VECTORIZE
|
|
418
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
419
|
+
r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
|
|
420
|
+
}
|
|
421
|
+
#endif
|
|
422
|
+
|
|
423
|
+
return simde__m64_from_private(r_);
|
|
424
|
+
#endif
|
|
425
|
+
}
|
|
426
|
+
#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b)
|
|
427
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
428
|
+
# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b)
|
|
429
|
+
# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b)
|
|
430
|
+
#endif
|
|
431
|
+
|
|
432
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
433
|
+
simde__m64
|
|
434
|
+
simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) {
|
|
435
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
436
|
+
return _mm_avg_pu8(a, b);
|
|
437
|
+
#else
|
|
438
|
+
simde__m64_private
|
|
439
|
+
r_,
|
|
440
|
+
a_ = simde__m64_to_private(a),
|
|
441
|
+
b_ = simde__m64_to_private(b);
|
|
442
|
+
|
|
443
|
+
#if defined(SIMDE_SSE_NEON)
|
|
444
|
+
r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8);
|
|
445
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE__CONVERT_VECTOR)
|
|
446
|
+
uint16_t wa SIMDE_VECTOR(16);
|
|
447
|
+
uint16_t wb SIMDE_VECTOR(16);
|
|
448
|
+
uint16_t wr SIMDE_VECTOR(16);
|
|
449
|
+
SIMDE__CONVERT_VECTOR(wa, a_.u8);
|
|
450
|
+
SIMDE__CONVERT_VECTOR(wb, b_.u8);
|
|
451
|
+
wr = (wa + wb + 1) >> 1;
|
|
452
|
+
SIMDE__CONVERT_VECTOR(r_.u8, wr);
|
|
453
|
+
#else
|
|
454
|
+
SIMDE__VECTORIZE
|
|
455
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
456
|
+
r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
|
|
457
|
+
}
|
|
458
|
+
#endif
|
|
459
|
+
|
|
460
|
+
return simde__m64_from_private(r_);
|
|
461
|
+
#endif
|
|
462
|
+
}
|
|
463
|
+
#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b)
|
|
464
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
465
|
+
# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b)
|
|
466
|
+
# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b)
|
|
467
|
+
#endif
|
|
468
|
+
|
|
469
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
470
|
+
simde__m128
|
|
471
|
+
simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) {
|
|
472
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
473
|
+
return _mm_cmpeq_ps(a, b);
|
|
474
|
+
#else
|
|
475
|
+
simde__m128_private
|
|
476
|
+
r_,
|
|
477
|
+
a_ = simde__m128_to_private(a),
|
|
478
|
+
b_ = simde__m128_to_private(b);
|
|
479
|
+
|
|
480
|
+
#if defined(SIMDE_SSE_NEON)
|
|
481
|
+
r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32);
|
|
482
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
483
|
+
r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128);
|
|
484
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
485
|
+
r_.altivec_f32 = (vector float) vec_cmpeq(a_.altivec_f32, b_.altivec_f32);
|
|
486
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
487
|
+
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.f32 == b_.f32);
|
|
488
|
+
#else
|
|
489
|
+
SIMDE__VECTORIZE
|
|
490
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
491
|
+
r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
492
|
+
}
|
|
493
|
+
#endif
|
|
494
|
+
|
|
495
|
+
return simde__m128_from_private(r_);
|
|
496
|
+
#endif
|
|
497
|
+
}
|
|
498
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
499
|
+
# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b))
|
|
500
|
+
#endif
|
|
501
|
+
|
|
502
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
503
|
+
simde__m128
|
|
504
|
+
simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) {
|
|
505
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
506
|
+
return _mm_cmpeq_ss(a, b);
|
|
507
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
508
|
+
return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b));
|
|
509
|
+
#else
|
|
510
|
+
simde__m128_private
|
|
511
|
+
r_,
|
|
512
|
+
a_ = simde__m128_to_private(a),
|
|
513
|
+
b_ = simde__m128_to_private(b);
|
|
514
|
+
|
|
515
|
+
r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
516
|
+
SIMDE__VECTORIZE
|
|
517
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
518
|
+
r_.u32[i] = a_.u32[i];
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
return simde__m128_from_private(r_);
|
|
522
|
+
#endif
|
|
523
|
+
}
|
|
524
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
525
|
+
# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b))
|
|
526
|
+
#endif
|
|
527
|
+
|
|
528
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
529
|
+
simde__m128
|
|
530
|
+
simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) {
|
|
531
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
532
|
+
return _mm_cmpge_ps(a, b);
|
|
533
|
+
#else
|
|
534
|
+
simde__m128_private
|
|
535
|
+
r_,
|
|
536
|
+
a_ = simde__m128_to_private(a),
|
|
537
|
+
b_ = simde__m128_to_private(b);
|
|
538
|
+
|
|
539
|
+
#if defined(SIMDE_SSE_NEON)
|
|
540
|
+
r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32);
|
|
541
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
542
|
+
r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128);
|
|
543
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
544
|
+
r_.altivec_f32 = (vector float) vec_cmpge(a_.altivec_f32, b_.altivec_f32);
|
|
545
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
546
|
+
r_.i32 = (__typeof__(r_.i32)) (a_.f32 >= b_.f32);
|
|
547
|
+
#else
|
|
548
|
+
SIMDE__VECTORIZE
|
|
549
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
550
|
+
r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
551
|
+
}
|
|
552
|
+
#endif
|
|
553
|
+
|
|
554
|
+
return simde__m128_from_private(r_);
|
|
555
|
+
#endif
|
|
556
|
+
}
|
|
557
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
558
|
+
# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b))
|
|
559
|
+
#endif
|
|
560
|
+
|
|
561
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
562
|
+
simde__m128
|
|
563
|
+
simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) {
|
|
564
|
+
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
|
|
565
|
+
return _mm_cmpge_ss(a, b);
|
|
566
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
567
|
+
return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b));
|
|
568
|
+
#else
|
|
569
|
+
simde__m128_private
|
|
570
|
+
r_,
|
|
571
|
+
a_ = simde__m128_to_private(a),
|
|
572
|
+
b_ = simde__m128_to_private(b);
|
|
573
|
+
|
|
574
|
+
r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
575
|
+
SIMDE__VECTORIZE
|
|
576
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
577
|
+
r_.u32[i] = a_.u32[i];
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
return simde__m128_from_private(r_);
|
|
581
|
+
#endif
|
|
582
|
+
}
|
|
583
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
584
|
+
# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b))
|
|
585
|
+
#endif
|
|
586
|
+
|
|
587
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
588
|
+
simde__m128
|
|
589
|
+
simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) {
|
|
590
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
591
|
+
return _mm_cmpgt_ps(a, b);
|
|
592
|
+
#else
|
|
593
|
+
simde__m128_private
|
|
594
|
+
r_,
|
|
595
|
+
a_ = simde__m128_to_private(a),
|
|
596
|
+
b_ = simde__m128_to_private(b);
|
|
597
|
+
|
|
598
|
+
#if defined(SIMDE_SSE_NEON)
|
|
599
|
+
r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32);
|
|
600
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
601
|
+
r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128);
|
|
602
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
603
|
+
r_.altivec_f32 = (vector float) vec_cmpgt(a_.altivec_f32, b_.altivec_f32);
|
|
604
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
605
|
+
r_.i32 = (__typeof__(r_.i32)) (a_.f32 > b_.f32);
|
|
606
|
+
#else
|
|
607
|
+
SIMDE__VECTORIZE
|
|
608
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
609
|
+
r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
610
|
+
}
|
|
611
|
+
#endif
|
|
612
|
+
|
|
613
|
+
return simde__m128_from_private(r_);
|
|
614
|
+
#endif
|
|
615
|
+
}
|
|
616
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
617
|
+
# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b))
|
|
618
|
+
#endif
|
|
619
|
+
|
|
620
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
621
|
+
simde__m128
|
|
622
|
+
simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) {
|
|
623
|
+
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
|
|
624
|
+
return _mm_cmpgt_ss(a, b);
|
|
625
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
626
|
+
return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b));
|
|
627
|
+
#else
|
|
628
|
+
simde__m128_private
|
|
629
|
+
r_,
|
|
630
|
+
a_ = simde__m128_to_private(a),
|
|
631
|
+
b_ = simde__m128_to_private(b);
|
|
632
|
+
|
|
633
|
+
r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
634
|
+
SIMDE__VECTORIZE
|
|
635
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
636
|
+
r_.u32[i] = a_.u32[i];
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
return simde__m128_from_private(r_);
|
|
640
|
+
#endif
|
|
641
|
+
}
|
|
642
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
643
|
+
# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b))
|
|
644
|
+
#endif
|
|
645
|
+
|
|
646
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
647
|
+
simde__m128
|
|
648
|
+
simde_mm_cmple_ps (simde__m128 a, simde__m128 b) {
|
|
649
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
650
|
+
return _mm_cmple_ps(a, b);
|
|
651
|
+
#else
|
|
652
|
+
simde__m128_private
|
|
653
|
+
r_,
|
|
654
|
+
a_ = simde__m128_to_private(a),
|
|
655
|
+
b_ = simde__m128_to_private(b);
|
|
656
|
+
|
|
657
|
+
#if defined(SIMDE_SSE_NEON)
|
|
658
|
+
r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32);
|
|
659
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
660
|
+
r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128);
|
|
661
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
662
|
+
r_.altivec_f32 = (vector float) vec_cmple(a_.altivec_f32, b_.altivec_f32);
|
|
663
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
664
|
+
r_.i32 = (__typeof__(r_.i32)) (a_.f32 <= b_.f32);
|
|
665
|
+
#else
|
|
666
|
+
SIMDE__VECTORIZE
|
|
667
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
668
|
+
r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
669
|
+
}
|
|
670
|
+
#endif
|
|
671
|
+
|
|
672
|
+
return simde__m128_from_private(r_);
|
|
673
|
+
#endif
|
|
674
|
+
}
|
|
675
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
676
|
+
# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b))
|
|
677
|
+
#endif
|
|
678
|
+
|
|
679
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
680
|
+
simde__m128
|
|
681
|
+
simde_mm_cmple_ss (simde__m128 a, simde__m128 b) {
|
|
682
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
683
|
+
return _mm_cmple_ss(a, b);
|
|
684
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
685
|
+
return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b));
|
|
686
|
+
#else
|
|
687
|
+
simde__m128_private
|
|
688
|
+
r_,
|
|
689
|
+
a_ = simde__m128_to_private(a),
|
|
690
|
+
b_ = simde__m128_to_private(b);
|
|
691
|
+
|
|
692
|
+
r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
693
|
+
SIMDE__VECTORIZE
|
|
694
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
695
|
+
r_.u32[i] = a_.u32[i];
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
return simde__m128_from_private(r_);
|
|
699
|
+
#endif
|
|
700
|
+
}
|
|
701
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
702
|
+
# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b))
|
|
703
|
+
#endif
|
|
704
|
+
|
|
705
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
706
|
+
simde__m128
|
|
707
|
+
simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) {
|
|
708
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
709
|
+
return _mm_cmplt_ps(a, b);
|
|
710
|
+
#else
|
|
711
|
+
simde__m128_private
|
|
712
|
+
r_,
|
|
713
|
+
a_ = simde__m128_to_private(a),
|
|
714
|
+
b_ = simde__m128_to_private(b);
|
|
715
|
+
|
|
716
|
+
#if defined(SIMDE_SSE_NEON)
|
|
717
|
+
r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32);
|
|
718
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
719
|
+
r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128);
|
|
720
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
721
|
+
r_.altivec_f32 = (vector float) vec_cmplt(a_.altivec_f32, b_.altivec_f32);
|
|
722
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
723
|
+
r_.i32 = (__typeof__(r_.i32)) (a_.f32 < b_.f32);
|
|
724
|
+
#else
|
|
725
|
+
SIMDE__VECTORIZE
|
|
726
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
727
|
+
r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
728
|
+
}
|
|
729
|
+
#endif
|
|
730
|
+
|
|
731
|
+
return simde__m128_from_private(r_);
|
|
732
|
+
#endif
|
|
733
|
+
}
|
|
734
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
735
|
+
# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b))
|
|
736
|
+
#endif
|
|
737
|
+
|
|
738
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
739
|
+
simde__m128
|
|
740
|
+
simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) {
|
|
741
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
742
|
+
return _mm_cmplt_ss(a, b);
|
|
743
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
744
|
+
return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b));
|
|
745
|
+
#else
|
|
746
|
+
simde__m128_private
|
|
747
|
+
r_,
|
|
748
|
+
a_ = simde__m128_to_private(a),
|
|
749
|
+
b_ = simde__m128_to_private(b);
|
|
750
|
+
|
|
751
|
+
r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
752
|
+
SIMDE__VECTORIZE
|
|
753
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
754
|
+
r_.u32[i] = a_.u32[i];
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
return simde__m128_from_private(r_);
|
|
758
|
+
#endif
|
|
759
|
+
}
|
|
760
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
761
|
+
# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b))
|
|
762
|
+
#endif
|
|
763
|
+
|
|
764
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
765
|
+
simde__m128
|
|
766
|
+
simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) {
|
|
767
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
768
|
+
return _mm_cmpneq_ps(a, b);
|
|
769
|
+
#else
|
|
770
|
+
simde__m128_private
|
|
771
|
+
r_,
|
|
772
|
+
a_ = simde__m128_to_private(a),
|
|
773
|
+
b_ = simde__m128_to_private(b);
|
|
774
|
+
|
|
775
|
+
#if defined(SIMDE_SSE_NEON)
|
|
776
|
+
r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
|
|
777
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
778
|
+
r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128);
|
|
779
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC) && (SIMDE_ARCH_POWER >= 900) && !defined(HEDLEY_IBM_VERSION)
|
|
780
|
+
/* vec_cmpne(vector float, vector float) is missing from XL C/C++ v16.1.1,
|
|
781
|
+
though the documentation (table 89 on page 432 of the IBM XL C/C++ for
|
|
782
|
+
Linux Compiler Reference, Version 16.1.1) shows that it should be
|
|
783
|
+
present. Both GCC and clang support it. */
|
|
784
|
+
r_.altivec_f32 = (vector float) vec_cmpne(a_.altivec_f32, b_.altivec_f32);
|
|
785
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
786
|
+
r_.i32 = (__typeof__(r_.i32)) (a_.f32 != b_.f32);
|
|
787
|
+
#else
|
|
788
|
+
SIMDE__VECTORIZE
|
|
789
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
790
|
+
r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
791
|
+
}
|
|
792
|
+
#endif
|
|
793
|
+
|
|
794
|
+
return simde__m128_from_private(r_);
|
|
795
|
+
#endif
|
|
796
|
+
}
|
|
797
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
798
|
+
# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b))
|
|
799
|
+
#endif
|
|
800
|
+
|
|
801
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
802
|
+
simde__m128
|
|
803
|
+
simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) {
|
|
804
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
805
|
+
return _mm_cmpneq_ss(a, b);
|
|
806
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
807
|
+
return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b));
|
|
808
|
+
#else
|
|
809
|
+
simde__m128_private
|
|
810
|
+
r_,
|
|
811
|
+
a_ = simde__m128_to_private(a),
|
|
812
|
+
b_ = simde__m128_to_private(b);
|
|
813
|
+
|
|
814
|
+
r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0);
|
|
815
|
+
SIMDE__VECTORIZE
|
|
816
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
817
|
+
r_.u32[i] = a_.u32[i];
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
return simde__m128_from_private(r_);
|
|
821
|
+
#endif
|
|
822
|
+
}
|
|
823
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
824
|
+
# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b))
|
|
825
|
+
#endif
|
|
826
|
+
|
|
827
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
828
|
+
simde__m128
|
|
829
|
+
simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) {
|
|
830
|
+
return simde_mm_cmplt_ps(a, b);
|
|
831
|
+
}
|
|
832
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
833
|
+
# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b))
|
|
834
|
+
#endif
|
|
835
|
+
|
|
836
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
837
|
+
simde__m128
|
|
838
|
+
simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) {
|
|
839
|
+
return simde_mm_cmplt_ss(a, b);
|
|
840
|
+
}
|
|
841
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
842
|
+
# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b))
|
|
843
|
+
#endif
|
|
844
|
+
|
|
845
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
846
|
+
simde__m128
|
|
847
|
+
simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) {
|
|
848
|
+
return simde_mm_cmple_ps(a, b);
|
|
849
|
+
}
|
|
850
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
851
|
+
# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b))
|
|
852
|
+
#endif
|
|
853
|
+
|
|
854
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
855
|
+
simde__m128
|
|
856
|
+
simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) {
|
|
857
|
+
return simde_mm_cmple_ss(a, b);
|
|
858
|
+
}
|
|
859
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
860
|
+
# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b))
|
|
861
|
+
#endif
|
|
862
|
+
|
|
863
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
864
|
+
simde__m128
|
|
865
|
+
simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) {
|
|
866
|
+
return simde_mm_cmpgt_ps(a, b);
|
|
867
|
+
}
|
|
868
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
869
|
+
# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b))
|
|
870
|
+
#endif
|
|
871
|
+
|
|
872
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
873
|
+
simde__m128
|
|
874
|
+
simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) {
|
|
875
|
+
return simde_mm_cmpgt_ss(a, b);
|
|
876
|
+
}
|
|
877
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
878
|
+
# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b))
|
|
879
|
+
#endif
|
|
880
|
+
|
|
881
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
882
|
+
simde__m128
|
|
883
|
+
simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) {
|
|
884
|
+
return simde_mm_cmpge_ps(a, b);
|
|
885
|
+
}
|
|
886
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
887
|
+
# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b))
|
|
888
|
+
#endif
|
|
889
|
+
|
|
890
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
891
|
+
simde__m128
|
|
892
|
+
simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) {
|
|
893
|
+
return simde_mm_cmpge_ss(a, b);
|
|
894
|
+
}
|
|
895
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
896
|
+
# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b))
|
|
897
|
+
#endif
|
|
898
|
+
|
|
899
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
900
|
+
simde__m128
|
|
901
|
+
simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) {
|
|
902
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
903
|
+
return _mm_cmpord_ps(a, b);
|
|
904
|
+
#else
|
|
905
|
+
simde__m128_private
|
|
906
|
+
r_,
|
|
907
|
+
a_ = simde__m128_to_private(a),
|
|
908
|
+
b_ = simde__m128_to_private(b);
|
|
909
|
+
|
|
910
|
+
#if defined(SIMDE_SSE_NEON)
|
|
911
|
+
/* Note: NEON does not have ordered compare builtin
|
|
912
|
+
Need to compare a eq a and b eq b to check for NaN
|
|
913
|
+
Do AND of results to get final */
|
|
914
|
+
uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
|
915
|
+
uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
|
916
|
+
r_.neon_u32 = vandq_u32(ceqaa, ceqbb);
|
|
917
|
+
#elif defined(simde_isnanf)
|
|
918
|
+
SIMDE__VECTORIZE
|
|
919
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
920
|
+
r_.u32[i] = (simde_isnanf(a_.f32[i]) || simde_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);
|
|
921
|
+
}
|
|
922
|
+
#else
|
|
923
|
+
HEDLEY_UNREACHABLE();
|
|
924
|
+
#endif
|
|
925
|
+
|
|
926
|
+
return simde__m128_from_private(r_);
|
|
927
|
+
#endif
|
|
928
|
+
}
|
|
929
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
930
|
+
# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b))
|
|
931
|
+
#endif
|
|
932
|
+
|
|
933
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
934
|
+
simde__m128
|
|
935
|
+
simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) {
|
|
936
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
937
|
+
return _mm_cmpunord_ps(a, b);
|
|
938
|
+
#else
|
|
939
|
+
simde__m128_private
|
|
940
|
+
r_,
|
|
941
|
+
a_ = simde__m128_to_private(a),
|
|
942
|
+
b_ = simde__m128_to_private(b);
|
|
943
|
+
|
|
944
|
+
#if defined(simde_isnanf)
|
|
945
|
+
SIMDE__VECTORIZE
|
|
946
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
947
|
+
r_.u32[i] = (simde_isnanf(a_.f32[i]) || simde_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
|
|
948
|
+
}
|
|
949
|
+
#else
|
|
950
|
+
HEDLEY_UNREACHABLE();
|
|
951
|
+
#endif
|
|
952
|
+
|
|
953
|
+
return simde__m128_from_private(r_);
|
|
954
|
+
#endif
|
|
955
|
+
}
|
|
956
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
957
|
+
# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b))
|
|
958
|
+
#endif
|
|
959
|
+
|
|
960
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
961
|
+
simde__m128
|
|
962
|
+
simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) {
|
|
963
|
+
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
|
|
964
|
+
return _mm_cmpunord_ss(a, b);
|
|
965
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
966
|
+
return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b));
|
|
967
|
+
#else
|
|
968
|
+
simde__m128_private
|
|
969
|
+
r_,
|
|
970
|
+
a_ = simde__m128_to_private(a),
|
|
971
|
+
b_ = simde__m128_to_private(b);
|
|
972
|
+
|
|
973
|
+
#if defined(simde_isnanf)
|
|
974
|
+
r_.u32[0] = (simde_isnanf(a_.f32[0]) || simde_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0);
|
|
975
|
+
SIMDE__VECTORIZE
|
|
976
|
+
for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
977
|
+
r_.u32[i] = a_.u32[i];
|
|
978
|
+
}
|
|
979
|
+
#else
|
|
980
|
+
HEDLEY_UNREACHABLE();
|
|
981
|
+
#endif
|
|
982
|
+
|
|
983
|
+
return simde__m128_from_private(r_);
|
|
984
|
+
#endif
|
|
985
|
+
}
|
|
986
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
987
|
+
# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b))
|
|
988
|
+
#endif
|
|
989
|
+
|
|
990
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
991
|
+
int
|
|
992
|
+
simde_mm_comieq_ss (simde__m128 a, simde__m128 b) {
|
|
993
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
994
|
+
return _mm_comieq_ss(a, b);
|
|
995
|
+
#else
|
|
996
|
+
simde__m128_private
|
|
997
|
+
a_ = simde__m128_to_private(a),
|
|
998
|
+
b_ = simde__m128_to_private(b);
|
|
999
|
+
|
|
1000
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1001
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
|
1002
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
|
1003
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
|
1004
|
+
uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32);
|
|
1005
|
+
return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0);
|
|
1006
|
+
#else
|
|
1007
|
+
return a_.f32[0] == b_.f32[0];
|
|
1008
|
+
#endif
|
|
1009
|
+
#endif
|
|
1010
|
+
}
|
|
1011
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1012
|
+
# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b))
|
|
1013
|
+
#endif
|
|
1014
|
+
|
|
1015
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1016
|
+
int
|
|
1017
|
+
simde_mm_comige_ss (simde__m128 a, simde__m128 b) {
|
|
1018
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1019
|
+
return _mm_comige_ss(a, b);
|
|
1020
|
+
#else
|
|
1021
|
+
simde__m128_private
|
|
1022
|
+
a_ = simde__m128_to_private(a),
|
|
1023
|
+
b_ = simde__m128_to_private(b);
|
|
1024
|
+
|
|
1025
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1026
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
|
1027
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
|
1028
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
|
1029
|
+
uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32);
|
|
1030
|
+
return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0);
|
|
1031
|
+
#else
|
|
1032
|
+
return a_.f32[0] >= b_.f32[0];
|
|
1033
|
+
#endif
|
|
1034
|
+
#endif
|
|
1035
|
+
}
|
|
1036
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1037
|
+
# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b))
|
|
1038
|
+
#endif
|
|
1039
|
+
|
|
1040
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1041
|
+
int
|
|
1042
|
+
simde_mm_comigt_ss (simde__m128 a, simde__m128 b) {
|
|
1043
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1044
|
+
return _mm_comigt_ss(a, b);
|
|
1045
|
+
#else
|
|
1046
|
+
simde__m128_private
|
|
1047
|
+
a_ = simde__m128_to_private(a),
|
|
1048
|
+
b_ = simde__m128_to_private(b);
|
|
1049
|
+
|
|
1050
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1051
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
|
1052
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
|
1053
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
|
1054
|
+
uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32);
|
|
1055
|
+
return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0);
|
|
1056
|
+
#else
|
|
1057
|
+
return a_.f32[0] > b_.f32[0];
|
|
1058
|
+
#endif
|
|
1059
|
+
#endif
|
|
1060
|
+
}
|
|
1061
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1062
|
+
# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b))
|
|
1063
|
+
#endif
|
|
1064
|
+
|
|
1065
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1066
|
+
int
|
|
1067
|
+
simde_mm_comile_ss (simde__m128 a, simde__m128 b) {
|
|
1068
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1069
|
+
return _mm_comile_ss(a, b);
|
|
1070
|
+
#else
|
|
1071
|
+
simde__m128_private
|
|
1072
|
+
a_ = simde__m128_to_private(a),
|
|
1073
|
+
b_ = simde__m128_to_private(b);
|
|
1074
|
+
|
|
1075
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1076
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
|
1077
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
|
1078
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
|
1079
|
+
uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32);
|
|
1080
|
+
return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0);
|
|
1081
|
+
#else
|
|
1082
|
+
return a_.f32[0] <= b_.f32[0];
|
|
1083
|
+
#endif
|
|
1084
|
+
#endif
|
|
1085
|
+
}
|
|
1086
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1087
|
+
# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b))
|
|
1088
|
+
#endif
|
|
1089
|
+
|
|
1090
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1091
|
+
int
|
|
1092
|
+
simde_mm_comilt_ss (simde__m128 a, simde__m128 b) {
|
|
1093
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1094
|
+
return _mm_comilt_ss(a, b);
|
|
1095
|
+
#else
|
|
1096
|
+
simde__m128_private
|
|
1097
|
+
a_ = simde__m128_to_private(a),
|
|
1098
|
+
b_ = simde__m128_to_private(b);
|
|
1099
|
+
|
|
1100
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1101
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
|
1102
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
|
1103
|
+
uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
|
|
1104
|
+
uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32);
|
|
1105
|
+
return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0);
|
|
1106
|
+
#else
|
|
1107
|
+
return a_.f32[0] < b_.f32[0];
|
|
1108
|
+
#endif
|
|
1109
|
+
#endif
|
|
1110
|
+
}
|
|
1111
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1112
|
+
# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b))
|
|
1113
|
+
#endif
|
|
1114
|
+
|
|
1115
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1116
|
+
int
|
|
1117
|
+
simde_mm_comineq_ss (simde__m128 a, simde__m128 b) {
|
|
1118
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1119
|
+
return _mm_comineq_ss(a, b);
|
|
1120
|
+
#else
|
|
1121
|
+
simde__m128_private
|
|
1122
|
+
a_ = simde__m128_to_private(a),
|
|
1123
|
+
b_ = simde__m128_to_private(b);
|
|
1124
|
+
|
|
1125
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1126
|
+
uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32);
|
|
1127
|
+
uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32);
|
|
1128
|
+
uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
|
|
1129
|
+
uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
|
|
1130
|
+
return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0);
|
|
1131
|
+
#else
|
|
1132
|
+
return a_.f32[0] != b_.f32[0];
|
|
1133
|
+
#endif
|
|
1134
|
+
#endif
|
|
1135
|
+
}
|
|
1136
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1137
|
+
# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b))
|
|
1138
|
+
#endif
|
|
1139
|
+
|
|
1140
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1141
|
+
simde__m128
|
|
1142
|
+
simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) {
|
|
1143
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1144
|
+
return _mm_cvt_pi2ps(a, b);
|
|
1145
|
+
#else
|
|
1146
|
+
simde__m128_private
|
|
1147
|
+
r_,
|
|
1148
|
+
a_ = simde__m128_to_private(a);
|
|
1149
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1150
|
+
|
|
1151
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1152
|
+
r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));
|
|
1153
|
+
#elif defined(SIMDE__CONVERT_VECTOR)
|
|
1154
|
+
SIMDE__CONVERT_VECTOR(r_.m64_private[0].f32, b_.i32);
|
|
1155
|
+
r_.m64_private[1] = a_.m64_private[1];
|
|
1156
|
+
|
|
1157
|
+
#else
|
|
1158
|
+
r_.f32[0] = (simde_float32) b_.i32[0];
|
|
1159
|
+
r_.f32[1] = (simde_float32) b_.i32[1];
|
|
1160
|
+
r_.i32[2] = a_.i32[2];
|
|
1161
|
+
r_.i32[3] = a_.i32[3];
|
|
1162
|
+
#endif
|
|
1163
|
+
|
|
1164
|
+
return simde__m128_from_private(r_);
|
|
1165
|
+
#endif
|
|
1166
|
+
}
|
|
1167
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1168
|
+
# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), b)
|
|
1169
|
+
#endif
|
|
1170
|
+
|
|
1171
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1172
|
+
simde__m64
|
|
1173
|
+
simde_mm_cvt_ps2pi (simde__m128 a) {
|
|
1174
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1175
|
+
return _mm_cvt_ps2pi(a);
|
|
1176
|
+
#else
|
|
1177
|
+
simde__m64_private r_;
|
|
1178
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1179
|
+
|
|
1180
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1181
|
+
r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
|
|
1182
|
+
#elif defined(SIMDE__CONVERT_VECTOR) && !defined(__clang__)
|
|
1183
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].f32);
|
|
1184
|
+
#else
|
|
1185
|
+
SIMDE__VECTORIZE
|
|
1186
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1187
|
+
r_.i32[i] = (int32_t) a_.f32[i];
|
|
1188
|
+
}
|
|
1189
|
+
#endif
|
|
1190
|
+
|
|
1191
|
+
return simde__m64_from_private(r_);
|
|
1192
|
+
#endif
|
|
1193
|
+
}
|
|
1194
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1195
|
+
# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a))
|
|
1196
|
+
#endif
|
|
1197
|
+
|
|
1198
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1199
|
+
simde__m128
|
|
1200
|
+
simde_mm_cvt_si2ss (simde__m128 a, int32_t b) {
|
|
1201
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1202
|
+
return _mm_cvt_si2ss(a, b);
|
|
1203
|
+
#else
|
|
1204
|
+
simde__m128_private
|
|
1205
|
+
r_,
|
|
1206
|
+
a_ = simde__m128_to_private(a);
|
|
1207
|
+
|
|
1208
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1209
|
+
r_.neon_f32 = vsetq_lane_f32((float) b, a_.neon_f32, 0);
|
|
1210
|
+
#else
|
|
1211
|
+
r_.f32[0] = (simde_float32) b;
|
|
1212
|
+
r_.i32[1] = a_.i32[1];
|
|
1213
|
+
r_.i32[2] = a_.i32[2];
|
|
1214
|
+
r_.i32[3] = a_.i32[3];
|
|
1215
|
+
#endif
|
|
1216
|
+
|
|
1217
|
+
return simde__m128_from_private(r_);
|
|
1218
|
+
#endif
|
|
1219
|
+
}
|
|
1220
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1221
|
+
# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b)
|
|
1222
|
+
#endif
|
|
1223
|
+
|
|
1224
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1225
|
+
int32_t
|
|
1226
|
+
simde_mm_cvt_ss2si (simde__m128 a) {
|
|
1227
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1228
|
+
return _mm_cvt_ss2si(a);
|
|
1229
|
+
#else
|
|
1230
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1231
|
+
|
|
1232
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1233
|
+
return SIMDE_CONVERT_FTOI(int32_t, nearbyintf(vgetq_lane_f32(a_.neon_f32, 0)));
|
|
1234
|
+
#elif defined(SIMDE_HAVE_MATH_H)
|
|
1235
|
+
return SIMDE_CONVERT_FTOI(int32_t, nearbyintf(a_.f32[0]));
|
|
1236
|
+
#else
|
|
1237
|
+
HEDLEY_UNREACHABLE();
|
|
1238
|
+
#endif
|
|
1239
|
+
#endif
|
|
1240
|
+
}
|
|
1241
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1242
|
+
# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a))
|
|
1243
|
+
#endif
|
|
1244
|
+
|
|
1245
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1246
|
+
simde__m128
|
|
1247
|
+
simde_mm_cvtpi16_ps (simde__m64 a) {
|
|
1248
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1249
|
+
return _mm_cvtpi16_ps(a);
|
|
1250
|
+
#else
|
|
1251
|
+
simde__m128_private r_;
|
|
1252
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1253
|
+
|
|
1254
|
+
#if defined(SIMDE_SSE_NEON) && 0 /* TODO */
|
|
1255
|
+
r_.neon_f32 = vmovl_s16(vget_low_s16(vuzp1q_s16(a_.neon_i16, vmovq_n_s16(0))));
|
|
1256
|
+
#elif defined(SIMDE__CONVERT_VECTOR)
|
|
1257
|
+
SIMDE__CONVERT_VECTOR(r_.f32, a_.i16);
|
|
1258
|
+
#else
|
|
1259
|
+
SIMDE__VECTORIZE
|
|
1260
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1261
|
+
simde_float32 v = a_.i16[i];
|
|
1262
|
+
r_.f32[i] = v;
|
|
1263
|
+
}
|
|
1264
|
+
#endif
|
|
1265
|
+
|
|
1266
|
+
return simde__m128_from_private(r_);
|
|
1267
|
+
#endif
|
|
1268
|
+
}
|
|
1269
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1270
|
+
# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a)
|
|
1271
|
+
#endif
|
|
1272
|
+
|
|
1273
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1274
|
+
simde__m128
|
|
1275
|
+
simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) {
|
|
1276
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1277
|
+
return _mm_cvtpi32_ps(a, b);
|
|
1278
|
+
#else
|
|
1279
|
+
simde__m128_private
|
|
1280
|
+
r_,
|
|
1281
|
+
a_ = simde__m128_to_private(a);
|
|
1282
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1283
|
+
|
|
1284
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1285
|
+
r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32));
|
|
1286
|
+
#elif defined(SIMDE__CONVERT_VECTOR)
|
|
1287
|
+
SIMDE__CONVERT_VECTOR(r_.m64_private[0].f32, b_.i32);
|
|
1288
|
+
r_.m64_private[1] = a_.m64_private[1];
|
|
1289
|
+
#else
|
|
1290
|
+
r_.f32[0] = (simde_float32) b_.i32[0];
|
|
1291
|
+
r_.f32[1] = (simde_float32) b_.i32[1];
|
|
1292
|
+
r_.i32[2] = a_.i32[2];
|
|
1293
|
+
r_.i32[3] = a_.i32[3];
|
|
1294
|
+
#endif
|
|
1295
|
+
|
|
1296
|
+
return simde__m128_from_private(r_);
|
|
1297
|
+
#endif
|
|
1298
|
+
}
|
|
1299
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1300
|
+
# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b)
|
|
1301
|
+
#endif
|
|
1302
|
+
|
|
1303
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1304
|
+
simde__m128
|
|
1305
|
+
simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) {
|
|
1306
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1307
|
+
return _mm_cvtpi32x2_ps(a, b);
|
|
1308
|
+
#else
|
|
1309
|
+
simde__m128_private r_;
|
|
1310
|
+
simde__m64_private
|
|
1311
|
+
a_ = simde__m64_to_private(a),
|
|
1312
|
+
b_ = simde__m64_to_private(b);
|
|
1313
|
+
|
|
1314
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1315
|
+
r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
|
|
1316
|
+
#elif defined(SIMDE__CONVERT_VECTOR)
|
|
1317
|
+
SIMDE__CONVERT_VECTOR(r_.m64_private[0].f32, a_.i32);
|
|
1318
|
+
SIMDE__CONVERT_VECTOR(r_.m64_private[1].f32, b_.i32);
|
|
1319
|
+
#else
|
|
1320
|
+
r_.f32[0] = (simde_float32) a_.i32[0];
|
|
1321
|
+
r_.f32[1] = (simde_float32) a_.i32[1];
|
|
1322
|
+
r_.f32[2] = (simde_float32) b_.i32[0];
|
|
1323
|
+
r_.f32[3] = (simde_float32) b_.i32[1];
|
|
1324
|
+
#endif
|
|
1325
|
+
|
|
1326
|
+
return simde__m128_from_private(r_);
|
|
1327
|
+
#endif
|
|
1328
|
+
}
|
|
1329
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1330
|
+
# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b)
|
|
1331
|
+
#endif
|
|
1332
|
+
|
|
1333
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1334
|
+
simde__m128
|
|
1335
|
+
simde_mm_cvtpi8_ps (simde__m64 a) {
|
|
1336
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1337
|
+
return _mm_cvtpi8_ps(a);
|
|
1338
|
+
#else
|
|
1339
|
+
simde__m128_private r_;
|
|
1340
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1341
|
+
|
|
1342
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1343
|
+
r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8))));
|
|
1344
|
+
#else
|
|
1345
|
+
r_.f32[0] = (simde_float32) a_.i8[0];
|
|
1346
|
+
r_.f32[1] = (simde_float32) a_.i8[1];
|
|
1347
|
+
r_.f32[2] = (simde_float32) a_.i8[2];
|
|
1348
|
+
r_.f32[3] = (simde_float32) a_.i8[3];
|
|
1349
|
+
#endif
|
|
1350
|
+
|
|
1351
|
+
return simde__m128_from_private(r_);
|
|
1352
|
+
#endif
|
|
1353
|
+
}
|
|
1354
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1355
|
+
# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a)
|
|
1356
|
+
#endif
|
|
1357
|
+
|
|
1358
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1359
|
+
simde__m64
|
|
1360
|
+
simde_mm_cvtps_pi16 (simde__m128 a) {
|
|
1361
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1362
|
+
return _mm_cvtps_pi16(a);
|
|
1363
|
+
#else
|
|
1364
|
+
simde__m64_private r_;
|
|
1365
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1366
|
+
|
|
1367
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
1368
|
+
SIMDE__CONVERT_VECTOR(r_.i16, a_.f32);
|
|
1369
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
1370
|
+
r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(a_.neon_f32));
|
|
1371
|
+
#else
|
|
1372
|
+
SIMDE__VECTORIZE
|
|
1373
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1374
|
+
r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, a_.f32[i]);
|
|
1375
|
+
}
|
|
1376
|
+
#endif
|
|
1377
|
+
|
|
1378
|
+
return simde__m64_from_private(r_);
|
|
1379
|
+
#endif
|
|
1380
|
+
}
|
|
1381
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1382
|
+
# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a))
|
|
1383
|
+
#endif
|
|
1384
|
+
|
|
1385
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1386
|
+
simde__m64
|
|
1387
|
+
simde_mm_cvtps_pi32 (simde__m128 a) {
|
|
1388
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1389
|
+
return _mm_cvtps_pi32(a);
|
|
1390
|
+
#else
|
|
1391
|
+
simde__m64_private r_;
|
|
1392
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1393
|
+
|
|
1394
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1395
|
+
r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
|
|
1396
|
+
#elif defined(SIMDE__CONVERT_VECTOR)
|
|
1397
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].f32);
|
|
1398
|
+
#else
|
|
1399
|
+
SIMDE__VECTORIZE
|
|
1400
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1401
|
+
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, a_.f32[i]);
|
|
1402
|
+
}
|
|
1403
|
+
#endif
|
|
1404
|
+
|
|
1405
|
+
return simde__m64_from_private(r_);
|
|
1406
|
+
#endif
|
|
1407
|
+
}
|
|
1408
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1409
|
+
# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a))
|
|
1410
|
+
#endif
|
|
1411
|
+
|
|
1412
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1413
|
+
simde__m64
|
|
1414
|
+
simde_mm_cvtps_pi8 (simde__m128 a) {
|
|
1415
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1416
|
+
return _mm_cvtps_pi8(a);
|
|
1417
|
+
#else
|
|
1418
|
+
simde__m64_private r_;
|
|
1419
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1420
|
+
|
|
1421
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1422
|
+
int16x4_t b = vmovn_s32(vcvtq_s32_f32(a_.neon_f32));
|
|
1423
|
+
int16x8_t c = vcombine_s16(b, vmov_n_s16(0));
|
|
1424
|
+
r_.neon_i8 = vmovn_s16(c);
|
|
1425
|
+
#else
|
|
1426
|
+
SIMDE__VECTORIZE
|
|
1427
|
+
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
|
|
1428
|
+
r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, a_.f32[i]);
|
|
1429
|
+
}
|
|
1430
|
+
/* Note: the upper half is undefined */
|
|
1431
|
+
#endif
|
|
1432
|
+
|
|
1433
|
+
return simde__m64_from_private(r_);
|
|
1434
|
+
#endif
|
|
1435
|
+
}
|
|
1436
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1437
|
+
# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a))
|
|
1438
|
+
#endif
|
|
1439
|
+
|
|
1440
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1441
|
+
simde__m128
|
|
1442
|
+
simde_mm_cvtpu16_ps (simde__m64 a) {
|
|
1443
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1444
|
+
return _mm_cvtpu16_ps(a);
|
|
1445
|
+
#else
|
|
1446
|
+
simde__m128_private r_;
|
|
1447
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1448
|
+
|
|
1449
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1450
|
+
r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16));
|
|
1451
|
+
#elif defined(SIMDE__CONVERT_VECTOR)
|
|
1452
|
+
SIMDE__CONVERT_VECTOR(r_.f32, a_.u16);
|
|
1453
|
+
#else
|
|
1454
|
+
SIMDE__VECTORIZE
|
|
1455
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1456
|
+
r_.f32[i] = (simde_float32) a_.u16[i];
|
|
1457
|
+
}
|
|
1458
|
+
#endif
|
|
1459
|
+
|
|
1460
|
+
return simde__m128_from_private(r_);
|
|
1461
|
+
#endif
|
|
1462
|
+
}
|
|
1463
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1464
|
+
# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a)
|
|
1465
|
+
#endif
|
|
1466
|
+
|
|
1467
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1468
|
+
simde__m128
|
|
1469
|
+
simde_mm_cvtpu8_ps (simde__m64 a) {
|
|
1470
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1471
|
+
return _mm_cvtpu8_ps(a);
|
|
1472
|
+
#else
|
|
1473
|
+
simde__m128_private r_;
|
|
1474
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1475
|
+
|
|
1476
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1477
|
+
r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8))));
|
|
1478
|
+
#else
|
|
1479
|
+
SIMDE__VECTORIZE
|
|
1480
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1481
|
+
r_.f32[i] = (simde_float32) a_.u8[i];
|
|
1482
|
+
}
|
|
1483
|
+
#endif
|
|
1484
|
+
|
|
1485
|
+
return simde__m128_from_private(r_);
|
|
1486
|
+
#endif
|
|
1487
|
+
}
|
|
1488
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1489
|
+
# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a)
|
|
1490
|
+
#endif
|
|
1491
|
+
|
|
1492
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1493
|
+
simde__m128
|
|
1494
|
+
simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) {
|
|
1495
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1496
|
+
return _mm_cvtsi32_ss(a, b);
|
|
1497
|
+
#else
|
|
1498
|
+
simde__m128_private r_;
|
|
1499
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1500
|
+
|
|
1501
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1502
|
+
r_.neon_f32 = vsetq_lane_f32((simde_float32) b, a_.neon_f32, 0);
|
|
1503
|
+
#else
|
|
1504
|
+
r_.f32[0] = (simde_float32) b;
|
|
1505
|
+
SIMDE__VECTORIZE
|
|
1506
|
+
for (size_t i = 1 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1507
|
+
r_.i32[i] = a_.i32[i];
|
|
1508
|
+
}
|
|
1509
|
+
#endif
|
|
1510
|
+
|
|
1511
|
+
return simde__m128_from_private(r_);
|
|
1512
|
+
#endif
|
|
1513
|
+
}
|
|
1514
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1515
|
+
# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b)
|
|
1516
|
+
#endif
|
|
1517
|
+
|
|
1518
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1519
|
+
simde__m128
|
|
1520
|
+
simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) {
|
|
1521
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
|
|
1522
|
+
#if !defined(__PGI)
|
|
1523
|
+
return _mm_cvtsi64_ss(a, b);
|
|
1524
|
+
#else
|
|
1525
|
+
return _mm_cvtsi64x_ss(a, b);
|
|
1526
|
+
#endif
|
|
1527
|
+
#else
|
|
1528
|
+
simde__m128_private r_;
|
|
1529
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1530
|
+
|
|
1531
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1532
|
+
r_.neon_f32 = vsetq_lane_f32((simde_float32) b, a_.neon_f32, 0);
|
|
1533
|
+
#else
|
|
1534
|
+
r_ = a_;
|
|
1535
|
+
r_.f32[0] = (simde_float32) b;
|
|
1536
|
+
#endif
|
|
1537
|
+
|
|
1538
|
+
return simde__m128_from_private(r_);
|
|
1539
|
+
#endif
|
|
1540
|
+
}
|
|
1541
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1542
|
+
# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b)
|
|
1543
|
+
#endif
|
|
1544
|
+
|
|
1545
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1546
|
+
simde_float32
|
|
1547
|
+
simde_mm_cvtss_f32 (simde__m128 a) {
|
|
1548
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1549
|
+
return _mm_cvtss_f32(a);
|
|
1550
|
+
#else
|
|
1551
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1552
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1553
|
+
return vgetq_lane_f32(a_.neon_f32, 0);
|
|
1554
|
+
#else
|
|
1555
|
+
return a_.f32[0];
|
|
1556
|
+
#endif
|
|
1557
|
+
#endif
|
|
1558
|
+
}
|
|
1559
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1560
|
+
# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a))
|
|
1561
|
+
#endif
|
|
1562
|
+
|
|
1563
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1564
|
+
int32_t
|
|
1565
|
+
simde_mm_cvtss_si32 (simde__m128 a) {
|
|
1566
|
+
return simde_mm_cvt_ss2si(a);
|
|
1567
|
+
}
|
|
1568
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1569
|
+
# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a))
|
|
1570
|
+
#endif
|
|
1571
|
+
|
|
1572
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1573
|
+
int64_t
|
|
1574
|
+
simde_mm_cvtss_si64 (simde__m128 a) {
|
|
1575
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64)
|
|
1576
|
+
#if !defined(__PGI)
|
|
1577
|
+
return _mm_cvtss_si64(a);
|
|
1578
|
+
#else
|
|
1579
|
+
return _mm_cvtss_si64x(a);
|
|
1580
|
+
#endif
|
|
1581
|
+
#else
|
|
1582
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1583
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1584
|
+
return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0));
|
|
1585
|
+
#else
|
|
1586
|
+
return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]);
|
|
1587
|
+
#endif
|
|
1588
|
+
#endif
|
|
1589
|
+
}
|
|
1590
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1591
|
+
# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a))
|
|
1592
|
+
#endif
|
|
1593
|
+
|
|
1594
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1595
|
+
simde__m64
|
|
1596
|
+
simde_mm_cvtt_ps2pi (simde__m128 a) {
|
|
1597
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1598
|
+
return _mm_cvtt_ps2pi(a);
|
|
1599
|
+
#else
|
|
1600
|
+
simde__m64_private r_;
|
|
1601
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1602
|
+
|
|
1603
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1604
|
+
r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32));
|
|
1605
|
+
#elif defined(SIMDE__CONVERT_VECTOR)
|
|
1606
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.m64_private[0].f32);
|
|
1607
|
+
#else
|
|
1608
|
+
SIMDE__VECTORIZE
|
|
1609
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1610
|
+
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, a_.f32[i]);
|
|
1611
|
+
}
|
|
1612
|
+
#endif
|
|
1613
|
+
|
|
1614
|
+
return simde__m64_from_private(r_);
|
|
1615
|
+
#endif
|
|
1616
|
+
}
|
|
1617
|
+
#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a)
|
|
1618
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1619
|
+
# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a))
|
|
1620
|
+
# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a))
|
|
1621
|
+
#endif
|
|
1622
|
+
|
|
1623
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1624
|
+
int32_t
|
|
1625
|
+
simde_mm_cvtt_ss2si (simde__m128 a) {
|
|
1626
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1627
|
+
return _mm_cvtt_ss2si(a);
|
|
1628
|
+
#else
|
|
1629
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1630
|
+
|
|
1631
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1632
|
+
return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0));
|
|
1633
|
+
#else
|
|
1634
|
+
return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]);
|
|
1635
|
+
#endif
|
|
1636
|
+
#endif
|
|
1637
|
+
}
|
|
1638
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1639
|
+
# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a))
|
|
1640
|
+
# define _mm_cvttss_si32(a) simde_mm_cvttss_si32((a))
|
|
1641
|
+
#endif
|
|
1642
|
+
|
|
1643
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1644
|
+
int64_t
|
|
1645
|
+
simde_mm_cvttss_si64 (simde__m128 a) {
|
|
1646
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER)
|
|
1647
|
+
#if defined(__PGI)
|
|
1648
|
+
return _mm_cvttss_si64x(a);
|
|
1649
|
+
#else
|
|
1650
|
+
return _mm_cvttss_si64(a);
|
|
1651
|
+
#endif
|
|
1652
|
+
#else
|
|
1653
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
1654
|
+
|
|
1655
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1656
|
+
return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0));
|
|
1657
|
+
#else
|
|
1658
|
+
return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]);
|
|
1659
|
+
#endif
|
|
1660
|
+
#endif
|
|
1661
|
+
}
|
|
1662
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1663
|
+
# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a))
|
|
1664
|
+
#endif
|
|
1665
|
+
|
|
1666
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1667
|
+
simde__m128
|
|
1668
|
+
simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) {
|
|
1669
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1670
|
+
return _mm_cmpord_ss(a, b);
|
|
1671
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
1672
|
+
return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b));
|
|
1673
|
+
#else
|
|
1674
|
+
simde__m128_private
|
|
1675
|
+
r_,
|
|
1676
|
+
a_ = simde__m128_to_private(a);
|
|
1677
|
+
|
|
1678
|
+
#if defined(simde_isnanf)
|
|
1679
|
+
r_.u32[0] = (simde_isnanf(simde_mm_cvtss_f32(a)) || simde_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0);
|
|
1680
|
+
SIMDE__VECTORIZE
|
|
1681
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1682
|
+
r_.u32[i] = a_.u32[i];
|
|
1683
|
+
}
|
|
1684
|
+
#else
|
|
1685
|
+
HEDLEY_UNREACHABLE();
|
|
1686
|
+
#endif
|
|
1687
|
+
|
|
1688
|
+
return simde__m128_from_private(r_);
|
|
1689
|
+
#endif
|
|
1690
|
+
}
|
|
1691
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1692
|
+
# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b))
|
|
1693
|
+
#endif
|
|
1694
|
+
|
|
1695
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1696
|
+
simde__m128
|
|
1697
|
+
simde_mm_div_ps (simde__m128 a, simde__m128 b) {
|
|
1698
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1699
|
+
return _mm_div_ps(a, b);
|
|
1700
|
+
#else
|
|
1701
|
+
simde__m128_private
|
|
1702
|
+
r_,
|
|
1703
|
+
a_ = simde__m128_to_private(a),
|
|
1704
|
+
b_ = simde__m128_to_private(b);
|
|
1705
|
+
|
|
1706
|
+
#if defined(SIMDE_SSE_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
1707
|
+
r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32);
|
|
1708
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
1709
|
+
float32x4_t recip0 = vrecpeq_f32(b_.neon_f32);
|
|
1710
|
+
float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32));
|
|
1711
|
+
r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1);
|
|
1712
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
1713
|
+
r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128);
|
|
1714
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1715
|
+
r_.f32 = a_.f32 / b_.f32;
|
|
1716
|
+
#else
|
|
1717
|
+
SIMDE__VECTORIZE
|
|
1718
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1719
|
+
r_.f32[i] = a_.f32[i] / b_.f32[i];
|
|
1720
|
+
}
|
|
1721
|
+
#endif
|
|
1722
|
+
|
|
1723
|
+
return simde__m128_from_private(r_);
|
|
1724
|
+
#endif
|
|
1725
|
+
}
|
|
1726
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1727
|
+
# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b))
|
|
1728
|
+
#endif
|
|
1729
|
+
|
|
1730
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1731
|
+
simde__m128
|
|
1732
|
+
simde_mm_div_ss (simde__m128 a, simde__m128 b) {
|
|
1733
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1734
|
+
return _mm_div_ss(a, b);
|
|
1735
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
1736
|
+
return simde_mm_move_ss(a, simde_mm_div_ps(a, b));
|
|
1737
|
+
#else
|
|
1738
|
+
simde__m128_private
|
|
1739
|
+
r_,
|
|
1740
|
+
a_ = simde__m128_to_private(a),
|
|
1741
|
+
b_ = simde__m128_to_private(b);
|
|
1742
|
+
|
|
1743
|
+
r_.f32[0] = a_.f32[0] / b_.f32[0];
|
|
1744
|
+
SIMDE__VECTORIZE
|
|
1745
|
+
for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1746
|
+
r_.f32[i] = a_.f32[i];
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1749
|
+
return simde__m128_from_private(r_);
|
|
1750
|
+
#endif
|
|
1751
|
+
}
|
|
1752
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1753
|
+
# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b))
|
|
1754
|
+
#endif
|
|
1755
|
+
|
|
1756
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1757
|
+
int16_t
|
|
1758
|
+
simde_mm_extract_pi16 (simde__m64 a, const int imm8)
|
|
1759
|
+
HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
|
|
1760
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1761
|
+
return a_.i16[imm8];
|
|
1762
|
+
}
|
|
1763
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX) && !defined(HEDLEY_PGI_VERSION)
|
|
1764
|
+
# if HEDLEY_HAS_WARNING("-Wvector-conversion")
|
|
1765
|
+
/* https://bugs.llvm.org/show_bug.cgi?id=44589 */
|
|
1766
|
+
# define simde_mm_extract_pi16(a, imm8) ( \
|
|
1767
|
+
HEDLEY_DIAGNOSTIC_PUSH \
|
|
1768
|
+
_Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \
|
|
1769
|
+
HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16((a), (imm8))) \
|
|
1770
|
+
HEDLEY_DIAGNOSTIC_POP \
|
|
1771
|
+
)
|
|
1772
|
+
# else
|
|
1773
|
+
# define simde_mm_extract_pi16(a, imm8) ((int16_t) (_mm_extract_pi16(a, imm8)))
|
|
1774
|
+
# endif
|
|
1775
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
1776
|
+
# define simde_mm_extract_pi16(a, imm8) ((int16_t) (vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8)))
|
|
1777
|
+
#endif
|
|
1778
|
+
#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8)
|
|
1779
|
+
|
|
1780
|
+
enum {
|
|
1781
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1782
|
+
SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST,
|
|
1783
|
+
SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN,
|
|
1784
|
+
SIMDE_MM_ROUND_UP = _MM_ROUND_UP,
|
|
1785
|
+
SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO
|
|
1786
|
+
#else
|
|
1787
|
+
SIMDE_MM_ROUND_NEAREST
|
|
1788
|
+
#if defined(FE_TONEAREST)
|
|
1789
|
+
= FE_TONEAREST
|
|
1790
|
+
#endif
|
|
1791
|
+
,
|
|
1792
|
+
|
|
1793
|
+
SIMDE_MM_ROUND_DOWN
|
|
1794
|
+
#if defined(FE_DOWNWARD)
|
|
1795
|
+
= FE_DOWNWARD
|
|
1796
|
+
#endif
|
|
1797
|
+
,
|
|
1798
|
+
|
|
1799
|
+
SIMDE_MM_ROUND_UP
|
|
1800
|
+
#if defined(FE_UPWARD)
|
|
1801
|
+
= FE_UPWARD
|
|
1802
|
+
#endif
|
|
1803
|
+
,
|
|
1804
|
+
|
|
1805
|
+
SIMDE_MM_ROUND_TOWARD_ZERO
|
|
1806
|
+
#if defined(FE_TOWARDZERO)
|
|
1807
|
+
= FE_TOWARDZERO
|
|
1808
|
+
#endif
|
|
1809
|
+
#endif
|
|
1810
|
+
};
|
|
1811
|
+
|
|
1812
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1813
|
+
unsigned int
|
|
1814
|
+
SIMDE_MM_GET_ROUNDING_MODE(void) {
|
|
1815
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1816
|
+
return _MM_GET_ROUNDING_MODE();
|
|
1817
|
+
#elif defined(SIMDE_HAVE_MATH_H)
|
|
1818
|
+
return (unsigned int) fegetround();
|
|
1819
|
+
#else
|
|
1820
|
+
HEDLEY_UNREACHABLE();
|
|
1821
|
+
#endif
|
|
1822
|
+
}
|
|
1823
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1824
|
+
# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), imm8)
|
|
1825
|
+
#endif
|
|
1826
|
+
|
|
1827
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1828
|
+
void
|
|
1829
|
+
SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) {
|
|
1830
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1831
|
+
_MM_SET_ROUNDING_MODE(a);
|
|
1832
|
+
#else
|
|
1833
|
+
fesetround((int) a);
|
|
1834
|
+
#endif
|
|
1835
|
+
}
|
|
1836
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1837
|
+
# define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a)
|
|
1838
|
+
#endif
|
|
1839
|
+
|
|
1840
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1841
|
+
simde__m64
|
|
1842
|
+
simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8)
|
|
1843
|
+
HEDLEY_REQUIRE_MSG((imm8 & 3) == imm8, "imm8 must be in range [0, 3]") {
|
|
1844
|
+
simde__m64_private
|
|
1845
|
+
r_,
|
|
1846
|
+
a_ = simde__m64_to_private(a);
|
|
1847
|
+
|
|
1848
|
+
r_.i64[0] = a_.i64[0];
|
|
1849
|
+
r_.i16[imm8] = i;
|
|
1850
|
+
|
|
1851
|
+
return simde__m64_from_private(r_);
|
|
1852
|
+
}
|
|
1853
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX) && !defined(__PGI)
|
|
1854
|
+
# if HEDLEY_HAS_WARNING("-Wvector-conversion")
|
|
1855
|
+
/* https://bugs.llvm.org/show_bug.cgi?id=44589 */
|
|
1856
|
+
# define ssimde_mm_insert_pi16(a, i, imm8) ( \
|
|
1857
|
+
HEDLEY_DIAGNOSTIC_PUSH \
|
|
1858
|
+
_Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \
|
|
1859
|
+
(_mm_insert_pi16((a), (i), (imm8))) \
|
|
1860
|
+
HEDLEY_DIAGNOSTIC_POP \
|
|
1861
|
+
)
|
|
1862
|
+
# else
|
|
1863
|
+
# define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8)
|
|
1864
|
+
# endif
|
|
1865
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
1866
|
+
# define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_private((simde__m64_private) { .neon_i16 = vset_lane_s16(i, simde__m64_to_private(a).neon_i16, (imm8)) })
|
|
1867
|
+
#endif
|
|
1868
|
+
#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8))
|
|
1869
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1870
|
+
# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
|
|
1871
|
+
# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8)
|
|
1872
|
+
#endif
|
|
1873
|
+
|
|
1874
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1875
|
+
simde__m128
|
|
1876
|
+
simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
|
|
1877
|
+
simde_assert_aligned(16, mem_addr);
|
|
1878
|
+
|
|
1879
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1880
|
+
return _mm_load_ps(mem_addr);
|
|
1881
|
+
#else
|
|
1882
|
+
simde__m128_private r_;
|
|
1883
|
+
|
|
1884
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1885
|
+
r_.neon_f32 = vld1q_f32(mem_addr);
|
|
1886
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
1887
|
+
r_.altivec_f32 = vec_ld(0, mem_addr);
|
|
1888
|
+
#else
|
|
1889
|
+
r_ = *SIMDE_CAST_ALIGN(16, simde__m128_private const*, mem_addr);
|
|
1890
|
+
#endif
|
|
1891
|
+
|
|
1892
|
+
return simde__m128_from_private(r_);
|
|
1893
|
+
#endif
|
|
1894
|
+
}
|
|
1895
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1896
|
+
# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr)
|
|
1897
|
+
#endif
|
|
1898
|
+
|
|
1899
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1900
|
+
simde__m128
|
|
1901
|
+
simde_mm_load_ps1 (simde_float32 const* mem_addr) {
|
|
1902
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1903
|
+
return _mm_load_ps1(mem_addr);
|
|
1904
|
+
#else
|
|
1905
|
+
simde__m128_private r_;
|
|
1906
|
+
|
|
1907
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1908
|
+
r_.neon_f32 = vld1q_dup_f32(mem_addr);
|
|
1909
|
+
#else
|
|
1910
|
+
r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr));
|
|
1911
|
+
#endif
|
|
1912
|
+
|
|
1913
|
+
return simde__m128_from_private(r_);
|
|
1914
|
+
#endif
|
|
1915
|
+
}
|
|
1916
|
+
#define simde_mm_load1_ps(mem_addr) simde_mm_load_ps1(mem_addr)
|
|
1917
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1918
|
+
# define _mm_load_ps1(mem_addr) simde_mm_load_ps1(mem_addr)
|
|
1919
|
+
# define _mm_load1_ps(mem_addr) simde_mm_load_ps1(mem_addr)
|
|
1920
|
+
#endif
|
|
1921
|
+
|
|
1922
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1923
|
+
simde__m128
|
|
1924
|
+
simde_mm_load_ss (simde_float32 const* mem_addr) {
|
|
1925
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1926
|
+
return _mm_load_ss(mem_addr);
|
|
1927
|
+
#else
|
|
1928
|
+
simde__m128_private r_;
|
|
1929
|
+
|
|
1930
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1931
|
+
r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0);
|
|
1932
|
+
#else
|
|
1933
|
+
r_.f32[0] = *mem_addr;
|
|
1934
|
+
r_.i32[1] = 0;
|
|
1935
|
+
r_.i32[2] = 0;
|
|
1936
|
+
r_.i32[3] = 0;
|
|
1937
|
+
#endif
|
|
1938
|
+
|
|
1939
|
+
return simde__m128_from_private(r_);
|
|
1940
|
+
#endif
|
|
1941
|
+
}
|
|
1942
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1943
|
+
# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr)
|
|
1944
|
+
#endif
|
|
1945
|
+
|
|
1946
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1947
|
+
simde__m128
|
|
1948
|
+
simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) {
|
|
1949
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
1950
|
+
return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));
|
|
1951
|
+
#else
|
|
1952
|
+
simde__m128_private
|
|
1953
|
+
r_,
|
|
1954
|
+
a_ = simde__m128_to_private(a);
|
|
1955
|
+
|
|
1956
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1957
|
+
r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)));
|
|
1958
|
+
#else
|
|
1959
|
+
simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr);
|
|
1960
|
+
r_.f32[0] = a_.f32[0];
|
|
1961
|
+
r_.f32[1] = a_.f32[1];
|
|
1962
|
+
r_.f32[2] = b_.f32[0];
|
|
1963
|
+
r_.f32[3] = b_.f32[1];
|
|
1964
|
+
#endif
|
|
1965
|
+
|
|
1966
|
+
return simde__m128_from_private(r_);
|
|
1967
|
+
#endif
|
|
1968
|
+
}
|
|
1969
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
1970
|
+
# define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr))
|
|
1971
|
+
#endif
|
|
1972
|
+
|
|
1973
|
+
/* The SSE documentation says that there are no alignment requirements
|
|
1974
|
+
for mem_addr. Unfortunately they used the __m64 type for the argument
|
|
1975
|
+
which is supposed to be 8-byte aligned, so some compilers (like clang
|
|
1976
|
+
with -Wcast-align) will generate a warning if you try to cast, say,
|
|
1977
|
+
a simde_float32* to a simde__m64* for this function.
|
|
1978
|
+
|
|
1979
|
+
I think the choice of argument type is unfortunate, but I do think we
|
|
1980
|
+
need to stick to it here. If there is demand I can always add something
|
|
1981
|
+
like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */
|
|
1982
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1983
|
+
simde__m128
|
|
1984
|
+
simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) {
|
|
1985
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1986
|
+
return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr));
|
|
1987
|
+
#else
|
|
1988
|
+
simde__m128_private
|
|
1989
|
+
r_,
|
|
1990
|
+
a_ = simde__m128_to_private(a);
|
|
1991
|
+
|
|
1992
|
+
#if defined(SIMDE_SSE_NEON)
|
|
1993
|
+
r_.neon_f32 = vcombine_f32(vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32));
|
|
1994
|
+
#else
|
|
1995
|
+
simde__m64_private b_;
|
|
1996
|
+
simde_memcpy(&b_, mem_addr, sizeof(b_));
|
|
1997
|
+
r_.i32[0] = b_.i32[0];
|
|
1998
|
+
r_.i32[1] = b_.i32[1];
|
|
1999
|
+
r_.i32[2] = a_.i32[2];
|
|
2000
|
+
r_.i32[3] = a_.i32[3];
|
|
2001
|
+
#endif
|
|
2002
|
+
|
|
2003
|
+
return simde__m128_from_private(r_);
|
|
2004
|
+
#endif
|
|
2005
|
+
}
|
|
2006
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2007
|
+
# define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr))
|
|
2008
|
+
#endif
|
|
2009
|
+
|
|
2010
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2011
|
+
simde__m128
|
|
2012
|
+
simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
|
|
2013
|
+
simde_assert_aligned(16, mem_addr);
|
|
2014
|
+
|
|
2015
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2016
|
+
return _mm_loadr_ps(mem_addr);
|
|
2017
|
+
#else
|
|
2018
|
+
simde__m128_private
|
|
2019
|
+
r_,
|
|
2020
|
+
v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr));
|
|
2021
|
+
|
|
2022
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2023
|
+
r_.neon_f32 = vrev64q_f32(v_.neon_f32);
|
|
2024
|
+
r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2);
|
|
2025
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
2026
|
+
r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, v_.f32, v_.f32, 3, 2, 1, 0);
|
|
2027
|
+
#else
|
|
2028
|
+
r_.f32[0] = v_.f32[3];
|
|
2029
|
+
r_.f32[1] = v_.f32[2];
|
|
2030
|
+
r_.f32[2] = v_.f32[1];
|
|
2031
|
+
r_.f32[3] = v_.f32[0];
|
|
2032
|
+
#endif
|
|
2033
|
+
|
|
2034
|
+
return simde__m128_from_private(r_);
|
|
2035
|
+
#endif
|
|
2036
|
+
}
|
|
2037
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2038
|
+
# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr)
|
|
2039
|
+
#endif
|
|
2040
|
+
|
|
2041
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2042
|
+
simde__m128
|
|
2043
|
+
simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) {
|
|
2044
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2045
|
+
return _mm_loadu_ps(mem_addr);
|
|
2046
|
+
#else
|
|
2047
|
+
simde__m128_private r_;
|
|
2048
|
+
|
|
2049
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2050
|
+
r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr));
|
|
2051
|
+
#else
|
|
2052
|
+
r_.f32[0] = mem_addr[0];
|
|
2053
|
+
r_.f32[1] = mem_addr[1];
|
|
2054
|
+
r_.f32[2] = mem_addr[2];
|
|
2055
|
+
r_.f32[3] = mem_addr[3];
|
|
2056
|
+
#endif
|
|
2057
|
+
|
|
2058
|
+
return simde__m128_from_private(r_);
|
|
2059
|
+
#endif
|
|
2060
|
+
}
|
|
2061
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2062
|
+
# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr)
|
|
2063
|
+
#endif
|
|
2064
|
+
|
|
2065
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2066
|
+
void
|
|
2067
|
+
simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) {
|
|
2068
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2069
|
+
_mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr));
|
|
2070
|
+
#else
|
|
2071
|
+
simde__m64_private
|
|
2072
|
+
a_ = simde__m64_to_private(a),
|
|
2073
|
+
mask_ = simde__m64_to_private(mask);
|
|
2074
|
+
|
|
2075
|
+
SIMDE__VECTORIZE
|
|
2076
|
+
for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++)
|
|
2077
|
+
if (mask_.i8[i] < 0)
|
|
2078
|
+
mem_addr[i] = a_.i8[i];
|
|
2079
|
+
#endif
|
|
2080
|
+
}
|
|
2081
|
+
#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr)
|
|
2082
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2083
|
+
# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64(a, (mask), mem_addr)
|
|
2084
|
+
#endif
|
|
2085
|
+
|
|
2086
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2087
|
+
simde__m64
|
|
2088
|
+
simde_mm_max_pi16 (simde__m64 a, simde__m64 b) {
|
|
2089
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2090
|
+
return _mm_max_pi16(a, b);
|
|
2091
|
+
#else
|
|
2092
|
+
simde__m64_private
|
|
2093
|
+
r_,
|
|
2094
|
+
a_ = simde__m64_to_private(a),
|
|
2095
|
+
b_ = simde__m64_to_private(b);
|
|
2096
|
+
|
|
2097
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2098
|
+
r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16);
|
|
2099
|
+
#else
|
|
2100
|
+
SIMDE__VECTORIZE
|
|
2101
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
2102
|
+
r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
|
|
2103
|
+
}
|
|
2104
|
+
#endif
|
|
2105
|
+
|
|
2106
|
+
return simde__m64_from_private(r_);
|
|
2107
|
+
#endif
|
|
2108
|
+
}
|
|
2109
|
+
#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
|
|
2110
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2111
|
+
# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b)
|
|
2112
|
+
# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b)
|
|
2113
|
+
#endif
|
|
2114
|
+
|
|
2115
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2116
|
+
simde__m128
|
|
2117
|
+
simde_mm_max_ps (simde__m128 a, simde__m128 b) {
|
|
2118
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2119
|
+
return _mm_max_ps(a, b);
|
|
2120
|
+
#else
|
|
2121
|
+
simde__m128_private
|
|
2122
|
+
r_,
|
|
2123
|
+
a_ = simde__m128_to_private(a),
|
|
2124
|
+
b_ = simde__m128_to_private(b);
|
|
2125
|
+
|
|
2126
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2127
|
+
r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32);
|
|
2128
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
2129
|
+
r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32);
|
|
2130
|
+
#else
|
|
2131
|
+
SIMDE__VECTORIZE
|
|
2132
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
2133
|
+
r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i];
|
|
2134
|
+
}
|
|
2135
|
+
#endif
|
|
2136
|
+
|
|
2137
|
+
return simde__m128_from_private(r_);
|
|
2138
|
+
#endif
|
|
2139
|
+
}
|
|
2140
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2141
|
+
# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b))
|
|
2142
|
+
#endif
|
|
2143
|
+
|
|
2144
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2145
|
+
simde__m64
|
|
2146
|
+
simde_mm_max_pu8 (simde__m64 a, simde__m64 b) {
|
|
2147
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2148
|
+
return _mm_max_pu8(a, b);
|
|
2149
|
+
#else
|
|
2150
|
+
simde__m64_private
|
|
2151
|
+
r_,
|
|
2152
|
+
a_ = simde__m64_to_private(a),
|
|
2153
|
+
b_ = simde__m64_to_private(b);
|
|
2154
|
+
|
|
2155
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2156
|
+
r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8);
|
|
2157
|
+
#else
|
|
2158
|
+
SIMDE__VECTORIZE
|
|
2159
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
2160
|
+
r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
|
|
2161
|
+
}
|
|
2162
|
+
#endif
|
|
2163
|
+
|
|
2164
|
+
return simde__m64_from_private(r_);
|
|
2165
|
+
#endif
|
|
2166
|
+
}
|
|
2167
|
+
#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b)
|
|
2168
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2169
|
+
# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b)
|
|
2170
|
+
# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b)
|
|
2171
|
+
#endif
|
|
2172
|
+
|
|
2173
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2174
|
+
simde__m128
|
|
2175
|
+
simde_mm_max_ss (simde__m128 a, simde__m128 b) {
|
|
2176
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2177
|
+
return _mm_max_ss(a, b);
|
|
2178
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
2179
|
+
return simde_mm_move_ss(a, simde_mm_max_ps(a, b));
|
|
2180
|
+
#else
|
|
2181
|
+
simde__m128_private
|
|
2182
|
+
r_,
|
|
2183
|
+
a_ = simde__m128_to_private(a),
|
|
2184
|
+
b_ = simde__m128_to_private(b);
|
|
2185
|
+
|
|
2186
|
+
r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0];
|
|
2187
|
+
r_.f32[1] = a_.f32[1];
|
|
2188
|
+
r_.f32[2] = a_.f32[2];
|
|
2189
|
+
r_.f32[3] = a_.f32[3];
|
|
2190
|
+
|
|
2191
|
+
return simde__m128_from_private(r_);
|
|
2192
|
+
#endif
|
|
2193
|
+
}
|
|
2194
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2195
|
+
# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b))
|
|
2196
|
+
#endif
|
|
2197
|
+
|
|
2198
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2199
|
+
simde__m64
|
|
2200
|
+
simde_mm_min_pi16 (simde__m64 a, simde__m64 b) {
|
|
2201
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2202
|
+
return _mm_min_pi16(a, b);
|
|
2203
|
+
#else
|
|
2204
|
+
simde__m64_private
|
|
2205
|
+
r_,
|
|
2206
|
+
a_ = simde__m64_to_private(a),
|
|
2207
|
+
b_ = simde__m64_to_private(b);
|
|
2208
|
+
|
|
2209
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2210
|
+
r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16);
|
|
2211
|
+
#else
|
|
2212
|
+
SIMDE__VECTORIZE
|
|
2213
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
2214
|
+
r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
|
|
2215
|
+
}
|
|
2216
|
+
#endif
|
|
2217
|
+
|
|
2218
|
+
return simde__m64_from_private(r_);
|
|
2219
|
+
#endif
|
|
2220
|
+
}
|
|
2221
|
+
#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b)
|
|
2222
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2223
|
+
# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b)
|
|
2224
|
+
# define _m_pminsw(a, b) simde_mm_min_pi16(a, b)
|
|
2225
|
+
#endif
|
|
2226
|
+
|
|
2227
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2228
|
+
simde__m128
|
|
2229
|
+
simde_mm_min_ps (simde__m128 a, simde__m128 b) {
|
|
2230
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2231
|
+
return _mm_min_ps(a, b);
|
|
2232
|
+
#else
|
|
2233
|
+
simde__m128_private
|
|
2234
|
+
r_,
|
|
2235
|
+
a_ = simde__m128_to_private(a),
|
|
2236
|
+
b_ = simde__m128_to_private(b);
|
|
2237
|
+
|
|
2238
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2239
|
+
r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32);
|
|
2240
|
+
#elif defined(SIMDE_SSE_POWER_ALTIVEC)
|
|
2241
|
+
r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32);
|
|
2242
|
+
#else
|
|
2243
|
+
SIMDE__VECTORIZE
|
|
2244
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
2245
|
+
r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i];
|
|
2246
|
+
}
|
|
2247
|
+
#endif
|
|
2248
|
+
|
|
2249
|
+
return simde__m128_from_private(r_);
|
|
2250
|
+
#endif
|
|
2251
|
+
}
|
|
2252
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2253
|
+
# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b))
|
|
2254
|
+
#endif
|
|
2255
|
+
|
|
2256
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2257
|
+
simde__m64
|
|
2258
|
+
simde_mm_min_pu8 (simde__m64 a, simde__m64 b) {
|
|
2259
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2260
|
+
return _mm_min_pu8(a, b);
|
|
2261
|
+
#else
|
|
2262
|
+
simde__m64_private
|
|
2263
|
+
r_,
|
|
2264
|
+
a_ = simde__m64_to_private(a),
|
|
2265
|
+
b_ = simde__m64_to_private(b);
|
|
2266
|
+
|
|
2267
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2268
|
+
r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8);
|
|
2269
|
+
#else
|
|
2270
|
+
SIMDE__VECTORIZE
|
|
2271
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
2272
|
+
r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
|
|
2273
|
+
}
|
|
2274
|
+
#endif
|
|
2275
|
+
|
|
2276
|
+
return simde__m64_from_private(r_);
|
|
2277
|
+
#endif
|
|
2278
|
+
}
|
|
2279
|
+
#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b)
|
|
2280
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2281
|
+
# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b)
|
|
2282
|
+
# define _m_pminub(a, b) simde_mm_min_pu8(a, b)
|
|
2283
|
+
#endif
|
|
2284
|
+
|
|
2285
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2286
|
+
simde__m128
|
|
2287
|
+
simde_mm_min_ss (simde__m128 a, simde__m128 b) {
|
|
2288
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2289
|
+
return _mm_min_ss(a, b);
|
|
2290
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
2291
|
+
return simde_mm_move_ss(a, simde_mm_min_ps(a, b));
|
|
2292
|
+
#else
|
|
2293
|
+
simde__m128_private
|
|
2294
|
+
r_,
|
|
2295
|
+
a_ = simde__m128_to_private(a),
|
|
2296
|
+
b_ = simde__m128_to_private(b);
|
|
2297
|
+
|
|
2298
|
+
r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0];
|
|
2299
|
+
r_.f32[1] = a_.f32[1];
|
|
2300
|
+
r_.f32[2] = a_.f32[2];
|
|
2301
|
+
r_.f32[3] = a_.f32[3];
|
|
2302
|
+
|
|
2303
|
+
return simde__m128_from_private(r_);
|
|
2304
|
+
#endif
|
|
2305
|
+
}
|
|
2306
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2307
|
+
# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b))
|
|
2308
|
+
#endif
|
|
2309
|
+
|
|
2310
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2311
|
+
simde__m128
|
|
2312
|
+
simde_mm_movehl_ps (simde__m128 a, simde__m128 b) {
|
|
2313
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2314
|
+
return _mm_movehl_ps(a, b);
|
|
2315
|
+
#else
|
|
2316
|
+
simde__m128_private
|
|
2317
|
+
r_,
|
|
2318
|
+
a_ = simde__m128_to_private(a),
|
|
2319
|
+
b_ = simde__m128_to_private(b);
|
|
2320
|
+
|
|
2321
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2322
|
+
r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 6, 7, 2, 3);
|
|
2323
|
+
#else
|
|
2324
|
+
r_.f32[0] = b_.f32[2];
|
|
2325
|
+
r_.f32[1] = b_.f32[3];
|
|
2326
|
+
r_.f32[2] = a_.f32[2];
|
|
2327
|
+
r_.f32[3] = a_.f32[3];
|
|
2328
|
+
#endif
|
|
2329
|
+
|
|
2330
|
+
return simde__m128_from_private(r_);
|
|
2331
|
+
#endif
|
|
2332
|
+
}
|
|
2333
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2334
|
+
# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b))
|
|
2335
|
+
#endif
|
|
2336
|
+
|
|
2337
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2338
|
+
simde__m128
|
|
2339
|
+
simde_mm_movelh_ps (simde__m128 a, simde__m128 b) {
|
|
2340
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2341
|
+
return _mm_movelh_ps(a, b);
|
|
2342
|
+
#else
|
|
2343
|
+
simde__m128_private
|
|
2344
|
+
r_,
|
|
2345
|
+
a_ = simde__m128_to_private(a),
|
|
2346
|
+
b_ = simde__m128_to_private(b);
|
|
2347
|
+
|
|
2348
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
2349
|
+
r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 0, 1, 4, 5);
|
|
2350
|
+
#else
|
|
2351
|
+
r_.f32[0] = a_.f32[0];
|
|
2352
|
+
r_.f32[1] = a_.f32[1];
|
|
2353
|
+
r_.f32[2] = b_.f32[0];
|
|
2354
|
+
r_.f32[3] = b_.f32[1];
|
|
2355
|
+
#endif
|
|
2356
|
+
|
|
2357
|
+
return simde__m128_from_private(r_);
|
|
2358
|
+
#endif
|
|
2359
|
+
}
|
|
2360
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2361
|
+
# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b))
|
|
2362
|
+
#endif
|
|
2363
|
+
|
|
2364
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2365
|
+
int
|
|
2366
|
+
simde_mm_movemask_pi8 (simde__m64 a) {
|
|
2367
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2368
|
+
return _mm_movemask_pi8(a);
|
|
2369
|
+
#else
|
|
2370
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2371
|
+
int r = 0;
|
|
2372
|
+
const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]);
|
|
2373
|
+
|
|
2374
|
+
SIMDE__VECTORIZE_REDUCTION(|:r)
|
|
2375
|
+
for (size_t i = 0 ; i < nmemb ; i++) {
|
|
2376
|
+
r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i);
|
|
2377
|
+
}
|
|
2378
|
+
|
|
2379
|
+
return r;
|
|
2380
|
+
#endif
|
|
2381
|
+
}
|
|
2382
|
+
#define simde_m_pmovmskb(a, b) simde_mm_movemask_pi8(a, b)
|
|
2383
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2384
|
+
# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a)
|
|
2385
|
+
#endif
|
|
2386
|
+
|
|
2387
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2388
|
+
int
|
|
2389
|
+
simde_mm_movemask_ps (simde__m128 a) {
|
|
2390
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2391
|
+
return _mm_movemask_ps(a);
|
|
2392
|
+
#else
|
|
2393
|
+
int r = 0;
|
|
2394
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
2395
|
+
|
|
2396
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2397
|
+
/* TODO: check to see if NEON version is faster than the portable version */
|
|
2398
|
+
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
|
2399
|
+
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
|
2400
|
+
uint32x4_t t0 = a_.neon_u32;
|
|
2401
|
+
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
|
2402
|
+
uint32x4_t t2 = vandq_u32(t1, movemask);
|
|
2403
|
+
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
|
2404
|
+
r = vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
|
2405
|
+
#else
|
|
2406
|
+
SIMDE__VECTORIZE_REDUCTION(|:r)
|
|
2407
|
+
for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) {
|
|
2408
|
+
r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i;
|
|
2409
|
+
}
|
|
2410
|
+
#endif
|
|
2411
|
+
|
|
2412
|
+
return r;
|
|
2413
|
+
#endif
|
|
2414
|
+
}
|
|
2415
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2416
|
+
# define _mm_movemask_ps(a) simde_mm_movemask_ps((a))
|
|
2417
|
+
#endif
|
|
2418
|
+
|
|
2419
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2420
|
+
simde__m128
|
|
2421
|
+
simde_mm_mul_ps (simde__m128 a, simde__m128 b) {
|
|
2422
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2423
|
+
return _mm_mul_ps(a, b);
|
|
2424
|
+
#else
|
|
2425
|
+
simde__m128_private
|
|
2426
|
+
r_,
|
|
2427
|
+
a_ = simde__m128_to_private(a),
|
|
2428
|
+
b_ = simde__m128_to_private(b);
|
|
2429
|
+
|
|
2430
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2431
|
+
r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32);
|
|
2432
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
2433
|
+
r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128);
|
|
2434
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2435
|
+
r_.f32 = a_.f32 * b_.f32;
|
|
2436
|
+
#else
|
|
2437
|
+
SIMDE__VECTORIZE
|
|
2438
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
2439
|
+
r_.f32[i] = a_.f32[i] * b_.f32[i];
|
|
2440
|
+
}
|
|
2441
|
+
#endif
|
|
2442
|
+
|
|
2443
|
+
return simde__m128_from_private(r_);
|
|
2444
|
+
#endif
|
|
2445
|
+
}
|
|
2446
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2447
|
+
# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b))
|
|
2448
|
+
#endif
|
|
2449
|
+
|
|
2450
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2451
|
+
simde__m128
|
|
2452
|
+
simde_mm_mul_ss (simde__m128 a, simde__m128 b) {
|
|
2453
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2454
|
+
return _mm_mul_ss(a, b);
|
|
2455
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
2456
|
+
return simde_mm_move_ss(a, simde_mm_mul_ps(a, b));
|
|
2457
|
+
#else
|
|
2458
|
+
simde__m128_private
|
|
2459
|
+
r_,
|
|
2460
|
+
a_ = simde__m128_to_private(a),
|
|
2461
|
+
b_ = simde__m128_to_private(b);
|
|
2462
|
+
|
|
2463
|
+
r_.f32[0] = a_.f32[0] * b_.f32[0];
|
|
2464
|
+
r_.f32[1] = a_.f32[1];
|
|
2465
|
+
r_.f32[2] = a_.f32[2];
|
|
2466
|
+
r_.f32[3] = a_.f32[3];
|
|
2467
|
+
|
|
2468
|
+
return simde__m128_from_private(r_);
|
|
2469
|
+
#endif
|
|
2470
|
+
}
|
|
2471
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2472
|
+
# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b))
|
|
2473
|
+
#endif
|
|
2474
|
+
|
|
2475
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2476
|
+
simde__m64
|
|
2477
|
+
simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) {
|
|
2478
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2479
|
+
return _mm_mulhi_pu16(a, b);
|
|
2480
|
+
#else
|
|
2481
|
+
simde__m64_private
|
|
2482
|
+
r_,
|
|
2483
|
+
a_ = simde__m64_to_private(a),
|
|
2484
|
+
b_ = simde__m64_to_private(b);
|
|
2485
|
+
|
|
2486
|
+
SIMDE__VECTORIZE
|
|
2487
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
2488
|
+
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16)));
|
|
2489
|
+
}
|
|
2490
|
+
|
|
2491
|
+
return simde__m64_from_private(r_);
|
|
2492
|
+
#endif
|
|
2493
|
+
}
|
|
2494
|
+
#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b)
|
|
2495
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2496
|
+
# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b)
|
|
2497
|
+
#endif
|
|
2498
|
+
|
|
2499
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2500
|
+
simde__m128
|
|
2501
|
+
simde_mm_or_ps (simde__m128 a, simde__m128 b) {
|
|
2502
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2503
|
+
return _mm_or_ps(a, b);
|
|
2504
|
+
#else
|
|
2505
|
+
simde__m128_private
|
|
2506
|
+
r_,
|
|
2507
|
+
a_ = simde__m128_to_private(a),
|
|
2508
|
+
b_ = simde__m128_to_private(b);
|
|
2509
|
+
|
|
2510
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2511
|
+
r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32);
|
|
2512
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2513
|
+
r_.i32f = a_.i32f | b_.i32f;
|
|
2514
|
+
#else
|
|
2515
|
+
SIMDE__VECTORIZE
|
|
2516
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
2517
|
+
r_.u32[i] = a_.u32[i] | b_.u32[i];
|
|
2518
|
+
}
|
|
2519
|
+
#endif
|
|
2520
|
+
|
|
2521
|
+
return simde__m128_from_private(r_);
|
|
2522
|
+
#endif
|
|
2523
|
+
}
|
|
2524
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2525
|
+
# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b))
|
|
2526
|
+
#endif
|
|
2527
|
+
|
|
2528
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2529
|
+
void
|
|
2530
|
+
simde_mm_prefetch (char const* p, int i) {
|
|
2531
|
+
(void) p;
|
|
2532
|
+
(void) i;
|
|
2533
|
+
}
|
|
2534
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2535
|
+
# define simde_mm_prefetch(p, i) _mm_prefetch(p, i)
|
|
2536
|
+
#endif
|
|
2537
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2538
|
+
# define _mm_prefetch(p, i) simde_mm_prefetch(p, i)
|
|
2539
|
+
#endif
|
|
2540
|
+
|
|
2541
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2542
|
+
simde__m128
|
|
2543
|
+
simde_mm_rcp_ps (simde__m128 a) {
|
|
2544
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2545
|
+
return _mm_rcp_ps(a);
|
|
2546
|
+
#else
|
|
2547
|
+
simde__m128_private
|
|
2548
|
+
r_,
|
|
2549
|
+
a_ = simde__m128_to_private(a);
|
|
2550
|
+
|
|
2551
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2552
|
+
float32x4_t recip = vrecpeq_f32(a_.neon_f32);
|
|
2553
|
+
|
|
2554
|
+
# if !defined(SIMDE_MM_RCP_PS_ITERS)
|
|
2555
|
+
# define SIMDE_MM_RCP_PS_ITERS SIMDE_ACCURACY_ITERS
|
|
2556
|
+
# endif
|
|
2557
|
+
|
|
2558
|
+
for (int i = 0; i < SIMDE_MM_RCP_PS_ITERS ; ++i) {
|
|
2559
|
+
recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32));
|
|
2560
|
+
}
|
|
2561
|
+
|
|
2562
|
+
r_.neon_f32 = recip;
|
|
2563
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
2564
|
+
r_.f32 = 1.0f / a_.f32;
|
|
2565
|
+
#else
|
|
2566
|
+
SIMDE__VECTORIZE
|
|
2567
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
2568
|
+
r_.f32[i] = 1.0f / a_.f32[i];
|
|
2569
|
+
}
|
|
2570
|
+
#endif
|
|
2571
|
+
|
|
2572
|
+
return simde__m128_from_private(r_);
|
|
2573
|
+
#endif
|
|
2574
|
+
}
|
|
2575
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2576
|
+
# define _mm_rcp_ps(a) simde_mm_rcp_ps((a))
|
|
2577
|
+
#endif
|
|
2578
|
+
|
|
2579
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2580
|
+
simde__m128
|
|
2581
|
+
simde_mm_rcp_ss (simde__m128 a) {
|
|
2582
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2583
|
+
return _mm_rcp_ss(a);
|
|
2584
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
2585
|
+
return simde_mm_move_ss(a, simde_mm_rcp_ps(a));
|
|
2586
|
+
#else
|
|
2587
|
+
simde__m128_private
|
|
2588
|
+
r_,
|
|
2589
|
+
a_ = simde__m128_to_private(a);
|
|
2590
|
+
|
|
2591
|
+
r_.f32[0] = 1.0f / a_.f32[0];
|
|
2592
|
+
r_.f32[1] = a_.f32[1];
|
|
2593
|
+
r_.f32[2] = a_.f32[2];
|
|
2594
|
+
r_.f32[3] = a_.f32[3];
|
|
2595
|
+
|
|
2596
|
+
return simde__m128_from_private(r_);
|
|
2597
|
+
#endif
|
|
2598
|
+
}
|
|
2599
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2600
|
+
# define _mm_rcp_ss(a) simde_mm_rcp_ss((a))
|
|
2601
|
+
#endif
|
|
2602
|
+
|
|
2603
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2604
|
+
simde__m128
|
|
2605
|
+
simde_mm_rsqrt_ps (simde__m128 a) {
|
|
2606
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2607
|
+
return _mm_rsqrt_ps(a);
|
|
2608
|
+
#else
|
|
2609
|
+
simde__m128_private
|
|
2610
|
+
r_,
|
|
2611
|
+
a_ = simde__m128_to_private(a);
|
|
2612
|
+
|
|
2613
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2614
|
+
r_.neon_f32 = vrsqrteq_f32(a_.neon_f32);
|
|
2615
|
+
#elif defined(__STDC_IEC_559__)
|
|
2616
|
+
/* http://h14s.p5r.org/2012/09/0x5f3759df.html?mwh=1 */
|
|
2617
|
+
SIMDE__VECTORIZE
|
|
2618
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
2619
|
+
r_.i32[i] = INT32_C(0x5f3759df) - (a_.i32[i] >> 1);
|
|
2620
|
+
|
|
2621
|
+
#if SIMDE_ACCURACY_ITERS > 2
|
|
2622
|
+
const float half = SIMDE_FLOAT32_C(0.5) * a_.f32[i];
|
|
2623
|
+
for (int ai = 2 ; ai < SIMDE_ACCURACY_ITERS ; ai++)
|
|
2624
|
+
r_.f32[i] *= SIMDE_FLOAT32_C(1.5) - (half * r_.f32[i] * r_.f32[i]);
|
|
2625
|
+
#endif
|
|
2626
|
+
}
|
|
2627
|
+
#elif defined(SIMDE_HAVE_MATH_H)
|
|
2628
|
+
SIMDE__VECTORIZE
|
|
2629
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
2630
|
+
r_.f32[i] = 1.0f / sqrtf(a_.f32[i]);
|
|
2631
|
+
}
|
|
2632
|
+
#else
|
|
2633
|
+
HEDLEY_UNREACHABLE();
|
|
2634
|
+
#endif
|
|
2635
|
+
|
|
2636
|
+
return simde__m128_from_private(r_);
|
|
2637
|
+
#endif
|
|
2638
|
+
}
|
|
2639
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2640
|
+
# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a))
|
|
2641
|
+
#endif
|
|
2642
|
+
|
|
2643
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2644
|
+
simde__m128
|
|
2645
|
+
simde_mm_rsqrt_ss (simde__m128 a) {
|
|
2646
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2647
|
+
return _mm_rsqrt_ss(a);
|
|
2648
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
2649
|
+
return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a));
|
|
2650
|
+
#else
|
|
2651
|
+
simde__m128_private
|
|
2652
|
+
r_,
|
|
2653
|
+
a_ = simde__m128_to_private(a);
|
|
2654
|
+
|
|
2655
|
+
#if defined(__STDC_IEC_559__)
|
|
2656
|
+
{
|
|
2657
|
+
r_.i32[0] = INT32_C(0x5f3759df) - (a_.i32[0] >> 1);
|
|
2658
|
+
|
|
2659
|
+
#if SIMDE_ACCURACY_ITERS > 2
|
|
2660
|
+
float half = SIMDE_FLOAT32_C(0.5) * a_.f32[0];
|
|
2661
|
+
for (int ai = 2 ; ai < SIMDE_ACCURACY_ITERS ; ai++)
|
|
2662
|
+
r_.f32[0] *= SIMDE_FLOAT32_C(1.5) - (half * r_.f32[0] * r_.f32[0]);
|
|
2663
|
+
#endif
|
|
2664
|
+
}
|
|
2665
|
+
r_.f32[0] = 1.0f / sqrtf(a_.f32[0]);
|
|
2666
|
+
r_.f32[1] = a_.f32[1];
|
|
2667
|
+
r_.f32[2] = a_.f32[2];
|
|
2668
|
+
r_.f32[3] = a_.f32[3];
|
|
2669
|
+
#elif defined(SIMDE_HAVE_MATH_H)
|
|
2670
|
+
r_.f32[0] = 1.0f / sqrtf(a_.f32[0]);
|
|
2671
|
+
r_.f32[1] = a_.f32[1];
|
|
2672
|
+
r_.f32[2] = a_.f32[2];
|
|
2673
|
+
r_.f32[3] = a_.f32[3];
|
|
2674
|
+
#else
|
|
2675
|
+
HEDLEY_UNREACHABLE();
|
|
2676
|
+
#endif
|
|
2677
|
+
|
|
2678
|
+
return simde__m128_from_private(r_);
|
|
2679
|
+
#endif
|
|
2680
|
+
}
|
|
2681
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2682
|
+
# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a))
|
|
2683
|
+
#endif
|
|
2684
|
+
|
|
2685
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2686
|
+
simde__m64
|
|
2687
|
+
simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) {
|
|
2688
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
2689
|
+
return _mm_sad_pu8(a, b);
|
|
2690
|
+
#else
|
|
2691
|
+
simde__m64_private
|
|
2692
|
+
r_,
|
|
2693
|
+
a_ = simde__m64_to_private(a),
|
|
2694
|
+
b_ = simde__m64_to_private(b);
|
|
2695
|
+
uint16_t sum = 0;
|
|
2696
|
+
|
|
2697
|
+
#if defined(SIMDE_HAVE_STDLIB_H)
|
|
2698
|
+
SIMDE__VECTORIZE_REDUCTION(+:sum)
|
|
2699
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
2700
|
+
sum += (uint8_t) abs(a_.u8[i] - b_.u8[i]);
|
|
2701
|
+
}
|
|
2702
|
+
|
|
2703
|
+
r_.i16[0] = (int16_t) sum;
|
|
2704
|
+
r_.i16[1] = 0;
|
|
2705
|
+
r_.i16[2] = 0;
|
|
2706
|
+
r_.i16[3] = 0;
|
|
2707
|
+
#else
|
|
2708
|
+
HEDLEY_UNREACHABLE();
|
|
2709
|
+
#endif
|
|
2710
|
+
|
|
2711
|
+
return simde__m64_from_private(r_);
|
|
2712
|
+
#endif
|
|
2713
|
+
}
|
|
2714
|
+
#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b)
|
|
2715
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2716
|
+
# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b)
|
|
2717
|
+
# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b)
|
|
2718
|
+
#endif
|
|
2719
|
+
|
|
2720
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2721
|
+
simde__m128
|
|
2722
|
+
simde_mm_set_ss (simde_float32 a) {
|
|
2723
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2724
|
+
return _mm_set_ss(a);
|
|
2725
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
2726
|
+
return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0);
|
|
2727
|
+
#else
|
|
2728
|
+
return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a);
|
|
2729
|
+
#endif
|
|
2730
|
+
}
|
|
2731
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2732
|
+
# define _mm_set_ss(a) simde_mm_set_ss(a)
|
|
2733
|
+
#endif
|
|
2734
|
+
|
|
2735
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2736
|
+
simde__m128
|
|
2737
|
+
simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
|
|
2738
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2739
|
+
return _mm_setr_ps(e3, e2, e1, e0);
|
|
2740
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
2741
|
+
SIMDE_ALIGN(16) simde_float32 data[4] = { e3, e2, e1, e0 };
|
|
2742
|
+
return vld1q_f32(data);
|
|
2743
|
+
#else
|
|
2744
|
+
return simde_mm_set_ps(e0, e1, e2, e3);
|
|
2745
|
+
#endif
|
|
2746
|
+
}
|
|
2747
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2748
|
+
# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0)
|
|
2749
|
+
#endif
|
|
2750
|
+
|
|
2751
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2752
|
+
simde__m128
|
|
2753
|
+
simde_mm_setzero_ps (void) {
|
|
2754
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2755
|
+
return _mm_setzero_ps();
|
|
2756
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
2757
|
+
return vdupq_n_f32(SIMDE_FLOAT32_C(0.0));
|
|
2758
|
+
#else
|
|
2759
|
+
simde__m128 r;
|
|
2760
|
+
simde_memset(&r, 0, sizeof(r));
|
|
2761
|
+
return r;
|
|
2762
|
+
#endif
|
|
2763
|
+
}
|
|
2764
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2765
|
+
# define _mm_setzero_ps() simde_mm_setzero_ps()
|
|
2766
|
+
#endif
|
|
2767
|
+
|
|
2768
|
+
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
|
2769
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
2770
|
+
SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
|
|
2771
|
+
#endif
|
|
2772
|
+
|
|
2773
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2774
|
+
simde__m128
|
|
2775
|
+
simde_mm_undefined_ps (void) {
|
|
2776
|
+
simde__m128_private r_;
|
|
2777
|
+
|
|
2778
|
+
#if defined(SIMDE__HAVE_UNDEFINED128)
|
|
2779
|
+
r_.n = _mm_undefined_ps();
|
|
2780
|
+
#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
|
2781
|
+
r_ = simde__m128_to_private(simde_mm_setzero_ps());
|
|
2782
|
+
#endif
|
|
2783
|
+
|
|
2784
|
+
return simde__m128_from_private(r_);
|
|
2785
|
+
}
|
|
2786
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2787
|
+
# define _mm_undefined_ps() simde_mm_undefined_ps()
|
|
2788
|
+
#endif
|
|
2789
|
+
|
|
2790
|
+
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
|
2791
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
2792
|
+
#endif
|
|
2793
|
+
|
|
2794
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2795
|
+
simde__m128
|
|
2796
|
+
simde_mm_setone_ps (void) {
|
|
2797
|
+
simde__m128 t = simde_mm_setzero_ps();
|
|
2798
|
+
return simde_mm_cmpeq_ps(t, t);
|
|
2799
|
+
}
|
|
2800
|
+
|
|
2801
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2802
|
+
void
|
|
2803
|
+
simde_mm_sfence (void) {
|
|
2804
|
+
/* TODO: Use Hedley. */
|
|
2805
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2806
|
+
_mm_sfence();
|
|
2807
|
+
#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
|
|
2808
|
+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
|
|
2809
|
+
#elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
|
|
2810
|
+
# if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)
|
|
2811
|
+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
|
|
2812
|
+
# else
|
|
2813
|
+
atomic_thread_fence(memory_order_seq_cst);
|
|
2814
|
+
# endif
|
|
2815
|
+
#elif defined(_MSC_VER)
|
|
2816
|
+
MemoryBarrier();
|
|
2817
|
+
#elif HEDLEY_HAS_EXTENSION(c_atomic)
|
|
2818
|
+
__c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
|
|
2819
|
+
#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
|
|
2820
|
+
__sync_synchronize();
|
|
2821
|
+
#elif defined(_OPENMP)
|
|
2822
|
+
# pragma omp critical(simde_mm_sfence_)
|
|
2823
|
+
{ }
|
|
2824
|
+
#endif
|
|
2825
|
+
}
|
|
2826
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2827
|
+
# define _mm_sfence() simde_mm_sfence()
|
|
2828
|
+
#endif
|
|
2829
|
+
|
|
2830
|
+
#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
|
|
2831
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2832
|
+
# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w)
|
|
2833
|
+
#endif
|
|
2834
|
+
|
|
2835
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX) && !defined(__PGI)
|
|
2836
|
+
# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8)
|
|
2837
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
2838
|
+
# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \
|
|
2839
|
+
const simde__m64_private simde__tmp_a_ = simde__m64_to_private(a); \
|
|
2840
|
+
simde__m64_from_private((simde__m64_private) { .i16 = \
|
|
2841
|
+
SIMDE__SHUFFLE_VECTOR(16, 8, \
|
|
2842
|
+
(simde__tmp_a_).i16, \
|
|
2843
|
+
(simde__tmp_a_).i16, \
|
|
2844
|
+
(((imm8) ) & 3), \
|
|
2845
|
+
(((imm8) >> 2) & 3), \
|
|
2846
|
+
(((imm8) >> 4) & 3), \
|
|
2847
|
+
(((imm8) >> 6) & 3)) }); }))
|
|
2848
|
+
#else
|
|
2849
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2850
|
+
simde__m64
|
|
2851
|
+
simde_mm_shuffle_pi16 (simde__m64 a, const int imm8)
|
|
2852
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
2853
|
+
simde__m64_private r_;
|
|
2854
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2855
|
+
|
|
2856
|
+
for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) {
|
|
2857
|
+
r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3];
|
|
2858
|
+
}
|
|
2859
|
+
|
|
2860
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
2861
|
+
#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized")
|
|
2862
|
+
# pragma clang diagnostic ignored "-Wconditional-uninitialized"
|
|
2863
|
+
#endif
|
|
2864
|
+
return simde__m64_from_private(r_);
|
|
2865
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
2866
|
+
}
|
|
2867
|
+
#endif
|
|
2868
|
+
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
|
|
2869
|
+
# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8)
|
|
2870
|
+
#else
|
|
2871
|
+
# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
|
|
2872
|
+
#endif
|
|
2873
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2874
|
+
# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8)
|
|
2875
|
+
# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8)
|
|
2876
|
+
#endif
|
|
2877
|
+
|
|
2878
|
+
#if defined(SIMDE_SSE_NATIVE) && !defined(__PGI)
|
|
2879
|
+
# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8)
|
|
2880
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
2881
|
+
# define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \
|
|
2882
|
+
simde__m128_from_private((simde__m128_private) { .f32 = \
|
|
2883
|
+
SIMDE__SHUFFLE_VECTOR(32, 16, \
|
|
2884
|
+
simde__m128_to_private(a).f32, \
|
|
2885
|
+
simde__m128_to_private(b).f32, \
|
|
2886
|
+
(((imm8) ) & 3), \
|
|
2887
|
+
(((imm8) >> 2) & 3), \
|
|
2888
|
+
(((imm8) >> 4) & 3) + 4, \
|
|
2889
|
+
(((imm8) >> 6) & 3) + 4) }); }))
|
|
2890
|
+
#else
|
|
2891
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2892
|
+
simde__m128
|
|
2893
|
+
simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8)
|
|
2894
|
+
HEDLEY_REQUIRE_MSG((imm8 & 0xff) == imm8, "imm8 must be in range [0, 255]") {
|
|
2895
|
+
simde__m128_private
|
|
2896
|
+
r_,
|
|
2897
|
+
a_ = simde__m128_to_private(a),
|
|
2898
|
+
b_ = simde__m128_to_private(b);
|
|
2899
|
+
|
|
2900
|
+
r_.f32[0] = a_.f32[(imm8 >> 0) & 3];
|
|
2901
|
+
r_.f32[1] = a_.f32[(imm8 >> 2) & 3];
|
|
2902
|
+
r_.f32[2] = b_.f32[(imm8 >> 4) & 3];
|
|
2903
|
+
r_.f32[3] = b_.f32[(imm8 >> 6) & 3];
|
|
2904
|
+
|
|
2905
|
+
return simde__m128_from_private(r_);
|
|
2906
|
+
}
|
|
2907
|
+
#endif
|
|
2908
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2909
|
+
# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8)
|
|
2910
|
+
#endif
|
|
2911
|
+
|
|
2912
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2913
|
+
simde__m128
|
|
2914
|
+
simde_mm_sqrt_ps (simde__m128 a) {
|
|
2915
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2916
|
+
return _mm_sqrt_ps(a);
|
|
2917
|
+
#else
|
|
2918
|
+
simde__m128_private
|
|
2919
|
+
r_,
|
|
2920
|
+
a_ = simde__m128_to_private(a);
|
|
2921
|
+
|
|
2922
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2923
|
+
float32x4_t recipsq = vrsqrteq_f32(a_.neon_f32);
|
|
2924
|
+
float32x4_t sq = vrecpeq_f32(recipsq);
|
|
2925
|
+
/* ??? use step versions of both sqrt and recip for better accuracy? */
|
|
2926
|
+
r_.neon_f32 = sq;
|
|
2927
|
+
#elif defined(SIMDE_HAVE_MATH_H)
|
|
2928
|
+
SIMDE__VECTORIZE
|
|
2929
|
+
for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) {
|
|
2930
|
+
r_.f32[i] = sqrtf(a_.f32[i]);
|
|
2931
|
+
}
|
|
2932
|
+
#else
|
|
2933
|
+
HEDLEY_UNREACHABLE();
|
|
2934
|
+
#endif
|
|
2935
|
+
|
|
2936
|
+
return simde__m128_from_private(r_);
|
|
2937
|
+
#endif
|
|
2938
|
+
}
|
|
2939
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2940
|
+
# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a))
|
|
2941
|
+
#endif
|
|
2942
|
+
|
|
2943
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2944
|
+
simde__m128
|
|
2945
|
+
simde_mm_sqrt_ss (simde__m128 a) {
|
|
2946
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2947
|
+
return _mm_sqrt_ss(a);
|
|
2948
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
2949
|
+
return simde_mm_move_ss(a, simde_mm_sqrt_ps(a));
|
|
2950
|
+
#else
|
|
2951
|
+
simde__m128_private
|
|
2952
|
+
r_,
|
|
2953
|
+
a_ = simde__m128_to_private(a);
|
|
2954
|
+
|
|
2955
|
+
#if defined(SIMDE_HAVE_MATH_H)
|
|
2956
|
+
r_.f32[0] = sqrtf(a_.f32[0]);
|
|
2957
|
+
r_.f32[1] = a_.f32[1];
|
|
2958
|
+
r_.f32[2] = a_.f32[2];
|
|
2959
|
+
r_.f32[3] = a_.f32[3];
|
|
2960
|
+
#else
|
|
2961
|
+
HEDLEY_UNREACHABLE();
|
|
2962
|
+
#endif
|
|
2963
|
+
|
|
2964
|
+
return simde__m128_from_private(r_);
|
|
2965
|
+
#endif
|
|
2966
|
+
}
|
|
2967
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2968
|
+
# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a))
|
|
2969
|
+
#endif
|
|
2970
|
+
|
|
2971
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2972
|
+
void
|
|
2973
|
+
simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
|
2974
|
+
simde_assert_aligned(16, mem_addr);
|
|
2975
|
+
|
|
2976
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
2977
|
+
_mm_store_ps(mem_addr, a);
|
|
2978
|
+
#else
|
|
2979
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
2980
|
+
|
|
2981
|
+
#if defined(SIMDE_SSE_NEON)
|
|
2982
|
+
vst1q_f32(mem_addr, a_.neon_f32);
|
|
2983
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
2984
|
+
wasm_v128_store(mem_addr, a_.wasm_v128);
|
|
2985
|
+
#else
|
|
2986
|
+
SIMDE__VECTORIZE_ALIGNED(mem_addr:16)
|
|
2987
|
+
for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
|
|
2988
|
+
mem_addr[i] = a_.f32[i];
|
|
2989
|
+
}
|
|
2990
|
+
#endif
|
|
2991
|
+
#endif
|
|
2992
|
+
}
|
|
2993
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
2994
|
+
# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
|
|
2995
|
+
#endif
|
|
2996
|
+
|
|
2997
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2998
|
+
void
|
|
2999
|
+
simde_mm_store_ps1 (simde_float32 mem_addr[4], simde__m128 a) {
|
|
3000
|
+
simde_assert_aligned(16, mem_addr);
|
|
3001
|
+
|
|
3002
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3003
|
+
_mm_store_ps1(mem_addr, a);
|
|
3004
|
+
#else
|
|
3005
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
3006
|
+
|
|
3007
|
+
SIMDE__VECTORIZE_ALIGNED(mem_addr:16)
|
|
3008
|
+
for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
|
|
3009
|
+
mem_addr[i] = a_.f32[0];
|
|
3010
|
+
}
|
|
3011
|
+
#endif
|
|
3012
|
+
}
|
|
3013
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3014
|
+
# define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
|
|
3015
|
+
#endif
|
|
3016
|
+
|
|
3017
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3018
|
+
void
|
|
3019
|
+
simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {
|
|
3020
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3021
|
+
_mm_store_ss(mem_addr, a);
|
|
3022
|
+
#else
|
|
3023
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
3024
|
+
|
|
3025
|
+
#if defined(SIMDE_SSE_NEON)
|
|
3026
|
+
vst1q_lane_f32(mem_addr, a_.neon_f32, 0);
|
|
3027
|
+
#else
|
|
3028
|
+
*mem_addr = a_.f32[0];
|
|
3029
|
+
#endif
|
|
3030
|
+
#endif
|
|
3031
|
+
}
|
|
3032
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3033
|
+
# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
|
|
3034
|
+
#endif
|
|
3035
|
+
|
|
3036
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3037
|
+
void
|
|
3038
|
+
simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
|
3039
|
+
simde_assert_aligned(16, mem_addr);
|
|
3040
|
+
|
|
3041
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3042
|
+
_mm_store1_ps(mem_addr, a);
|
|
3043
|
+
#else
|
|
3044
|
+
simde_mm_store_ps1(mem_addr, a);
|
|
3045
|
+
#endif
|
|
3046
|
+
}
|
|
3047
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3048
|
+
# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
|
|
3049
|
+
#endif
|
|
3050
|
+
|
|
3051
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3052
|
+
void
|
|
3053
|
+
simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) {
|
|
3054
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3055
|
+
_mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
|
|
3056
|
+
#else
|
|
3057
|
+
simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr);
|
|
3058
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
3059
|
+
|
|
3060
|
+
dest_->f32[0] = a_.f32[2];
|
|
3061
|
+
dest_->f32[1] = a_.f32[3];
|
|
3062
|
+
#endif
|
|
3063
|
+
}
|
|
3064
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3065
|
+
# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a))
|
|
3066
|
+
#endif
|
|
3067
|
+
|
|
3068
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3069
|
+
void
|
|
3070
|
+
simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) {
|
|
3071
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3072
|
+
_mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
|
|
3073
|
+
#else
|
|
3074
|
+
simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr);
|
|
3075
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
3076
|
+
|
|
3077
|
+
dest_->f32[0] = a_.f32[0];
|
|
3078
|
+
dest_->f32[1] = a_.f32[1];
|
|
3079
|
+
#endif
|
|
3080
|
+
}
|
|
3081
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3082
|
+
# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a))
|
|
3083
|
+
#endif
|
|
3084
|
+
|
|
3085
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3086
|
+
void
|
|
3087
|
+
simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
|
3088
|
+
simde_assert_aligned(16, mem_addr);
|
|
3089
|
+
|
|
3090
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3091
|
+
_mm_storer_ps(mem_addr, a);
|
|
3092
|
+
#else
|
|
3093
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
3094
|
+
|
|
3095
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
3096
|
+
a_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, a_.f32, 3, 2, 1, 0);
|
|
3097
|
+
simde_mm_store_ps(mem_addr, simde__m128_from_private(a_));
|
|
3098
|
+
#else
|
|
3099
|
+
SIMDE__VECTORIZE_ALIGNED(mem_addr:16)
|
|
3100
|
+
for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) {
|
|
3101
|
+
mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i];
|
|
3102
|
+
}
|
|
3103
|
+
#endif
|
|
3104
|
+
#endif
|
|
3105
|
+
}
|
|
3106
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3107
|
+
# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
|
|
3108
|
+
#endif
|
|
3109
|
+
|
|
3110
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3111
|
+
void
|
|
3112
|
+
simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
|
3113
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3114
|
+
_mm_storeu_ps(mem_addr, a);
|
|
3115
|
+
#else
|
|
3116
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
3117
|
+
|
|
3118
|
+
#if defined(SIMDE_SSE_NEON)
|
|
3119
|
+
vst1q_f32(mem_addr, a_.neon_f32);
|
|
3120
|
+
#else
|
|
3121
|
+
simde_memcpy(mem_addr, &a_, sizeof(a_));
|
|
3122
|
+
#endif
|
|
3123
|
+
#endif
|
|
3124
|
+
}
|
|
3125
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3126
|
+
# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
|
|
3127
|
+
#endif
|
|
3128
|
+
|
|
3129
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3130
|
+
simde__m128
|
|
3131
|
+
simde_mm_sub_ps (simde__m128 a, simde__m128 b) {
|
|
3132
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3133
|
+
return _mm_sub_ps(a, b);
|
|
3134
|
+
#else
|
|
3135
|
+
simde__m128_private
|
|
3136
|
+
r_,
|
|
3137
|
+
a_ = simde__m128_to_private(a),
|
|
3138
|
+
b_ = simde__m128_to_private(b);
|
|
3139
|
+
|
|
3140
|
+
#if defined(SIMDE_SSE_NEON)
|
|
3141
|
+
r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32);
|
|
3142
|
+
#elif defined(SIMDE_SSE_WASM_SIMD128)
|
|
3143
|
+
r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128);
|
|
3144
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
3145
|
+
r_.f32 = a_.f32 - b_.f32;
|
|
3146
|
+
#else
|
|
3147
|
+
SIMDE__VECTORIZE
|
|
3148
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
3149
|
+
r_.f32[i] = a_.f32[i] - b_.f32[i];
|
|
3150
|
+
}
|
|
3151
|
+
#endif
|
|
3152
|
+
|
|
3153
|
+
return simde__m128_from_private(r_);
|
|
3154
|
+
#endif
|
|
3155
|
+
}
|
|
3156
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3157
|
+
# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b))
|
|
3158
|
+
#endif
|
|
3159
|
+
|
|
3160
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3161
|
+
simde__m128
|
|
3162
|
+
simde_mm_sub_ss (simde__m128 a, simde__m128 b) {
|
|
3163
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3164
|
+
return _mm_sub_ss(a, b);
|
|
3165
|
+
#elif defined(SIMDE_ASSUME_VECTORIZATION)
|
|
3166
|
+
return simde_mm_move_ss(a, simde_mm_sub_ps(a, b));
|
|
3167
|
+
#else
|
|
3168
|
+
simde__m128_private
|
|
3169
|
+
r_,
|
|
3170
|
+
a_ = simde__m128_to_private(a),
|
|
3171
|
+
b_ = simde__m128_to_private(b);
|
|
3172
|
+
|
|
3173
|
+
r_.f32[0] = a_.f32[0] - b_.f32[0];
|
|
3174
|
+
r_.f32[1] = a_.f32[1];
|
|
3175
|
+
r_.f32[2] = a_.f32[2];
|
|
3176
|
+
r_.f32[3] = a_.f32[3];
|
|
3177
|
+
|
|
3178
|
+
return simde__m128_from_private(r_);
|
|
3179
|
+
#endif
|
|
3180
|
+
}
|
|
3181
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3182
|
+
# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b))
|
|
3183
|
+
#endif
|
|
3184
|
+
|
|
3185
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3186
|
+
int
|
|
3187
|
+
simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) {
|
|
3188
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3189
|
+
return _mm_ucomieq_ss(a, b);
|
|
3190
|
+
#else
|
|
3191
|
+
simde__m128_private
|
|
3192
|
+
a_ = simde__m128_to_private(a),
|
|
3193
|
+
b_ = simde__m128_to_private(b);
|
|
3194
|
+
int r;
|
|
3195
|
+
|
|
3196
|
+
#if defined(SIMDE_HAVE_FENV_H)
|
|
3197
|
+
fenv_t envp;
|
|
3198
|
+
int x = feholdexcept(&envp);
|
|
3199
|
+
r = a_.f32[0] == b_.f32[0];
|
|
3200
|
+
if (HEDLEY_LIKELY(x == 0))
|
|
3201
|
+
fesetenv(&envp);
|
|
3202
|
+
#else
|
|
3203
|
+
HEDLEY_UNREACHABLE();
|
|
3204
|
+
#endif
|
|
3205
|
+
|
|
3206
|
+
return r;
|
|
3207
|
+
#endif
|
|
3208
|
+
}
|
|
3209
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3210
|
+
# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b))
|
|
3211
|
+
#endif
|
|
3212
|
+
|
|
3213
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3214
|
+
int
|
|
3215
|
+
simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) {
|
|
3216
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3217
|
+
return _mm_ucomige_ss(a, b);
|
|
3218
|
+
#else
|
|
3219
|
+
simde__m128_private
|
|
3220
|
+
a_ = simde__m128_to_private(a),
|
|
3221
|
+
b_ = simde__m128_to_private(b);
|
|
3222
|
+
int r;
|
|
3223
|
+
|
|
3224
|
+
#if defined(SIMDE_HAVE_FENV_H)
|
|
3225
|
+
fenv_t envp;
|
|
3226
|
+
int x = feholdexcept(&envp);
|
|
3227
|
+
r = a_.f32[0] >= b_.f32[0];
|
|
3228
|
+
if (HEDLEY_LIKELY(x == 0))
|
|
3229
|
+
fesetenv(&envp);
|
|
3230
|
+
#else
|
|
3231
|
+
HEDLEY_UNREACHABLE();
|
|
3232
|
+
#endif
|
|
3233
|
+
|
|
3234
|
+
return r;
|
|
3235
|
+
#endif
|
|
3236
|
+
}
|
|
3237
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3238
|
+
# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b))
|
|
3239
|
+
#endif
|
|
3240
|
+
|
|
3241
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3242
|
+
int
|
|
3243
|
+
simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) {
|
|
3244
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3245
|
+
return _mm_ucomigt_ss(a, b);
|
|
3246
|
+
#else
|
|
3247
|
+
simde__m128_private
|
|
3248
|
+
a_ = simde__m128_to_private(a),
|
|
3249
|
+
b_ = simde__m128_to_private(b);
|
|
3250
|
+
int r;
|
|
3251
|
+
|
|
3252
|
+
#if defined(SIMDE_HAVE_FENV_H)
|
|
3253
|
+
fenv_t envp;
|
|
3254
|
+
int x = feholdexcept(&envp);
|
|
3255
|
+
r = a_.f32[0] > b_.f32[0];
|
|
3256
|
+
if (HEDLEY_LIKELY(x == 0))
|
|
3257
|
+
fesetenv(&envp);
|
|
3258
|
+
#else
|
|
3259
|
+
HEDLEY_UNREACHABLE();
|
|
3260
|
+
#endif
|
|
3261
|
+
|
|
3262
|
+
return r;
|
|
3263
|
+
#endif
|
|
3264
|
+
}
|
|
3265
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3266
|
+
# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b))
|
|
3267
|
+
#endif
|
|
3268
|
+
|
|
3269
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3270
|
+
int
|
|
3271
|
+
simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) {
|
|
3272
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3273
|
+
return _mm_ucomile_ss(a, b);
|
|
3274
|
+
#else
|
|
3275
|
+
simde__m128_private
|
|
3276
|
+
a_ = simde__m128_to_private(a),
|
|
3277
|
+
b_ = simde__m128_to_private(b);
|
|
3278
|
+
int r;
|
|
3279
|
+
|
|
3280
|
+
#if defined(SIMDE_HAVE_FENV_H)
|
|
3281
|
+
fenv_t envp;
|
|
3282
|
+
int x = feholdexcept(&envp);
|
|
3283
|
+
r = a_.f32[0] <= b_.f32[0];
|
|
3284
|
+
if (HEDLEY_LIKELY(x == 0))
|
|
3285
|
+
fesetenv(&envp);
|
|
3286
|
+
#else
|
|
3287
|
+
HEDLEY_UNREACHABLE();
|
|
3288
|
+
#endif
|
|
3289
|
+
|
|
3290
|
+
return r;
|
|
3291
|
+
#endif
|
|
3292
|
+
}
|
|
3293
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3294
|
+
# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b))
|
|
3295
|
+
#endif
|
|
3296
|
+
|
|
3297
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3298
|
+
int
|
|
3299
|
+
simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) {
|
|
3300
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3301
|
+
return _mm_ucomilt_ss(a, b);
|
|
3302
|
+
#else
|
|
3303
|
+
simde__m128_private
|
|
3304
|
+
a_ = simde__m128_to_private(a),
|
|
3305
|
+
b_ = simde__m128_to_private(b);
|
|
3306
|
+
int r;
|
|
3307
|
+
|
|
3308
|
+
#if defined(SIMDE_HAVE_FENV_H)
|
|
3309
|
+
fenv_t envp;
|
|
3310
|
+
int x = feholdexcept(&envp);
|
|
3311
|
+
r = a_.f32[0] < b_.f32[0];
|
|
3312
|
+
if (HEDLEY_LIKELY(x == 0))
|
|
3313
|
+
fesetenv(&envp);
|
|
3314
|
+
#else
|
|
3315
|
+
HEDLEY_UNREACHABLE();
|
|
3316
|
+
#endif
|
|
3317
|
+
|
|
3318
|
+
return r;
|
|
3319
|
+
#endif
|
|
3320
|
+
}
|
|
3321
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3322
|
+
# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b))
|
|
3323
|
+
#endif
|
|
3324
|
+
|
|
3325
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3326
|
+
int
|
|
3327
|
+
simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) {
|
|
3328
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3329
|
+
return _mm_ucomineq_ss(a, b);
|
|
3330
|
+
#else
|
|
3331
|
+
simde__m128_private
|
|
3332
|
+
a_ = simde__m128_to_private(a),
|
|
3333
|
+
b_ = simde__m128_to_private(b);
|
|
3334
|
+
int r;
|
|
3335
|
+
|
|
3336
|
+
#if defined(SIMDE_HAVE_FENV_H)
|
|
3337
|
+
fenv_t envp;
|
|
3338
|
+
int x = feholdexcept(&envp);
|
|
3339
|
+
r = a_.f32[0] != b_.f32[0];
|
|
3340
|
+
if (HEDLEY_LIKELY(x == 0))
|
|
3341
|
+
fesetenv(&envp);
|
|
3342
|
+
#else
|
|
3343
|
+
HEDLEY_UNREACHABLE();
|
|
3344
|
+
#endif
|
|
3345
|
+
|
|
3346
|
+
return r;
|
|
3347
|
+
#endif
|
|
3348
|
+
}
|
|
3349
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3350
|
+
# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b))
|
|
3351
|
+
#endif
|
|
3352
|
+
|
|
3353
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3354
|
+
# if defined(__has_builtin)
|
|
3355
|
+
# if __has_builtin(__builtin_ia32_undef128)
|
|
3356
|
+
# define SIMDE__HAVE_UNDEFINED128
|
|
3357
|
+
# endif
|
|
3358
|
+
# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER)
|
|
3359
|
+
# define SIMDE__HAVE_UNDEFINED128
|
|
3360
|
+
# endif
|
|
3361
|
+
#endif
|
|
3362
|
+
|
|
3363
|
+
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
|
3364
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
3365
|
+
SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_
|
|
3366
|
+
#endif
|
|
3367
|
+
|
|
3368
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3369
|
+
simde__m128
|
|
3370
|
+
simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) {
|
|
3371
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3372
|
+
return _mm_unpackhi_ps(a, b);
|
|
3373
|
+
#else
|
|
3374
|
+
simde__m128_private
|
|
3375
|
+
r_,
|
|
3376
|
+
a_ = simde__m128_to_private(a),
|
|
3377
|
+
b_ = simde__m128_to_private(b);
|
|
3378
|
+
|
|
3379
|
+
#if defined(SIMDE_SSE_NEON)
|
|
3380
|
+
float32x2_t a1 = vget_high_f32(a_.neon_f32);
|
|
3381
|
+
float32x2_t b1 = vget_high_f32(b_.neon_f32);
|
|
3382
|
+
float32x2x2_t result = vzip_f32(a1, b1);
|
|
3383
|
+
r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
|
|
3384
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
3385
|
+
r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 2, 6, 3, 7);
|
|
3386
|
+
#else
|
|
3387
|
+
r_.f32[0] = a_.f32[2];
|
|
3388
|
+
r_.f32[1] = b_.f32[2];
|
|
3389
|
+
r_.f32[2] = a_.f32[3];
|
|
3390
|
+
r_.f32[3] = b_.f32[3];
|
|
3391
|
+
#endif
|
|
3392
|
+
|
|
3393
|
+
return simde__m128_from_private(r_);
|
|
3394
|
+
#endif
|
|
3395
|
+
}
|
|
3396
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3397
|
+
# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b))
|
|
3398
|
+
#endif
|
|
3399
|
+
|
|
3400
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3401
|
+
simde__m128
|
|
3402
|
+
simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) {
|
|
3403
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3404
|
+
return _mm_unpacklo_ps(a, b);
|
|
3405
|
+
#else
|
|
3406
|
+
simde__m128_private
|
|
3407
|
+
r_,
|
|
3408
|
+
a_ = simde__m128_to_private(a),
|
|
3409
|
+
b_ = simde__m128_to_private(b);
|
|
3410
|
+
|
|
3411
|
+
#if defined(SIMDE__SHUFFLE_VECTOR)
|
|
3412
|
+
r_.f32 = SIMDE__SHUFFLE_VECTOR(32, 16, a_.f32, b_.f32, 0, 4, 1, 5);
|
|
3413
|
+
#elif defined(SIMDE_SSE_NEON)
|
|
3414
|
+
float32x2_t a1 = vget_low_f32(a_.neon_f32);
|
|
3415
|
+
float32x2_t b1 = vget_low_f32(b_.neon_f32);
|
|
3416
|
+
float32x2x2_t result = vzip_f32(a1, b1);
|
|
3417
|
+
r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]);
|
|
3418
|
+
#else
|
|
3419
|
+
r_.f32[0] = a_.f32[0];
|
|
3420
|
+
r_.f32[1] = b_.f32[0];
|
|
3421
|
+
r_.f32[2] = a_.f32[1];
|
|
3422
|
+
r_.f32[3] = b_.f32[1];
|
|
3423
|
+
#endif
|
|
3424
|
+
|
|
3425
|
+
return simde__m128_from_private(r_);
|
|
3426
|
+
#endif
|
|
3427
|
+
}
|
|
3428
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3429
|
+
# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b))
|
|
3430
|
+
#endif
|
|
3431
|
+
|
|
3432
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3433
|
+
simde__m128
|
|
3434
|
+
simde_mm_xor_ps (simde__m128 a, simde__m128 b) {
|
|
3435
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3436
|
+
return _mm_xor_ps(a, b);
|
|
3437
|
+
#else
|
|
3438
|
+
simde__m128_private
|
|
3439
|
+
r_,
|
|
3440
|
+
a_ = simde__m128_to_private(a),
|
|
3441
|
+
b_ = simde__m128_to_private(b);
|
|
3442
|
+
|
|
3443
|
+
#if defined(SIMDE_SSE_NEON)
|
|
3444
|
+
r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32);
|
|
3445
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
3446
|
+
r_.i32f = a_.i32f ^ b_.i32f;
|
|
3447
|
+
#else
|
|
3448
|
+
SIMDE__VECTORIZE
|
|
3449
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
3450
|
+
r_.u32[i] = a_.u32[i] ^ b_.u32[i];
|
|
3451
|
+
}
|
|
3452
|
+
#endif
|
|
3453
|
+
|
|
3454
|
+
return simde__m128_from_private(r_);
|
|
3455
|
+
#endif
|
|
3456
|
+
}
|
|
3457
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3458
|
+
# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b))
|
|
3459
|
+
#endif
|
|
3460
|
+
|
|
3461
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3462
|
+
void
|
|
3463
|
+
simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) {
|
|
3464
|
+
#if defined(SIMDE_SSE_NATIVE) && defined(SIMDE_ARCH_X86_MMX)
|
|
3465
|
+
_mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a);
|
|
3466
|
+
#else
|
|
3467
|
+
simde__m64_private*
|
|
3468
|
+
dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr),
|
|
3469
|
+
a_ = simde__m64_to_private(a);
|
|
3470
|
+
|
|
3471
|
+
#if defined(SIMDE_SSE_NEON)
|
|
3472
|
+
dest->i64[0] = vget_lane_s64(a_.neon_i64, 0);
|
|
3473
|
+
#else
|
|
3474
|
+
dest->i64[0] = a_.i64[0];
|
|
3475
|
+
#endif
|
|
3476
|
+
#endif
|
|
3477
|
+
}
|
|
3478
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3479
|
+
# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a))
|
|
3480
|
+
#endif
|
|
3481
|
+
|
|
3482
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3483
|
+
void
|
|
3484
|
+
simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {
|
|
3485
|
+
simde_assert_aligned(16, mem_addr);
|
|
3486
|
+
|
|
3487
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3488
|
+
_mm_stream_ps(mem_addr, a);
|
|
3489
|
+
#else
|
|
3490
|
+
simde__m128_private a_ = simde__m128_to_private(a);
|
|
3491
|
+
|
|
3492
|
+
#if defined(SIMDE_SSE_NEON)
|
|
3493
|
+
vst1q_f32(mem_addr, a_.neon_f32);
|
|
3494
|
+
#else
|
|
3495
|
+
SIMDE__ASSUME_ALIGNED(mem_addr, 16);
|
|
3496
|
+
simde_memcpy(mem_addr, &a_, sizeof(a_));
|
|
3497
|
+
#endif
|
|
3498
|
+
#endif
|
|
3499
|
+
}
|
|
3500
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3501
|
+
# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
|
|
3502
|
+
#endif
|
|
3503
|
+
|
|
3504
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3505
|
+
uint32_t
|
|
3506
|
+
simde_mm_getcsr (void) {
|
|
3507
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3508
|
+
return _mm_getcsr();
|
|
3509
|
+
#else
|
|
3510
|
+
uint32_t r = 0;
|
|
3511
|
+
|
|
3512
|
+
#if defined(SIMDE_HAVE_FENV_H)
|
|
3513
|
+
int rounding_mode = fegetround();
|
|
3514
|
+
|
|
3515
|
+
switch(rounding_mode) {
|
|
3516
|
+
#if defined(FE_TONEAREST)
|
|
3517
|
+
case FE_TONEAREST:
|
|
3518
|
+
break;
|
|
3519
|
+
#endif
|
|
3520
|
+
#if defined(FE_UPWARD)
|
|
3521
|
+
case FE_UPWARD:
|
|
3522
|
+
r |= 2 << 13;
|
|
3523
|
+
break;
|
|
3524
|
+
#endif
|
|
3525
|
+
#if defined(FE_DOWNWARD)
|
|
3526
|
+
case FE_DOWNWARD:
|
|
3527
|
+
r |= 1 << 13;
|
|
3528
|
+
break;
|
|
3529
|
+
#endif
|
|
3530
|
+
#if defined(FE_TOWARDZERO)
|
|
3531
|
+
case FE_TOWARDZERO:
|
|
3532
|
+
r = 3 << 13;
|
|
3533
|
+
break;
|
|
3534
|
+
#endif
|
|
3535
|
+
}
|
|
3536
|
+
#else
|
|
3537
|
+
HEDLEY_UNREACHABLE();
|
|
3538
|
+
#endif
|
|
3539
|
+
|
|
3540
|
+
return r;
|
|
3541
|
+
#endif
|
|
3542
|
+
}
|
|
3543
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3544
|
+
# define _mm_getcsr() simde_mm_getcsr()
|
|
3545
|
+
#endif
|
|
3546
|
+
|
|
3547
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3548
|
+
void
|
|
3549
|
+
simde_mm_setcsr (uint32_t a) {
|
|
3550
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
3551
|
+
_mm_setcsr(a);
|
|
3552
|
+
#else
|
|
3553
|
+
switch((a >> 13) & 3) {
|
|
3554
|
+
#if defined(FE_TONEAREST)
|
|
3555
|
+
case 0:
|
|
3556
|
+
fesetround(FE_TONEAREST);
|
|
3557
|
+
#endif
|
|
3558
|
+
#if defined(FE_DOWNWARD)
|
|
3559
|
+
break;
|
|
3560
|
+
case 1:
|
|
3561
|
+
fesetround(FE_DOWNWARD);
|
|
3562
|
+
#endif
|
|
3563
|
+
#if defined(FE_UPWARD)
|
|
3564
|
+
break;
|
|
3565
|
+
case 2:
|
|
3566
|
+
fesetround(FE_UPWARD);
|
|
3567
|
+
#endif
|
|
3568
|
+
#if defined(FE_TOWARDZERO)
|
|
3569
|
+
break;
|
|
3570
|
+
case 3:
|
|
3571
|
+
fesetround(FE_TOWARDZERO);
|
|
3572
|
+
break;
|
|
3573
|
+
#endif
|
|
3574
|
+
}
|
|
3575
|
+
#endif
|
|
3576
|
+
}
|
|
3577
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3578
|
+
# define _mm_setcsr(a) simde_mm_setcsr(a)
|
|
3579
|
+
#endif
|
|
3580
|
+
|
|
3581
|
+
#define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
|
|
3582
|
+
do { \
|
|
3583
|
+
simde__m128 tmp3, tmp2, tmp1, tmp0; \
|
|
3584
|
+
tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \
|
|
3585
|
+
tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \
|
|
3586
|
+
tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \
|
|
3587
|
+
tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \
|
|
3588
|
+
row0 = simde_mm_movelh_ps(tmp0, tmp2); \
|
|
3589
|
+
row1 = simde_mm_movehl_ps(tmp2, tmp0); \
|
|
3590
|
+
row2 = simde_mm_movelh_ps(tmp1, tmp3); \
|
|
3591
|
+
row3 = simde_mm_movehl_ps(tmp3, tmp1); \
|
|
3592
|
+
} while (0)
|
|
3593
|
+
|
|
3594
|
+
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
|
|
3595
|
+
# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3)
|
|
3596
|
+
#endif
|
|
3597
|
+
|
|
3598
|
+
#if defined(_MM_EXCEPT_INVALID)
|
|
3599
|
+
# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID
|
|
3600
|
+
#else
|
|
3601
|
+
# define SIMDE_MM_EXCEPT_INVALID (0x0001)
|
|
3602
|
+
#endif
|
|
3603
|
+
#if defined(_MM_EXCEPT_DENORM)
|
|
3604
|
+
# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM
|
|
3605
|
+
#else
|
|
3606
|
+
# define SIMDE_MM_EXCEPT_DENORM (0x0002)
|
|
3607
|
+
#endif
|
|
3608
|
+
#if defined(_MM_EXCEPT_DIV_ZERO)
|
|
3609
|
+
# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO
|
|
3610
|
+
#else
|
|
3611
|
+
# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004)
|
|
3612
|
+
#endif
|
|
3613
|
+
#if defined(_MM_EXCEPT_OVERFLOW)
|
|
3614
|
+
# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW
|
|
3615
|
+
#else
|
|
3616
|
+
# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008)
|
|
3617
|
+
#endif
|
|
3618
|
+
#if defined(_MM_EXCEPT_UNDERFLOW)
|
|
3619
|
+
# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW
|
|
3620
|
+
#else
|
|
3621
|
+
# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010)
|
|
3622
|
+
#endif
|
|
3623
|
+
#if defined(_MM_EXCEPT_INEXACT)
|
|
3624
|
+
# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT
|
|
3625
|
+
#else
|
|
3626
|
+
# define SIMDE_MM_EXCEPT_INEXACT (0x0020)
|
|
3627
|
+
#endif
|
|
3628
|
+
#if defined(_MM_EXCEPT_MASK)
|
|
3629
|
+
# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK
|
|
3630
|
+
#else
|
|
3631
|
+
# define SIMDE_MM_EXCEPT_MASK \
|
|
3632
|
+
(SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \
|
|
3633
|
+
SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \
|
|
3634
|
+
SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT)
|
|
3635
|
+
#endif
|
|
3636
|
+
|
|
3637
|
+
#if defined(_MM_MASK_INVALID)
|
|
3638
|
+
# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID
|
|
3639
|
+
#else
|
|
3640
|
+
# define SIMDE_MM_MASK_INVALID (0x0080)
|
|
3641
|
+
#endif
|
|
3642
|
+
#if defined(_MM_MASK_DENORM)
|
|
3643
|
+
# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM
|
|
3644
|
+
#else
|
|
3645
|
+
# define SIMDE_MM_MASK_DENORM (0x0100)
|
|
3646
|
+
#endif
|
|
3647
|
+
#if defined(_MM_MASK_DIV_ZERO)
|
|
3648
|
+
# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO
|
|
3649
|
+
#else
|
|
3650
|
+
# define SIMDE_MM_MASK_DIV_ZERO (0x0200)
|
|
3651
|
+
#endif
|
|
3652
|
+
#if defined(_MM_MASK_OVERFLOW)
|
|
3653
|
+
# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW
|
|
3654
|
+
#else
|
|
3655
|
+
# define SIMDE_MM_MASK_OVERFLOW (0x0400)
|
|
3656
|
+
#endif
|
|
3657
|
+
#if defined(_MM_MASK_UNDERFLOW)
|
|
3658
|
+
# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW
|
|
3659
|
+
#else
|
|
3660
|
+
# define SIMDE_MM_MASK_UNDERFLOW (0x0800)
|
|
3661
|
+
#endif
|
|
3662
|
+
#if defined(_MM_MASK_INEXACT)
|
|
3663
|
+
# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT
|
|
3664
|
+
#else
|
|
3665
|
+
# define SIMDE_MM_MASK_INEXACT (0x1000)
|
|
3666
|
+
#endif
|
|
3667
|
+
#if defined(_MM_MASK_MASK)
|
|
3668
|
+
# define SIMDE_MM_MASK_MASK _MM_MASK_MASK
|
|
3669
|
+
#else
|
|
3670
|
+
# define SIMDE_MM_MASK_MASK \
|
|
3671
|
+
(SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \
|
|
3672
|
+
SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \
|
|
3673
|
+
SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT)
|
|
3674
|
+
#endif
|
|
3675
|
+
|
|
3676
|
+
#if defined(_MM_FLUSH_ZERO_MASK)
|
|
3677
|
+
# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK
|
|
3678
|
+
#else
|
|
3679
|
+
# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000)
|
|
3680
|
+
#endif
|
|
3681
|
+
#if defined(_MM_FLUSH_ZERO_ON)
|
|
3682
|
+
# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON
|
|
3683
|
+
#else
|
|
3684
|
+
# define SIMDE_MM_FLUSH_ZERO_ON (0x8000)
|
|
3685
|
+
#endif
|
|
3686
|
+
#if defined(_MM_FLUSH_ZERO_OFF)
|
|
3687
|
+
# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF
|
|
3688
|
+
#else
|
|
3689
|
+
# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000)
|
|
3690
|
+
#endif
|
|
3691
|
+
|
|
3692
|
+
SIMDE__END_DECLS
|
|
3693
|
+
|
|
3694
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
3695
|
+
|
|
3696
|
+
#endif /* !defined(SIMDE__SSE_H) */
|