minimap2 0.2.25.0 → 0.2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
- data/ext/minimap2/lib/simde/COPYING +20 -0
- data/ext/minimap2/lib/simde/README.md +333 -0
- data/ext/minimap2/lib/simde/amalgamate.py +58 -0
- data/ext/minimap2/lib/simde/meson.build +33 -0
- data/ext/minimap2/lib/simde/netlify.toml +20 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
- data/ext/minimap2/lib/simde/simde/check.h +267 -0
- data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
- data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
- data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
- data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
- data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
- data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
- data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
- data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
- data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
- data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
- data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
- data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
- data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
- data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
- data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
- data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
- data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
- data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
- data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
- data/ext/minimap2/lib/simde/test/meson.build +64 -0
- data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
- data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
- data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
- data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
- data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
- data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
- data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
- data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
- data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
- data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
- data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
- data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
- data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
- data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
- data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
- data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
- data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
- data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
- data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
- data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
- data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
- data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
- data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
- data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/lib/minimap2/version.rb +1 -1
- metadata +100 -3
|
@@ -0,0 +1,2210 @@
|
|
|
1
|
+
/* Copyright (c) 2017-2020 Evan Nemerson <evan@nemerson.com>
|
|
2
|
+
*
|
|
3
|
+
* Permission is hereby granted, free of charge, to any person
|
|
4
|
+
* obtaining a copy of this software and associated documentation
|
|
5
|
+
* files (the "Software"), to deal in the Software without
|
|
6
|
+
* restriction, including without limitation the rights to use, copy,
|
|
7
|
+
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
8
|
+
* of the Software, and to permit persons to whom the Software is
|
|
9
|
+
* furnished to do so, subject to the following conditions:
|
|
10
|
+
*
|
|
11
|
+
* The above copyright notice and this permission notice shall be
|
|
12
|
+
* included in all copies or substantial portions of the Software.
|
|
13
|
+
*
|
|
14
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
18
|
+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
19
|
+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
20
|
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
* SOFTWARE.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
#if !defined(SIMDE__MMX_H)
|
|
25
|
+
# if !defined(SIMDE__MMX_H)
|
|
26
|
+
# define SIMDE__MMX_H
|
|
27
|
+
# endif
|
|
28
|
+
# include "../simde-common.h"
|
|
29
|
+
|
|
30
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
31
|
+
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
32
|
+
|
|
33
|
+
# if defined(SIMDE_MMX_FORCE_NATIVE)
|
|
34
|
+
# define SIMDE_MMX_NATIVE
|
|
35
|
+
# elif defined(SIMDE_ARCH_X86_MMX) && !defined(SIMDE_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
|
36
|
+
# define SIMDE_MMX_NATIVE
|
|
37
|
+
# elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_MMX_NO_NEON) && !defined(SIMDE_NO_NEON)
|
|
38
|
+
# define SIMDE_MMX_NEON
|
|
39
|
+
# endif
|
|
40
|
+
|
|
41
|
+
# if defined(SIMDE_MMX_NATIVE)
|
|
42
|
+
# define SIMDE_MMX_USE_NATIVE_TYPE
|
|
43
|
+
# elif defined(SIMDE_ARCH_X86_SSE)
|
|
44
|
+
# define SIMDE_MMX_USE_NATIVE_TYPE
|
|
45
|
+
# endif
|
|
46
|
+
|
|
47
|
+
# if defined(SIMDE_MMX_USE_NATIVE_TYPE)
|
|
48
|
+
# include <mmintrin.h>
|
|
49
|
+
# else
|
|
50
|
+
# if defined(SIMDE_MMX_NEON)
|
|
51
|
+
# include <arm_neon.h>
|
|
52
|
+
# endif
|
|
53
|
+
# endif
|
|
54
|
+
# include <stdint.h>
|
|
55
|
+
# include <limits.h>
|
|
56
|
+
|
|
57
|
+
SIMDE__BEGIN_DECLS
|
|
58
|
+
|
|
59
|
+
typedef union {
|
|
60
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
61
|
+
SIMDE_ALIGN(8) int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
62
|
+
SIMDE_ALIGN(8) int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
63
|
+
SIMDE_ALIGN(8) int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
64
|
+
SIMDE_ALIGN(8) int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
65
|
+
SIMDE_ALIGN(8) uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
66
|
+
SIMDE_ALIGN(8) uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
67
|
+
SIMDE_ALIGN(8) uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
68
|
+
SIMDE_ALIGN(8) uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
69
|
+
SIMDE_ALIGN(8) simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
70
|
+
SIMDE_ALIGN(8) int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
71
|
+
SIMDE_ALIGN(8) uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
72
|
+
#else
|
|
73
|
+
SIMDE_ALIGN(8) int8_t i8[8];
|
|
74
|
+
SIMDE_ALIGN(8) int16_t i16[4];
|
|
75
|
+
SIMDE_ALIGN(8) int32_t i32[2];
|
|
76
|
+
SIMDE_ALIGN(8) int64_t i64[1];
|
|
77
|
+
SIMDE_ALIGN(8) uint8_t u8[8];
|
|
78
|
+
SIMDE_ALIGN(8) uint16_t u16[4];
|
|
79
|
+
SIMDE_ALIGN(8) uint32_t u32[2];
|
|
80
|
+
SIMDE_ALIGN(8) uint64_t u64[1];
|
|
81
|
+
SIMDE_ALIGN(8) simde_float32 f32[2];
|
|
82
|
+
SIMDE_ALIGN(8) int_fast32_t i32f[8 / sizeof(int_fast32_t)];
|
|
83
|
+
SIMDE_ALIGN(8) uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
|
|
84
|
+
#endif
|
|
85
|
+
|
|
86
|
+
#if defined(SIMDE_MMX_USE_NATIVE_TYPE)
|
|
87
|
+
__m64 n;
|
|
88
|
+
#endif
|
|
89
|
+
#if defined(SIMDE_MMX_NEON)
|
|
90
|
+
int8x8_t neon_i8;
|
|
91
|
+
int16x4_t neon_i16;
|
|
92
|
+
int32x2_t neon_i32;
|
|
93
|
+
int64x1_t neon_i64;
|
|
94
|
+
uint8x8_t neon_u8;
|
|
95
|
+
uint16x4_t neon_u16;
|
|
96
|
+
uint32x2_t neon_u32;
|
|
97
|
+
uint64x1_t neon_u64;
|
|
98
|
+
float32x2_t neon_f32;
|
|
99
|
+
#endif
|
|
100
|
+
} simde__m64_private;
|
|
101
|
+
|
|
102
|
+
#if defined(SIMDE_MMX_USE_NATIVE_TYPE)
|
|
103
|
+
typedef __m64 simde__m64;
|
|
104
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
105
|
+
typedef int32x2_t simde__m64;
|
|
106
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
107
|
+
typedef int32_t simde__m64 SIMDE_ALIGN(8) SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
108
|
+
#else
|
|
109
|
+
typedef simde__m64_private simde__m64;
|
|
110
|
+
#endif
|
|
111
|
+
|
|
112
|
+
#if !defined(SIMDE_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
|
113
|
+
#define SIMDE_MMX_ENABLE_NATIVE_ALIASES
|
|
114
|
+
typedef simde__m64 __m64;
|
|
115
|
+
#endif
|
|
116
|
+
|
|
117
|
+
HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect");
|
|
118
|
+
HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect");
|
|
119
|
+
#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
|
|
120
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned");
|
|
121
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned");
|
|
122
|
+
#endif
|
|
123
|
+
|
|
124
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
125
|
+
simde__m64
|
|
126
|
+
simde__m64_from_private(simde__m64_private v) {
|
|
127
|
+
simde__m64 r;
|
|
128
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
129
|
+
return r;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
133
|
+
simde__m64_private
|
|
134
|
+
simde__m64_to_private(simde__m64 v) {
|
|
135
|
+
simde__m64_private r;
|
|
136
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
137
|
+
return r;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
141
|
+
simde__m64
|
|
142
|
+
simde_mm_add_pi8 (simde__m64 a, simde__m64 b) {
|
|
143
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
144
|
+
return _mm_add_pi8(a, b);
|
|
145
|
+
#else
|
|
146
|
+
simde__m64_private r_;
|
|
147
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
148
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
149
|
+
|
|
150
|
+
#if defined(SIMDE_MMX_NEON)
|
|
151
|
+
r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);
|
|
152
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
153
|
+
r_.i8 = a_.i8 + b_.i8;
|
|
154
|
+
#else
|
|
155
|
+
SIMDE__VECTORIZE
|
|
156
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
157
|
+
r_.i8[i] = a_.i8[i] + b_.i8[i];
|
|
158
|
+
}
|
|
159
|
+
#endif
|
|
160
|
+
|
|
161
|
+
return simde__m64_from_private(r_);
|
|
162
|
+
#endif
|
|
163
|
+
}
|
|
164
|
+
#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b)
|
|
165
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
166
|
+
# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b)
|
|
167
|
+
# define _m_paddb(a, b) simde_m_paddb(a, b)
|
|
168
|
+
#endif
|
|
169
|
+
|
|
170
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
171
|
+
simde__m64
|
|
172
|
+
simde_mm_add_pi16 (simde__m64 a, simde__m64 b) {
|
|
173
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
174
|
+
return _mm_add_pi16(a, b);
|
|
175
|
+
#else
|
|
176
|
+
simde__m64_private r_;
|
|
177
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
178
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
179
|
+
|
|
180
|
+
#if defined(SIMDE_MMX_NEON)
|
|
181
|
+
r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);
|
|
182
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
183
|
+
r_.i16 = a_.i16 + b_.i16;
|
|
184
|
+
#else
|
|
185
|
+
SIMDE__VECTORIZE
|
|
186
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
187
|
+
r_.i16[i] = a_.i16[i] + b_.i16[i];
|
|
188
|
+
}
|
|
189
|
+
#endif
|
|
190
|
+
|
|
191
|
+
return simde__m64_from_private(r_);
|
|
192
|
+
#endif
|
|
193
|
+
}
|
|
194
|
+
#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
|
|
195
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
196
|
+
# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)
|
|
197
|
+
# define _m_add_paddw(a, b) simde_mm_add_pi16(a, b)
|
|
198
|
+
#endif
|
|
199
|
+
|
|
200
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
201
|
+
simde__m64
|
|
202
|
+
simde_mm_add_pi32 (simde__m64 a, simde__m64 b) {
|
|
203
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
204
|
+
return _mm_add_pi32(a, b);
|
|
205
|
+
#else
|
|
206
|
+
simde__m64_private r_;
|
|
207
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
208
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
209
|
+
|
|
210
|
+
#if defined(SIMDE_MMX_NEON)
|
|
211
|
+
r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);
|
|
212
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
213
|
+
r_.i32 = a_.i32 + b_.i32;
|
|
214
|
+
#else
|
|
215
|
+
SIMDE__VECTORIZE
|
|
216
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
217
|
+
r_.i32[i] = a_.i32[i] + b_.i32[i];
|
|
218
|
+
}
|
|
219
|
+
#endif
|
|
220
|
+
|
|
221
|
+
return simde__m64_from_private(r_);
|
|
222
|
+
#endif
|
|
223
|
+
}
|
|
224
|
+
#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
|
|
225
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
226
|
+
# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)
|
|
227
|
+
# define _m_add_paddd(a, b) simde_mm_add_pi32(a, b)
|
|
228
|
+
#endif
|
|
229
|
+
|
|
230
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
231
|
+
simde__m64
|
|
232
|
+
simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) {
|
|
233
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
234
|
+
return _mm_adds_pi8(a, b);
|
|
235
|
+
#else
|
|
236
|
+
simde__m64_private r_;
|
|
237
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
238
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
239
|
+
|
|
240
|
+
#if defined(SIMDE_MMX_NEON)
|
|
241
|
+
r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);
|
|
242
|
+
#else
|
|
243
|
+
SIMDE__VECTORIZE
|
|
244
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
245
|
+
if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) {
|
|
246
|
+
r_.i8[i] = INT8_MAX;
|
|
247
|
+
} else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) {
|
|
248
|
+
r_.i8[i] = INT8_MIN;
|
|
249
|
+
} else {
|
|
250
|
+
r_.i8[i] = (a_.i8[i]) + (b_.i8[i]);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
#endif
|
|
254
|
+
|
|
255
|
+
return simde__m64_from_private(r_);
|
|
256
|
+
#endif
|
|
257
|
+
}
|
|
258
|
+
#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
|
259
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
260
|
+
# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)
|
|
261
|
+
# define _m_add_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
|
262
|
+
#endif
|
|
263
|
+
|
|
264
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
265
|
+
simde__m64
|
|
266
|
+
simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) {
|
|
267
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
268
|
+
return _mm_adds_pu8(a, b);
|
|
269
|
+
#else
|
|
270
|
+
simde__m64_private r_;
|
|
271
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
272
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
273
|
+
|
|
274
|
+
#if defined(SIMDE_MMX_NEON)
|
|
275
|
+
r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);
|
|
276
|
+
#else
|
|
277
|
+
SIMDE__VECTORIZE
|
|
278
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
279
|
+
const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]);
|
|
280
|
+
if (x > UINT8_MAX)
|
|
281
|
+
r_.u8[i] = UINT8_MAX;
|
|
282
|
+
else
|
|
283
|
+
r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
|
|
284
|
+
}
|
|
285
|
+
#endif
|
|
286
|
+
|
|
287
|
+
return simde__m64_from_private(r_);
|
|
288
|
+
#endif
|
|
289
|
+
}
|
|
290
|
+
#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b)
|
|
291
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
292
|
+
# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b)
|
|
293
|
+
# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b)
|
|
294
|
+
#endif
|
|
295
|
+
|
|
296
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
297
|
+
simde__m64
|
|
298
|
+
simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) {
|
|
299
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
300
|
+
return _mm_adds_pi16(a, b);
|
|
301
|
+
#else
|
|
302
|
+
simde__m64_private r_;
|
|
303
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
304
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
305
|
+
|
|
306
|
+
#if defined(SIMDE_MMX_NEON)
|
|
307
|
+
r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);
|
|
308
|
+
#else
|
|
309
|
+
SIMDE__VECTORIZE
|
|
310
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
311
|
+
if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) {
|
|
312
|
+
r_.i16[i] = INT16_MAX;
|
|
313
|
+
} else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) {
|
|
314
|
+
r_.i16[i] = SHRT_MIN;
|
|
315
|
+
} else {
|
|
316
|
+
r_.i16[i] = (a_.i16[i]) + (b_.i16[i]);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
#endif
|
|
320
|
+
|
|
321
|
+
return simde__m64_from_private(r_);
|
|
322
|
+
#endif
|
|
323
|
+
}
|
|
324
|
+
#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b)
|
|
325
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
326
|
+
# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b)
|
|
327
|
+
# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b)
|
|
328
|
+
#endif
|
|
329
|
+
|
|
330
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
331
|
+
simde__m64
|
|
332
|
+
simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) {
|
|
333
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
334
|
+
return _mm_adds_pu16(a, b);
|
|
335
|
+
#else
|
|
336
|
+
simde__m64_private r_;
|
|
337
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
338
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
339
|
+
|
|
340
|
+
#if defined(SIMDE_MMX_NEON)
|
|
341
|
+
r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);
|
|
342
|
+
#else
|
|
343
|
+
SIMDE__VECTORIZE
|
|
344
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
345
|
+
const uint32_t x = a_.u16[i] + b_.u16[i];
|
|
346
|
+
if (x > UINT16_MAX)
|
|
347
|
+
r_.u16[i] = UINT16_MAX;
|
|
348
|
+
else
|
|
349
|
+
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
|
|
350
|
+
}
|
|
351
|
+
#endif
|
|
352
|
+
|
|
353
|
+
return simde__m64_from_private(r_);
|
|
354
|
+
#endif
|
|
355
|
+
}
|
|
356
|
+
#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b)
|
|
357
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
358
|
+
# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b)
|
|
359
|
+
# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b)
|
|
360
|
+
#endif
|
|
361
|
+
|
|
362
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
363
|
+
simde__m64
|
|
364
|
+
simde_mm_and_si64 (simde__m64 a, simde__m64 b) {
|
|
365
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
366
|
+
return _mm_and_si64(a, b);
|
|
367
|
+
#else
|
|
368
|
+
simde__m64_private r_;
|
|
369
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
370
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
371
|
+
|
|
372
|
+
#if defined(SIMDE_MMX_NEON)
|
|
373
|
+
r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32);
|
|
374
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
375
|
+
r_.i64 = a_.i64 & b_.i64;
|
|
376
|
+
#else
|
|
377
|
+
r_.i64[0] = a_.i64[0] & b_.i64[0];
|
|
378
|
+
#endif
|
|
379
|
+
|
|
380
|
+
return simde__m64_from_private(r_);
|
|
381
|
+
#endif
|
|
382
|
+
}
|
|
383
|
+
#define simde_m_pand(a, b) simde_mm_and_si64(a, b)
|
|
384
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
385
|
+
# define _mm_and_si64(a, b) simde_mm_and_si64(a, b)
|
|
386
|
+
# define _m_pand(a, b) simde_mm_and_si64(a, b)
|
|
387
|
+
#endif
|
|
388
|
+
|
|
389
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
390
|
+
simde__m64
|
|
391
|
+
simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) {
|
|
392
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
393
|
+
return _mm_andnot_si64(a, b);
|
|
394
|
+
#else
|
|
395
|
+
simde__m64_private r_;
|
|
396
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
397
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
398
|
+
|
|
399
|
+
#if defined(SIMDE_MMX_NEON)
|
|
400
|
+
r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);
|
|
401
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
402
|
+
r_.i32f = ~a_.i32f & b_.i32f;
|
|
403
|
+
#else
|
|
404
|
+
r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]);
|
|
405
|
+
#endif
|
|
406
|
+
|
|
407
|
+
return simde__m64_from_private(r_);
|
|
408
|
+
#endif
|
|
409
|
+
}
|
|
410
|
+
#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b)
|
|
411
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
412
|
+
# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b)
|
|
413
|
+
# define _m_pandn(a, b) simde_mm_andnot_si64(a, b)
|
|
414
|
+
#endif
|
|
415
|
+
|
|
416
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
417
|
+
simde__m64
|
|
418
|
+
simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) {
|
|
419
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
420
|
+
return _mm_cmpeq_pi8(a, b);
|
|
421
|
+
#else
|
|
422
|
+
simde__m64_private r_;
|
|
423
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
424
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
425
|
+
|
|
426
|
+
#if defined(SIMDE_MMX_NEON)
|
|
427
|
+
r_.neon_i8 = vreinterpret_s8_u8(vceq_s8(a_.neon_i8, b_.neon_i8));
|
|
428
|
+
#else
|
|
429
|
+
SIMDE__VECTORIZE
|
|
430
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
431
|
+
r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
|
|
432
|
+
}
|
|
433
|
+
#endif
|
|
434
|
+
|
|
435
|
+
return simde__m64_from_private(r_);
|
|
436
|
+
#endif
|
|
437
|
+
}
|
|
438
|
+
#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
439
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
440
|
+
# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
441
|
+
# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
442
|
+
#endif
|
|
443
|
+
|
|
444
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
445
|
+
simde__m64
|
|
446
|
+
simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) {
|
|
447
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
448
|
+
return _mm_cmpeq_pi16(a, b);
|
|
449
|
+
#else
|
|
450
|
+
simde__m64_private r_;
|
|
451
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
452
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
453
|
+
|
|
454
|
+
#if defined(SIMDE_MMX_NEON)
|
|
455
|
+
r_.neon_i16 = vreinterpret_s16_u16(vceq_s16(a_.neon_i16, b_.neon_i16));
|
|
456
|
+
#else
|
|
457
|
+
SIMDE__VECTORIZE
|
|
458
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
459
|
+
r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
|
|
460
|
+
}
|
|
461
|
+
#endif
|
|
462
|
+
|
|
463
|
+
return simde__m64_from_private(r_);
|
|
464
|
+
#endif
|
|
465
|
+
}
|
|
466
|
+
#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
467
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
468
|
+
# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
469
|
+
# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
470
|
+
#endif
|
|
471
|
+
|
|
472
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
473
|
+
simde__m64
|
|
474
|
+
simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) {
|
|
475
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
476
|
+
return _mm_cmpeq_pi32(a, b);
|
|
477
|
+
#else
|
|
478
|
+
simde__m64_private r_;
|
|
479
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
480
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
481
|
+
|
|
482
|
+
#if defined(SIMDE_MMX_NEON)
|
|
483
|
+
r_.neon_i32 = vreinterpret_s32_u32(vceq_s32(a_.neon_i32, b_.neon_i32));
|
|
484
|
+
#else
|
|
485
|
+
SIMDE__VECTORIZE
|
|
486
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
487
|
+
r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
|
|
488
|
+
}
|
|
489
|
+
#endif
|
|
490
|
+
|
|
491
|
+
return simde__m64_from_private(r_);
|
|
492
|
+
#endif
|
|
493
|
+
}
|
|
494
|
+
#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
495
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
496
|
+
# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
497
|
+
# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
498
|
+
#endif
|
|
499
|
+
|
|
500
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
501
|
+
simde__m64
|
|
502
|
+
simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) {
|
|
503
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
504
|
+
return _mm_cmpgt_pi8(a, b);
|
|
505
|
+
#else
|
|
506
|
+
simde__m64_private r_;
|
|
507
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
508
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
509
|
+
|
|
510
|
+
#if defined(SIMDE_MMX_NEON)
|
|
511
|
+
r_.neon_i8 = vreinterpret_s8_u8(vcgt_s8(a_.neon_i8, b_.neon_i8));
|
|
512
|
+
#else
|
|
513
|
+
SIMDE__VECTORIZE
|
|
514
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
515
|
+
r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
|
|
516
|
+
}
|
|
517
|
+
#endif
|
|
518
|
+
|
|
519
|
+
return simde__m64_from_private(r_);
|
|
520
|
+
#endif
|
|
521
|
+
}
|
|
522
|
+
#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
523
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
524
|
+
# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
525
|
+
# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
526
|
+
#endif
|
|
527
|
+
|
|
528
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
529
|
+
simde__m64
|
|
530
|
+
simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) {
|
|
531
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
532
|
+
return _mm_cmpgt_pi16(a, b);
|
|
533
|
+
#else
|
|
534
|
+
simde__m64_private r_;
|
|
535
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
536
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
537
|
+
|
|
538
|
+
#if defined(SIMDE_MMX_NEON)
|
|
539
|
+
r_.neon_i16 = vreinterpret_s16_u16(vcgt_s16(a_.neon_i16, b_.neon_i16));
|
|
540
|
+
#else
|
|
541
|
+
SIMDE__VECTORIZE
|
|
542
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
543
|
+
r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
|
|
544
|
+
}
|
|
545
|
+
#endif
|
|
546
|
+
|
|
547
|
+
return simde__m64_from_private(r_);
|
|
548
|
+
#endif
|
|
549
|
+
}
|
|
550
|
+
#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
551
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
552
|
+
# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
553
|
+
# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
554
|
+
#endif
|
|
555
|
+
|
|
556
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
557
|
+
simde__m64
|
|
558
|
+
simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) {
|
|
559
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
560
|
+
return _mm_cmpgt_pi32(a, b);
|
|
561
|
+
#else
|
|
562
|
+
simde__m64_private r_;
|
|
563
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
564
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
565
|
+
|
|
566
|
+
#if defined(SIMDE_MMX_NEON)
|
|
567
|
+
r_.neon_i32 = vreinterpret_s32_u32(vcgt_s32(a_.neon_i32, b_.neon_i32));
|
|
568
|
+
#else
|
|
569
|
+
SIMDE__VECTORIZE
|
|
570
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
571
|
+
r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
|
|
572
|
+
}
|
|
573
|
+
#endif
|
|
574
|
+
|
|
575
|
+
return simde__m64_from_private(r_);
|
|
576
|
+
#endif
|
|
577
|
+
}
|
|
578
|
+
#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
579
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
580
|
+
# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
581
|
+
# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
582
|
+
#endif
|
|
583
|
+
|
|
584
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
585
|
+
int64_t
|
|
586
|
+
simde_mm_cvtm64_si64 (simde__m64 a) {
|
|
587
|
+
#if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)
|
|
588
|
+
return _mm_cvtm64_si64(a);
|
|
589
|
+
#else
|
|
590
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
591
|
+
|
|
592
|
+
#if defined(SIMDE_MMX_NEON)
|
|
593
|
+
return vget_lane_s64(a_.neon_i64, 0);
|
|
594
|
+
#else
|
|
595
|
+
return a_.i64[0];
|
|
596
|
+
#endif
|
|
597
|
+
#endif
|
|
598
|
+
}
|
|
599
|
+
#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a)
|
|
600
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
601
|
+
# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a)
|
|
602
|
+
# define _m_to_int64(a) simde_mm_cvtm64_si64(a)
|
|
603
|
+
#endif
|
|
604
|
+
|
|
605
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
606
|
+
simde__m64
|
|
607
|
+
simde_mm_cvtsi32_si64 (int32_t a) {
|
|
608
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
609
|
+
return _mm_cvtsi32_si64(a);
|
|
610
|
+
#else
|
|
611
|
+
simde__m64_private r_;
|
|
612
|
+
|
|
613
|
+
#if defined(SIMDE_MMX_NEON)
|
|
614
|
+
const int32_t av[sizeof(r_.neon_i32) / sizeof(r_.neon_i32[0])] = { a, 0 };
|
|
615
|
+
r_.neon_i32 = vld1_s32(av);
|
|
616
|
+
#else
|
|
617
|
+
r_.i32[0] = a;
|
|
618
|
+
r_.i32[1] = 0;
|
|
619
|
+
#endif
|
|
620
|
+
|
|
621
|
+
return simde__m64_from_private(r_);
|
|
622
|
+
#endif
|
|
623
|
+
}
|
|
624
|
+
#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a)
|
|
625
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
626
|
+
# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a)
|
|
627
|
+
# define _m_from_int(a) simde_mm_cvtsi32_si64(a)
|
|
628
|
+
#endif
|
|
629
|
+
|
|
630
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
631
|
+
simde__m64
|
|
632
|
+
simde_mm_cvtsi64_m64 (int64_t a) {
|
|
633
|
+
#if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)
|
|
634
|
+
return _mm_cvtsi64_m64(a);
|
|
635
|
+
#else
|
|
636
|
+
simde__m64_private r_;
|
|
637
|
+
|
|
638
|
+
#if defined(SIMDE_MMX_NEON)
|
|
639
|
+
r_.neon_i64 = vld1_s64(&a);
|
|
640
|
+
#else
|
|
641
|
+
r_.i64[0] = a;
|
|
642
|
+
#endif
|
|
643
|
+
|
|
644
|
+
return simde__m64_from_private(r_);
|
|
645
|
+
#endif
|
|
646
|
+
}
|
|
647
|
+
#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a)
|
|
648
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
649
|
+
# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a)
|
|
650
|
+
# define _m_from_int64(a) simde_mm_cvtsi64_m64(a)
|
|
651
|
+
#endif
|
|
652
|
+
|
|
653
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
654
|
+
int32_t
|
|
655
|
+
simde_mm_cvtsi64_si32 (simde__m64 a) {
|
|
656
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
657
|
+
return _mm_cvtsi64_si32(a);
|
|
658
|
+
#else
|
|
659
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
660
|
+
|
|
661
|
+
#if defined(SIMDE_MMX_NEON)
|
|
662
|
+
return vget_lane_s32(a_.neon_i32, 0);
|
|
663
|
+
#else
|
|
664
|
+
return a_.i32[0];
|
|
665
|
+
#endif
|
|
666
|
+
#endif
|
|
667
|
+
}
|
|
668
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
669
|
+
# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a)
|
|
670
|
+
#endif
|
|
671
|
+
|
|
672
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
673
|
+
void
|
|
674
|
+
simde_mm_empty (void) {
|
|
675
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
676
|
+
_mm_empty();
|
|
677
|
+
#else
|
|
678
|
+
#endif
|
|
679
|
+
}
|
|
680
|
+
#define simde_m_empty() simde_mm_empty()
|
|
681
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
682
|
+
# define _mm_empty() simde_mm_empty()
|
|
683
|
+
# define _m_empty() simde_mm_empty()
|
|
684
|
+
#endif
|
|
685
|
+
|
|
686
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
687
|
+
simde__m64
|
|
688
|
+
simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) {
|
|
689
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
690
|
+
return _mm_madd_pi16(a, b);
|
|
691
|
+
#else
|
|
692
|
+
simde__m64_private r_;
|
|
693
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
694
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
695
|
+
|
|
696
|
+
#if defined(SIMDE_MMX_NEON)
|
|
697
|
+
int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
698
|
+
r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));
|
|
699
|
+
#else
|
|
700
|
+
SIMDE__VECTORIZE
|
|
701
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) {
|
|
702
|
+
r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]);
|
|
703
|
+
}
|
|
704
|
+
#endif
|
|
705
|
+
|
|
706
|
+
return simde__m64_from_private(r_);
|
|
707
|
+
#endif
|
|
708
|
+
}
|
|
709
|
+
#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
|
|
710
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
711
|
+
# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b)
|
|
712
|
+
# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
|
|
713
|
+
#endif
|
|
714
|
+
|
|
715
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
716
|
+
simde__m64
|
|
717
|
+
simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) {
|
|
718
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
719
|
+
return _mm_mulhi_pi16(a, b);
|
|
720
|
+
#else
|
|
721
|
+
simde__m64_private r_;
|
|
722
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
723
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
724
|
+
|
|
725
|
+
#if defined(SIMDE_MMX_NEON)
|
|
726
|
+
const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
727
|
+
const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);
|
|
728
|
+
const uint16x4_t t3 = vmovn_u32(t2);
|
|
729
|
+
r_.neon_i16 = vreinterpret_s16_u16(t3);
|
|
730
|
+
#else
|
|
731
|
+
SIMDE__VECTORIZE
|
|
732
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
733
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16));
|
|
734
|
+
}
|
|
735
|
+
#endif
|
|
736
|
+
|
|
737
|
+
return simde__m64_from_private(r_);
|
|
738
|
+
#endif
|
|
739
|
+
}
|
|
740
|
+
#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
|
|
741
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
742
|
+
# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b)
|
|
743
|
+
# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
|
|
744
|
+
#endif
|
|
745
|
+
|
|
746
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
747
|
+
simde__m64
|
|
748
|
+
simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) {
|
|
749
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
750
|
+
return _mm_mullo_pi16(a, b);
|
|
751
|
+
#else
|
|
752
|
+
simde__m64_private r_;
|
|
753
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
754
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
755
|
+
|
|
756
|
+
#if defined(SIMDE_MMX_NEON)
|
|
757
|
+
const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
758
|
+
const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));
|
|
759
|
+
r_.neon_i16 = vreinterpret_s16_u16(t2);
|
|
760
|
+
#else
|
|
761
|
+
SIMDE__VECTORIZE
|
|
762
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
763
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff));
|
|
764
|
+
}
|
|
765
|
+
#endif
|
|
766
|
+
|
|
767
|
+
return simde__m64_from_private(r_);
|
|
768
|
+
#endif
|
|
769
|
+
}
|
|
770
|
+
#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
|
|
771
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
772
|
+
# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b)
|
|
773
|
+
# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
|
|
774
|
+
#endif
|
|
775
|
+
|
|
776
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
777
|
+
simde__m64
|
|
778
|
+
simde_mm_or_si64 (simde__m64 a, simde__m64 b) {
|
|
779
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
780
|
+
return _mm_or_si64(a, b);
|
|
781
|
+
#else
|
|
782
|
+
simde__m64_private r_;
|
|
783
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
784
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
785
|
+
|
|
786
|
+
#if defined(SIMDE_MMX_NEON)
|
|
787
|
+
r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32);
|
|
788
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
789
|
+
r_.i64 = a_.i64 | b_.i64;
|
|
790
|
+
#else
|
|
791
|
+
r_.i64[0] = a_.i64[0] | b_.i64[0];
|
|
792
|
+
#endif
|
|
793
|
+
|
|
794
|
+
return simde__m64_from_private(r_);
|
|
795
|
+
#endif
|
|
796
|
+
}
|
|
797
|
+
#define simde_m_por(a, b) simde_mm_or_si64(a, b)
|
|
798
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
799
|
+
# define _mm_or_si64(a, b) simde_mm_or_si64(a, b)
|
|
800
|
+
# define _m_por(a, b) simde_mm_or_si64(a, b)
|
|
801
|
+
#endif
|
|
802
|
+
|
|
803
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
804
|
+
simde__m64
|
|
805
|
+
simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) {
|
|
806
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
807
|
+
return _mm_packs_pi16(a, b);
|
|
808
|
+
#else
|
|
809
|
+
simde__m64_private r_;
|
|
810
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
811
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
812
|
+
|
|
813
|
+
#if defined(SIMDE_MMX_NEON)
|
|
814
|
+
r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));
|
|
815
|
+
#else
|
|
816
|
+
SIMDE__VECTORIZE
|
|
817
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
818
|
+
if (a_.i16[i] < INT8_MIN) {
|
|
819
|
+
r_.i8[i] = INT8_MIN;
|
|
820
|
+
} else if (a_.i16[i] > INT8_MAX) {
|
|
821
|
+
r_.i8[i] = INT8_MAX;
|
|
822
|
+
} else {
|
|
823
|
+
r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
SIMDE__VECTORIZE
|
|
828
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
829
|
+
if (b_.i16[i] < INT8_MIN) {
|
|
830
|
+
r_.i8[i + 4] = INT8_MIN;
|
|
831
|
+
} else if (b_.i16[i] > INT8_MAX) {
|
|
832
|
+
r_.i8[i + 4] = INT8_MAX;
|
|
833
|
+
} else {
|
|
834
|
+
r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]);
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
#endif
|
|
838
|
+
|
|
839
|
+
return simde__m64_from_private(r_);
|
|
840
|
+
#endif
|
|
841
|
+
}
|
|
842
|
+
#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
|
|
843
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
844
|
+
# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)
|
|
845
|
+
# define _m_packsswb(a, b) mm_packs_pi16(a, b)
|
|
846
|
+
#endif
|
|
847
|
+
|
|
848
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
849
|
+
simde__m64
|
|
850
|
+
simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) {
|
|
851
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
852
|
+
return _mm_packs_pi32(a, b);
|
|
853
|
+
#else
|
|
854
|
+
simde__m64_private r_;
|
|
855
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
856
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
857
|
+
|
|
858
|
+
#if defined(SIMDE_MMX_NEON)
|
|
859
|
+
r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
|
|
860
|
+
#else
|
|
861
|
+
SIMDE__VECTORIZE
|
|
862
|
+
for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) {
|
|
863
|
+
if (a_.i32[i] < SHRT_MIN) {
|
|
864
|
+
r_.i16[i] = SHRT_MIN;
|
|
865
|
+
} else if (a_.i32[i] > INT16_MAX) {
|
|
866
|
+
r_.i16[i] = INT16_MAX;
|
|
867
|
+
} else {
|
|
868
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
SIMDE__VECTORIZE
|
|
873
|
+
for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) {
|
|
874
|
+
if (b_.i32[i] < SHRT_MIN) {
|
|
875
|
+
r_.i16[i + 2] = SHRT_MIN;
|
|
876
|
+
} else if (b_.i32[i] > INT16_MAX) {
|
|
877
|
+
r_.i16[i + 2] = INT16_MAX;
|
|
878
|
+
} else {
|
|
879
|
+
r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]);
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
#endif
|
|
883
|
+
|
|
884
|
+
return simde__m64_from_private(r_);
|
|
885
|
+
#endif
|
|
886
|
+
}
|
|
887
|
+
#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b)
|
|
888
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
889
|
+
# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b)
|
|
890
|
+
# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b)
|
|
891
|
+
#endif
|
|
892
|
+
|
|
893
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
894
|
+
simde__m64
|
|
895
|
+
simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) {
|
|
896
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
897
|
+
return _mm_packs_pu16(a, b);
|
|
898
|
+
#else
|
|
899
|
+
simde__m64_private r_;
|
|
900
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
901
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
902
|
+
|
|
903
|
+
#if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
904
|
+
const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16);
|
|
905
|
+
|
|
906
|
+
/* Set elements which are < 0 to 0 */
|
|
907
|
+
const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));
|
|
908
|
+
|
|
909
|
+
/* Vector with all s16 elements set to UINT8_MAX */
|
|
910
|
+
const int16x8_t vmax = vmovq_n_s16((int16_t) UINT8_MAX);
|
|
911
|
+
|
|
912
|
+
/* Elements which are within the acceptable range */
|
|
913
|
+
const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax)));
|
|
914
|
+
const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax)));
|
|
915
|
+
|
|
916
|
+
/* Final values as 16-bit integers */
|
|
917
|
+
const int16x8_t values = vorrq_s16(le_max, gt_max);
|
|
918
|
+
|
|
919
|
+
r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));
|
|
920
|
+
#else
|
|
921
|
+
SIMDE__VECTORIZE
|
|
922
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
923
|
+
if (a_.i16[i] > UINT8_MAX) {
|
|
924
|
+
r_.u8[i] = UINT8_MAX;
|
|
925
|
+
} else if (a_.i16[i] < 0) {
|
|
926
|
+
r_.u8[i] = 0;
|
|
927
|
+
} else {
|
|
928
|
+
r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]);
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
SIMDE__VECTORIZE
|
|
933
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
934
|
+
if (b_.i16[i] > UINT8_MAX) {
|
|
935
|
+
r_.u8[i + 4] = UINT8_MAX;
|
|
936
|
+
} else if (b_.i16[i] < 0) {
|
|
937
|
+
r_.u8[i + 4] = 0;
|
|
938
|
+
} else {
|
|
939
|
+
r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]);
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
#endif
|
|
943
|
+
|
|
944
|
+
return simde__m64_from_private(r_);
|
|
945
|
+
#endif
|
|
946
|
+
}
|
|
947
|
+
#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b)
|
|
948
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
949
|
+
# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b)
|
|
950
|
+
# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b)
|
|
951
|
+
#endif
|
|
952
|
+
|
|
953
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
954
|
+
simde__m64
|
|
955
|
+
simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {
|
|
956
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
957
|
+
return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
|
|
958
|
+
#else
|
|
959
|
+
simde__m64_private r_;
|
|
960
|
+
|
|
961
|
+
#if defined(SIMDE_MMX_NEON)
|
|
962
|
+
const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 };
|
|
963
|
+
r_.neon_i8 = vld1_s8(v);
|
|
964
|
+
#else
|
|
965
|
+
r_.i8[0] = e0;
|
|
966
|
+
r_.i8[1] = e1;
|
|
967
|
+
r_.i8[2] = e2;
|
|
968
|
+
r_.i8[3] = e3;
|
|
969
|
+
r_.i8[4] = e4;
|
|
970
|
+
r_.i8[5] = e5;
|
|
971
|
+
r_.i8[6] = e6;
|
|
972
|
+
r_.i8[7] = e7;
|
|
973
|
+
#endif
|
|
974
|
+
|
|
975
|
+
return simde__m64_from_private(r_);
|
|
976
|
+
#endif
|
|
977
|
+
}
|
|
978
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
979
|
+
# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
980
|
+
#endif
|
|
981
|
+
|
|
982
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
983
|
+
simde__m64
|
|
984
|
+
simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) {
|
|
985
|
+
simde__m64_private r_;
|
|
986
|
+
|
|
987
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
988
|
+
r_.n = _mm_set_pi8(
|
|
989
|
+
HEDLEY_STATIC_CAST(int8_t, e7),
|
|
990
|
+
HEDLEY_STATIC_CAST(int8_t, e6),
|
|
991
|
+
HEDLEY_STATIC_CAST(int8_t, e5),
|
|
992
|
+
HEDLEY_STATIC_CAST(int8_t, e4),
|
|
993
|
+
HEDLEY_STATIC_CAST(int8_t, e3),
|
|
994
|
+
HEDLEY_STATIC_CAST(int8_t, e2),
|
|
995
|
+
HEDLEY_STATIC_CAST(int8_t, e1),
|
|
996
|
+
HEDLEY_STATIC_CAST(int8_t, e0));
|
|
997
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
998
|
+
const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 };
|
|
999
|
+
r_.neon_u8 = vld1_u8(v);
|
|
1000
|
+
#else
|
|
1001
|
+
r_.u8[0] = e0;
|
|
1002
|
+
r_.u8[1] = e1;
|
|
1003
|
+
r_.u8[2] = e2;
|
|
1004
|
+
r_.u8[3] = e3;
|
|
1005
|
+
r_.u8[4] = e4;
|
|
1006
|
+
r_.u8[5] = e5;
|
|
1007
|
+
r_.u8[6] = e6;
|
|
1008
|
+
r_.u8[7] = e7;
|
|
1009
|
+
#endif
|
|
1010
|
+
|
|
1011
|
+
return simde__m64_from_private(r_);
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1015
|
+
simde__m64
|
|
1016
|
+
simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) {
|
|
1017
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1018
|
+
return _mm_set_pi16(e3, e2, e1, e0);
|
|
1019
|
+
#else
|
|
1020
|
+
simde__m64_private r_;
|
|
1021
|
+
|
|
1022
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1023
|
+
const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 };
|
|
1024
|
+
r_.neon_i16 = vld1_s16(v);
|
|
1025
|
+
#else
|
|
1026
|
+
r_.i16[0] = e0;
|
|
1027
|
+
r_.i16[1] = e1;
|
|
1028
|
+
r_.i16[2] = e2;
|
|
1029
|
+
r_.i16[3] = e3;
|
|
1030
|
+
#endif
|
|
1031
|
+
return simde__m64_from_private(r_);
|
|
1032
|
+
#endif
|
|
1033
|
+
}
|
|
1034
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1035
|
+
# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0)
|
|
1036
|
+
#endif
|
|
1037
|
+
|
|
1038
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1039
|
+
simde__m64
|
|
1040
|
+
simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) {
|
|
1041
|
+
simde__m64_private r_;
|
|
1042
|
+
|
|
1043
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1044
|
+
r_.n = _mm_set_pi16(
|
|
1045
|
+
HEDLEY_STATIC_CAST(int16_t, e3),
|
|
1046
|
+
HEDLEY_STATIC_CAST(int16_t, e2),
|
|
1047
|
+
HEDLEY_STATIC_CAST(int16_t, e1),
|
|
1048
|
+
HEDLEY_STATIC_CAST(int16_t, e0)
|
|
1049
|
+
);
|
|
1050
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1051
|
+
const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 };
|
|
1052
|
+
r_.neon_u16 = vld1_u16(v);
|
|
1053
|
+
#else
|
|
1054
|
+
r_.u16[0] = e0;
|
|
1055
|
+
r_.u16[1] = e1;
|
|
1056
|
+
r_.u16[2] = e2;
|
|
1057
|
+
r_.u16[3] = e3;
|
|
1058
|
+
#endif
|
|
1059
|
+
|
|
1060
|
+
return simde__m64_from_private(r_);
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1064
|
+
simde__m64
|
|
1065
|
+
simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) {
|
|
1066
|
+
simde__m64_private r_;
|
|
1067
|
+
|
|
1068
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1069
|
+
r_.n = _mm_set_pi32(
|
|
1070
|
+
HEDLEY_STATIC_CAST(int32_t, e1),
|
|
1071
|
+
HEDLEY_STATIC_CAST(int32_t, e0));
|
|
1072
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1073
|
+
const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 };
|
|
1074
|
+
r_.neon_u32 = vld1_u32(v);
|
|
1075
|
+
#else
|
|
1076
|
+
r_.u32[0] = e0;
|
|
1077
|
+
r_.u32[1] = e1;
|
|
1078
|
+
#endif
|
|
1079
|
+
|
|
1080
|
+
return simde__m64_from_private(r_);
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1084
|
+
simde__m64
|
|
1085
|
+
simde_mm_set_pi32 (int32_t e1, int32_t e0) {
|
|
1086
|
+
simde__m64_private r_;
|
|
1087
|
+
|
|
1088
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1089
|
+
r_.n = _mm_set_pi32(e1, e0);
|
|
1090
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1091
|
+
const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 };
|
|
1092
|
+
r_.neon_i32 = vld1_s32(v);
|
|
1093
|
+
#else
|
|
1094
|
+
r_.i32[0] = e0;
|
|
1095
|
+
r_.i32[1] = e1;
|
|
1096
|
+
#endif
|
|
1097
|
+
|
|
1098
|
+
return simde__m64_from_private(r_);
|
|
1099
|
+
}
|
|
1100
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1101
|
+
# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0)
|
|
1102
|
+
#endif
|
|
1103
|
+
|
|
1104
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1105
|
+
simde__m64
|
|
1106
|
+
simde_x_mm_set_pi64 (int64_t e0) {
|
|
1107
|
+
simde__m64_private r_;
|
|
1108
|
+
|
|
1109
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1110
|
+
const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 };
|
|
1111
|
+
r_.neon_i64 = vld1_s64(v);
|
|
1112
|
+
#else
|
|
1113
|
+
r_.i64[0] = e0;
|
|
1114
|
+
#endif
|
|
1115
|
+
|
|
1116
|
+
return simde__m64_from_private(r_);
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1121
|
+
simde__m64
|
|
1122
|
+
simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) {
|
|
1123
|
+
simde__m64_private r_;
|
|
1124
|
+
|
|
1125
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1126
|
+
const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 };
|
|
1127
|
+
r_.neon_f32 = vld1_f32(v);
|
|
1128
|
+
#else
|
|
1129
|
+
r_.f32[0] = e0;
|
|
1130
|
+
r_.f32[1] = e1;
|
|
1131
|
+
#endif
|
|
1132
|
+
|
|
1133
|
+
return simde__m64_from_private(r_);
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1137
|
+
simde__m64
|
|
1138
|
+
simde_mm_set1_pi8 (int8_t a) {
|
|
1139
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1140
|
+
return _mm_set1_pi8(a);
|
|
1141
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1142
|
+
simde__m64_private r_;
|
|
1143
|
+
r_.neon_i8 = vmov_n_s8(a);
|
|
1144
|
+
return simde__m64_from_private(r_);
|
|
1145
|
+
#else
|
|
1146
|
+
return simde_mm_set_pi8(a, a, a, a, a, a, a, a);
|
|
1147
|
+
#endif
|
|
1148
|
+
}
|
|
1149
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1150
|
+
# define _mm_set1_pi8(a) simde_mm_set1_pi8(a)
|
|
1151
|
+
#endif
|
|
1152
|
+
|
|
1153
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1154
|
+
simde__m64
|
|
1155
|
+
simde_mm_set1_pi16 (int16_t a) {
|
|
1156
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1157
|
+
return _mm_set1_pi16(a);
|
|
1158
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1159
|
+
simde__m64_private r_;
|
|
1160
|
+
r_.neon_i16 = vmov_n_s16(a);
|
|
1161
|
+
return simde__m64_from_private(r_);
|
|
1162
|
+
#else
|
|
1163
|
+
return simde_mm_set_pi16(a, a, a, a);
|
|
1164
|
+
#endif
|
|
1165
|
+
}
|
|
1166
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1167
|
+
# define _mm_set1_pi16(a) simde_mm_set1_pi16(a)
|
|
1168
|
+
#endif
|
|
1169
|
+
|
|
1170
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1171
|
+
simde__m64
|
|
1172
|
+
simde_mm_set1_pi32 (int32_t a) {
|
|
1173
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1174
|
+
return _mm_set1_pi32(a);
|
|
1175
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1176
|
+
simde__m64_private r_;
|
|
1177
|
+
r_.neon_i32 = vmov_n_s32(a);
|
|
1178
|
+
return simde__m64_from_private(r_);
|
|
1179
|
+
#else
|
|
1180
|
+
return simde_mm_set_pi32(a, a);
|
|
1181
|
+
#endif
|
|
1182
|
+
}
|
|
1183
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1184
|
+
# define _mm_set1_pi32(a) simde_mm_set1_pi32(a)
|
|
1185
|
+
#endif
|
|
1186
|
+
|
|
1187
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1188
|
+
simde__m64
|
|
1189
|
+
simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {
|
|
1190
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1191
|
+
return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
|
|
1192
|
+
#else
|
|
1193
|
+
return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7);
|
|
1194
|
+
#endif
|
|
1195
|
+
}
|
|
1196
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1197
|
+
# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1198
|
+
#endif
|
|
1199
|
+
|
|
1200
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1201
|
+
simde__m64
|
|
1202
|
+
simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) {
|
|
1203
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1204
|
+
return _mm_setr_pi16(e3, e2, e1, e0);
|
|
1205
|
+
#else
|
|
1206
|
+
return simde_mm_set_pi16(e0, e1, e2, e3);
|
|
1207
|
+
#endif
|
|
1208
|
+
}
|
|
1209
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1210
|
+
# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0)
|
|
1211
|
+
#endif
|
|
1212
|
+
|
|
1213
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1214
|
+
simde__m64
|
|
1215
|
+
simde_mm_setr_pi32 (int32_t e1, int32_t e0) {
|
|
1216
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1217
|
+
return _mm_setr_pi32(e1, e0);
|
|
1218
|
+
#else
|
|
1219
|
+
return simde_mm_set_pi32(e0, e1);
|
|
1220
|
+
#endif
|
|
1221
|
+
}
|
|
1222
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1223
|
+
# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0)
|
|
1224
|
+
#endif
|
|
1225
|
+
|
|
1226
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1227
|
+
simde__m64
|
|
1228
|
+
simde_mm_setzero_si64 (void) {
|
|
1229
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1230
|
+
return _mm_setzero_si64();
|
|
1231
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1232
|
+
simde__m64_private r_;
|
|
1233
|
+
r_.neon_u32 = vmov_n_u32(0);
|
|
1234
|
+
return simde__m64_from_private(r_);
|
|
1235
|
+
#else
|
|
1236
|
+
return simde_mm_set_pi32(0, 0);
|
|
1237
|
+
#endif
|
|
1238
|
+
}
|
|
1239
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1240
|
+
# define _mm_setzero_si64() simde_mm_setzero_si64()
|
|
1241
|
+
#endif
|
|
1242
|
+
|
|
1243
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1244
|
+
simde__m64
|
|
1245
|
+
simde_mm_setone_si64 (void) {
|
|
1246
|
+
#if defined(SIMDE_SSE_NATIVE)
|
|
1247
|
+
__m64 t = _mm_undefined_ps();
|
|
1248
|
+
return _mm_andnot_ps(t, t);
|
|
1249
|
+
#else
|
|
1250
|
+
simde__m64 r;
|
|
1251
|
+
simde_memset(&r, ~0, sizeof(r));
|
|
1252
|
+
return r;
|
|
1253
|
+
#endif
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1257
|
+
simde__m64
|
|
1258
|
+
simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) {
|
|
1259
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1260
|
+
return _mm_sll_pi16(a, count);
|
|
1261
|
+
#else
|
|
1262
|
+
simde__m64_private r_;
|
|
1263
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1264
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1265
|
+
|
|
1266
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1267
|
+
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) vget_lane_u64(count_.neon_u64, 0)));
|
|
1268
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1269
|
+
r_.i16 = a_.i16 << count_.u64[0];
|
|
1270
|
+
#else
|
|
1271
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
|
|
1272
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1273
|
+
return simde__m64_from_private(r_);
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
SIMDE__VECTORIZE
|
|
1277
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1278
|
+
r_.u16[i] = (uint16_t) (a_.u16[i] << count_.u64[0]);
|
|
1279
|
+
}
|
|
1280
|
+
#endif
|
|
1281
|
+
|
|
1282
|
+
return simde__m64_from_private(r_);
|
|
1283
|
+
#endif
|
|
1284
|
+
}
|
|
1285
|
+
#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count)
|
|
1286
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1287
|
+
# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count)
|
|
1288
|
+
# define _m_psllw(a, count) simde_mm_sll_pi16(a, count)
|
|
1289
|
+
#endif
|
|
1290
|
+
|
|
1291
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1292
|
+
simde__m64
|
|
1293
|
+
simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) {
|
|
1294
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1295
|
+
return _mm_sll_pi32(a, count);
|
|
1296
|
+
#else
|
|
1297
|
+
simde__m64_private r_;
|
|
1298
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1299
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1300
|
+
|
|
1301
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1302
|
+
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) vget_lane_u64(count_.neon_u64, 0)));
|
|
1303
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1304
|
+
r_.i32 = a_.i32 << count_.u64[0];
|
|
1305
|
+
#else
|
|
1306
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
|
|
1307
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1308
|
+
return simde__m64_from_private(r_);
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
SIMDE__VECTORIZE
|
|
1312
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
1313
|
+
r_.u32[i] = a_.u32[i] << count_.u64[0];
|
|
1314
|
+
}
|
|
1315
|
+
#endif
|
|
1316
|
+
|
|
1317
|
+
return simde__m64_from_private(r_);
|
|
1318
|
+
#endif
|
|
1319
|
+
}
|
|
1320
|
+
#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count)
|
|
1321
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1322
|
+
# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count)
|
|
1323
|
+
# define _m_pslld(a, count) simde_mm_sll_pi32(a, count)
|
|
1324
|
+
#endif
|
|
1325
|
+
|
|
1326
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1327
|
+
simde__m64
|
|
1328
|
+
simde_mm_slli_pi16 (simde__m64 a, int count) {
|
|
1329
|
+
#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
1330
|
+
return _mm_slli_pi16(a, count);
|
|
1331
|
+
#else
|
|
1332
|
+
simde__m64_private r_;
|
|
1333
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1334
|
+
|
|
1335
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1336
|
+
r_.i16 = a_.i16 << count;
|
|
1337
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1338
|
+
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count));
|
|
1339
|
+
#else
|
|
1340
|
+
SIMDE__VECTORIZE
|
|
1341
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1342
|
+
r_.u16[i] = (uint16_t) (a_.u16[i] << count);
|
|
1343
|
+
}
|
|
1344
|
+
#endif
|
|
1345
|
+
|
|
1346
|
+
return simde__m64_from_private(r_);
|
|
1347
|
+
#endif
|
|
1348
|
+
}
|
|
1349
|
+
#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count)
|
|
1350
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1351
|
+
# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count)
|
|
1352
|
+
# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count)
|
|
1353
|
+
#endif
|
|
1354
|
+
|
|
1355
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1356
|
+
simde__m64
|
|
1357
|
+
simde_mm_slli_pi32 (simde__m64 a, int count) {
|
|
1358
|
+
#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
1359
|
+
return _mm_slli_pi32(a, count);
|
|
1360
|
+
#else
|
|
1361
|
+
simde__m64_private r_;
|
|
1362
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1363
|
+
|
|
1364
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1365
|
+
r_.i32 = a_.i32 << count;
|
|
1366
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1367
|
+
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count));
|
|
1368
|
+
#else
|
|
1369
|
+
SIMDE__VECTORIZE
|
|
1370
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
1371
|
+
r_.u32[i] = a_.u32[i] << count;
|
|
1372
|
+
}
|
|
1373
|
+
#endif
|
|
1374
|
+
|
|
1375
|
+
return simde__m64_from_private(r_);
|
|
1376
|
+
#endif
|
|
1377
|
+
}
|
|
1378
|
+
#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b)
|
|
1379
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1380
|
+
# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count)
|
|
1381
|
+
# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count)
|
|
1382
|
+
#endif
|
|
1383
|
+
|
|
1384
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1385
|
+
simde__m64
|
|
1386
|
+
simde_mm_slli_si64 (simde__m64 a, int count) {
|
|
1387
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1388
|
+
return _mm_slli_si64(a, count);
|
|
1389
|
+
#else
|
|
1390
|
+
simde__m64_private r_;
|
|
1391
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1392
|
+
|
|
1393
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1394
|
+
r_.i64 = a_.i64 << count;
|
|
1395
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1396
|
+
r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count));
|
|
1397
|
+
#else
|
|
1398
|
+
r_.u64[0] = a_.u64[0] << count;
|
|
1399
|
+
#endif
|
|
1400
|
+
|
|
1401
|
+
return simde__m64_from_private(r_);
|
|
1402
|
+
#endif
|
|
1403
|
+
}
|
|
1404
|
+
#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count)
|
|
1405
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1406
|
+
# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count)
|
|
1407
|
+
# define _m_psllqi(a, count) simde_mm_slli_si64(a, count)
|
|
1408
|
+
#endif
|
|
1409
|
+
|
|
1410
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1411
|
+
simde__m64
|
|
1412
|
+
simde_mm_sll_si64 (simde__m64 a, simde__m64 count) {
|
|
1413
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1414
|
+
return _mm_sll_si64(a, count);
|
|
1415
|
+
#else
|
|
1416
|
+
simde__m64_private r_;
|
|
1417
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1418
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1419
|
+
|
|
1420
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1421
|
+
r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64);
|
|
1422
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1423
|
+
r_.i64 = a_.i64 << count_.i64;
|
|
1424
|
+
#else
|
|
1425
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
|
|
1426
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1427
|
+
return simde__m64_from_private(r_);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
r_.u64[0] = a_.u64[0] << count_.u64[0];
|
|
1431
|
+
#endif
|
|
1432
|
+
|
|
1433
|
+
return simde__m64_from_private(r_);
|
|
1434
|
+
#endif
|
|
1435
|
+
}
|
|
1436
|
+
#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count)
|
|
1437
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1438
|
+
# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count)
|
|
1439
|
+
# define _m_psllq(a, count) simde_mm_sll_si64(a, count)
|
|
1440
|
+
#endif
|
|
1441
|
+
|
|
1442
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1443
|
+
simde__m64
|
|
1444
|
+
simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) {
|
|
1445
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1446
|
+
return _mm_srl_pi16(a, count);
|
|
1447
|
+
#else
|
|
1448
|
+
simde__m64_private r_;
|
|
1449
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1450
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1451
|
+
|
|
1452
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1453
|
+
r_.u16 = a_.u16 >> count_.u64[0];
|
|
1454
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1455
|
+
r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0))));
|
|
1456
|
+
#else
|
|
1457
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
|
|
1458
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1459
|
+
return simde__m64_from_private(r_);
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
SIMDE__VECTORIZE
|
|
1463
|
+
for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) {
|
|
1464
|
+
r_.u16[i] = a_.u16[i] >> count_.u64[0];
|
|
1465
|
+
}
|
|
1466
|
+
#endif
|
|
1467
|
+
|
|
1468
|
+
return simde__m64_from_private(r_);
|
|
1469
|
+
#endif
|
|
1470
|
+
}
|
|
1471
|
+
#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count)
|
|
1472
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1473
|
+
# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count)
|
|
1474
|
+
# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count)
|
|
1475
|
+
#endif
|
|
1476
|
+
|
|
1477
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1478
|
+
simde__m64
|
|
1479
|
+
simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) {
|
|
1480
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1481
|
+
return _mm_srl_pi32(a, count);
|
|
1482
|
+
#else
|
|
1483
|
+
simde__m64_private r_;
|
|
1484
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1485
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1486
|
+
|
|
1487
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1488
|
+
r_.u32 = a_.u32 >> count_.u64[0];
|
|
1489
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1490
|
+
r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0))));
|
|
1491
|
+
#else
|
|
1492
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
|
|
1493
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1494
|
+
return simde__m64_from_private(r_);
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1497
|
+
SIMDE__VECTORIZE
|
|
1498
|
+
for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) {
|
|
1499
|
+
r_.u32[i] = a_.u32[i] >> count_.u64[0];
|
|
1500
|
+
}
|
|
1501
|
+
#endif
|
|
1502
|
+
|
|
1503
|
+
return simde__m64_from_private(r_);
|
|
1504
|
+
#endif
|
|
1505
|
+
}
|
|
1506
|
+
#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count)
|
|
1507
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1508
|
+
# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count)
|
|
1509
|
+
# define _m_psrld(a, count) simde_mm_srl_pi32(a, count)
|
|
1510
|
+
#endif
|
|
1511
|
+
|
|
1512
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1513
|
+
simde__m64
|
|
1514
|
+
simde_mm_srli_pi16 (simde__m64 a, int count) {
|
|
1515
|
+
#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
1516
|
+
return _mm_srli_pi16(a, count);
|
|
1517
|
+
#else
|
|
1518
|
+
simde__m64_private r_;
|
|
1519
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1520
|
+
|
|
1521
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1522
|
+
r_.u16 = a_.u16 >> count;
|
|
1523
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1524
|
+
r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count)));
|
|
1525
|
+
#else
|
|
1526
|
+
SIMDE__VECTORIZE
|
|
1527
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1528
|
+
r_.u16[i] = a_.u16[i] >> count;
|
|
1529
|
+
}
|
|
1530
|
+
#endif
|
|
1531
|
+
|
|
1532
|
+
return simde__m64_from_private(r_);
|
|
1533
|
+
#endif
|
|
1534
|
+
}
|
|
1535
|
+
#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
|
|
1536
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1537
|
+
# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count)
|
|
1538
|
+
# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
|
|
1539
|
+
#endif
|
|
1540
|
+
|
|
1541
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1542
|
+
simde__m64
|
|
1543
|
+
simde_mm_srli_pi32 (simde__m64 a, int count) {
|
|
1544
|
+
#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
1545
|
+
return _mm_srli_pi32(a, count);
|
|
1546
|
+
#else
|
|
1547
|
+
simde__m64_private r_;
|
|
1548
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1549
|
+
|
|
1550
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1551
|
+
r_.u32 = a_.u32 >> count;
|
|
1552
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1553
|
+
r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count)));
|
|
1554
|
+
#else
|
|
1555
|
+
SIMDE__VECTORIZE
|
|
1556
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
1557
|
+
r_.u32[i] = a_.u32[i] >> count;
|
|
1558
|
+
}
|
|
1559
|
+
#endif
|
|
1560
|
+
|
|
1561
|
+
return simde__m64_from_private(r_);
|
|
1562
|
+
#endif
|
|
1563
|
+
}
|
|
1564
|
+
#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count)
|
|
1565
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1566
|
+
# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count)
|
|
1567
|
+
# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count)
|
|
1568
|
+
#endif
|
|
1569
|
+
|
|
1570
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1571
|
+
simde__m64
|
|
1572
|
+
simde_mm_srli_si64 (simde__m64 a, int count) {
|
|
1573
|
+
#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
1574
|
+
return _mm_srli_si64(a, count);
|
|
1575
|
+
#else
|
|
1576
|
+
simde__m64_private r_;
|
|
1577
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1578
|
+
|
|
1579
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1580
|
+
r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count));
|
|
1581
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1582
|
+
r_.u64 = a_.u64 >> count;
|
|
1583
|
+
#else
|
|
1584
|
+
r_.u64[0] = a_.u64[0] >> count;
|
|
1585
|
+
#endif
|
|
1586
|
+
|
|
1587
|
+
return simde__m64_from_private(r_);
|
|
1588
|
+
#endif
|
|
1589
|
+
}
|
|
1590
|
+
#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count)
|
|
1591
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1592
|
+
# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count)
|
|
1593
|
+
# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count)
|
|
1594
|
+
#endif
|
|
1595
|
+
|
|
1596
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1597
|
+
simde__m64
|
|
1598
|
+
simde_mm_srl_si64 (simde__m64 a, simde__m64 count) {
|
|
1599
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1600
|
+
return _mm_srl_si64(a, count);
|
|
1601
|
+
#else
|
|
1602
|
+
simde__m64_private r_;
|
|
1603
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1604
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1605
|
+
|
|
1606
|
+
#if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
1607
|
+
r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64));
|
|
1608
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1609
|
+
r_.u64 = a_.u64 >> count_.u64;
|
|
1610
|
+
#else
|
|
1611
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
|
|
1612
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1613
|
+
return simde__m64_from_private(r_);
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
r_.u64[0] = a_.u64[0] >> count_.u64[0];
|
|
1617
|
+
#endif
|
|
1618
|
+
|
|
1619
|
+
return simde__m64_from_private(r_);
|
|
1620
|
+
#endif
|
|
1621
|
+
}
|
|
1622
|
+
#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count)
|
|
1623
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1624
|
+
# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count)
|
|
1625
|
+
# define _m_psrlq(a, count) simde_mm_srl_si64(a, count)
|
|
1626
|
+
#endif
|
|
1627
|
+
|
|
1628
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1629
|
+
simde__m64
|
|
1630
|
+
simde_mm_srai_pi16 (simde__m64 a, int count) {
|
|
1631
|
+
#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
1632
|
+
return _mm_srai_pi16(a, count);
|
|
1633
|
+
#else
|
|
1634
|
+
simde__m64_private r_;
|
|
1635
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1636
|
+
|
|
1637
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1638
|
+
r_.i16 = a_.i16 >> (count & 0xff);
|
|
1639
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1640
|
+
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count));
|
|
1641
|
+
#else
|
|
1642
|
+
SIMDE__VECTORIZE
|
|
1643
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1644
|
+
r_.i16[i] = a_.i16[i] >> (count & 0xff);
|
|
1645
|
+
}
|
|
1646
|
+
#endif
|
|
1647
|
+
|
|
1648
|
+
return simde__m64_from_private(r_);
|
|
1649
|
+
#endif
|
|
1650
|
+
}
|
|
1651
|
+
#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count)
|
|
1652
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1653
|
+
# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count)
|
|
1654
|
+
# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count)
|
|
1655
|
+
#endif
|
|
1656
|
+
|
|
1657
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1658
|
+
simde__m64
|
|
1659
|
+
simde_mm_srai_pi32 (simde__m64 a, int count) {
|
|
1660
|
+
#if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
|
|
1661
|
+
return _mm_srai_pi32(a, count);
|
|
1662
|
+
#else
|
|
1663
|
+
simde__m64_private r_;
|
|
1664
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1665
|
+
|
|
1666
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1667
|
+
r_.i32 = a_.i32 >> (count & 0xff);
|
|
1668
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1669
|
+
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));
|
|
1670
|
+
#else
|
|
1671
|
+
SIMDE__VECTORIZE
|
|
1672
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1673
|
+
r_.i32[i] = a_.i32[i] >> (count & 0xff);
|
|
1674
|
+
}
|
|
1675
|
+
#endif
|
|
1676
|
+
|
|
1677
|
+
return simde__m64_from_private(r_);
|
|
1678
|
+
#endif
|
|
1679
|
+
}
|
|
1680
|
+
#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)
|
|
1681
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1682
|
+
# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
|
|
1683
|
+
# define _m_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
|
|
1684
|
+
#endif
|
|
1685
|
+
|
|
1686
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1687
|
+
simde__m64
|
|
1688
|
+
simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) {
|
|
1689
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1690
|
+
return _mm_sra_pi16(a, count);
|
|
1691
|
+
#else
|
|
1692
|
+
simde__m64_private r_;
|
|
1693
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1694
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1695
|
+
const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));
|
|
1696
|
+
|
|
1697
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1698
|
+
r_.i16 = a_.i16 >> cnt;
|
|
1699
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1700
|
+
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0))));
|
|
1701
|
+
#else
|
|
1702
|
+
SIMDE__VECTORIZE
|
|
1703
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1704
|
+
r_.i16[i] = a_.i16[i] >> cnt;
|
|
1705
|
+
}
|
|
1706
|
+
#endif
|
|
1707
|
+
|
|
1708
|
+
return simde__m64_from_private(r_);
|
|
1709
|
+
#endif
|
|
1710
|
+
}
|
|
1711
|
+
#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count)
|
|
1712
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1713
|
+
# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count)
|
|
1714
|
+
# define _m_psraw(a, count) simde_mm_sra_pi16(a, count)
|
|
1715
|
+
#endif
|
|
1716
|
+
|
|
1717
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1718
|
+
simde__m64
|
|
1719
|
+
simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) {
|
|
1720
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1721
|
+
return _mm_sra_pi32(a, count);
|
|
1722
|
+
#else
|
|
1723
|
+
simde__m64_private r_;
|
|
1724
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1725
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1726
|
+
const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]);
|
|
1727
|
+
|
|
1728
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1729
|
+
r_.i32 = a_.i32 >> cnt;
|
|
1730
|
+
#elif defined(SIMDE_MMX_NEON)
|
|
1731
|
+
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0))));
|
|
1732
|
+
#else
|
|
1733
|
+
SIMDE__VECTORIZE
|
|
1734
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1735
|
+
r_.i32[i] = a_.i32[i] >> cnt;
|
|
1736
|
+
}
|
|
1737
|
+
#endif
|
|
1738
|
+
|
|
1739
|
+
return simde__m64_from_private(r_);
|
|
1740
|
+
#endif
|
|
1741
|
+
}
|
|
1742
|
+
#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b)
|
|
1743
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1744
|
+
# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count)
|
|
1745
|
+
# define _m_psrad(a, count) simde_mm_sra_pi32(a, count)
|
|
1746
|
+
#endif
|
|
1747
|
+
|
|
1748
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1749
|
+
simde__m64
|
|
1750
|
+
simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) {
|
|
1751
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1752
|
+
return _mm_sub_pi8(a, b);
|
|
1753
|
+
#else
|
|
1754
|
+
simde__m64_private r_;
|
|
1755
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1756
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1757
|
+
|
|
1758
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1759
|
+
r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);
|
|
1760
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1761
|
+
r_.i8 = a_.i8 - b_.i8;
|
|
1762
|
+
#else
|
|
1763
|
+
SIMDE__VECTORIZE
|
|
1764
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
1765
|
+
r_.i8[i] = a_.i8[i] - b_.i8[i];
|
|
1766
|
+
}
|
|
1767
|
+
#endif
|
|
1768
|
+
|
|
1769
|
+
return simde__m64_from_private(r_);
|
|
1770
|
+
#endif
|
|
1771
|
+
}
|
|
1772
|
+
#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b)
|
|
1773
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1774
|
+
# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b)
|
|
1775
|
+
# define _m_psubb(a, b) simde_mm_sub_pi8(a, b)
|
|
1776
|
+
#endif
|
|
1777
|
+
|
|
1778
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1779
|
+
simde__m64
|
|
1780
|
+
simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) {
|
|
1781
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1782
|
+
return _mm_sub_pi16(a, b);
|
|
1783
|
+
#else
|
|
1784
|
+
simde__m64_private r_;
|
|
1785
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1786
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1787
|
+
|
|
1788
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1789
|
+
r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);
|
|
1790
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1791
|
+
r_.i16 = a_.i16 - b_.i16;
|
|
1792
|
+
#else
|
|
1793
|
+
SIMDE__VECTORIZE
|
|
1794
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1795
|
+
r_.i16[i] = a_.i16[i] - b_.i16[i];
|
|
1796
|
+
}
|
|
1797
|
+
#endif
|
|
1798
|
+
|
|
1799
|
+
return simde__m64_from_private(r_);
|
|
1800
|
+
#endif
|
|
1801
|
+
}
|
|
1802
|
+
#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b)
|
|
1803
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1804
|
+
# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b)
|
|
1805
|
+
# define _m_psubw(a, b) simde_mm_sub_pi16(a, b)
|
|
1806
|
+
#endif
|
|
1807
|
+
|
|
1808
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1809
|
+
simde__m64
|
|
1810
|
+
simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) {
|
|
1811
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1812
|
+
return _mm_sub_pi32(a, b);
|
|
1813
|
+
#else
|
|
1814
|
+
simde__m64_private r_;
|
|
1815
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1816
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1817
|
+
|
|
1818
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1819
|
+
r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);
|
|
1820
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1821
|
+
r_.i32 = a_.i32 - b_.i32;
|
|
1822
|
+
#else
|
|
1823
|
+
SIMDE__VECTORIZE
|
|
1824
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1825
|
+
r_.i32[i] = a_.i32[i] - b_.i32[i];
|
|
1826
|
+
}
|
|
1827
|
+
#endif
|
|
1828
|
+
|
|
1829
|
+
return simde__m64_from_private(r_);
|
|
1830
|
+
#endif
|
|
1831
|
+
}
|
|
1832
|
+
#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b)
|
|
1833
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1834
|
+
# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b)
|
|
1835
|
+
# define _m_psubd(a, b) simde_mm_sub_pi32(a, b)
|
|
1836
|
+
#endif
|
|
1837
|
+
|
|
1838
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1839
|
+
simde__m64
|
|
1840
|
+
simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) {
|
|
1841
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1842
|
+
return _mm_subs_pi8(a, b);
|
|
1843
|
+
#else
|
|
1844
|
+
simde__m64_private r_;
|
|
1845
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1846
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1847
|
+
|
|
1848
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1849
|
+
r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);
|
|
1850
|
+
#else
|
|
1851
|
+
SIMDE__VECTORIZE
|
|
1852
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
1853
|
+
if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {
|
|
1854
|
+
r_.i8[i] = INT8_MIN;
|
|
1855
|
+
} else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) {
|
|
1856
|
+
r_.i8[i] = INT8_MAX;
|
|
1857
|
+
} else {
|
|
1858
|
+
r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);
|
|
1859
|
+
}
|
|
1860
|
+
}
|
|
1861
|
+
#endif
|
|
1862
|
+
|
|
1863
|
+
return simde__m64_from_private(r_);
|
|
1864
|
+
#endif
|
|
1865
|
+
}
|
|
1866
|
+
#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b)
|
|
1867
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1868
|
+
# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b)
|
|
1869
|
+
# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b)
|
|
1870
|
+
#endif
|
|
1871
|
+
|
|
1872
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1873
|
+
simde__m64
|
|
1874
|
+
simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) {
|
|
1875
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1876
|
+
return _mm_subs_pu8(a, b);
|
|
1877
|
+
#else
|
|
1878
|
+
simde__m64_private r_;
|
|
1879
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1880
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1881
|
+
|
|
1882
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1883
|
+
r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);
|
|
1884
|
+
#else
|
|
1885
|
+
SIMDE__VECTORIZE
|
|
1886
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
1887
|
+
const int32_t x = a_.u8[i] - b_.u8[i];
|
|
1888
|
+
if (x < 0) {
|
|
1889
|
+
r_.u8[i] = 0;
|
|
1890
|
+
} else if (x > UINT8_MAX) {
|
|
1891
|
+
r_.u8[i] = UINT8_MAX;
|
|
1892
|
+
} else {
|
|
1893
|
+
r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
|
|
1894
|
+
}
|
|
1895
|
+
}
|
|
1896
|
+
#endif
|
|
1897
|
+
|
|
1898
|
+
return simde__m64_from_private(r_);
|
|
1899
|
+
#endif
|
|
1900
|
+
}
|
|
1901
|
+
#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b)
|
|
1902
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1903
|
+
# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b)
|
|
1904
|
+
# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b)
|
|
1905
|
+
#endif
|
|
1906
|
+
|
|
1907
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1908
|
+
simde__m64
|
|
1909
|
+
simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) {
|
|
1910
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1911
|
+
return _mm_subs_pi16(a, b);
|
|
1912
|
+
#else
|
|
1913
|
+
simde__m64_private r_;
|
|
1914
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1915
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1916
|
+
|
|
1917
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1918
|
+
r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);
|
|
1919
|
+
#else
|
|
1920
|
+
SIMDE__VECTORIZE
|
|
1921
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1922
|
+
if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) {
|
|
1923
|
+
r_.i16[i] = SHRT_MIN;
|
|
1924
|
+
} else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) {
|
|
1925
|
+
r_.i16[i] = INT16_MAX;
|
|
1926
|
+
} else {
|
|
1927
|
+
r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
#endif
|
|
1931
|
+
|
|
1932
|
+
return simde__m64_from_private(r_);
|
|
1933
|
+
#endif
|
|
1934
|
+
}
|
|
1935
|
+
#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b)
|
|
1936
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1937
|
+
# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b)
|
|
1938
|
+
# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b)
|
|
1939
|
+
#endif
|
|
1940
|
+
|
|
1941
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1942
|
+
simde__m64
|
|
1943
|
+
simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) {
|
|
1944
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1945
|
+
return _mm_subs_pu16(a, b);
|
|
1946
|
+
#else
|
|
1947
|
+
simde__m64_private r_;
|
|
1948
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1949
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1950
|
+
|
|
1951
|
+
#if defined(SIMDE_MMX_NEON)
|
|
1952
|
+
r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);
|
|
1953
|
+
#else
|
|
1954
|
+
SIMDE__VECTORIZE
|
|
1955
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1956
|
+
const int x = a_.u16[i] - b_.u16[i];
|
|
1957
|
+
if (x < 0) {
|
|
1958
|
+
r_.u16[i] = 0;
|
|
1959
|
+
} else if (x > UINT16_MAX) {
|
|
1960
|
+
r_.u16[i] = UINT16_MAX;
|
|
1961
|
+
} else {
|
|
1962
|
+
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
#endif
|
|
1966
|
+
|
|
1967
|
+
return simde__m64_from_private(r_);
|
|
1968
|
+
#endif
|
|
1969
|
+
}
|
|
1970
|
+
#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b)
|
|
1971
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
1972
|
+
# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b)
|
|
1973
|
+
# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b)
|
|
1974
|
+
#endif
|
|
1975
|
+
|
|
1976
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1977
|
+
simde__m64
|
|
1978
|
+
simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) {
|
|
1979
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
1980
|
+
return _mm_unpackhi_pi8(a, b);
|
|
1981
|
+
#else
|
|
1982
|
+
simde__m64_private r_;
|
|
1983
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1984
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1985
|
+
|
|
1986
|
+
#if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
1987
|
+
r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8);
|
|
1988
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
1989
|
+
r_.i8 = SIMDE__SHUFFLE_VECTOR(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15);
|
|
1990
|
+
#else
|
|
1991
|
+
r_.i8[0] = a_.i8[4];
|
|
1992
|
+
r_.i8[1] = b_.i8[4];
|
|
1993
|
+
r_.i8[2] = a_.i8[5];
|
|
1994
|
+
r_.i8[3] = b_.i8[5];
|
|
1995
|
+
r_.i8[4] = a_.i8[6];
|
|
1996
|
+
r_.i8[5] = b_.i8[6];
|
|
1997
|
+
r_.i8[6] = a_.i8[7];
|
|
1998
|
+
r_.i8[7] = b_.i8[7];
|
|
1999
|
+
#endif
|
|
2000
|
+
|
|
2001
|
+
return simde__m64_from_private(r_);
|
|
2002
|
+
#endif
|
|
2003
|
+
}
|
|
2004
|
+
#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
2005
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
2006
|
+
# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
2007
|
+
# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
2008
|
+
#endif
|
|
2009
|
+
|
|
2010
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2011
|
+
simde__m64
|
|
2012
|
+
simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) {
|
|
2013
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
2014
|
+
return _mm_unpackhi_pi16(a, b);
|
|
2015
|
+
#else
|
|
2016
|
+
simde__m64_private r_;
|
|
2017
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2018
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2019
|
+
|
|
2020
|
+
#if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
2021
|
+
r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);
|
|
2022
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
2023
|
+
r_.i16 = SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);
|
|
2024
|
+
#else
|
|
2025
|
+
r_.i16[0] = a_.i16[2];
|
|
2026
|
+
r_.i16[1] = b_.i16[2];
|
|
2027
|
+
r_.i16[2] = a_.i16[3];
|
|
2028
|
+
r_.i16[3] = b_.i16[3];
|
|
2029
|
+
#endif
|
|
2030
|
+
|
|
2031
|
+
return simde__m64_from_private(r_);
|
|
2032
|
+
#endif
|
|
2033
|
+
}
|
|
2034
|
+
#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
2035
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
2036
|
+
# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
2037
|
+
# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
2038
|
+
#endif
|
|
2039
|
+
|
|
2040
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2041
|
+
simde__m64
|
|
2042
|
+
simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) {
|
|
2043
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
2044
|
+
return _mm_unpackhi_pi32(a, b);
|
|
2045
|
+
#else
|
|
2046
|
+
simde__m64_private r_;
|
|
2047
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2048
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2049
|
+
|
|
2050
|
+
#if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
2051
|
+
r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);
|
|
2052
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
2053
|
+
r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 1, 3);
|
|
2054
|
+
#else
|
|
2055
|
+
r_.i32[0] = a_.i32[1];
|
|
2056
|
+
r_.i32[1] = b_.i32[1];
|
|
2057
|
+
#endif
|
|
2058
|
+
|
|
2059
|
+
return simde__m64_from_private(r_);
|
|
2060
|
+
#endif
|
|
2061
|
+
}
|
|
2062
|
+
#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
2063
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
2064
|
+
# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
2065
|
+
# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
2066
|
+
#endif
|
|
2067
|
+
|
|
2068
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2069
|
+
simde__m64
|
|
2070
|
+
simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) {
|
|
2071
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
2072
|
+
return _mm_unpacklo_pi8(a, b);
|
|
2073
|
+
#else
|
|
2074
|
+
simde__m64_private r_;
|
|
2075
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2076
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2077
|
+
|
|
2078
|
+
#if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
2079
|
+
r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);
|
|
2080
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
2081
|
+
r_.i8 = SIMDE__SHUFFLE_VECTOR(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11);
|
|
2082
|
+
#else
|
|
2083
|
+
r_.i8[0] = a_.i8[0];
|
|
2084
|
+
r_.i8[1] = b_.i8[0];
|
|
2085
|
+
r_.i8[2] = a_.i8[1];
|
|
2086
|
+
r_.i8[3] = b_.i8[1];
|
|
2087
|
+
r_.i8[4] = a_.i8[2];
|
|
2088
|
+
r_.i8[5] = b_.i8[2];
|
|
2089
|
+
r_.i8[6] = a_.i8[3];
|
|
2090
|
+
r_.i8[7] = b_.i8[3];
|
|
2091
|
+
#endif
|
|
2092
|
+
|
|
2093
|
+
return simde__m64_from_private(r_);
|
|
2094
|
+
#endif
|
|
2095
|
+
}
|
|
2096
|
+
#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
2097
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
2098
|
+
# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
2099
|
+
# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
2100
|
+
#endif
|
|
2101
|
+
|
|
2102
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2103
|
+
simde__m64
|
|
2104
|
+
simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) {
|
|
2105
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
2106
|
+
return _mm_unpacklo_pi16(a, b);
|
|
2107
|
+
#else
|
|
2108
|
+
simde__m64_private r_;
|
|
2109
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2110
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2111
|
+
|
|
2112
|
+
#if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
2113
|
+
r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);
|
|
2114
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
2115
|
+
r_.i16 = SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);
|
|
2116
|
+
#else
|
|
2117
|
+
r_.i16[0] = a_.i16[0];
|
|
2118
|
+
r_.i16[1] = b_.i16[0];
|
|
2119
|
+
r_.i16[2] = a_.i16[1];
|
|
2120
|
+
r_.i16[3] = b_.i16[1];
|
|
2121
|
+
#endif
|
|
2122
|
+
|
|
2123
|
+
return simde__m64_from_private(r_);
|
|
2124
|
+
#endif
|
|
2125
|
+
}
|
|
2126
|
+
#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
2127
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
2128
|
+
# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
2129
|
+
# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
2130
|
+
#endif
|
|
2131
|
+
|
|
2132
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2133
|
+
simde__m64
|
|
2134
|
+
simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) {
|
|
2135
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
2136
|
+
return _mm_unpacklo_pi32(a, b);
|
|
2137
|
+
#else
|
|
2138
|
+
simde__m64_private r_;
|
|
2139
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2140
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2141
|
+
|
|
2142
|
+
#if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
|
|
2143
|
+
r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);
|
|
2144
|
+
#elif defined(SIMDE__SHUFFLE_VECTOR)
|
|
2145
|
+
r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 0, 2);
|
|
2146
|
+
#else
|
|
2147
|
+
r_.i32[0] = a_.i32[0];
|
|
2148
|
+
r_.i32[1] = b_.i32[0];
|
|
2149
|
+
#endif
|
|
2150
|
+
|
|
2151
|
+
return simde__m64_from_private(r_);
|
|
2152
|
+
#endif
|
|
2153
|
+
}
|
|
2154
|
+
#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
2155
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
2156
|
+
# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
2157
|
+
# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
2158
|
+
#endif
|
|
2159
|
+
|
|
2160
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2161
|
+
simde__m64
|
|
2162
|
+
simde_mm_xor_si64 (simde__m64 a, simde__m64 b) {
|
|
2163
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
2164
|
+
return _mm_xor_si64(a, b);
|
|
2165
|
+
#else
|
|
2166
|
+
simde__m64_private r_;
|
|
2167
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2168
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2169
|
+
|
|
2170
|
+
#if defined(SIMDE_MMX_NEON)
|
|
2171
|
+
r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32);
|
|
2172
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2173
|
+
r_.i32f = a_.i32f ^ b_.i32f;
|
|
2174
|
+
#else
|
|
2175
|
+
r_.u64[0] = a_.u64[0] ^ b_.u64[0];
|
|
2176
|
+
#endif
|
|
2177
|
+
|
|
2178
|
+
return simde__m64_from_private(r_);
|
|
2179
|
+
#endif
|
|
2180
|
+
}
|
|
2181
|
+
#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b)
|
|
2182
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
2183
|
+
# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b)
|
|
2184
|
+
# define _m_pxor(a, b) simde_mm_xor_si64(a, b)
|
|
2185
|
+
#endif
|
|
2186
|
+
|
|
2187
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2188
|
+
int32_t
|
|
2189
|
+
simde_m_to_int (simde__m64 a) {
|
|
2190
|
+
#if defined(SIMDE_MMX_NATIVE)
|
|
2191
|
+
return _m_to_int(a);
|
|
2192
|
+
#else
|
|
2193
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2194
|
+
|
|
2195
|
+
#if defined(SIMDE_MMX_NEON)
|
|
2196
|
+
return vget_lane_s32(a_.neon_i32, 0);
|
|
2197
|
+
#else
|
|
2198
|
+
return a_.i32[0];
|
|
2199
|
+
#endif
|
|
2200
|
+
#endif
|
|
2201
|
+
}
|
|
2202
|
+
#if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
|
|
2203
|
+
# define _m_to_int(a) simde_m_to_int(a)
|
|
2204
|
+
#endif
|
|
2205
|
+
|
|
2206
|
+
SIMDE__END_DECLS
|
|
2207
|
+
|
|
2208
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
2209
|
+
|
|
2210
|
+
#endif /* !defined(SIMDE__MMX_H) */
|