minimap2 0.2.25.0 → 0.2.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
- data/ext/minimap2/lib/simde/COPYING +20 -0
- data/ext/minimap2/lib/simde/README.md +333 -0
- data/ext/minimap2/lib/simde/amalgamate.py +58 -0
- data/ext/minimap2/lib/simde/meson.build +33 -0
- data/ext/minimap2/lib/simde/netlify.toml +20 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
- data/ext/minimap2/lib/simde/simde/check.h +267 -0
- data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
- data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
- data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
- data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
- data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
- data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
- data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
- data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
- data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
- data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
- data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
- data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
- data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
- data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
- data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
- data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
- data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
- data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
- data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
- data/ext/minimap2/lib/simde/test/meson.build +64 -0
- data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
- data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
- data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
- data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
- data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
- data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
- data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
- data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
- data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
- data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
- data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
- data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
- data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
- data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
- data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
- data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
- data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
- data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
- data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
- data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
- data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
- data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
- data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
- data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/lib/minimap2/version.rb +1 -1
- metadata +100 -3
|
@@ -0,0 +1,3389 @@
|
|
|
1
|
+
/* Permission is hereby granted, free of charge, to any person
|
|
2
|
+
* obtaining a copy of this software and associated documentation
|
|
3
|
+
* files (the "Software"), to deal in the Software without
|
|
4
|
+
* restriction, including without limitation the rights to use, copy,
|
|
5
|
+
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
6
|
+
* of the Software, and to permit persons to whom the Software is
|
|
7
|
+
* furnished to do so, subject to the following conditions:
|
|
8
|
+
*
|
|
9
|
+
* The above copyright notice and this permission notice shall be
|
|
10
|
+
* included in all copies or substantial portions of the Software.
|
|
11
|
+
*
|
|
12
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
13
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
14
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
15
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
16
|
+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
17
|
+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
18
|
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
* SOFTWARE.
|
|
20
|
+
*
|
|
21
|
+
* Copyright:
|
|
22
|
+
* 2020 Evan Nemerson <evan@nemerson.com>
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
#if !defined(SIMDE__AVX512F_H)
|
|
26
|
+
# if !defined(SIMDE__AVX512F_H)
|
|
27
|
+
# define SIMDE__AVX512F_H
|
|
28
|
+
# endif
|
|
29
|
+
# include "avx2.h"
|
|
30
|
+
|
|
31
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
32
|
+
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
33
|
+
|
|
34
|
+
# if defined(SIMDE_ARCH_X86_AVX512F) && !defined(SIMDE_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
|
35
|
+
# define SIMDE_AVX512F_NATIVE
|
|
36
|
+
# elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_AVX512F_NO_NEON) && !defined(SIMDE_NO_NEON)
|
|
37
|
+
# define SIMDE_AVX512F_NEON
|
|
38
|
+
# elif defined(SIMDE_ARCH_POWER_ALTIVEC)
|
|
39
|
+
# define SIMDE_AVX512F_POWER_ALTIVEC
|
|
40
|
+
# endif
|
|
41
|
+
|
|
42
|
+
/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for
|
|
43
|
+
__m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte
|
|
44
|
+
aligned even if we reduce the alignment requirements of other members.
|
|
45
|
+
|
|
46
|
+
Even if we're on x86 and use the native AVX-512 types for arguments/return values, the
|
|
47
|
+
to/from private functions will break, and I'm not willing to change their APIs to use
|
|
48
|
+
pointers (which would also require more verbose code on the caller side) just to make
|
|
49
|
+
MSVC happy.
|
|
50
|
+
|
|
51
|
+
If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later,
|
|
52
|
+
or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to
|
|
53
|
+
fix this without requiring API changes (except transparently through macros), patches
|
|
54
|
+
are welcome. */
|
|
55
|
+
# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0)
|
|
56
|
+
# if defined(SIMDE_AVX512F_NATIVE)
|
|
57
|
+
# undef SIMDE_AVX512F_NATIVE
|
|
58
|
+
# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.")
|
|
59
|
+
# endif
|
|
60
|
+
# define SIMDE_AVX512_ALIGN SIMDE_ALIGN(32)
|
|
61
|
+
# else
|
|
62
|
+
# define SIMDE_AVX512_ALIGN SIMDE_ALIGN(64)
|
|
63
|
+
# endif
|
|
64
|
+
|
|
65
|
+
# if defined(SIMDE_AVX512F_NATIVE)
|
|
66
|
+
# include <immintrin.h>
|
|
67
|
+
# endif
|
|
68
|
+
|
|
69
|
+
# if defined(SIMDE_AVX512F_POWER_ALTIVEC)
|
|
70
|
+
# include <altivec.h>
|
|
71
|
+
# endif
|
|
72
|
+
|
|
73
|
+
SIMDE__BEGIN_DECLS
|
|
74
|
+
|
|
75
|
+
typedef union {
|
|
76
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
77
|
+
SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
78
|
+
SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
79
|
+
SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
80
|
+
SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
81
|
+
SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
82
|
+
SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
83
|
+
SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
84
|
+
SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
85
|
+
#if defined(SIMDE__HAVE_INT128)
|
|
86
|
+
SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
87
|
+
SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
88
|
+
#endif
|
|
89
|
+
SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
90
|
+
SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
91
|
+
SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
92
|
+
SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
93
|
+
#else
|
|
94
|
+
SIMDE_AVX512_ALIGN int8_t i8[64];
|
|
95
|
+
SIMDE_AVX512_ALIGN int16_t i16[32];
|
|
96
|
+
SIMDE_AVX512_ALIGN int32_t i32[16];
|
|
97
|
+
SIMDE_AVX512_ALIGN int64_t i64[8];
|
|
98
|
+
SIMDE_AVX512_ALIGN uint8_t u8[64];
|
|
99
|
+
SIMDE_AVX512_ALIGN uint16_t u16[32];
|
|
100
|
+
SIMDE_AVX512_ALIGN uint32_t u32[16];
|
|
101
|
+
SIMDE_AVX512_ALIGN uint64_t u64[8];
|
|
102
|
+
SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
|
|
103
|
+
SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
|
|
104
|
+
#if defined(SIMDE__HAVE_INT128)
|
|
105
|
+
SIMDE_AVX512_ALIGN simde_int128 i128[4];
|
|
106
|
+
SIMDE_AVX512_ALIGN simde_uint128 u128[4];
|
|
107
|
+
#endif
|
|
108
|
+
SIMDE_AVX512_ALIGN simde_float32 f32[16];
|
|
109
|
+
SIMDE_AVX512_ALIGN simde_float64 f64[8];
|
|
110
|
+
#endif
|
|
111
|
+
|
|
112
|
+
SIMDE_AVX512_ALIGN simde__m128_private m128_private[4];
|
|
113
|
+
SIMDE_AVX512_ALIGN simde__m128 m128[4];
|
|
114
|
+
SIMDE_AVX512_ALIGN simde__m256_private m256_private[2];
|
|
115
|
+
SIMDE_AVX512_ALIGN simde__m256 m256[2];
|
|
116
|
+
|
|
117
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
118
|
+
SIMDE_AVX512_ALIGN __m512 n;
|
|
119
|
+
#elif defined(SIMDE_ARCH_POWER_ALTIVEC)
|
|
120
|
+
SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
|
|
121
|
+
SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
|
|
122
|
+
SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
|
|
123
|
+
SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
|
|
124
|
+
SIMDE_ALIGN(16) vector signed char altivec_i8[4];
|
|
125
|
+
SIMDE_ALIGN(16) vector signed short altivec_i16[4];
|
|
126
|
+
SIMDE_ALIGN(16) vector signed int altivec_i32[4];
|
|
127
|
+
SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
|
|
128
|
+
SIMDE_ALIGN(16) vector float altivec_f32[4];
|
|
129
|
+
SIMDE_ALIGN(16) vector double altivec_f64[4];
|
|
130
|
+
#endif
|
|
131
|
+
} simde__m512_private;
|
|
132
|
+
|
|
133
|
+
typedef union {
|
|
134
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
135
|
+
SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
136
|
+
SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
137
|
+
SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
138
|
+
SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
139
|
+
SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
140
|
+
SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
141
|
+
SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
142
|
+
SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
143
|
+
#if defined(SIMDE__HAVE_INT128)
|
|
144
|
+
SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
145
|
+
SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
146
|
+
#endif
|
|
147
|
+
SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
148
|
+
SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
149
|
+
SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
150
|
+
SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
151
|
+
#else
|
|
152
|
+
SIMDE_AVX512_ALIGN int8_t i8[64];
|
|
153
|
+
SIMDE_AVX512_ALIGN int16_t i16[32];
|
|
154
|
+
SIMDE_AVX512_ALIGN int32_t i32[16];
|
|
155
|
+
SIMDE_AVX512_ALIGN int64_t i64[8];
|
|
156
|
+
SIMDE_AVX512_ALIGN uint8_t u8[64];
|
|
157
|
+
SIMDE_AVX512_ALIGN uint16_t u16[32];
|
|
158
|
+
SIMDE_AVX512_ALIGN uint32_t u32[16];
|
|
159
|
+
SIMDE_AVX512_ALIGN uint64_t u64[8];
|
|
160
|
+
#if defined(SIMDE__HAVE_INT128)
|
|
161
|
+
SIMDE_AVX512_ALIGN simde_int128 i128[4];
|
|
162
|
+
SIMDE_AVX512_ALIGN simde_uint128 u128[4];
|
|
163
|
+
#endif
|
|
164
|
+
SIMDE_AVX512_ALIGN simde_float32 f32[16];
|
|
165
|
+
SIMDE_AVX512_ALIGN simde_float64 f64[8];
|
|
166
|
+
SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
|
|
167
|
+
SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
|
|
168
|
+
#endif
|
|
169
|
+
|
|
170
|
+
SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4];
|
|
171
|
+
SIMDE_AVX512_ALIGN simde__m128d m128d[4];
|
|
172
|
+
SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2];
|
|
173
|
+
SIMDE_AVX512_ALIGN simde__m256d m256d[2];
|
|
174
|
+
|
|
175
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
176
|
+
SIMDE_AVX512_ALIGN __m512d n;
|
|
177
|
+
#elif defined(SIMDE_ARCH_POWER_ALTIVEC)
|
|
178
|
+
SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
|
|
179
|
+
SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
|
|
180
|
+
SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
|
|
181
|
+
SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
|
|
182
|
+
SIMDE_ALIGN(16) vector signed char altivec_i8[4];
|
|
183
|
+
SIMDE_ALIGN(16) vector signed short altivec_i16[4];
|
|
184
|
+
SIMDE_ALIGN(16) vector signed int altivec_i32[4];
|
|
185
|
+
SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
|
|
186
|
+
SIMDE_ALIGN(16) vector float altivec_f32[4];
|
|
187
|
+
SIMDE_ALIGN(16) vector double altivec_f64[4];
|
|
188
|
+
#endif
|
|
189
|
+
} simde__m512d_private;
|
|
190
|
+
|
|
191
|
+
typedef union {
|
|
192
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
193
|
+
SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
194
|
+
SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
195
|
+
SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
196
|
+
SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
197
|
+
SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
198
|
+
SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
199
|
+
SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
200
|
+
SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
201
|
+
#if defined(SIMDE__HAVE_INT128)
|
|
202
|
+
SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
203
|
+
SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
204
|
+
#endif
|
|
205
|
+
SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
206
|
+
SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
207
|
+
SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
208
|
+
SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
209
|
+
#else
|
|
210
|
+
SIMDE_AVX512_ALIGN int8_t i8[64];
|
|
211
|
+
SIMDE_AVX512_ALIGN int16_t i16[32];
|
|
212
|
+
SIMDE_AVX512_ALIGN int32_t i32[16];
|
|
213
|
+
SIMDE_AVX512_ALIGN int64_t i64[8];
|
|
214
|
+
SIMDE_AVX512_ALIGN uint8_t u8[64];
|
|
215
|
+
SIMDE_AVX512_ALIGN uint16_t u16[32];
|
|
216
|
+
SIMDE_AVX512_ALIGN uint32_t u32[16];
|
|
217
|
+
SIMDE_AVX512_ALIGN uint64_t u64[8];
|
|
218
|
+
SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
|
|
219
|
+
SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
|
|
220
|
+
#if defined(SIMDE__HAVE_INT128)
|
|
221
|
+
SIMDE_AVX512_ALIGN simde_int128 i128[4];
|
|
222
|
+
SIMDE_AVX512_ALIGN simde_uint128 u128[4];
|
|
223
|
+
#endif
|
|
224
|
+
SIMDE_AVX512_ALIGN simde_float32 f32[16];
|
|
225
|
+
SIMDE_AVX512_ALIGN simde_float64 f64[8];
|
|
226
|
+
#endif
|
|
227
|
+
|
|
228
|
+
SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4];
|
|
229
|
+
SIMDE_AVX512_ALIGN simde__m128i m128i[4];
|
|
230
|
+
SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2];
|
|
231
|
+
SIMDE_AVX512_ALIGN simde__m256i m256i[2];
|
|
232
|
+
|
|
233
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
234
|
+
SIMDE_AVX512_ALIGN __m512i n;
|
|
235
|
+
#elif defined(SIMDE_ARCH_POWER_ALTIVEC)
|
|
236
|
+
SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
|
|
237
|
+
SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
|
|
238
|
+
SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
|
|
239
|
+
SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
|
|
240
|
+
SIMDE_ALIGN(16) vector signed char altivec_i8[4];
|
|
241
|
+
SIMDE_ALIGN(16) vector signed short altivec_i16[4];
|
|
242
|
+
SIMDE_ALIGN(16) vector signed int altivec_i32[4];
|
|
243
|
+
SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
|
|
244
|
+
SIMDE_ALIGN(16) vector float altivec_f32[4];
|
|
245
|
+
SIMDE_ALIGN(16) vector double altivec_f64[4];
|
|
246
|
+
#endif
|
|
247
|
+
} simde__m512i_private;
|
|
248
|
+
|
|
249
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
250
|
+
typedef __m512 simde__m512;
|
|
251
|
+
typedef __m512i simde__m512i;
|
|
252
|
+
typedef __m512d simde__m512d;
|
|
253
|
+
typedef __mmask8 simde__mmask8;
|
|
254
|
+
typedef __mmask16 simde__mmask16;
|
|
255
|
+
typedef __mmask32 simde__mmask32;
|
|
256
|
+
typedef __mmask64 simde__mmask64;
|
|
257
|
+
#else
|
|
258
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
259
|
+
typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
260
|
+
typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
261
|
+
typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
|
|
262
|
+
#else
|
|
263
|
+
typedef simde__m512_private simde__m512;
|
|
264
|
+
typedef simde__m512i_private simde__m512i;
|
|
265
|
+
typedef simde__m512d_private simde__m512d;
|
|
266
|
+
#endif
|
|
267
|
+
|
|
268
|
+
typedef uint_fast8_t simde__mmask8;
|
|
269
|
+
typedef uint_fast16_t simde__mmask16;
|
|
270
|
+
typedef uint_fast32_t simde__mmask32;
|
|
271
|
+
typedef uint_fast64_t simde__mmask64;
|
|
272
|
+
#endif
|
|
273
|
+
|
|
274
|
+
#if !defined(SIMDE_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
|
275
|
+
#define SIMDE_AVX512F_ENABLE_NATIVE_ALIASES
|
|
276
|
+
typedef simde__m512 __m512;
|
|
277
|
+
typedef simde__m512i __m512i;
|
|
278
|
+
typedef simde__m512d __m512d;
|
|
279
|
+
#endif
|
|
280
|
+
|
|
281
|
+
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect");
|
|
282
|
+
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect");
|
|
283
|
+
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect");
|
|
284
|
+
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect");
|
|
285
|
+
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect");
|
|
286
|
+
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect");
|
|
287
|
+
#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
|
|
288
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned");
|
|
289
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned");
|
|
290
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned");
|
|
291
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned");
|
|
292
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned");
|
|
293
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned");
|
|
294
|
+
#endif
|
|
295
|
+
|
|
296
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
297
|
+
simde__m512
|
|
298
|
+
simde__m512_from_private(simde__m512_private v) {
|
|
299
|
+
simde__m512 r;
|
|
300
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
301
|
+
return r;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
305
|
+
simde__m512_private
|
|
306
|
+
simde__m512_to_private(simde__m512 v) {
|
|
307
|
+
simde__m512_private r;
|
|
308
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
309
|
+
return r;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
313
|
+
simde__m512i
|
|
314
|
+
simde__m512i_from_private(simde__m512i_private v) {
|
|
315
|
+
simde__m512i r;
|
|
316
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
317
|
+
return r;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
321
|
+
simde__m512i_private
|
|
322
|
+
simde__m512i_to_private(simde__m512i v) {
|
|
323
|
+
simde__m512i_private r;
|
|
324
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
325
|
+
return r;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
329
|
+
simde__m512d
|
|
330
|
+
simde__m512d_from_private(simde__m512d_private v) {
|
|
331
|
+
simde__m512d r;
|
|
332
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
333
|
+
return r;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
337
|
+
simde__m512d_private
|
|
338
|
+
simde__m512d_to_private(simde__m512d v) {
|
|
339
|
+
simde__m512d_private r;
|
|
340
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
341
|
+
return r;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
345
|
+
simde__mmask16
|
|
346
|
+
simde__m512i_private_to_mmask16 (simde__m512i_private a) {
|
|
347
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
348
|
+
HEDLEY_UNREACHABLE_RETURN(0);
|
|
349
|
+
#else
|
|
350
|
+
simde__mmask16 r = 0;
|
|
351
|
+
|
|
352
|
+
/* Note: using addition instead of a bitwise or for the reduction
|
|
353
|
+
seems like it should improve things since hardware support for
|
|
354
|
+
horizontal addition is better than bitwise or. However, GCC
|
|
355
|
+
generates the same code, and clang is actually a bit slower.
|
|
356
|
+
I suspect this can be optimized quite a bit, and this function
|
|
357
|
+
is probably going to be pretty hot. */
|
|
358
|
+
SIMDE__VECTORIZE_REDUCTION(|:r)
|
|
359
|
+
for (size_t i = 0 ; i < (sizeof(a.i32) / sizeof(a.i32[0])) ; i++) {
|
|
360
|
+
r |= !!(a.i32[i]) << i;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return r;
|
|
364
|
+
#endif
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
368
|
+
simde__mmask8
|
|
369
|
+
simde__m512i_private_to_mmask8 (simde__m512i_private a) {
|
|
370
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
371
|
+
HEDLEY_UNREACHABLE_RETURN(0);
|
|
372
|
+
#else
|
|
373
|
+
simde__mmask8 r = 0;
|
|
374
|
+
SIMDE__VECTORIZE_REDUCTION(|:r)
|
|
375
|
+
for (size_t i = 0 ; i < (sizeof(a.i64) / sizeof(a.i64[0])) ; i++) {
|
|
376
|
+
r |= !!(a.i64[i]) << i;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
return r;
|
|
380
|
+
#endif
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
384
|
+
simde__m512i
|
|
385
|
+
simde__m512i_from_mmask16 (simde__mmask16 k) {
|
|
386
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
387
|
+
/* Should never be reached. */
|
|
388
|
+
return _mm512_mask_mov_epi32(_mm512_setzero_epi32(), k, _mm512_set1_epi32(~INT32_C(0)));
|
|
389
|
+
#else
|
|
390
|
+
simde__m512i_private r_;
|
|
391
|
+
|
|
392
|
+
SIMDE__VECTORIZE
|
|
393
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
394
|
+
r_.i32[i] = (k & (1 << i)) ? ~INT32_C(0) : INT32_C(0);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
return simde__m512i_from_private(r_);
|
|
398
|
+
#endif
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
402
|
+
simde__m512
|
|
403
|
+
simde_mm512_castpd_ps (simde__m512d a) {
|
|
404
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
405
|
+
return _mm512_castpd_ps(a);
|
|
406
|
+
#else
|
|
407
|
+
simde__m512 r;
|
|
408
|
+
memcpy(&r, &a, sizeof(r));
|
|
409
|
+
return r;
|
|
410
|
+
#endif
|
|
411
|
+
}
|
|
412
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
413
|
+
#define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a)
|
|
414
|
+
#endif
|
|
415
|
+
|
|
416
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
417
|
+
simde__m512i
|
|
418
|
+
simde_mm512_castpd_si512 (simde__m512d a) {
|
|
419
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
420
|
+
return _mm512_castpd_si512(a);
|
|
421
|
+
#else
|
|
422
|
+
simde__m512i r;
|
|
423
|
+
memcpy(&r, &a, sizeof(r));
|
|
424
|
+
return r;
|
|
425
|
+
#endif
|
|
426
|
+
}
|
|
427
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
428
|
+
#define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a)
|
|
429
|
+
#endif
|
|
430
|
+
|
|
431
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
432
|
+
simde__m512d
|
|
433
|
+
simde_mm512_castps_pd (simde__m512 a) {
|
|
434
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
435
|
+
return _mm512_castps_pd(a);
|
|
436
|
+
#else
|
|
437
|
+
simde__m512d r;
|
|
438
|
+
memcpy(&r, &a, sizeof(r));
|
|
439
|
+
return r;
|
|
440
|
+
#endif
|
|
441
|
+
}
|
|
442
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
443
|
+
#define _mm512_castps_pd(a) simde_mm512_castps_pd(a)
|
|
444
|
+
#endif
|
|
445
|
+
|
|
446
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
447
|
+
simde__m512i
|
|
448
|
+
simde_mm512_castps_si512 (simde__m512 a) {
|
|
449
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
450
|
+
return _mm512_castps_si512(a);
|
|
451
|
+
#else
|
|
452
|
+
simde__m512i r;
|
|
453
|
+
memcpy(&r, &a, sizeof(r));
|
|
454
|
+
return r;
|
|
455
|
+
#endif
|
|
456
|
+
}
|
|
457
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
458
|
+
#define _mm512_castps_si512(a) simde_mm512_castps_si512(a)
|
|
459
|
+
#endif
|
|
460
|
+
|
|
461
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
462
|
+
simde__m512
|
|
463
|
+
simde_mm512_castsi512_ps (simde__m512i a) {
|
|
464
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
465
|
+
return _mm512_castsi512_ps(a);
|
|
466
|
+
#else
|
|
467
|
+
simde__m512 r;
|
|
468
|
+
memcpy(&r, &a, sizeof(r));
|
|
469
|
+
return r;
|
|
470
|
+
#endif
|
|
471
|
+
}
|
|
472
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
473
|
+
#define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a)
|
|
474
|
+
#endif
|
|
475
|
+
|
|
476
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
477
|
+
simde__m512d
|
|
478
|
+
simde_mm512_castsi512_pd (simde__m512i a) {
|
|
479
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
480
|
+
return _mm512_castsi512_pd(a);
|
|
481
|
+
#else
|
|
482
|
+
simde__m512d r;
|
|
483
|
+
memcpy(&r, &a, sizeof(r));
|
|
484
|
+
return r;
|
|
485
|
+
#endif
|
|
486
|
+
}
|
|
487
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
488
|
+
#define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a)
|
|
489
|
+
#endif
|
|
490
|
+
|
|
491
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
492
|
+
simde__m512d
|
|
493
|
+
simde_mm512_castpd128_pd512 (simde__m128d a) {
|
|
494
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
495
|
+
return _mm512_castpd128_pd512(a);
|
|
496
|
+
#else
|
|
497
|
+
simde__m512d_private r_;
|
|
498
|
+
r_.m128d[0] = a;
|
|
499
|
+
return simde__m512d_from_private(r_);
|
|
500
|
+
#endif
|
|
501
|
+
}
|
|
502
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
503
|
+
#define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a)
|
|
504
|
+
#endif
|
|
505
|
+
|
|
506
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
507
|
+
simde__m512d
|
|
508
|
+
simde_mm512_castpd256_pd512 (simde__m256d a) {
|
|
509
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
510
|
+
return _mm512_castpd256_pd512(a);
|
|
511
|
+
#else
|
|
512
|
+
simde__m512d_private r_;
|
|
513
|
+
r_.m256d[0] = a;
|
|
514
|
+
return simde__m512d_from_private(r_);
|
|
515
|
+
#endif
|
|
516
|
+
}
|
|
517
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
518
|
+
#define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a)
|
|
519
|
+
#endif
|
|
520
|
+
|
|
521
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
522
|
+
simde__m128d
|
|
523
|
+
simde_mm512_castpd512_pd128 (simde__m512d a) {
|
|
524
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
525
|
+
return _mm512_castpd512_pd128(a);
|
|
526
|
+
#else
|
|
527
|
+
simde__m512d_private a_ = simde__m512d_to_private(a);
|
|
528
|
+
return a_.m128d[0];
|
|
529
|
+
#endif
|
|
530
|
+
}
|
|
531
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
532
|
+
#define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a)
|
|
533
|
+
#endif
|
|
534
|
+
|
|
535
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
536
|
+
simde__m256d
|
|
537
|
+
simde_mm512_castpd512_pd256 (simde__m512d a) {
|
|
538
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
539
|
+
return _mm512_castpd512_pd256(a);
|
|
540
|
+
#else
|
|
541
|
+
simde__m512d_private a_ = simde__m512d_to_private(a);
|
|
542
|
+
return a_.m256d[0];
|
|
543
|
+
#endif
|
|
544
|
+
}
|
|
545
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
546
|
+
#define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a)
|
|
547
|
+
#endif
|
|
548
|
+
|
|
549
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
550
|
+
simde__m512
|
|
551
|
+
simde_mm512_castps128_ps512 (simde__m128 a) {
|
|
552
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
553
|
+
return _mm512_castps128_ps512(a);
|
|
554
|
+
#else
|
|
555
|
+
simde__m512_private r_;
|
|
556
|
+
r_.m128[0] = a;
|
|
557
|
+
return simde__m512_from_private(r_);
|
|
558
|
+
#endif
|
|
559
|
+
}
|
|
560
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
561
|
+
#define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a)
|
|
562
|
+
#endif
|
|
563
|
+
|
|
564
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
565
|
+
simde__m512
|
|
566
|
+
simde_mm512_castps256_ps512 (simde__m256 a) {
|
|
567
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
568
|
+
return _mm512_castps256_ps512(a);
|
|
569
|
+
#else
|
|
570
|
+
simde__m512_private r_;
|
|
571
|
+
r_.m256[0] = a;
|
|
572
|
+
return simde__m512_from_private(r_);
|
|
573
|
+
#endif
|
|
574
|
+
}
|
|
575
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
576
|
+
#define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a)
|
|
577
|
+
#endif
|
|
578
|
+
|
|
579
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
580
|
+
simde__m128
|
|
581
|
+
simde_mm512_castps512_ps128 (simde__m512 a) {
|
|
582
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
583
|
+
return _mm512_castps512_ps128(a);
|
|
584
|
+
#else
|
|
585
|
+
simde__m512_private a_ = simde__m512_to_private(a);
|
|
586
|
+
return a_.m128[0];
|
|
587
|
+
#endif
|
|
588
|
+
}
|
|
589
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
590
|
+
#define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a)
|
|
591
|
+
#endif
|
|
592
|
+
|
|
593
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
594
|
+
simde__m256
|
|
595
|
+
simde_mm512_castps512_ps256 (simde__m512 a) {
|
|
596
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
597
|
+
return _mm512_castps512_ps256(a);
|
|
598
|
+
#else
|
|
599
|
+
simde__m512_private a_ = simde__m512_to_private(a);
|
|
600
|
+
return a_.m256[0];
|
|
601
|
+
#endif
|
|
602
|
+
}
|
|
603
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
604
|
+
#define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a)
|
|
605
|
+
#endif
|
|
606
|
+
|
|
607
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
608
|
+
simde__m512i
|
|
609
|
+
simde_mm512_castsi128_si512 (simde__m128i a) {
|
|
610
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
611
|
+
return _mm512_castsi128_si512(a);
|
|
612
|
+
#else
|
|
613
|
+
simde__m512i_private r_;
|
|
614
|
+
r_.m128i[0] = a;
|
|
615
|
+
return simde__m512i_from_private(r_);
|
|
616
|
+
#endif
|
|
617
|
+
}
|
|
618
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
619
|
+
#define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a)
|
|
620
|
+
#endif
|
|
621
|
+
|
|
622
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
623
|
+
simde__m512i
|
|
624
|
+
simde_mm512_castsi256_si512 (simde__m256i a) {
|
|
625
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
626
|
+
return _mm512_castsi256_si512(a);
|
|
627
|
+
#else
|
|
628
|
+
simde__m512i_private r_;
|
|
629
|
+
r_.m256i[0] = a;
|
|
630
|
+
return simde__m512i_from_private(r_);
|
|
631
|
+
#endif
|
|
632
|
+
}
|
|
633
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
634
|
+
#define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a)
|
|
635
|
+
#endif
|
|
636
|
+
|
|
637
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
638
|
+
simde__m128i
|
|
639
|
+
simde_mm512_castsi512_si128 (simde__m512i a) {
|
|
640
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
641
|
+
return _mm512_castsi512_si128(a);
|
|
642
|
+
#else
|
|
643
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
644
|
+
return a_.m128i[0];
|
|
645
|
+
#endif
|
|
646
|
+
}
|
|
647
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
648
|
+
#define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a)
|
|
649
|
+
#endif
|
|
650
|
+
|
|
651
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
652
|
+
simde__m256i
|
|
653
|
+
simde_mm512_castsi512_si256 (simde__m512i a) {
|
|
654
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
655
|
+
return _mm512_castsi512_si256(a);
|
|
656
|
+
#else
|
|
657
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
658
|
+
return a_.m256i[0];
|
|
659
|
+
#endif
|
|
660
|
+
}
|
|
661
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
662
|
+
#define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a)
|
|
663
|
+
#endif
|
|
664
|
+
|
|
665
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
666
|
+
simde__m512i
|
|
667
|
+
simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56,
|
|
668
|
+
int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48,
|
|
669
|
+
int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40,
|
|
670
|
+
int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32,
|
|
671
|
+
int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24,
|
|
672
|
+
int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16,
|
|
673
|
+
int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8,
|
|
674
|
+
int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {
|
|
675
|
+
simde__m512i_private r_;
|
|
676
|
+
|
|
677
|
+
r_.i8[ 0] = e0;
|
|
678
|
+
r_.i8[ 1] = e1;
|
|
679
|
+
r_.i8[ 2] = e2;
|
|
680
|
+
r_.i8[ 3] = e3;
|
|
681
|
+
r_.i8[ 4] = e4;
|
|
682
|
+
r_.i8[ 5] = e5;
|
|
683
|
+
r_.i8[ 6] = e6;
|
|
684
|
+
r_.i8[ 7] = e7;
|
|
685
|
+
r_.i8[ 8] = e8;
|
|
686
|
+
r_.i8[ 9] = e9;
|
|
687
|
+
r_.i8[10] = e10;
|
|
688
|
+
r_.i8[11] = e11;
|
|
689
|
+
r_.i8[12] = e12;
|
|
690
|
+
r_.i8[13] = e13;
|
|
691
|
+
r_.i8[14] = e14;
|
|
692
|
+
r_.i8[15] = e15;
|
|
693
|
+
r_.i8[16] = e16;
|
|
694
|
+
r_.i8[17] = e17;
|
|
695
|
+
r_.i8[18] = e18;
|
|
696
|
+
r_.i8[19] = e19;
|
|
697
|
+
r_.i8[20] = e20;
|
|
698
|
+
r_.i8[21] = e21;
|
|
699
|
+
r_.i8[22] = e22;
|
|
700
|
+
r_.i8[23] = e23;
|
|
701
|
+
r_.i8[24] = e24;
|
|
702
|
+
r_.i8[25] = e25;
|
|
703
|
+
r_.i8[26] = e26;
|
|
704
|
+
r_.i8[27] = e27;
|
|
705
|
+
r_.i8[28] = e28;
|
|
706
|
+
r_.i8[29] = e29;
|
|
707
|
+
r_.i8[30] = e30;
|
|
708
|
+
r_.i8[31] = e31;
|
|
709
|
+
r_.i8[32] = e32;
|
|
710
|
+
r_.i8[33] = e33;
|
|
711
|
+
r_.i8[34] = e34;
|
|
712
|
+
r_.i8[35] = e35;
|
|
713
|
+
r_.i8[36] = e36;
|
|
714
|
+
r_.i8[37] = e37;
|
|
715
|
+
r_.i8[38] = e38;
|
|
716
|
+
r_.i8[39] = e39;
|
|
717
|
+
r_.i8[40] = e40;
|
|
718
|
+
r_.i8[41] = e41;
|
|
719
|
+
r_.i8[42] = e42;
|
|
720
|
+
r_.i8[43] = e43;
|
|
721
|
+
r_.i8[44] = e44;
|
|
722
|
+
r_.i8[45] = e45;
|
|
723
|
+
r_.i8[46] = e46;
|
|
724
|
+
r_.i8[47] = e47;
|
|
725
|
+
r_.i8[48] = e48;
|
|
726
|
+
r_.i8[49] = e49;
|
|
727
|
+
r_.i8[50] = e50;
|
|
728
|
+
r_.i8[51] = e51;
|
|
729
|
+
r_.i8[52] = e52;
|
|
730
|
+
r_.i8[53] = e53;
|
|
731
|
+
r_.i8[54] = e54;
|
|
732
|
+
r_.i8[55] = e55;
|
|
733
|
+
r_.i8[56] = e56;
|
|
734
|
+
r_.i8[57] = e57;
|
|
735
|
+
r_.i8[58] = e58;
|
|
736
|
+
r_.i8[59] = e59;
|
|
737
|
+
r_.i8[60] = e60;
|
|
738
|
+
r_.i8[61] = e61;
|
|
739
|
+
r_.i8[62] = e62;
|
|
740
|
+
r_.i8[63] = e63;
|
|
741
|
+
|
|
742
|
+
return simde__m512i_from_private(r_);
|
|
743
|
+
}
|
|
744
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
745
|
+
#define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
|
|
746
|
+
#endif
|
|
747
|
+
|
|
748
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
749
|
+
simde__m512i
|
|
750
|
+
simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24,
|
|
751
|
+
int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16,
|
|
752
|
+
int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8,
|
|
753
|
+
int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) {
|
|
754
|
+
simde__m512i_private r_;
|
|
755
|
+
|
|
756
|
+
r_.i16[ 0] = e0;
|
|
757
|
+
r_.i16[ 1] = e1;
|
|
758
|
+
r_.i16[ 2] = e2;
|
|
759
|
+
r_.i16[ 3] = e3;
|
|
760
|
+
r_.i16[ 4] = e4;
|
|
761
|
+
r_.i16[ 5] = e5;
|
|
762
|
+
r_.i16[ 6] = e6;
|
|
763
|
+
r_.i16[ 7] = e7;
|
|
764
|
+
r_.i16[ 8] = e8;
|
|
765
|
+
r_.i16[ 9] = e9;
|
|
766
|
+
r_.i16[10] = e10;
|
|
767
|
+
r_.i16[11] = e11;
|
|
768
|
+
r_.i16[12] = e12;
|
|
769
|
+
r_.i16[13] = e13;
|
|
770
|
+
r_.i16[14] = e14;
|
|
771
|
+
r_.i16[15] = e15;
|
|
772
|
+
r_.i16[16] = e16;
|
|
773
|
+
r_.i16[17] = e17;
|
|
774
|
+
r_.i16[18] = e18;
|
|
775
|
+
r_.i16[19] = e19;
|
|
776
|
+
r_.i16[20] = e20;
|
|
777
|
+
r_.i16[21] = e21;
|
|
778
|
+
r_.i16[22] = e22;
|
|
779
|
+
r_.i16[23] = e23;
|
|
780
|
+
r_.i16[24] = e24;
|
|
781
|
+
r_.i16[25] = e25;
|
|
782
|
+
r_.i16[26] = e26;
|
|
783
|
+
r_.i16[27] = e27;
|
|
784
|
+
r_.i16[28] = e28;
|
|
785
|
+
r_.i16[29] = e29;
|
|
786
|
+
r_.i16[30] = e30;
|
|
787
|
+
r_.i16[31] = e31;
|
|
788
|
+
|
|
789
|
+
return simde__m512i_from_private(r_);
|
|
790
|
+
}
|
|
791
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
792
|
+
#define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
|
|
793
|
+
#endif
|
|
794
|
+
|
|
795
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
796
|
+
simde__m512i
|
|
797
|
+
simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8,
|
|
798
|
+
int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) {
|
|
799
|
+
simde__m512i_private r_;
|
|
800
|
+
|
|
801
|
+
r_.i32[ 0] = e0;
|
|
802
|
+
r_.i32[ 1] = e1;
|
|
803
|
+
r_.i32[ 2] = e2;
|
|
804
|
+
r_.i32[ 3] = e3;
|
|
805
|
+
r_.i32[ 4] = e4;
|
|
806
|
+
r_.i32[ 5] = e5;
|
|
807
|
+
r_.i32[ 6] = e6;
|
|
808
|
+
r_.i32[ 7] = e7;
|
|
809
|
+
r_.i32[ 8] = e8;
|
|
810
|
+
r_.i32[ 9] = e9;
|
|
811
|
+
r_.i32[10] = e10;
|
|
812
|
+
r_.i32[11] = e11;
|
|
813
|
+
r_.i32[12] = e12;
|
|
814
|
+
r_.i32[13] = e13;
|
|
815
|
+
r_.i32[14] = e14;
|
|
816
|
+
r_.i32[15] = e15;
|
|
817
|
+
|
|
818
|
+
return simde__m512i_from_private(r_);
|
|
819
|
+
}
|
|
820
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
821
|
+
#define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
|
|
822
|
+
#endif
|
|
823
|
+
|
|
824
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
825
|
+
simde__m512i
|
|
826
|
+
simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
|
|
827
|
+
simde__m512i_private r_;
|
|
828
|
+
|
|
829
|
+
r_.i64[0] = e0;
|
|
830
|
+
r_.i64[1] = e1;
|
|
831
|
+
r_.i64[2] = e2;
|
|
832
|
+
r_.i64[3] = e3;
|
|
833
|
+
r_.i64[4] = e4;
|
|
834
|
+
r_.i64[5] = e5;
|
|
835
|
+
r_.i64[6] = e6;
|
|
836
|
+
r_.i64[7] = e7;
|
|
837
|
+
|
|
838
|
+
return simde__m512i_from_private(r_);
|
|
839
|
+
}
|
|
840
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
841
|
+
#define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
842
|
+
#endif
|
|
843
|
+
|
|
844
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
845
|
+
simde__m512i
|
|
846
|
+
simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56,
|
|
847
|
+
uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48,
|
|
848
|
+
uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40,
|
|
849
|
+
uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32,
|
|
850
|
+
uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24,
|
|
851
|
+
uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16,
|
|
852
|
+
uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8,
|
|
853
|
+
uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) {
|
|
854
|
+
simde__m512i_private r_;
|
|
855
|
+
|
|
856
|
+
r_.u8[ 0] = e0;
|
|
857
|
+
r_.u8[ 1] = e1;
|
|
858
|
+
r_.u8[ 2] = e2;
|
|
859
|
+
r_.u8[ 3] = e3;
|
|
860
|
+
r_.u8[ 4] = e4;
|
|
861
|
+
r_.u8[ 5] = e5;
|
|
862
|
+
r_.u8[ 6] = e6;
|
|
863
|
+
r_.u8[ 7] = e7;
|
|
864
|
+
r_.u8[ 8] = e8;
|
|
865
|
+
r_.u8[ 9] = e9;
|
|
866
|
+
r_.u8[10] = e10;
|
|
867
|
+
r_.u8[11] = e11;
|
|
868
|
+
r_.u8[12] = e12;
|
|
869
|
+
r_.u8[13] = e13;
|
|
870
|
+
r_.u8[14] = e14;
|
|
871
|
+
r_.u8[15] = e15;
|
|
872
|
+
r_.u8[16] = e16;
|
|
873
|
+
r_.u8[17] = e17;
|
|
874
|
+
r_.u8[18] = e18;
|
|
875
|
+
r_.u8[19] = e19;
|
|
876
|
+
r_.u8[20] = e20;
|
|
877
|
+
r_.u8[21] = e21;
|
|
878
|
+
r_.u8[22] = e22;
|
|
879
|
+
r_.u8[23] = e23;
|
|
880
|
+
r_.u8[24] = e24;
|
|
881
|
+
r_.u8[25] = e25;
|
|
882
|
+
r_.u8[26] = e26;
|
|
883
|
+
r_.u8[27] = e27;
|
|
884
|
+
r_.u8[28] = e28;
|
|
885
|
+
r_.u8[29] = e29;
|
|
886
|
+
r_.u8[30] = e30;
|
|
887
|
+
r_.u8[31] = e31;
|
|
888
|
+
r_.u8[32] = e32;
|
|
889
|
+
r_.u8[33] = e33;
|
|
890
|
+
r_.u8[34] = e34;
|
|
891
|
+
r_.u8[35] = e35;
|
|
892
|
+
r_.u8[36] = e36;
|
|
893
|
+
r_.u8[37] = e37;
|
|
894
|
+
r_.u8[38] = e38;
|
|
895
|
+
r_.u8[39] = e39;
|
|
896
|
+
r_.u8[40] = e40;
|
|
897
|
+
r_.u8[41] = e41;
|
|
898
|
+
r_.u8[42] = e42;
|
|
899
|
+
r_.u8[43] = e43;
|
|
900
|
+
r_.u8[44] = e44;
|
|
901
|
+
r_.u8[45] = e45;
|
|
902
|
+
r_.u8[46] = e46;
|
|
903
|
+
r_.u8[47] = e47;
|
|
904
|
+
r_.u8[48] = e48;
|
|
905
|
+
r_.u8[49] = e49;
|
|
906
|
+
r_.u8[50] = e50;
|
|
907
|
+
r_.u8[51] = e51;
|
|
908
|
+
r_.u8[52] = e52;
|
|
909
|
+
r_.u8[53] = e53;
|
|
910
|
+
r_.u8[54] = e54;
|
|
911
|
+
r_.u8[55] = e55;
|
|
912
|
+
r_.u8[56] = e56;
|
|
913
|
+
r_.u8[57] = e57;
|
|
914
|
+
r_.u8[58] = e58;
|
|
915
|
+
r_.u8[59] = e59;
|
|
916
|
+
r_.u8[60] = e60;
|
|
917
|
+
r_.u8[61] = e61;
|
|
918
|
+
r_.u8[62] = e62;
|
|
919
|
+
r_.u8[63] = e63;
|
|
920
|
+
|
|
921
|
+
return simde__m512i_from_private(r_);
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
925
|
+
simde__m512i
|
|
926
|
+
simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24,
|
|
927
|
+
uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16,
|
|
928
|
+
uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8,
|
|
929
|
+
uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) {
|
|
930
|
+
simde__m512i_private r_;
|
|
931
|
+
|
|
932
|
+
r_.u16[ 0] = e0;
|
|
933
|
+
r_.u16[ 1] = e1;
|
|
934
|
+
r_.u16[ 2] = e2;
|
|
935
|
+
r_.u16[ 3] = e3;
|
|
936
|
+
r_.u16[ 4] = e4;
|
|
937
|
+
r_.u16[ 5] = e5;
|
|
938
|
+
r_.u16[ 6] = e6;
|
|
939
|
+
r_.u16[ 7] = e7;
|
|
940
|
+
r_.u16[ 8] = e8;
|
|
941
|
+
r_.u16[ 9] = e9;
|
|
942
|
+
r_.u16[10] = e10;
|
|
943
|
+
r_.u16[11] = e11;
|
|
944
|
+
r_.u16[12] = e12;
|
|
945
|
+
r_.u16[13] = e13;
|
|
946
|
+
r_.u16[14] = e14;
|
|
947
|
+
r_.u16[15] = e15;
|
|
948
|
+
r_.u16[16] = e16;
|
|
949
|
+
r_.u16[17] = e17;
|
|
950
|
+
r_.u16[18] = e18;
|
|
951
|
+
r_.u16[19] = e19;
|
|
952
|
+
r_.u16[20] = e20;
|
|
953
|
+
r_.u16[21] = e21;
|
|
954
|
+
r_.u16[22] = e22;
|
|
955
|
+
r_.u16[23] = e23;
|
|
956
|
+
r_.u16[24] = e24;
|
|
957
|
+
r_.u16[25] = e25;
|
|
958
|
+
r_.u16[26] = e26;
|
|
959
|
+
r_.u16[27] = e27;
|
|
960
|
+
r_.u16[28] = e28;
|
|
961
|
+
r_.u16[29] = e29;
|
|
962
|
+
r_.u16[30] = e30;
|
|
963
|
+
r_.u16[31] = e31;
|
|
964
|
+
|
|
965
|
+
return simde__m512i_from_private(r_);
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
969
|
+
simde__m512i
|
|
970
|
+
simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8,
|
|
971
|
+
uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {
|
|
972
|
+
simde__m512i_private r_;
|
|
973
|
+
|
|
974
|
+
r_.u32[ 0] = e0;
|
|
975
|
+
r_.u32[ 1] = e1;
|
|
976
|
+
r_.u32[ 2] = e2;
|
|
977
|
+
r_.u32[ 3] = e3;
|
|
978
|
+
r_.u32[ 4] = e4;
|
|
979
|
+
r_.u32[ 5] = e5;
|
|
980
|
+
r_.u32[ 6] = e6;
|
|
981
|
+
r_.u32[ 7] = e7;
|
|
982
|
+
r_.u32[ 8] = e8;
|
|
983
|
+
r_.u32[ 9] = e9;
|
|
984
|
+
r_.u32[10] = e10;
|
|
985
|
+
r_.u32[11] = e11;
|
|
986
|
+
r_.u32[12] = e12;
|
|
987
|
+
r_.u32[13] = e13;
|
|
988
|
+
r_.u32[14] = e14;
|
|
989
|
+
r_.u32[15] = e15;
|
|
990
|
+
|
|
991
|
+
return simde__m512i_from_private(r_);
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
995
|
+
simde__m512i
|
|
996
|
+
simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) {
|
|
997
|
+
simde__m512i_private r_;
|
|
998
|
+
|
|
999
|
+
r_.u64[ 0] = e0;
|
|
1000
|
+
r_.u64[ 1] = e1;
|
|
1001
|
+
r_.u64[ 2] = e2;
|
|
1002
|
+
r_.u64[ 3] = e3;
|
|
1003
|
+
r_.u64[ 4] = e4;
|
|
1004
|
+
r_.u64[ 5] = e5;
|
|
1005
|
+
r_.u64[ 6] = e6;
|
|
1006
|
+
r_.u64[ 7] = e7;
|
|
1007
|
+
|
|
1008
|
+
return simde__m512i_from_private(r_);
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1012
|
+
simde__m512
|
|
1013
|
+
simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12,
|
|
1014
|
+
simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8,
|
|
1015
|
+
simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,
|
|
1016
|
+
simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
|
|
1017
|
+
simde__m512_private r_;
|
|
1018
|
+
|
|
1019
|
+
r_.f32[ 0] = e0;
|
|
1020
|
+
r_.f32[ 1] = e1;
|
|
1021
|
+
r_.f32[ 2] = e2;
|
|
1022
|
+
r_.f32[ 3] = e3;
|
|
1023
|
+
r_.f32[ 4] = e4;
|
|
1024
|
+
r_.f32[ 5] = e5;
|
|
1025
|
+
r_.f32[ 6] = e6;
|
|
1026
|
+
r_.f32[ 7] = e7;
|
|
1027
|
+
r_.f32[ 8] = e8;
|
|
1028
|
+
r_.f32[ 9] = e9;
|
|
1029
|
+
r_.f32[10] = e10;
|
|
1030
|
+
r_.f32[11] = e11;
|
|
1031
|
+
r_.f32[12] = e12;
|
|
1032
|
+
r_.f32[13] = e13;
|
|
1033
|
+
r_.f32[14] = e14;
|
|
1034
|
+
r_.f32[15] = e15;
|
|
1035
|
+
|
|
1036
|
+
return simde__m512_from_private(r_);
|
|
1037
|
+
}
|
|
1038
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1039
|
+
#define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1040
|
+
#endif
|
|
1041
|
+
|
|
1042
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1043
|
+
simde__m512d
|
|
1044
|
+
simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {
|
|
1045
|
+
simde__m512d_private r_;
|
|
1046
|
+
|
|
1047
|
+
r_.f64[0] = e0;
|
|
1048
|
+
r_.f64[1] = e1;
|
|
1049
|
+
r_.f64[2] = e2;
|
|
1050
|
+
r_.f64[3] = e3;
|
|
1051
|
+
r_.f64[4] = e4;
|
|
1052
|
+
r_.f64[5] = e5;
|
|
1053
|
+
r_.f64[6] = e6;
|
|
1054
|
+
r_.f64[7] = e7;
|
|
1055
|
+
|
|
1056
|
+
return simde__m512d_from_private(r_);
|
|
1057
|
+
}
|
|
1058
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1059
|
+
#define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1060
|
+
#endif
|
|
1061
|
+
|
|
1062
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1063
|
+
simde__m512i
|
|
1064
|
+
simde_mm512_set1_epi8 (int8_t a) {
|
|
1065
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1066
|
+
return _mm512_set1_epi8(a);
|
|
1067
|
+
#else
|
|
1068
|
+
simde__m512i_private r_;
|
|
1069
|
+
|
|
1070
|
+
SIMDE__VECTORIZE
|
|
1071
|
+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
|
|
1072
|
+
r_.i8[i] = a;
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
return simde__m512i_from_private(r_);
|
|
1076
|
+
#endif
|
|
1077
|
+
}
|
|
1078
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1079
|
+
#define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a)
|
|
1080
|
+
#endif
|
|
1081
|
+
|
|
1082
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1083
|
+
simde__m512i
|
|
1084
|
+
simde_mm512_set1_epi16 (int16_t a) {
|
|
1085
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1086
|
+
return _mm512_set1_epi16(a);
|
|
1087
|
+
#else
|
|
1088
|
+
simde__m512i_private r_;
|
|
1089
|
+
|
|
1090
|
+
SIMDE__VECTORIZE
|
|
1091
|
+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
|
|
1092
|
+
r_.i16[i] = a;
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
return simde__m512i_from_private(r_);
|
|
1096
|
+
#endif
|
|
1097
|
+
}
|
|
1098
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1099
|
+
#define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a)
|
|
1100
|
+
#endif
|
|
1101
|
+
|
|
1102
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1103
|
+
simde__m512i
|
|
1104
|
+
simde_mm512_set1_epi32 (int32_t a) {
|
|
1105
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1106
|
+
return _mm512_set1_epi32(a);
|
|
1107
|
+
#else
|
|
1108
|
+
simde__m512i_private r_;
|
|
1109
|
+
|
|
1110
|
+
SIMDE__VECTORIZE
|
|
1111
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1112
|
+
r_.i32[i] = a;
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
return simde__m512i_from_private(r_);
|
|
1116
|
+
#endif
|
|
1117
|
+
}
|
|
1118
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1119
|
+
#define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a)
|
|
1120
|
+
#endif
|
|
1121
|
+
|
|
1122
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1123
|
+
simde__m512i
|
|
1124
|
+
simde_mm512_set1_epi64 (int64_t a) {
|
|
1125
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1126
|
+
return _mm512_set1_epi64(a);
|
|
1127
|
+
#else
|
|
1128
|
+
simde__m512i_private r_;
|
|
1129
|
+
|
|
1130
|
+
SIMDE__VECTORIZE
|
|
1131
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1132
|
+
r_.i64[i] = a;
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
return simde__m512i_from_private(r_);
|
|
1136
|
+
#endif
|
|
1137
|
+
}
|
|
1138
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1139
|
+
#define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a)
|
|
1140
|
+
#endif
|
|
1141
|
+
|
|
1142
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1143
|
+
simde__m512i
|
|
1144
|
+
simde_x_mm512_set1_epu8 (uint8_t a) {
|
|
1145
|
+
simde__m512i_private r_;
|
|
1146
|
+
|
|
1147
|
+
SIMDE__VECTORIZE
|
|
1148
|
+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
|
|
1149
|
+
r_.u8[i] = a;
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
return simde__m512i_from_private(r_);
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1156
|
+
simde__m512i
|
|
1157
|
+
simde_x_mm512_set1_epu16 (uint16_t a) {
|
|
1158
|
+
simde__m512i_private r_;
|
|
1159
|
+
|
|
1160
|
+
SIMDE__VECTORIZE
|
|
1161
|
+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
|
|
1162
|
+
r_.u16[i] = a;
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
return simde__m512i_from_private(r_);
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1169
|
+
simde__m512i
|
|
1170
|
+
simde_x_mm512_set1_epu32 (uint32_t a) {
|
|
1171
|
+
simde__m512i_private r_;
|
|
1172
|
+
|
|
1173
|
+
SIMDE__VECTORIZE
|
|
1174
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
1175
|
+
r_.u32[i] = a;
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
return simde__m512i_from_private(r_);
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1182
|
+
simde__m512i
|
|
1183
|
+
simde_x_mm512_set1_epu64 (uint64_t a) {
|
|
1184
|
+
simde__m512i_private r_;
|
|
1185
|
+
|
|
1186
|
+
SIMDE__VECTORIZE
|
|
1187
|
+
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
|
|
1188
|
+
r_.u64[i] = a;
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
return simde__m512i_from_private(r_);
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1195
|
+
simde__m512
|
|
1196
|
+
simde_mm512_set1_ps (simde_float32 a) {
|
|
1197
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1198
|
+
return _mm512_set1_ps(a);
|
|
1199
|
+
#else
|
|
1200
|
+
simde__m512_private r_;
|
|
1201
|
+
|
|
1202
|
+
SIMDE__VECTORIZE
|
|
1203
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1204
|
+
r_.f32[i] = a;
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
return simde__m512_from_private(r_);
|
|
1208
|
+
#endif
|
|
1209
|
+
}
|
|
1210
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1211
|
+
#define _mm512_set1_ps(a) simde_mm512_set1_ps(a)
|
|
1212
|
+
#endif
|
|
1213
|
+
|
|
1214
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1215
|
+
simde__m512d
|
|
1216
|
+
simde_mm512_set1_pd (simde_float64 a) {
|
|
1217
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1218
|
+
return _mm512_set1_pd(a);
|
|
1219
|
+
#else
|
|
1220
|
+
simde__m512d_private r_;
|
|
1221
|
+
|
|
1222
|
+
SIMDE__VECTORIZE
|
|
1223
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
1224
|
+
r_.f64[i] = a;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
return simde__m512d_from_private(r_);
|
|
1228
|
+
#endif
|
|
1229
|
+
}
|
|
1230
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1231
|
+
#define _mm512_set1_pd(a) simde_mm512_set1_pd(a)
|
|
1232
|
+
#endif
|
|
1233
|
+
|
|
1234
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1235
|
+
simde__m512i
|
|
1236
|
+
simde_mm512_set4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) {
|
|
1237
|
+
simde__m512i_private r_;
|
|
1238
|
+
|
|
1239
|
+
r_.i32[ 0] = a;
|
|
1240
|
+
r_.i32[ 1] = b;
|
|
1241
|
+
r_.i32[ 2] = c;
|
|
1242
|
+
r_.i32[ 3] = d;
|
|
1243
|
+
r_.i32[ 4] = a;
|
|
1244
|
+
r_.i32[ 5] = b;
|
|
1245
|
+
r_.i32[ 6] = c;
|
|
1246
|
+
r_.i32[ 7] = d;
|
|
1247
|
+
r_.i32[ 8] = a;
|
|
1248
|
+
r_.i32[ 9] = b;
|
|
1249
|
+
r_.i32[10] = c;
|
|
1250
|
+
r_.i32[11] = d;
|
|
1251
|
+
r_.i32[12] = a;
|
|
1252
|
+
r_.i32[13] = b;
|
|
1253
|
+
r_.i32[14] = c;
|
|
1254
|
+
r_.i32[15] = d;
|
|
1255
|
+
|
|
1256
|
+
return simde__m512i_from_private(r_);
|
|
1257
|
+
}
|
|
1258
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1259
|
+
#define _mm512_set4_epi32(d,c,b,a) simde_mm512_set4_epi32(d,c,b,a)
|
|
1260
|
+
#endif
|
|
1261
|
+
|
|
1262
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1263
|
+
simde__m512i
|
|
1264
|
+
simde_mm512_set4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) {
|
|
1265
|
+
simde__m512i_private r_;
|
|
1266
|
+
|
|
1267
|
+
r_.i64[0] = a;
|
|
1268
|
+
r_.i64[1] = b;
|
|
1269
|
+
r_.i64[2] = c;
|
|
1270
|
+
r_.i64[3] = d;
|
|
1271
|
+
r_.i64[4] = a;
|
|
1272
|
+
r_.i64[5] = b;
|
|
1273
|
+
r_.i64[6] = c;
|
|
1274
|
+
r_.i64[7] = d;
|
|
1275
|
+
|
|
1276
|
+
return simde__m512i_from_private(r_);
|
|
1277
|
+
}
|
|
1278
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1279
|
+
#define _mm512_set4_epi64(d,c,b,a) simde_mm512_set4_epi64(d,c,b,a)
|
|
1280
|
+
#endif
|
|
1281
|
+
|
|
1282
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1283
|
+
simde__m512
|
|
1284
|
+
simde_mm512_set4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) {
|
|
1285
|
+
simde__m512_private r_;
|
|
1286
|
+
|
|
1287
|
+
r_.f32[ 0] = a;
|
|
1288
|
+
r_.f32[ 1] = b;
|
|
1289
|
+
r_.f32[ 2] = c;
|
|
1290
|
+
r_.f32[ 3] = d;
|
|
1291
|
+
r_.f32[ 4] = a;
|
|
1292
|
+
r_.f32[ 5] = b;
|
|
1293
|
+
r_.f32[ 6] = c;
|
|
1294
|
+
r_.f32[ 7] = d;
|
|
1295
|
+
r_.f32[ 8] = a;
|
|
1296
|
+
r_.f32[ 9] = b;
|
|
1297
|
+
r_.f32[10] = c;
|
|
1298
|
+
r_.f32[11] = d;
|
|
1299
|
+
r_.f32[12] = a;
|
|
1300
|
+
r_.f32[13] = b;
|
|
1301
|
+
r_.f32[14] = c;
|
|
1302
|
+
r_.f32[15] = d;
|
|
1303
|
+
|
|
1304
|
+
return simde__m512_from_private(r_);
|
|
1305
|
+
}
|
|
1306
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1307
|
+
#define _mm512_set4_ps(d,c,b,a) simde_mm512_set4_ps(d,c,b,a)
|
|
1308
|
+
#endif
|
|
1309
|
+
|
|
1310
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1311
|
+
simde__m512d
|
|
1312
|
+
simde_mm512_set4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) {
|
|
1313
|
+
simde__m512d_private r_;
|
|
1314
|
+
|
|
1315
|
+
r_.f64[0] = a;
|
|
1316
|
+
r_.f64[1] = b;
|
|
1317
|
+
r_.f64[2] = c;
|
|
1318
|
+
r_.f64[3] = d;
|
|
1319
|
+
r_.f64[4] = a;
|
|
1320
|
+
r_.f64[5] = b;
|
|
1321
|
+
r_.f64[6] = c;
|
|
1322
|
+
r_.f64[7] = d;
|
|
1323
|
+
|
|
1324
|
+
return simde__m512d_from_private(r_);
|
|
1325
|
+
}
|
|
1326
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1327
|
+
#define _mm512_set4_pd(d,c,b,a) simde_mm512_set4_pd(d,c,b,a)
|
|
1328
|
+
#endif
|
|
1329
|
+
|
|
1330
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1331
|
+
simde__m512i
|
|
1332
|
+
simde_mm512_setr_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8,
|
|
1333
|
+
int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) {
|
|
1334
|
+
simde__m512i_private r_;
|
|
1335
|
+
|
|
1336
|
+
r_.i32[ 0] = e15;
|
|
1337
|
+
r_.i32[ 1] = e14;
|
|
1338
|
+
r_.i32[ 2] = e13;
|
|
1339
|
+
r_.i32[ 3] = e12;
|
|
1340
|
+
r_.i32[ 4] = e11;
|
|
1341
|
+
r_.i32[ 5] = e10;
|
|
1342
|
+
r_.i32[ 6] = e9;
|
|
1343
|
+
r_.i32[ 7] = e8;
|
|
1344
|
+
r_.i32[ 8] = e7;
|
|
1345
|
+
r_.i32[ 9] = e6;
|
|
1346
|
+
r_.i32[10] = e5;
|
|
1347
|
+
r_.i32[11] = e4;
|
|
1348
|
+
r_.i32[12] = e3;
|
|
1349
|
+
r_.i32[13] = e2;
|
|
1350
|
+
r_.i32[14] = e1;
|
|
1351
|
+
r_.i32[15] = e0;
|
|
1352
|
+
|
|
1353
|
+
return simde__m512i_from_private(r_);
|
|
1354
|
+
}
|
|
1355
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1356
|
+
#define _mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1357
|
+
#endif
|
|
1358
|
+
|
|
1359
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1360
|
+
simde__m512i
|
|
1361
|
+
simde_mm512_setr_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
|
|
1362
|
+
simde__m512i_private r_;
|
|
1363
|
+
|
|
1364
|
+
r_.i64[0] = e7;
|
|
1365
|
+
r_.i64[1] = e6;
|
|
1366
|
+
r_.i64[2] = e5;
|
|
1367
|
+
r_.i64[3] = e4;
|
|
1368
|
+
r_.i64[4] = e3;
|
|
1369
|
+
r_.i64[5] = e2;
|
|
1370
|
+
r_.i64[6] = e1;
|
|
1371
|
+
r_.i64[7] = e0;
|
|
1372
|
+
|
|
1373
|
+
return simde__m512i_from_private(r_);
|
|
1374
|
+
}
|
|
1375
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1376
|
+
#define _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1377
|
+
#endif
|
|
1378
|
+
|
|
1379
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1380
|
+
simde__m512
|
|
1381
|
+
simde_mm512_setr_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12,
|
|
1382
|
+
simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8,
|
|
1383
|
+
simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,
|
|
1384
|
+
simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
|
|
1385
|
+
simde__m512_private r_;
|
|
1386
|
+
|
|
1387
|
+
r_.f32[ 0] = e15;
|
|
1388
|
+
r_.f32[ 1] = e14;
|
|
1389
|
+
r_.f32[ 2] = e13;
|
|
1390
|
+
r_.f32[ 3] = e12;
|
|
1391
|
+
r_.f32[ 4] = e11;
|
|
1392
|
+
r_.f32[ 5] = e10;
|
|
1393
|
+
r_.f32[ 6] = e9;
|
|
1394
|
+
r_.f32[ 7] = e8;
|
|
1395
|
+
r_.f32[ 8] = e7;
|
|
1396
|
+
r_.f32[ 9] = e6;
|
|
1397
|
+
r_.f32[10] = e5;
|
|
1398
|
+
r_.f32[11] = e4;
|
|
1399
|
+
r_.f32[12] = e3;
|
|
1400
|
+
r_.f32[13] = e2;
|
|
1401
|
+
r_.f32[14] = e1;
|
|
1402
|
+
r_.f32[15] = e0;
|
|
1403
|
+
|
|
1404
|
+
return simde__m512_from_private(r_);
|
|
1405
|
+
}
|
|
1406
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1407
|
+
#define _mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1408
|
+
#endif
|
|
1409
|
+
|
|
1410
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1411
|
+
simde__m512d
|
|
1412
|
+
simde_mm512_setr_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {
|
|
1413
|
+
simde__m512d_private r_;
|
|
1414
|
+
|
|
1415
|
+
r_.f64[0] = e7;
|
|
1416
|
+
r_.f64[1] = e6;
|
|
1417
|
+
r_.f64[2] = e5;
|
|
1418
|
+
r_.f64[3] = e4;
|
|
1419
|
+
r_.f64[4] = e3;
|
|
1420
|
+
r_.f64[5] = e2;
|
|
1421
|
+
r_.f64[6] = e1;
|
|
1422
|
+
r_.f64[7] = e0;
|
|
1423
|
+
|
|
1424
|
+
return simde__m512d_from_private(r_);
|
|
1425
|
+
}
|
|
1426
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1427
|
+
#define _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1428
|
+
#endif
|
|
1429
|
+
|
|
1430
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1431
|
+
simde__m512i
|
|
1432
|
+
simde_mm512_setr4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) {
|
|
1433
|
+
simde__m512i_private r_;
|
|
1434
|
+
|
|
1435
|
+
r_.i32[ 0] = d;
|
|
1436
|
+
r_.i32[ 1] = c;
|
|
1437
|
+
r_.i32[ 2] = b;
|
|
1438
|
+
r_.i32[ 3] = a;
|
|
1439
|
+
r_.i32[ 4] = d;
|
|
1440
|
+
r_.i32[ 5] = c;
|
|
1441
|
+
r_.i32[ 6] = b;
|
|
1442
|
+
r_.i32[ 7] = a;
|
|
1443
|
+
r_.i32[ 8] = d;
|
|
1444
|
+
r_.i32[ 9] = c;
|
|
1445
|
+
r_.i32[10] = b;
|
|
1446
|
+
r_.i32[11] = a;
|
|
1447
|
+
r_.i32[12] = d;
|
|
1448
|
+
r_.i32[13] = c;
|
|
1449
|
+
r_.i32[14] = b;
|
|
1450
|
+
r_.i32[15] = a;
|
|
1451
|
+
|
|
1452
|
+
return simde__m512i_from_private(r_);
|
|
1453
|
+
}
|
|
1454
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1455
|
+
#define _mm512_setr4_epi32(d,c,b,a) simde_mm512_setr4_epi32(d,c,b,a)
|
|
1456
|
+
#endif
|
|
1457
|
+
|
|
1458
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1459
|
+
simde__m512i
|
|
1460
|
+
simde_mm512_setr4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) {
|
|
1461
|
+
simde__m512i_private r_;
|
|
1462
|
+
|
|
1463
|
+
r_.i64[0] = d;
|
|
1464
|
+
r_.i64[1] = c;
|
|
1465
|
+
r_.i64[2] = b;
|
|
1466
|
+
r_.i64[3] = a;
|
|
1467
|
+
r_.i64[4] = d;
|
|
1468
|
+
r_.i64[5] = c;
|
|
1469
|
+
r_.i64[6] = b;
|
|
1470
|
+
r_.i64[7] = a;
|
|
1471
|
+
|
|
1472
|
+
return simde__m512i_from_private(r_);
|
|
1473
|
+
}
|
|
1474
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1475
|
+
#define _mm512_setr4_epi64(d,c,b,a) simde_mm512_setr4_epi64(d,c,b,a)
|
|
1476
|
+
#endif
|
|
1477
|
+
|
|
1478
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1479
|
+
simde__m512
|
|
1480
|
+
simde_mm512_setr4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) {
|
|
1481
|
+
simde__m512_private r_;
|
|
1482
|
+
|
|
1483
|
+
r_.f32[ 0] = d;
|
|
1484
|
+
r_.f32[ 1] = c;
|
|
1485
|
+
r_.f32[ 2] = b;
|
|
1486
|
+
r_.f32[ 3] = a;
|
|
1487
|
+
r_.f32[ 4] = d;
|
|
1488
|
+
r_.f32[ 5] = c;
|
|
1489
|
+
r_.f32[ 6] = b;
|
|
1490
|
+
r_.f32[ 7] = a;
|
|
1491
|
+
r_.f32[ 8] = d;
|
|
1492
|
+
r_.f32[ 9] = c;
|
|
1493
|
+
r_.f32[10] = b;
|
|
1494
|
+
r_.f32[11] = a;
|
|
1495
|
+
r_.f32[12] = d;
|
|
1496
|
+
r_.f32[13] = c;
|
|
1497
|
+
r_.f32[14] = b;
|
|
1498
|
+
r_.f32[15] = a;
|
|
1499
|
+
|
|
1500
|
+
return simde__m512_from_private(r_);
|
|
1501
|
+
}
|
|
1502
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1503
|
+
#define _mm512_setr4_ps(d,c,b,a) simde_mm512_setr4_ps(d,c,b,a)
|
|
1504
|
+
#endif
|
|
1505
|
+
|
|
1506
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1507
|
+
simde__m512d
|
|
1508
|
+
simde_mm512_setr4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) {
|
|
1509
|
+
simde__m512d_private r_;
|
|
1510
|
+
|
|
1511
|
+
r_.f64[0] = d;
|
|
1512
|
+
r_.f64[1] = c;
|
|
1513
|
+
r_.f64[2] = b;
|
|
1514
|
+
r_.f64[3] = a;
|
|
1515
|
+
r_.f64[4] = d;
|
|
1516
|
+
r_.f64[5] = c;
|
|
1517
|
+
r_.f64[6] = b;
|
|
1518
|
+
r_.f64[7] = a;
|
|
1519
|
+
|
|
1520
|
+
return simde__m512d_from_private(r_);
|
|
1521
|
+
}
|
|
1522
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1523
|
+
#define _mm512_setr4_pd(d,c,b,a) simde_mm512_setr4_pd(d,c,b,a)
|
|
1524
|
+
#endif
|
|
1525
|
+
|
|
1526
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1527
|
+
simde__m512i
|
|
1528
|
+
simde_mm512_setzero_si512(void) {
|
|
1529
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1530
|
+
return _mm512_setzero_si512();
|
|
1531
|
+
#else
|
|
1532
|
+
simde__m512i r;
|
|
1533
|
+
simde_memset(&r, 0, sizeof(r));
|
|
1534
|
+
return r;
|
|
1535
|
+
#endif
|
|
1536
|
+
}
|
|
1537
|
+
#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512()
|
|
1538
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1539
|
+
#define _mm512_setzero_si512() simde_mm512_setzero_si512()
|
|
1540
|
+
#define _mm512_setzero_epi32() simde_mm512_setzero_si512()
|
|
1541
|
+
#endif
|
|
1542
|
+
|
|
1543
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1544
|
+
simde__m512i
|
|
1545
|
+
simde_mm512_setone_si512(void) {
|
|
1546
|
+
simde__m512i_private r_;
|
|
1547
|
+
|
|
1548
|
+
SIMDE__VECTORIZE
|
|
1549
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
|
|
1550
|
+
r_.i32f[i] = ~((int_fast32_t) 0);
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
return simde__m512i_from_private(r_);
|
|
1554
|
+
}
|
|
1555
|
+
#define simde_mm512_setone_epi32() simde_mm512_setone_si512()
|
|
1556
|
+
|
|
1557
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1558
|
+
simde__m512
|
|
1559
|
+
simde_mm512_setzero_ps(void) {
|
|
1560
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1561
|
+
return _mm512_setzero_ps();
|
|
1562
|
+
#else
|
|
1563
|
+
return simde_mm512_castsi512_ps(simde_mm512_setzero_si512());
|
|
1564
|
+
#endif
|
|
1565
|
+
}
|
|
1566
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1567
|
+
#define _mm512_setzero_si512() simde_mm512_setzero_si512()
|
|
1568
|
+
#endif
|
|
1569
|
+
|
|
1570
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1571
|
+
simde__m512
|
|
1572
|
+
simde_mm512_setone_ps(void) {
|
|
1573
|
+
return simde_mm512_castsi512_ps(simde_mm512_setone_si512());
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1577
|
+
simde__m512d
|
|
1578
|
+
simde_mm512_setzero_pd(void) {
|
|
1579
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1580
|
+
return _mm512_setzero_pd();
|
|
1581
|
+
#else
|
|
1582
|
+
return simde_mm512_castsi512_pd(simde_mm512_setzero_si512());
|
|
1583
|
+
#endif
|
|
1584
|
+
}
|
|
1585
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1586
|
+
#define _mm512_setzero_si512() simde_mm512_setzero_si512()
|
|
1587
|
+
#endif
|
|
1588
|
+
|
|
1589
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1590
|
+
simde__m512d
|
|
1591
|
+
simde_mm512_setone_pd(void) {
|
|
1592
|
+
return simde_mm512_castsi512_pd(simde_mm512_setone_si512());
|
|
1593
|
+
}
|
|
1594
|
+
|
|
1595
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1596
|
+
simde__m512i
|
|
1597
|
+
simde_mm512_mask_mov_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
|
|
1598
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1599
|
+
return _mm512_mask_mov_epi32(src, k, a);
|
|
1600
|
+
#else
|
|
1601
|
+
simde__m512i_private
|
|
1602
|
+
src_ = simde__m512i_to_private(src),
|
|
1603
|
+
a_ = simde__m512i_to_private(a),
|
|
1604
|
+
r_;
|
|
1605
|
+
|
|
1606
|
+
SIMDE__VECTORIZE
|
|
1607
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1608
|
+
r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i];
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
return simde__m512i_from_private(r_);
|
|
1612
|
+
#endif
|
|
1613
|
+
}
|
|
1614
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1615
|
+
#define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a)
|
|
1616
|
+
#endif
|
|
1617
|
+
|
|
1618
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1619
|
+
simde__m512i
|
|
1620
|
+
simde_mm512_mask_mov_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
|
|
1621
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1622
|
+
return _mm512_mask_mov_epi64(src, k, a);
|
|
1623
|
+
#else
|
|
1624
|
+
simde__m512i_private
|
|
1625
|
+
src_ = simde__m512i_to_private(src),
|
|
1626
|
+
a_ = simde__m512i_to_private(a),
|
|
1627
|
+
r_;
|
|
1628
|
+
|
|
1629
|
+
SIMDE__VECTORIZE
|
|
1630
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1631
|
+
r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i];
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1634
|
+
return simde__m512i_from_private(r_);
|
|
1635
|
+
#endif
|
|
1636
|
+
}
|
|
1637
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1638
|
+
#define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a)
|
|
1639
|
+
#endif
|
|
1640
|
+
|
|
1641
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1642
|
+
simde__m512
|
|
1643
|
+
simde_mm512_mask_mov_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
|
|
1644
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1645
|
+
return _mm512_mask_mov_ps(src, k, a);
|
|
1646
|
+
#else
|
|
1647
|
+
simde__m512_private
|
|
1648
|
+
src_ = simde__m512_to_private(src),
|
|
1649
|
+
a_ = simde__m512_to_private(a),
|
|
1650
|
+
r_;
|
|
1651
|
+
|
|
1652
|
+
SIMDE__VECTORIZE
|
|
1653
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1654
|
+
r_.f32[i] = ((k >> i) & 1) ? a_.f32[i] : src_.f32[i];
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
return simde__m512_from_private(r_);
|
|
1658
|
+
#endif
|
|
1659
|
+
}
|
|
1660
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1661
|
+
#define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a)
|
|
1662
|
+
#endif
|
|
1663
|
+
|
|
1664
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1665
|
+
simde__m512d
|
|
1666
|
+
simde_mm512_mask_mov_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
|
|
1667
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1668
|
+
return _mm512_mask_mov_pd(src, k, a);
|
|
1669
|
+
#else
|
|
1670
|
+
simde__m512d_private
|
|
1671
|
+
src_ = simde__m512d_to_private(src),
|
|
1672
|
+
a_ = simde__m512d_to_private(a),
|
|
1673
|
+
r_;
|
|
1674
|
+
|
|
1675
|
+
SIMDE__VECTORIZE
|
|
1676
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
1677
|
+
r_.f64[i] = ((k >> i) & 1) ? a_.f64[i] : src_.f64[i];
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
return simde__m512d_from_private(r_);
|
|
1681
|
+
#endif
|
|
1682
|
+
}
|
|
1683
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1684
|
+
#define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a)
|
|
1685
|
+
#endif
|
|
1686
|
+
|
|
1687
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1688
|
+
simde__m512i
|
|
1689
|
+
simde_mm512_maskz_mov_epi32(simde__mmask16 k, simde__m512i a) {
|
|
1690
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1691
|
+
return _mm512_maskz_mov_epi32(k, a);
|
|
1692
|
+
#else
|
|
1693
|
+
simde__m512i_private
|
|
1694
|
+
a_ = simde__m512i_to_private(a),
|
|
1695
|
+
r_;
|
|
1696
|
+
|
|
1697
|
+
SIMDE__VECTORIZE
|
|
1698
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
1699
|
+
r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0);
|
|
1700
|
+
}
|
|
1701
|
+
|
|
1702
|
+
return simde__m512i_from_private(r_);
|
|
1703
|
+
#endif
|
|
1704
|
+
}
|
|
1705
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1706
|
+
#define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a)
|
|
1707
|
+
#endif
|
|
1708
|
+
|
|
1709
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1710
|
+
simde__m512i
|
|
1711
|
+
simde_mm512_maskz_mov_epi64(simde__mmask8 k, simde__m512i a) {
|
|
1712
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1713
|
+
return _mm512_maskz_mov_epi64(k, a);
|
|
1714
|
+
#else
|
|
1715
|
+
simde__m512i_private
|
|
1716
|
+
a_ = simde__m512i_to_private(a),
|
|
1717
|
+
r_;
|
|
1718
|
+
|
|
1719
|
+
SIMDE__VECTORIZE
|
|
1720
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
1721
|
+
r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0);
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
return simde__m512i_from_private(r_);
|
|
1725
|
+
#endif
|
|
1726
|
+
}
|
|
1727
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1728
|
+
#define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a)
|
|
1729
|
+
#endif
|
|
1730
|
+
|
|
1731
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1732
|
+
simde__m512
|
|
1733
|
+
simde_mm512_maskz_mov_ps(simde__mmask16 k, simde__m512 a) {
|
|
1734
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1735
|
+
return _mm512_maskz_mov_ps(k, a);
|
|
1736
|
+
#else
|
|
1737
|
+
simde__m512_private
|
|
1738
|
+
a_ = simde__m512_to_private(a),
|
|
1739
|
+
r_;
|
|
1740
|
+
|
|
1741
|
+
SIMDE__VECTORIZE
|
|
1742
|
+
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
|
|
1743
|
+
r_.f32[i] = ((k >> i) & 1) ? a_.f32[i] : SIMDE_FLOAT32_C(0.0);
|
|
1744
|
+
}
|
|
1745
|
+
|
|
1746
|
+
return simde__m512_from_private(r_);
|
|
1747
|
+
#endif
|
|
1748
|
+
}
|
|
1749
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1750
|
+
#define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a)
|
|
1751
|
+
#endif
|
|
1752
|
+
|
|
1753
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1754
|
+
simde__m512d
|
|
1755
|
+
simde_mm512_maskz_mov_pd(simde__mmask8 k, simde__m512d a) {
|
|
1756
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1757
|
+
return _mm512_maskz_mov_pd(k, a);
|
|
1758
|
+
#else
|
|
1759
|
+
simde__m512d_private
|
|
1760
|
+
a_ = simde__m512d_to_private(a),
|
|
1761
|
+
r_;
|
|
1762
|
+
|
|
1763
|
+
SIMDE__VECTORIZE
|
|
1764
|
+
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
|
|
1765
|
+
r_.f64[i] = ((k >> i) & 1) ? a_.f64[i] : SIMDE_FLOAT64_C(0.0);
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
return simde__m512d_from_private(r_);
|
|
1769
|
+
#endif
|
|
1770
|
+
}
|
|
1771
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1772
|
+
#define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a)
|
|
1773
|
+
#endif
|
|
1774
|
+
|
|
1775
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1776
|
+
simde__m512i
|
|
1777
|
+
simde_mm512_abs_epi32(simde__m512i a) {
|
|
1778
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1779
|
+
return _mm512_abs_epi32(a);
|
|
1780
|
+
#else
|
|
1781
|
+
simde__m512i_private
|
|
1782
|
+
r_,
|
|
1783
|
+
a_ = simde__m512i_to_private(a);
|
|
1784
|
+
|
|
1785
|
+
SIMDE__VECTORIZE
|
|
1786
|
+
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
1787
|
+
r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i];
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
return simde__m512i_from_private(r_);
|
|
1791
|
+
#endif
|
|
1792
|
+
}
|
|
1793
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1794
|
+
# define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a)
|
|
1795
|
+
#endif
|
|
1796
|
+
|
|
1797
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1798
|
+
simde__m512i
|
|
1799
|
+
simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
|
|
1800
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1801
|
+
return _mm512_mask_abs_epi32(src, k, a);
|
|
1802
|
+
#else
|
|
1803
|
+
return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a));
|
|
1804
|
+
#endif
|
|
1805
|
+
}
|
|
1806
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1807
|
+
#define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a)
|
|
1808
|
+
#endif
|
|
1809
|
+
|
|
1810
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1811
|
+
simde__m512i
|
|
1812
|
+
simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) {
|
|
1813
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1814
|
+
return _mm512_maskz_abs_epi32(k, a);
|
|
1815
|
+
#else
|
|
1816
|
+
return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a));
|
|
1817
|
+
#endif
|
|
1818
|
+
}
|
|
1819
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1820
|
+
#define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a)
|
|
1821
|
+
#endif
|
|
1822
|
+
|
|
1823
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1824
|
+
simde__m512i
|
|
1825
|
+
simde_mm512_abs_epi64(simde__m512i a) {
|
|
1826
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1827
|
+
return _mm512_abs_epi64(a);
|
|
1828
|
+
#else
|
|
1829
|
+
simde__m512i_private
|
|
1830
|
+
r_,
|
|
1831
|
+
a_ = simde__m512i_to_private(a);
|
|
1832
|
+
|
|
1833
|
+
SIMDE__VECTORIZE
|
|
1834
|
+
for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
|
|
1835
|
+
r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i];
|
|
1836
|
+
}
|
|
1837
|
+
|
|
1838
|
+
return simde__m512i_from_private(r_);
|
|
1839
|
+
#endif
|
|
1840
|
+
}
|
|
1841
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1842
|
+
# define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a)
|
|
1843
|
+
#endif
|
|
1844
|
+
|
|
1845
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1846
|
+
simde__m512i
|
|
1847
|
+
simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
|
|
1848
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1849
|
+
return _mm512_mask_abs_epi64(src, k, a);
|
|
1850
|
+
#else
|
|
1851
|
+
return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a));
|
|
1852
|
+
#endif
|
|
1853
|
+
}
|
|
1854
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1855
|
+
#define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a)
|
|
1856
|
+
#endif
|
|
1857
|
+
|
|
1858
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1859
|
+
simde__m512i
|
|
1860
|
+
simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) {
|
|
1861
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1862
|
+
return _mm512_maskz_abs_epi64(k, a);
|
|
1863
|
+
#else
|
|
1864
|
+
return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a));
|
|
1865
|
+
#endif
|
|
1866
|
+
}
|
|
1867
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1868
|
+
#define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a)
|
|
1869
|
+
#endif
|
|
1870
|
+
|
|
1871
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1872
|
+
simde__m512i
|
|
1873
|
+
simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) {
|
|
1874
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1875
|
+
return _mm512_add_epi32(a, b);
|
|
1876
|
+
#else
|
|
1877
|
+
simde__m512i_private
|
|
1878
|
+
r_,
|
|
1879
|
+
a_ = simde__m512i_to_private(a),
|
|
1880
|
+
b_ = simde__m512i_to_private(b);
|
|
1881
|
+
|
|
1882
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1883
|
+
r_.i32 = a_.i32 + b_.i32;
|
|
1884
|
+
#else
|
|
1885
|
+
SIMDE__VECTORIZE
|
|
1886
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
|
|
1887
|
+
r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]);
|
|
1888
|
+
}
|
|
1889
|
+
#endif
|
|
1890
|
+
|
|
1891
|
+
return simde__m512i_from_private(r_);
|
|
1892
|
+
#endif
|
|
1893
|
+
}
|
|
1894
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1895
|
+
#define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b)
|
|
1896
|
+
#endif
|
|
1897
|
+
|
|
1898
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1899
|
+
simde__m512i
|
|
1900
|
+
simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
|
|
1901
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1902
|
+
return _mm512_mask_add_epi32(src, k, a, b);
|
|
1903
|
+
#else
|
|
1904
|
+
return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b));
|
|
1905
|
+
#endif
|
|
1906
|
+
}
|
|
1907
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1908
|
+
#define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b)
|
|
1909
|
+
#endif
|
|
1910
|
+
|
|
1911
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1912
|
+
simde__m512i
|
|
1913
|
+
simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
|
|
1914
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1915
|
+
return _mm512_maskz_add_epi32(k, a, b);
|
|
1916
|
+
#else
|
|
1917
|
+
return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b));
|
|
1918
|
+
#endif
|
|
1919
|
+
}
|
|
1920
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1921
|
+
#define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b)
|
|
1922
|
+
#endif
|
|
1923
|
+
|
|
1924
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1925
|
+
simde__m512i
|
|
1926
|
+
simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) {
|
|
1927
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1928
|
+
return _mm512_add_epi64(a, b);
|
|
1929
|
+
#else
|
|
1930
|
+
simde__m512i_private
|
|
1931
|
+
r_,
|
|
1932
|
+
a_ = simde__m512i_to_private(a),
|
|
1933
|
+
b_ = simde__m512i_to_private(b);
|
|
1934
|
+
|
|
1935
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1936
|
+
r_.i64 = a_.i64 + b_.i64;
|
|
1937
|
+
#else
|
|
1938
|
+
SIMDE__VECTORIZE
|
|
1939
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
|
|
1940
|
+
r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]);
|
|
1941
|
+
}
|
|
1942
|
+
#endif
|
|
1943
|
+
|
|
1944
|
+
return simde__m512i_from_private(r_);
|
|
1945
|
+
#endif
|
|
1946
|
+
}
|
|
1947
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1948
|
+
#define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b)
|
|
1949
|
+
#endif
|
|
1950
|
+
|
|
1951
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1952
|
+
simde__m512i
|
|
1953
|
+
simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
|
|
1954
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1955
|
+
return _mm512_mask_add_epi64(src, k, a, b);
|
|
1956
|
+
#else
|
|
1957
|
+
return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b));
|
|
1958
|
+
#endif
|
|
1959
|
+
}
|
|
1960
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1961
|
+
#define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b)
|
|
1962
|
+
#endif
|
|
1963
|
+
|
|
1964
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1965
|
+
simde__m512i
|
|
1966
|
+
simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
|
|
1967
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1968
|
+
return _mm512_maskz_add_epi64(k, a, b);
|
|
1969
|
+
#else
|
|
1970
|
+
return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b));
|
|
1971
|
+
#endif
|
|
1972
|
+
}
|
|
1973
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
1974
|
+
#define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b)
|
|
1975
|
+
#endif
|
|
1976
|
+
|
|
1977
|
+
|
|
1978
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
1979
|
+
simde__m512
|
|
1980
|
+
simde_mm512_add_ps (simde__m512 a, simde__m512 b) {
|
|
1981
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
1982
|
+
return _mm512_add_ps(a, b);
|
|
1983
|
+
#else
|
|
1984
|
+
simde__m512_private
|
|
1985
|
+
r_,
|
|
1986
|
+
a_ = simde__m512_to_private(a),
|
|
1987
|
+
b_ = simde__m512_to_private(b);
|
|
1988
|
+
|
|
1989
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1990
|
+
r_.f32 = a_.f32 + b_.f32;
|
|
1991
|
+
#else
|
|
1992
|
+
SIMDE__VECTORIZE
|
|
1993
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
|
|
1994
|
+
r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]);
|
|
1995
|
+
}
|
|
1996
|
+
#endif
|
|
1997
|
+
|
|
1998
|
+
return simde__m512_from_private(r_);
|
|
1999
|
+
#endif
|
|
2000
|
+
}
|
|
2001
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2002
|
+
#define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b)
|
|
2003
|
+
#endif
|
|
2004
|
+
|
|
2005
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2006
|
+
simde__m512
|
|
2007
|
+
simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
|
|
2008
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2009
|
+
return _mm512_mask_add_ps(src, k, a, b);
|
|
2010
|
+
#else
|
|
2011
|
+
return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b));
|
|
2012
|
+
#endif
|
|
2013
|
+
}
|
|
2014
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2015
|
+
#define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b)
|
|
2016
|
+
#endif
|
|
2017
|
+
|
|
2018
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2019
|
+
simde__m512
|
|
2020
|
+
simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
|
|
2021
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2022
|
+
return _mm512_maskz_add_ps(k, a, b);
|
|
2023
|
+
#else
|
|
2024
|
+
return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b));
|
|
2025
|
+
#endif
|
|
2026
|
+
}
|
|
2027
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2028
|
+
#define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b)
|
|
2029
|
+
#endif
|
|
2030
|
+
|
|
2031
|
+
|
|
2032
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2033
|
+
simde__m512d
|
|
2034
|
+
simde_mm512_add_pd (simde__m512d a, simde__m512d b) {
|
|
2035
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2036
|
+
return _mm512_add_pd(a, b);
|
|
2037
|
+
#else
|
|
2038
|
+
simde__m512d_private
|
|
2039
|
+
r_,
|
|
2040
|
+
a_ = simde__m512d_to_private(a),
|
|
2041
|
+
b_ = simde__m512d_to_private(b);
|
|
2042
|
+
|
|
2043
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2044
|
+
r_.f64 = a_.f64 + b_.f64;
|
|
2045
|
+
#else
|
|
2046
|
+
SIMDE__VECTORIZE
|
|
2047
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
|
|
2048
|
+
r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]);
|
|
2049
|
+
}
|
|
2050
|
+
#endif
|
|
2051
|
+
|
|
2052
|
+
return simde__m512d_from_private(r_);
|
|
2053
|
+
#endif
|
|
2054
|
+
}
|
|
2055
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2056
|
+
#define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b)
|
|
2057
|
+
#endif
|
|
2058
|
+
|
|
2059
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2060
|
+
simde__m512d
|
|
2061
|
+
simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
|
|
2062
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2063
|
+
return _mm512_mask_add_pd(src, k, a, b);
|
|
2064
|
+
#else
|
|
2065
|
+
return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b));
|
|
2066
|
+
#endif
|
|
2067
|
+
}
|
|
2068
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2069
|
+
#define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b)
|
|
2070
|
+
#endif
|
|
2071
|
+
|
|
2072
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2073
|
+
simde__m512d
|
|
2074
|
+
simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
|
|
2075
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2076
|
+
return _mm512_maskz_add_pd(k, a, b);
|
|
2077
|
+
#else
|
|
2078
|
+
return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b));
|
|
2079
|
+
#endif
|
|
2080
|
+
}
|
|
2081
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2082
|
+
#define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b)
|
|
2083
|
+
#endif
|
|
2084
|
+
|
|
2085
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2086
|
+
simde__m512i
|
|
2087
|
+
simde_mm512_and_si512 (simde__m512i a, simde__m512i b) {
|
|
2088
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2089
|
+
return _mm512_and_si512(a, b);
|
|
2090
|
+
#else
|
|
2091
|
+
simde__m512i_private
|
|
2092
|
+
r_,
|
|
2093
|
+
a_ = simde__m512i_to_private(a),
|
|
2094
|
+
b_ = simde__m512i_to_private(b);
|
|
2095
|
+
|
|
2096
|
+
#if defined(SIMDE_ARCH_X86_AVX2)
|
|
2097
|
+
r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]);
|
|
2098
|
+
r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]);
|
|
2099
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2100
|
+
r_.i32f = a_.i32f & b_.i32f;
|
|
2101
|
+
#else
|
|
2102
|
+
SIMDE__VECTORIZE
|
|
2103
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
2104
|
+
r_.i32[i] = a_.i32[i] & b_.i32[i];
|
|
2105
|
+
}
|
|
2106
|
+
#endif
|
|
2107
|
+
|
|
2108
|
+
return simde__m512i_from_private(r_);
|
|
2109
|
+
#endif
|
|
2110
|
+
}
|
|
2111
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2112
|
+
#define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b)
|
|
2113
|
+
#endif
|
|
2114
|
+
|
|
2115
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2116
|
+
simde__m512i
|
|
2117
|
+
simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) {
|
|
2118
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2119
|
+
return _mm512_andnot_si512(a, b);
|
|
2120
|
+
#else
|
|
2121
|
+
simde__m512i_private
|
|
2122
|
+
r_,
|
|
2123
|
+
a_ = simde__m512i_to_private(a),
|
|
2124
|
+
b_ = simde__m512i_to_private(b);
|
|
2125
|
+
|
|
2126
|
+
#if defined(SIMDE_ARCH_X86_AVX2)
|
|
2127
|
+
r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]);
|
|
2128
|
+
r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]);
|
|
2129
|
+
#else
|
|
2130
|
+
SIMDE__VECTORIZE
|
|
2131
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
|
|
2132
|
+
r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
|
|
2133
|
+
}
|
|
2134
|
+
#endif
|
|
2135
|
+
|
|
2136
|
+
return simde__m512i_from_private(r_);
|
|
2137
|
+
#endif
|
|
2138
|
+
}
|
|
2139
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2140
|
+
#define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b)
|
|
2141
|
+
#endif
|
|
2142
|
+
|
|
2143
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2144
|
+
simde__m512i
|
|
2145
|
+
simde_mm512_broadcast_i32x4 (simde__m128i a) {
|
|
2146
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2147
|
+
return _mm512_broadcast_i32x4(a);
|
|
2148
|
+
#else
|
|
2149
|
+
simde__m512i_private r_;
|
|
2150
|
+
|
|
2151
|
+
#if defined(SIMDE_ARCH_X86_AVX2)
|
|
2152
|
+
r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a);
|
|
2153
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
2154
|
+
r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a;
|
|
2155
|
+
#else
|
|
2156
|
+
SIMDE__VECTORIZE
|
|
2157
|
+
for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
|
|
2158
|
+
r_.m128i[i] = a;
|
|
2159
|
+
}
|
|
2160
|
+
#endif
|
|
2161
|
+
|
|
2162
|
+
return simde__m512i_from_private(r_);
|
|
2163
|
+
#endif
|
|
2164
|
+
}
|
|
2165
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2166
|
+
#define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a)
|
|
2167
|
+
#endif
|
|
2168
|
+
|
|
2169
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2170
|
+
simde__mmask16
|
|
2171
|
+
simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
|
|
2172
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2173
|
+
return _mm512_cmpeq_epi32_mask(a, b);
|
|
2174
|
+
#else
|
|
2175
|
+
simde__m512i_private
|
|
2176
|
+
r_,
|
|
2177
|
+
a_ = simde__m512i_to_private(a),
|
|
2178
|
+
b_ = simde__m512i_to_private(b);
|
|
2179
|
+
|
|
2180
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
|
|
2181
|
+
r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]);
|
|
2182
|
+
}
|
|
2183
|
+
|
|
2184
|
+
return simde__m512i_private_to_mmask16(r_);
|
|
2185
|
+
#endif
|
|
2186
|
+
}
|
|
2187
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2188
|
+
#define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b)
|
|
2189
|
+
#endif
|
|
2190
|
+
|
|
2191
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2192
|
+
simde__mmask16
|
|
2193
|
+
simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
|
|
2194
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2195
|
+
return _mm512_mask_cmpeq_epi32_mask(k1, a, b);
|
|
2196
|
+
#else
|
|
2197
|
+
return simde_mm512_cmpeq_epi32_mask(a, b) & k1;
|
|
2198
|
+
#endif
|
|
2199
|
+
}
|
|
2200
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2201
|
+
#define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b)
|
|
2202
|
+
#endif
|
|
2203
|
+
|
|
2204
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2205
|
+
simde__mmask8
|
|
2206
|
+
simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) {
|
|
2207
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2208
|
+
return _mm512_cmpeq_epi64_mask(a, b);
|
|
2209
|
+
#else
|
|
2210
|
+
simde__m512i_private
|
|
2211
|
+
r_,
|
|
2212
|
+
a_ = simde__m512i_to_private(a),
|
|
2213
|
+
b_ = simde__m512i_to_private(b);
|
|
2214
|
+
|
|
2215
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
|
|
2216
|
+
r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]);
|
|
2217
|
+
}
|
|
2218
|
+
|
|
2219
|
+
return simde__m512i_private_to_mmask8(r_);
|
|
2220
|
+
#endif
|
|
2221
|
+
}
|
|
2222
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2223
|
+
#define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b)
|
|
2224
|
+
#endif
|
|
2225
|
+
|
|
2226
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2227
|
+
simde__mmask8
|
|
2228
|
+
simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
|
|
2229
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2230
|
+
return _mm512_mask_cmpeq_epi64_mask(k1, a, b);
|
|
2231
|
+
#else
|
|
2232
|
+
return simde_mm512_cmpeq_epi64_mask(a, b) & k1;
|
|
2233
|
+
#endif
|
|
2234
|
+
}
|
|
2235
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2236
|
+
#define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b)
|
|
2237
|
+
#endif
|
|
2238
|
+
|
|
2239
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2240
|
+
simde__mmask16
|
|
2241
|
+
simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) {
|
|
2242
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2243
|
+
return _mm512_cmpgt_epi32_mask(a, b);
|
|
2244
|
+
#else
|
|
2245
|
+
simde__m512i_private
|
|
2246
|
+
r_,
|
|
2247
|
+
a_ = simde__m512i_to_private(a),
|
|
2248
|
+
b_ = simde__m512i_to_private(b);
|
|
2249
|
+
|
|
2250
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
|
|
2251
|
+
r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]);
|
|
2252
|
+
}
|
|
2253
|
+
|
|
2254
|
+
return simde__m512i_private_to_mmask16(r_);
|
|
2255
|
+
#endif
|
|
2256
|
+
}
|
|
2257
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2258
|
+
#define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b)
|
|
2259
|
+
#endif
|
|
2260
|
+
|
|
2261
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2262
|
+
simde__mmask16
|
|
2263
|
+
simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
|
|
2264
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2265
|
+
return _mm512_mask_cmpgt_epi32_mask(k1, a, b);
|
|
2266
|
+
#else
|
|
2267
|
+
return simde_mm512_cmpgt_epi32_mask(a, b) & k1;
|
|
2268
|
+
#endif
|
|
2269
|
+
}
|
|
2270
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2271
|
+
#define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b)
|
|
2272
|
+
#endif
|
|
2273
|
+
|
|
2274
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2275
|
+
simde__mmask8
|
|
2276
|
+
simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) {
|
|
2277
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2278
|
+
return _mm512_cmpgt_epi64_mask(a, b);
|
|
2279
|
+
#else
|
|
2280
|
+
simde__m512i_private
|
|
2281
|
+
r_,
|
|
2282
|
+
a_ = simde__m512i_to_private(a),
|
|
2283
|
+
b_ = simde__m512i_to_private(b);
|
|
2284
|
+
|
|
2285
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
|
|
2286
|
+
r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]);
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
return simde__m512i_private_to_mmask8(r_);
|
|
2290
|
+
#endif
|
|
2291
|
+
}
|
|
2292
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2293
|
+
#define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b)
|
|
2294
|
+
#endif
|
|
2295
|
+
|
|
2296
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2297
|
+
simde__mmask8
|
|
2298
|
+
simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
|
|
2299
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2300
|
+
return _mm512_mask_cmpgt_epi64_mask(k1, a, b);
|
|
2301
|
+
#else
|
|
2302
|
+
return simde_mm512_cmpgt_epi64_mask(a, b) & k1;
|
|
2303
|
+
#endif
|
|
2304
|
+
}
|
|
2305
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2306
|
+
#define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b)
|
|
2307
|
+
#endif
|
|
2308
|
+
|
|
2309
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2310
|
+
simde__m512i
|
|
2311
|
+
simde_mm512_cvtepi8_epi32 (simde__m128i a) {
|
|
2312
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2313
|
+
return _mm512_cvtepi8_epi32(a);
|
|
2314
|
+
#else
|
|
2315
|
+
simde__m512i_private r_;
|
|
2316
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
2317
|
+
|
|
2318
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
2319
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.i8);
|
|
2320
|
+
#else
|
|
2321
|
+
SIMDE__VECTORIZE
|
|
2322
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
2323
|
+
r_.i32[i] = a_.i8[i];
|
|
2324
|
+
}
|
|
2325
|
+
#endif
|
|
2326
|
+
|
|
2327
|
+
return simde__m512i_from_private(r_);
|
|
2328
|
+
#endif
|
|
2329
|
+
}
|
|
2330
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2331
|
+
#define _mm512_cvtepi8_epi32(a) simde_mm512_cvtepi8_epi32(a)
|
|
2332
|
+
#endif
|
|
2333
|
+
|
|
2334
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2335
|
+
simde__m512i
|
|
2336
|
+
simde_mm512_cvtepi8_epi64 (simde__m128i a) {
|
|
2337
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2338
|
+
return _mm512_cvtepi8_epi64(a);
|
|
2339
|
+
#else
|
|
2340
|
+
simde__m512i_private r_;
|
|
2341
|
+
simde__m128i_private a_ = simde__m128i_to_private(a);
|
|
2342
|
+
|
|
2343
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
2344
|
+
SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i8);
|
|
2345
|
+
#else
|
|
2346
|
+
SIMDE__VECTORIZE
|
|
2347
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
2348
|
+
r_.i64[i] = a_.i8[i];
|
|
2349
|
+
}
|
|
2350
|
+
#endif
|
|
2351
|
+
|
|
2352
|
+
return simde__m512i_from_private(r_);
|
|
2353
|
+
#endif
|
|
2354
|
+
}
|
|
2355
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2356
|
+
#define _mm512_cvtepi8_epi64(a) simde_mm512_cvtepi8_epi64(a)
|
|
2357
|
+
#endif
|
|
2358
|
+
|
|
2359
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2360
|
+
simde__m128i
|
|
2361
|
+
simde_mm512_cvtepi32_epi8 (simde__m512i a) {
|
|
2362
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2363
|
+
return _mm512_cvtepi32_epi8(a);
|
|
2364
|
+
#else
|
|
2365
|
+
simde__m128i_private r_;
|
|
2366
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2367
|
+
|
|
2368
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
2369
|
+
SIMDE__CONVERT_VECTOR(r_.i8, a_.i32);
|
|
2370
|
+
#else
|
|
2371
|
+
SIMDE__VECTORIZE
|
|
2372
|
+
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
2373
|
+
r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i32[i]);
|
|
2374
|
+
}
|
|
2375
|
+
#endif
|
|
2376
|
+
|
|
2377
|
+
return simde__m128i_from_private(r_);
|
|
2378
|
+
#endif
|
|
2379
|
+
}
|
|
2380
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2381
|
+
#define _mm512_cvtepi32_epi8(a) simde_mm512_cvtepi32_epi8(a)
|
|
2382
|
+
#endif
|
|
2383
|
+
|
|
2384
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2385
|
+
simde__m256i
|
|
2386
|
+
simde_mm512_cvtepi32_epi16 (simde__m512i a) {
|
|
2387
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2388
|
+
return _mm512_cvtepi32_epi16(a);
|
|
2389
|
+
#else
|
|
2390
|
+
simde__m256i_private r_;
|
|
2391
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2392
|
+
|
|
2393
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
2394
|
+
SIMDE__CONVERT_VECTOR(r_.i16, a_.i32);
|
|
2395
|
+
#else
|
|
2396
|
+
SIMDE__VECTORIZE
|
|
2397
|
+
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
2398
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
|
|
2399
|
+
}
|
|
2400
|
+
#endif
|
|
2401
|
+
|
|
2402
|
+
return simde__m256i_from_private(r_);
|
|
2403
|
+
#endif
|
|
2404
|
+
}
|
|
2405
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2406
|
+
#define _mm512_cvtepi32_epi16(a) simde_mm512_cvtepi32_epi16(a)
|
|
2407
|
+
#endif
|
|
2408
|
+
|
|
2409
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2410
|
+
simde__m128i
|
|
2411
|
+
simde_mm512_cvtepi64_epi8 (simde__m512i a) {
|
|
2412
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2413
|
+
return _mm512_cvtepi64_epi8(a);
|
|
2414
|
+
#else
|
|
2415
|
+
simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
|
|
2416
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2417
|
+
|
|
2418
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
2419
|
+
SIMDE__CONVERT_VECTOR(r_.m64_private[0].i8, a_.i64);
|
|
2420
|
+
#else
|
|
2421
|
+
SIMDE__VECTORIZE
|
|
2422
|
+
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
2423
|
+
r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i64[i]);
|
|
2424
|
+
}
|
|
2425
|
+
#endif
|
|
2426
|
+
|
|
2427
|
+
return simde__m128i_from_private(r_);
|
|
2428
|
+
#endif
|
|
2429
|
+
}
|
|
2430
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2431
|
+
#define _mm512_cvtepi64_epi8(a) simde_mm512_cvtepi64_epi8(a)
|
|
2432
|
+
#endif
|
|
2433
|
+
|
|
2434
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2435
|
+
simde__m128i
|
|
2436
|
+
simde_mm512_cvtepi64_epi16 (simde__m512i a) {
|
|
2437
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2438
|
+
return _mm512_cvtepi64_epi16(a);
|
|
2439
|
+
#else
|
|
2440
|
+
simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
|
|
2441
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2442
|
+
|
|
2443
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
2444
|
+
SIMDE__CONVERT_VECTOR(r_.i16, a_.i64);
|
|
2445
|
+
#else
|
|
2446
|
+
SIMDE__VECTORIZE
|
|
2447
|
+
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
2448
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i64[i]);
|
|
2449
|
+
}
|
|
2450
|
+
#endif
|
|
2451
|
+
|
|
2452
|
+
return simde__m128i_from_private(r_);
|
|
2453
|
+
#endif
|
|
2454
|
+
}
|
|
2455
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2456
|
+
#define _mm512_cvtepi64_epi16(a) simde_mm512_cvtepi64_epi16(a)
|
|
2457
|
+
#endif
|
|
2458
|
+
|
|
2459
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2460
|
+
simde__m256i
|
|
2461
|
+
simde_mm512_cvtepi64_epi32 (simde__m512i a) {
|
|
2462
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2463
|
+
return _mm512_cvtepi64_epi32(a);
|
|
2464
|
+
#else
|
|
2465
|
+
simde__m256i_private r_;
|
|
2466
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2467
|
+
|
|
2468
|
+
#if defined(SIMDE__CONVERT_VECTOR)
|
|
2469
|
+
SIMDE__CONVERT_VECTOR(r_.i32, a_.i64);
|
|
2470
|
+
#else
|
|
2471
|
+
SIMDE__VECTORIZE
|
|
2472
|
+
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
2473
|
+
r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i64[i]);
|
|
2474
|
+
}
|
|
2475
|
+
#endif
|
|
2476
|
+
|
|
2477
|
+
return simde__m256i_from_private(r_);
|
|
2478
|
+
#endif
|
|
2479
|
+
}
|
|
2480
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2481
|
+
#define _mm512_cvtepi64_epi32(a) simde_mm512_cvtepi64_epi32(a)
|
|
2482
|
+
#endif
|
|
2483
|
+
|
|
2484
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2485
|
+
simde__m128i
|
|
2486
|
+
simde_mm512_cvtsepi32_epi8 (simde__m512i a) {
|
|
2487
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2488
|
+
return _mm512_cvtsepi32_epi8(a);
|
|
2489
|
+
#else
|
|
2490
|
+
simde__m128i_private r_;
|
|
2491
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2492
|
+
|
|
2493
|
+
SIMDE__VECTORIZE
|
|
2494
|
+
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
2495
|
+
r_.i8[i] =
|
|
2496
|
+
(a_.i32[i] < INT8_MIN)
|
|
2497
|
+
? (INT8_MIN)
|
|
2498
|
+
: ((a_.i32[i] > INT8_MAX)
|
|
2499
|
+
? (INT8_MAX)
|
|
2500
|
+
: HEDLEY_STATIC_CAST(int8_t, a_.i32[i]));
|
|
2501
|
+
}
|
|
2502
|
+
|
|
2503
|
+
return simde__m128i_from_private(r_);
|
|
2504
|
+
#endif
|
|
2505
|
+
}
|
|
2506
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2507
|
+
#define _mm512_cvtsepi32_epi8(a) simde_mm512_cvtsepi32_epi8(a)
|
|
2508
|
+
#endif
|
|
2509
|
+
|
|
2510
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2511
|
+
simde__m256i
|
|
2512
|
+
simde_mm512_cvtsepi32_epi16 (simde__m512i a) {
|
|
2513
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2514
|
+
return _mm512_cvtsepi32_epi16(a);
|
|
2515
|
+
#else
|
|
2516
|
+
simde__m256i_private r_;
|
|
2517
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2518
|
+
|
|
2519
|
+
SIMDE__VECTORIZE
|
|
2520
|
+
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
2521
|
+
r_.i16[i] =
|
|
2522
|
+
(a_.i32[i] < INT16_MIN)
|
|
2523
|
+
? (INT16_MIN)
|
|
2524
|
+
: ((a_.i32[i] > INT16_MAX)
|
|
2525
|
+
? (INT16_MAX)
|
|
2526
|
+
: HEDLEY_STATIC_CAST(int16_t, a_.i32[i]));
|
|
2527
|
+
}
|
|
2528
|
+
|
|
2529
|
+
return simde__m256i_from_private(r_);
|
|
2530
|
+
#endif
|
|
2531
|
+
}
|
|
2532
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2533
|
+
#define _mm512_cvtsepi32_epi16(a) simde_mm512_cvtsepi32_epi16(a)
|
|
2534
|
+
#endif
|
|
2535
|
+
|
|
2536
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2537
|
+
simde__m128i
|
|
2538
|
+
simde_mm512_cvtsepi64_epi8 (simde__m512i a) {
|
|
2539
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2540
|
+
return _mm512_cvtsepi64_epi8(a);
|
|
2541
|
+
#else
|
|
2542
|
+
simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
|
|
2543
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2544
|
+
|
|
2545
|
+
SIMDE__VECTORIZE
|
|
2546
|
+
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
2547
|
+
r_.i8[i] =
|
|
2548
|
+
(a_.i64[i] < INT8_MIN)
|
|
2549
|
+
? (INT8_MIN)
|
|
2550
|
+
: ((a_.i64[i] > INT8_MAX)
|
|
2551
|
+
? (INT8_MAX)
|
|
2552
|
+
: HEDLEY_STATIC_CAST(int8_t, a_.i64[i]));
|
|
2553
|
+
}
|
|
2554
|
+
|
|
2555
|
+
return simde__m128i_from_private(r_);
|
|
2556
|
+
#endif
|
|
2557
|
+
}
|
|
2558
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2559
|
+
#define _mm512_cvtsepi64_epi8(a) simde_mm512_cvtsepi64_epi8(a)
|
|
2560
|
+
#endif
|
|
2561
|
+
|
|
2562
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2563
|
+
simde__m128i
|
|
2564
|
+
simde_mm512_cvtsepi64_epi16 (simde__m512i a) {
|
|
2565
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2566
|
+
return _mm512_cvtsepi64_epi16(a);
|
|
2567
|
+
#else
|
|
2568
|
+
simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
|
|
2569
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2570
|
+
|
|
2571
|
+
SIMDE__VECTORIZE
|
|
2572
|
+
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
2573
|
+
r_.i16[i] =
|
|
2574
|
+
(a_.i64[i] < INT16_MIN)
|
|
2575
|
+
? (INT16_MIN)
|
|
2576
|
+
: ((a_.i64[i] > INT16_MAX)
|
|
2577
|
+
? (INT16_MAX)
|
|
2578
|
+
: HEDLEY_STATIC_CAST(int16_t, a_.i64[i]));
|
|
2579
|
+
}
|
|
2580
|
+
|
|
2581
|
+
return simde__m128i_from_private(r_);
|
|
2582
|
+
#endif
|
|
2583
|
+
}
|
|
2584
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2585
|
+
#define _mm512_cvtsepi64_epi16(a) simde_mm512_cvtsepi64_epi16(a)
|
|
2586
|
+
#endif
|
|
2587
|
+
|
|
2588
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2589
|
+
simde__m256i
|
|
2590
|
+
simde_mm512_cvtsepi64_epi32 (simde__m512i a) {
|
|
2591
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2592
|
+
return _mm512_cvtsepi64_epi32(a);
|
|
2593
|
+
#else
|
|
2594
|
+
simde__m256i_private r_;
|
|
2595
|
+
simde__m512i_private a_ = simde__m512i_to_private(a);
|
|
2596
|
+
|
|
2597
|
+
SIMDE__VECTORIZE
|
|
2598
|
+
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
2599
|
+
r_.i32[i] =
|
|
2600
|
+
(a_.i64[i] < INT32_MIN)
|
|
2601
|
+
? (INT32_MIN)
|
|
2602
|
+
: ((a_.i64[i] > INT32_MAX)
|
|
2603
|
+
? (INT32_MAX)
|
|
2604
|
+
: HEDLEY_STATIC_CAST(int32_t, a_.i64[i]));
|
|
2605
|
+
}
|
|
2606
|
+
|
|
2607
|
+
return simde__m256i_from_private(r_);
|
|
2608
|
+
#endif
|
|
2609
|
+
}
|
|
2610
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2611
|
+
#define _mm512_cvtsepi64_epi32(a) simde_mm512_cvtsepi64_epi32(a)
|
|
2612
|
+
#endif
|
|
2613
|
+
|
|
2614
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2615
|
+
simde__m512
|
|
2616
|
+
simde_mm512_div_ps (simde__m512 a, simde__m512 b) {
|
|
2617
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2618
|
+
return _mm512_div_ps(a, b);
|
|
2619
|
+
#else
|
|
2620
|
+
simde__m512_private
|
|
2621
|
+
r_,
|
|
2622
|
+
a_ = simde__m512_to_private(a),
|
|
2623
|
+
b_ = simde__m512_to_private(b);
|
|
2624
|
+
|
|
2625
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2626
|
+
r_.f32 = a_.f32 / b_.f32;
|
|
2627
|
+
#else
|
|
2628
|
+
SIMDE__VECTORIZE
|
|
2629
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
|
|
2630
|
+
r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]);
|
|
2631
|
+
}
|
|
2632
|
+
#endif
|
|
2633
|
+
|
|
2634
|
+
return simde__m512_from_private(r_);
|
|
2635
|
+
#endif
|
|
2636
|
+
}
|
|
2637
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2638
|
+
#define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b)
|
|
2639
|
+
#endif
|
|
2640
|
+
|
|
2641
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2642
|
+
simde__m512
|
|
2643
|
+
simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
|
|
2644
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2645
|
+
return _mm512_mask_div_ps(src, k, a, b);
|
|
2646
|
+
#else
|
|
2647
|
+
return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b));
|
|
2648
|
+
#endif
|
|
2649
|
+
}
|
|
2650
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2651
|
+
#define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b)
|
|
2652
|
+
#endif
|
|
2653
|
+
|
|
2654
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2655
|
+
simde__m512
|
|
2656
|
+
simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
|
|
2657
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2658
|
+
return _mm512_maskz_div_ps(k, a, b);
|
|
2659
|
+
#else
|
|
2660
|
+
return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b));
|
|
2661
|
+
#endif
|
|
2662
|
+
}
|
|
2663
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2664
|
+
#define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b)
|
|
2665
|
+
#endif
|
|
2666
|
+
|
|
2667
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2668
|
+
simde__m512d
|
|
2669
|
+
simde_mm512_div_pd (simde__m512d a, simde__m512d b) {
|
|
2670
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2671
|
+
return _mm512_div_pd(a, b);
|
|
2672
|
+
#else
|
|
2673
|
+
simde__m512d_private
|
|
2674
|
+
r_,
|
|
2675
|
+
a_ = simde__m512d_to_private(a),
|
|
2676
|
+
b_ = simde__m512d_to_private(b);
|
|
2677
|
+
|
|
2678
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2679
|
+
r_.f64 = a_.f64 / b_.f64;
|
|
2680
|
+
#else
|
|
2681
|
+
SIMDE__VECTORIZE
|
|
2682
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
|
|
2683
|
+
r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]);
|
|
2684
|
+
}
|
|
2685
|
+
#endif
|
|
2686
|
+
|
|
2687
|
+
return simde__m512d_from_private(r_);
|
|
2688
|
+
#endif
|
|
2689
|
+
}
|
|
2690
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2691
|
+
#define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b)
|
|
2692
|
+
#endif
|
|
2693
|
+
|
|
2694
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2695
|
+
simde__m512d
|
|
2696
|
+
simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
|
|
2697
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2698
|
+
return _mm512_mask_div_pd(src, k, a, b);
|
|
2699
|
+
#else
|
|
2700
|
+
return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b));
|
|
2701
|
+
#endif
|
|
2702
|
+
}
|
|
2703
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2704
|
+
#define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b)
|
|
2705
|
+
#endif
|
|
2706
|
+
|
|
2707
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2708
|
+
simde__m512d
|
|
2709
|
+
simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
|
|
2710
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2711
|
+
return _mm512_maskz_div_pd(k, a, b);
|
|
2712
|
+
#else
|
|
2713
|
+
return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b));
|
|
2714
|
+
#endif
|
|
2715
|
+
}
|
|
2716
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2717
|
+
#define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b)
|
|
2718
|
+
#endif
|
|
2719
|
+
|
|
2720
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2721
|
+
simde__m512i
|
|
2722
|
+
simde_mm512_load_si512 (simde__m512i const * mem_addr) {
|
|
2723
|
+
simde_assert_aligned(64, mem_addr);
|
|
2724
|
+
|
|
2725
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2726
|
+
return _mm512_load_si512((__m512i const*) mem_addr);
|
|
2727
|
+
#elif defined(SIMDE_ARCH_AARCH64) && (defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(8,0,0))
|
|
2728
|
+
simde__m512i r;
|
|
2729
|
+
memcpy(&r, mem_addr, sizeof(r));
|
|
2730
|
+
return r;
|
|
2731
|
+
#else
|
|
2732
|
+
return *mem_addr;
|
|
2733
|
+
#endif
|
|
2734
|
+
}
|
|
2735
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2736
|
+
#define _mm512_load_si512(a) simde_mm512_load_si512(a)
|
|
2737
|
+
#endif
|
|
2738
|
+
|
|
2739
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2740
|
+
simde__m512i
|
|
2741
|
+
simde_mm512_loadu_si512 (simde__m512i const * mem_addr) {
|
|
2742
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2743
|
+
return _mm512_loadu_si512((__m512i const*) mem_addr);
|
|
2744
|
+
#else
|
|
2745
|
+
simde__m512i r;
|
|
2746
|
+
simde_memcpy(&r, mem_addr, sizeof(r));
|
|
2747
|
+
return r;
|
|
2748
|
+
#endif
|
|
2749
|
+
}
|
|
2750
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2751
|
+
#define _mm512_loadu_si512(a) simde_mm512_loadu_si512(a)
|
|
2752
|
+
#endif
|
|
2753
|
+
|
|
2754
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2755
|
+
simde__m512
|
|
2756
|
+
simde_mm512_mul_ps (simde__m512 a, simde__m512 b) {
|
|
2757
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2758
|
+
return _mm512_mul_ps(a, b);
|
|
2759
|
+
#else
|
|
2760
|
+
simde__m512_private
|
|
2761
|
+
r_,
|
|
2762
|
+
a_ = simde__m512_to_private(a),
|
|
2763
|
+
b_ = simde__m512_to_private(b);
|
|
2764
|
+
|
|
2765
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2766
|
+
r_.f32 = a_.f32 * b_.f32;
|
|
2767
|
+
#else
|
|
2768
|
+
SIMDE__VECTORIZE
|
|
2769
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
|
|
2770
|
+
r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]);
|
|
2771
|
+
}
|
|
2772
|
+
#endif
|
|
2773
|
+
|
|
2774
|
+
return simde__m512_from_private(r_);
|
|
2775
|
+
#endif
|
|
2776
|
+
}
|
|
2777
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2778
|
+
#define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b)
|
|
2779
|
+
#endif
|
|
2780
|
+
|
|
2781
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2782
|
+
simde__m512
|
|
2783
|
+
simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
|
|
2784
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2785
|
+
return _mm512_mask_mul_ps(src, k, a, b);
|
|
2786
|
+
#else
|
|
2787
|
+
return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b));
|
|
2788
|
+
#endif
|
|
2789
|
+
}
|
|
2790
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2791
|
+
#define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b)
|
|
2792
|
+
#endif
|
|
2793
|
+
|
|
2794
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2795
|
+
simde__m512
|
|
2796
|
+
simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
|
|
2797
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2798
|
+
return _mm512_maskz_mul_ps(k, a, b);
|
|
2799
|
+
#else
|
|
2800
|
+
return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b));
|
|
2801
|
+
#endif
|
|
2802
|
+
}
|
|
2803
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2804
|
+
#define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b)
|
|
2805
|
+
#endif
|
|
2806
|
+
|
|
2807
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2808
|
+
simde__m512d
|
|
2809
|
+
simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
|
|
2810
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2811
|
+
return _mm512_mul_pd(a, b);
|
|
2812
|
+
#else
|
|
2813
|
+
simde__m512d_private
|
|
2814
|
+
r_,
|
|
2815
|
+
a_ = simde__m512d_to_private(a),
|
|
2816
|
+
b_ = simde__m512d_to_private(b);
|
|
2817
|
+
|
|
2818
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2819
|
+
r_.f64 = a_.f64 * b_.f64;
|
|
2820
|
+
#else
|
|
2821
|
+
SIMDE__VECTORIZE
|
|
2822
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
|
|
2823
|
+
r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]);
|
|
2824
|
+
}
|
|
2825
|
+
#endif
|
|
2826
|
+
|
|
2827
|
+
return simde__m512d_from_private(r_);
|
|
2828
|
+
#endif
|
|
2829
|
+
}
|
|
2830
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2831
|
+
#define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b)
|
|
2832
|
+
#endif
|
|
2833
|
+
|
|
2834
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2835
|
+
simde__m512d
|
|
2836
|
+
simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
|
|
2837
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2838
|
+
return _mm512_mask_mul_pd(src, k, a, b);
|
|
2839
|
+
#else
|
|
2840
|
+
return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b));
|
|
2841
|
+
#endif
|
|
2842
|
+
}
|
|
2843
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2844
|
+
#define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b)
|
|
2845
|
+
#endif
|
|
2846
|
+
|
|
2847
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2848
|
+
simde__m512d
|
|
2849
|
+
simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
|
|
2850
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2851
|
+
return _mm512_maskz_mul_pd(k, a, b);
|
|
2852
|
+
#else
|
|
2853
|
+
return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b));
|
|
2854
|
+
#endif
|
|
2855
|
+
}
|
|
2856
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2857
|
+
#define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b)
|
|
2858
|
+
#endif
|
|
2859
|
+
|
|
2860
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2861
|
+
simde__m512i
|
|
2862
|
+
simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) {
|
|
2863
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2864
|
+
return _mm512_mul_epi32(a, b);
|
|
2865
|
+
#else
|
|
2866
|
+
simde__m512i_private
|
|
2867
|
+
r_,
|
|
2868
|
+
a_ = simde__m512i_to_private(a),
|
|
2869
|
+
b_ = simde__m512i_to_private(b);
|
|
2870
|
+
|
|
2871
|
+
#if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
|
|
2872
|
+
simde__m512i_private x;
|
|
2873
|
+
__typeof__(r_.i64) ta, tb;
|
|
2874
|
+
|
|
2875
|
+
/* Get even numbered 32-bit values */
|
|
2876
|
+
x.i32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
|
|
2877
|
+
/* Cast to 64 bits */
|
|
2878
|
+
SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].i32);
|
|
2879
|
+
SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].i32);
|
|
2880
|
+
r_.i64 = ta * tb;
|
|
2881
|
+
#else
|
|
2882
|
+
SIMDE__VECTORIZE
|
|
2883
|
+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
|
|
2884
|
+
r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]);
|
|
2885
|
+
}
|
|
2886
|
+
#endif
|
|
2887
|
+
return simde__m512i_from_private(r_);
|
|
2888
|
+
#endif
|
|
2889
|
+
}
|
|
2890
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2891
|
+
#define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b)
|
|
2892
|
+
#endif
|
|
2893
|
+
|
|
2894
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2895
|
+
simde__m512i
|
|
2896
|
+
simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
|
|
2897
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2898
|
+
return _mm512_mask_mul_epi32(src, k, a, b);
|
|
2899
|
+
#else
|
|
2900
|
+
return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b));
|
|
2901
|
+
#endif
|
|
2902
|
+
}
|
|
2903
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2904
|
+
#define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b)
|
|
2905
|
+
#endif
|
|
2906
|
+
|
|
2907
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2908
|
+
simde__m512i
|
|
2909
|
+
simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
|
|
2910
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2911
|
+
return _mm512_maskz_mul_epi32(k, a, b);
|
|
2912
|
+
#else
|
|
2913
|
+
return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b));
|
|
2914
|
+
#endif
|
|
2915
|
+
}
|
|
2916
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2917
|
+
#define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b)
|
|
2918
|
+
#endif
|
|
2919
|
+
|
|
2920
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2921
|
+
simde__m512i
|
|
2922
|
+
simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) {
|
|
2923
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2924
|
+
return _mm512_mul_epu32(a, b);
|
|
2925
|
+
#else
|
|
2926
|
+
simde__m512i_private
|
|
2927
|
+
r_,
|
|
2928
|
+
a_ = simde__m512i_to_private(a),
|
|
2929
|
+
b_ = simde__m512i_to_private(b);
|
|
2930
|
+
|
|
2931
|
+
#if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
|
|
2932
|
+
simde__m512i_private x;
|
|
2933
|
+
__typeof__(r_.u64) ta, tb;
|
|
2934
|
+
|
|
2935
|
+
x.u32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
|
|
2936
|
+
SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].u32);
|
|
2937
|
+
SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].u32);
|
|
2938
|
+
r_.u64 = ta * tb;
|
|
2939
|
+
#else
|
|
2940
|
+
SIMDE__VECTORIZE
|
|
2941
|
+
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
|
|
2942
|
+
r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]);
|
|
2943
|
+
}
|
|
2944
|
+
#endif
|
|
2945
|
+
|
|
2946
|
+
return simde__m512i_from_private(r_);
|
|
2947
|
+
#endif
|
|
2948
|
+
}
|
|
2949
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2950
|
+
#define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b)
|
|
2951
|
+
#endif
|
|
2952
|
+
|
|
2953
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2954
|
+
simde__m512i
|
|
2955
|
+
simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
|
|
2956
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2957
|
+
return _mm512_mask_mul_epu32(src, k, a, b);
|
|
2958
|
+
#else
|
|
2959
|
+
return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b));
|
|
2960
|
+
#endif
|
|
2961
|
+
}
|
|
2962
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2963
|
+
#define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b)
|
|
2964
|
+
#endif
|
|
2965
|
+
|
|
2966
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2967
|
+
simde__m512i
|
|
2968
|
+
simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
|
|
2969
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2970
|
+
return _mm512_maskz_mul_epu32(k, a, b);
|
|
2971
|
+
#else
|
|
2972
|
+
return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b));
|
|
2973
|
+
#endif
|
|
2974
|
+
}
|
|
2975
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
2976
|
+
#define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b)
|
|
2977
|
+
#endif
|
|
2978
|
+
|
|
2979
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
2980
|
+
simde__m512i
|
|
2981
|
+
simde_mm512_or_si512 (simde__m512i a, simde__m512i b) {
|
|
2982
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
2983
|
+
return _mm512_or_si512(a, b);
|
|
2984
|
+
#else
|
|
2985
|
+
simde__m512i_private
|
|
2986
|
+
r_,
|
|
2987
|
+
a_ = simde__m512i_to_private(a),
|
|
2988
|
+
b_ = simde__m512i_to_private(b);
|
|
2989
|
+
|
|
2990
|
+
#if defined(SIMDE_ARCH_X86_AVX2)
|
|
2991
|
+
r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]);
|
|
2992
|
+
r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]);
|
|
2993
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2994
|
+
r_.i32f = a_.i32f | b_.i32f;
|
|
2995
|
+
#else
|
|
2996
|
+
SIMDE__VECTORIZE
|
|
2997
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
|
|
2998
|
+
r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
|
|
2999
|
+
}
|
|
3000
|
+
#endif
|
|
3001
|
+
|
|
3002
|
+
return simde__m512i_from_private(r_);
|
|
3003
|
+
#endif
|
|
3004
|
+
}
|
|
3005
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3006
|
+
#define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b)
|
|
3007
|
+
#endif
|
|
3008
|
+
|
|
3009
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3010
|
+
simde__m512i
|
|
3011
|
+
simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) {
|
|
3012
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3013
|
+
return _mm512_sub_epi32(a, b);
|
|
3014
|
+
#else
|
|
3015
|
+
simde__m512i_private
|
|
3016
|
+
r_,
|
|
3017
|
+
a_ = simde__m512i_to_private(a),
|
|
3018
|
+
b_ = simde__m512i_to_private(b);
|
|
3019
|
+
|
|
3020
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
3021
|
+
r_.i32 = a_.i32 - b_.i32;
|
|
3022
|
+
#else
|
|
3023
|
+
SIMDE__VECTORIZE
|
|
3024
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
|
|
3025
|
+
r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]);
|
|
3026
|
+
}
|
|
3027
|
+
#endif
|
|
3028
|
+
|
|
3029
|
+
return simde__m512i_from_private(r_);
|
|
3030
|
+
#endif
|
|
3031
|
+
}
|
|
3032
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3033
|
+
#define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b)
|
|
3034
|
+
#endif
|
|
3035
|
+
|
|
3036
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3037
|
+
simde__m512i
|
|
3038
|
+
simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
|
|
3039
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3040
|
+
return _mm512_mask_sub_epi32(src, k, a, b);
|
|
3041
|
+
#else
|
|
3042
|
+
return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b));
|
|
3043
|
+
#endif
|
|
3044
|
+
}
|
|
3045
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3046
|
+
#define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b)
|
|
3047
|
+
#endif
|
|
3048
|
+
|
|
3049
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3050
|
+
simde__m512i
|
|
3051
|
+
simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
|
|
3052
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3053
|
+
return _mm512_maskz_sub_epi32(k, a, b);
|
|
3054
|
+
#else
|
|
3055
|
+
return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b));
|
|
3056
|
+
#endif
|
|
3057
|
+
}
|
|
3058
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3059
|
+
#define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b)
|
|
3060
|
+
#endif
|
|
3061
|
+
|
|
3062
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3063
|
+
simde__m512i
|
|
3064
|
+
simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
|
|
3065
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3066
|
+
return _mm512_sub_epi64(a, b);
|
|
3067
|
+
#else
|
|
3068
|
+
simde__m512i_private
|
|
3069
|
+
r_,
|
|
3070
|
+
a_ = simde__m512i_to_private(a),
|
|
3071
|
+
b_ = simde__m512i_to_private(b);
|
|
3072
|
+
|
|
3073
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
3074
|
+
r_.i64 = a_.i64 - b_.i64;
|
|
3075
|
+
#else
|
|
3076
|
+
SIMDE__VECTORIZE
|
|
3077
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
|
|
3078
|
+
r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]);
|
|
3079
|
+
}
|
|
3080
|
+
#endif
|
|
3081
|
+
|
|
3082
|
+
return simde__m512i_from_private(r_);
|
|
3083
|
+
#endif
|
|
3084
|
+
}
|
|
3085
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3086
|
+
#define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b)
|
|
3087
|
+
#endif
|
|
3088
|
+
|
|
3089
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3090
|
+
simde__m512i
|
|
3091
|
+
simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
|
|
3092
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3093
|
+
return _mm512_mask_sub_epi64(src, k, a, b);
|
|
3094
|
+
#else
|
|
3095
|
+
return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b));
|
|
3096
|
+
#endif
|
|
3097
|
+
}
|
|
3098
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3099
|
+
#define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b)
|
|
3100
|
+
#endif
|
|
3101
|
+
|
|
3102
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3103
|
+
simde__m512i
|
|
3104
|
+
simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
|
|
3105
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3106
|
+
return _mm512_maskz_sub_epi64(k, a, b);
|
|
3107
|
+
#else
|
|
3108
|
+
return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b));
|
|
3109
|
+
#endif
|
|
3110
|
+
}
|
|
3111
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3112
|
+
#define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b)
|
|
3113
|
+
#endif
|
|
3114
|
+
|
|
3115
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3116
|
+
simde__m512
|
|
3117
|
+
simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
|
|
3118
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3119
|
+
return _mm512_sub_ps(a, b);
|
|
3120
|
+
#else
|
|
3121
|
+
simde__m512_private
|
|
3122
|
+
r_,
|
|
3123
|
+
a_ = simde__m512_to_private(a),
|
|
3124
|
+
b_ = simde__m512_to_private(b);
|
|
3125
|
+
|
|
3126
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
3127
|
+
r_.f32 = a_.f32 - b_.f32;
|
|
3128
|
+
#else
|
|
3129
|
+
SIMDE__VECTORIZE
|
|
3130
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
|
|
3131
|
+
r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]);
|
|
3132
|
+
}
|
|
3133
|
+
#endif
|
|
3134
|
+
|
|
3135
|
+
return simde__m512_from_private(r_);
|
|
3136
|
+
#endif
|
|
3137
|
+
}
|
|
3138
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3139
|
+
#define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b)
|
|
3140
|
+
#endif
|
|
3141
|
+
|
|
3142
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3143
|
+
simde__m512
|
|
3144
|
+
simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
|
|
3145
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3146
|
+
return _mm512_mask_sub_ps(src, k, a, b);
|
|
3147
|
+
#else
|
|
3148
|
+
return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b));
|
|
3149
|
+
#endif
|
|
3150
|
+
}
|
|
3151
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3152
|
+
#define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b)
|
|
3153
|
+
#endif
|
|
3154
|
+
|
|
3155
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3156
|
+
simde__m512
|
|
3157
|
+
simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
|
|
3158
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3159
|
+
return _mm512_maskz_sub_ps(k, a, b);
|
|
3160
|
+
#else
|
|
3161
|
+
return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b));
|
|
3162
|
+
#endif
|
|
3163
|
+
}
|
|
3164
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3165
|
+
#define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b)
|
|
3166
|
+
#endif
|
|
3167
|
+
|
|
3168
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3169
|
+
simde__m512d
|
|
3170
|
+
simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
|
|
3171
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3172
|
+
return _mm512_sub_pd(a, b);
|
|
3173
|
+
#else
|
|
3174
|
+
simde__m512d_private
|
|
3175
|
+
r_,
|
|
3176
|
+
a_ = simde__m512d_to_private(a),
|
|
3177
|
+
b_ = simde__m512d_to_private(b);
|
|
3178
|
+
|
|
3179
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
3180
|
+
r_.f64 = a_.f64 - b_.f64;
|
|
3181
|
+
#else
|
|
3182
|
+
SIMDE__VECTORIZE
|
|
3183
|
+
for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
|
|
3184
|
+
r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]);
|
|
3185
|
+
}
|
|
3186
|
+
#endif
|
|
3187
|
+
|
|
3188
|
+
return simde__m512d_from_private(r_);
|
|
3189
|
+
#endif
|
|
3190
|
+
}
|
|
3191
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3192
|
+
#define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b)
|
|
3193
|
+
#endif
|
|
3194
|
+
|
|
3195
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3196
|
+
simde__m512d
|
|
3197
|
+
simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
|
|
3198
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3199
|
+
return _mm512_mask_sub_pd(src, k, a, b);
|
|
3200
|
+
#else
|
|
3201
|
+
return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b));
|
|
3202
|
+
#endif
|
|
3203
|
+
}
|
|
3204
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3205
|
+
#define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b)
|
|
3206
|
+
#endif
|
|
3207
|
+
|
|
3208
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3209
|
+
simde__m512d
|
|
3210
|
+
simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
|
|
3211
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3212
|
+
return _mm512_maskz_sub_pd(k, a, b);
|
|
3213
|
+
#else
|
|
3214
|
+
return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b));
|
|
3215
|
+
#endif
|
|
3216
|
+
}
|
|
3217
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3218
|
+
#define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b)
|
|
3219
|
+
#endif
|
|
3220
|
+
|
|
3221
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3222
|
+
simde__m512i
|
|
3223
|
+
simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) {
|
|
3224
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3225
|
+
return _mm512_srli_epi32(a, imm8);
|
|
3226
|
+
#else
|
|
3227
|
+
simde__m512i_private
|
|
3228
|
+
r_,
|
|
3229
|
+
a_ = simde__m512i_to_private(a);
|
|
3230
|
+
|
|
3231
|
+
#if defined(SIMDE_ARCH_X86_AVX2)
|
|
3232
|
+
r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], imm8);
|
|
3233
|
+
r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], imm8);
|
|
3234
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
3235
|
+
r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], imm8);
|
|
3236
|
+
r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], imm8);
|
|
3237
|
+
r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], imm8);
|
|
3238
|
+
r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], imm8);
|
|
3239
|
+
#else
|
|
3240
|
+
if (imm8 > 31) {
|
|
3241
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
3242
|
+
} else {
|
|
3243
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
3244
|
+
r_.u32 = a_.u32 >> imm8;
|
|
3245
|
+
#else
|
|
3246
|
+
SIMDE__VECTORIZE
|
|
3247
|
+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
|
|
3248
|
+
r_.u32[i] = a_.u32[i] >> imm8;
|
|
3249
|
+
}
|
|
3250
|
+
#endif
|
|
3251
|
+
}
|
|
3252
|
+
#endif
|
|
3253
|
+
|
|
3254
|
+
return simde__m512i_from_private(r_);
|
|
3255
|
+
#endif
|
|
3256
|
+
}
|
|
3257
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3258
|
+
#define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8)
|
|
3259
|
+
#endif
|
|
3260
|
+
|
|
3261
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3262
|
+
simde__m512i
|
|
3263
|
+
simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) {
|
|
3264
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3265
|
+
return _mm512_srli_epi64(a, imm8);
|
|
3266
|
+
#else
|
|
3267
|
+
simde__m512i_private
|
|
3268
|
+
r_,
|
|
3269
|
+
a_ = simde__m512i_to_private(a);
|
|
3270
|
+
|
|
3271
|
+
#if defined(SIMDE_ARCH_X86_AVX2)
|
|
3272
|
+
r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], imm8);
|
|
3273
|
+
r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], imm8);
|
|
3274
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
3275
|
+
r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], imm8);
|
|
3276
|
+
r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], imm8);
|
|
3277
|
+
r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], imm8);
|
|
3278
|
+
r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], imm8);
|
|
3279
|
+
#else
|
|
3280
|
+
/* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are
|
|
3281
|
+
* used. In this case we should do "imm8 &= 0xff" here. However in
|
|
3282
|
+
* practice all bits are used. */
|
|
3283
|
+
if (imm8 > 63) {
|
|
3284
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
3285
|
+
} else {
|
|
3286
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
3287
|
+
r_.u64 = a_.u64 >> imm8;
|
|
3288
|
+
#else
|
|
3289
|
+
SIMDE__VECTORIZE
|
|
3290
|
+
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
|
|
3291
|
+
r_.u64[i] = a_.u64[i] >> imm8;
|
|
3292
|
+
}
|
|
3293
|
+
#endif
|
|
3294
|
+
}
|
|
3295
|
+
#endif
|
|
3296
|
+
|
|
3297
|
+
return simde__m512i_from_private(r_);
|
|
3298
|
+
#endif
|
|
3299
|
+
}
|
|
3300
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3301
|
+
#define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8)
|
|
3302
|
+
#endif
|
|
3303
|
+
|
|
3304
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3305
|
+
simde__mmask16
|
|
3306
|
+
simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
|
|
3307
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3308
|
+
return _mm512_mask_test_epi32_mask(k1, a, b);
|
|
3309
|
+
#else
|
|
3310
|
+
simde__m512i_private
|
|
3311
|
+
a_ = simde__m512i_to_private(a),
|
|
3312
|
+
b_ = simde__m512i_to_private(b);
|
|
3313
|
+
simde__mmask16 r = 0;
|
|
3314
|
+
|
|
3315
|
+
SIMDE__VECTORIZE_REDUCTION(|:r)
|
|
3316
|
+
for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
|
|
3317
|
+
r |= !!(a_.i32[i] & b_.i32[i]) << i;
|
|
3318
|
+
}
|
|
3319
|
+
|
|
3320
|
+
return r & k1;
|
|
3321
|
+
#endif
|
|
3322
|
+
}
|
|
3323
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3324
|
+
#define _mm512_mask_test_epi32_mask(a, b) simde_mm512_mask_test_epi32_mask(a, b)
|
|
3325
|
+
#endif
|
|
3326
|
+
|
|
3327
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3328
|
+
simde__mmask8
|
|
3329
|
+
simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
|
|
3330
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3331
|
+
return _mm512_mask_test_epi64_mask(k1, a, b);
|
|
3332
|
+
#else
|
|
3333
|
+
simde__m512i_private
|
|
3334
|
+
a_ = simde__m512i_to_private(a),
|
|
3335
|
+
b_ = simde__m512i_to_private(b);
|
|
3336
|
+
simde__mmask8 r = 0;
|
|
3337
|
+
|
|
3338
|
+
SIMDE__VECTORIZE_REDUCTION(|:r)
|
|
3339
|
+
for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
|
|
3340
|
+
r |= !!(a_.i64[i] & b_.i64[i]) << i;
|
|
3341
|
+
}
|
|
3342
|
+
|
|
3343
|
+
return r & k1;
|
|
3344
|
+
#endif
|
|
3345
|
+
}
|
|
3346
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3347
|
+
#define _mm512_mask_test_epi64_mask(a, b) simde_mm512_mask_test_epi64_mask(a, b)
|
|
3348
|
+
#endif
|
|
3349
|
+
|
|
3350
|
+
SIMDE__FUNCTION_ATTRIBUTES
|
|
3351
|
+
simde__m512i
|
|
3352
|
+
simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) {
|
|
3353
|
+
#if defined(SIMDE_AVX512F_NATIVE)
|
|
3354
|
+
return _mm512_xor_si512(a, b);
|
|
3355
|
+
#else
|
|
3356
|
+
simde__m512i_private
|
|
3357
|
+
r_,
|
|
3358
|
+
a_ = simde__m512i_to_private(a),
|
|
3359
|
+
b_ = simde__m512i_to_private(b);
|
|
3360
|
+
|
|
3361
|
+
#if defined(SIMDE_ARCH_X86_AVX2)
|
|
3362
|
+
r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]);
|
|
3363
|
+
r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]);
|
|
3364
|
+
#elif defined(SIMDE_ARCH_X86_SSE2)
|
|
3365
|
+
r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]);
|
|
3366
|
+
r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]);
|
|
3367
|
+
r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]);
|
|
3368
|
+
r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]);
|
|
3369
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
3370
|
+
r_.i32f = a_.i32f ^ b_.i32f;
|
|
3371
|
+
#else
|
|
3372
|
+
SIMDE__VECTORIZE
|
|
3373
|
+
for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
|
|
3374
|
+
r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
|
|
3375
|
+
}
|
|
3376
|
+
#endif
|
|
3377
|
+
|
|
3378
|
+
return simde__m512i_from_private(r_);
|
|
3379
|
+
#endif
|
|
3380
|
+
}
|
|
3381
|
+
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
|
|
3382
|
+
#define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b)
|
|
3383
|
+
#endif
|
|
3384
|
+
|
|
3385
|
+
SIMDE__END_DECLS
|
|
3386
|
+
|
|
3387
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
3388
|
+
|
|
3389
|
+
#endif /* !defined(SIMDE__AVX512F_H) */
|