minimap2 0.2.25.1 → 0.2.25.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/ext/Rakefile +2 -2
  3. data/lib/minimap2/version.rb +1 -1
  4. metadata +1 -97
  5. data/ext/minimap2/lib/simde/CONTRIBUTING.md +0 -114
  6. data/ext/minimap2/lib/simde/COPYING +0 -20
  7. data/ext/minimap2/lib/simde/README.md +0 -333
  8. data/ext/minimap2/lib/simde/amalgamate.py +0 -58
  9. data/ext/minimap2/lib/simde/meson.build +0 -33
  10. data/ext/minimap2/lib/simde/netlify.toml +0 -20
  11. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +0 -140
  12. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +0 -137
  13. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +0 -142
  14. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +0 -145
  15. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +0 -140
  16. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +0 -145
  17. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +0 -140
  18. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +0 -143
  19. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +0 -137
  20. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +0 -141
  21. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +0 -147
  22. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +0 -141
  23. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +0 -134
  24. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +0 -138
  25. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +0 -134
  26. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +0 -137
  27. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +0 -131
  28. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +0 -135
  29. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +0 -141
  30. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +0 -135
  31. data/ext/minimap2/lib/simde/simde/arm/neon.h +0 -97
  32. data/ext/minimap2/lib/simde/simde/check.h +0 -267
  33. data/ext/minimap2/lib/simde/simde/debug-trap.h +0 -83
  34. data/ext/minimap2/lib/simde/simde/hedley.h +0 -1899
  35. data/ext/minimap2/lib/simde/simde/simde-arch.h +0 -445
  36. data/ext/minimap2/lib/simde/simde/simde-common.h +0 -697
  37. data/ext/minimap2/lib/simde/simde/x86/avx.h +0 -5385
  38. data/ext/minimap2/lib/simde/simde/x86/avx2.h +0 -2402
  39. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +0 -391
  40. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +0 -3389
  41. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +0 -112
  42. data/ext/minimap2/lib/simde/simde/x86/fma.h +0 -659
  43. data/ext/minimap2/lib/simde/simde/x86/mmx.h +0 -2210
  44. data/ext/minimap2/lib/simde/simde/x86/sse.h +0 -3696
  45. data/ext/minimap2/lib/simde/simde/x86/sse2.h +0 -5991
  46. data/ext/minimap2/lib/simde/simde/x86/sse3.h +0 -343
  47. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +0 -1783
  48. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +0 -105
  49. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +0 -1053
  50. data/ext/minimap2/lib/simde/simde/x86/svml.h +0 -543
  51. data/ext/minimap2/lib/simde/test/CMakeLists.txt +0 -166
  52. data/ext/minimap2/lib/simde/test/arm/meson.build +0 -4
  53. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +0 -23
  54. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +0 -871
  55. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +0 -134
  56. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +0 -39
  57. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +0 -10
  58. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +0 -1260
  59. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +0 -873
  60. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +0 -1084
  61. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +0 -1260
  62. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +0 -18
  63. data/ext/minimap2/lib/simde/test/arm/test-arm.c +0 -20
  64. data/ext/minimap2/lib/simde/test/arm/test-arm.h +0 -8
  65. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +0 -171
  66. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +0 -68
  67. data/ext/minimap2/lib/simde/test/meson.build +0 -64
  68. data/ext/minimap2/lib/simde/test/munit/COPYING +0 -21
  69. data/ext/minimap2/lib/simde/test/munit/Makefile +0 -55
  70. data/ext/minimap2/lib/simde/test/munit/README.md +0 -54
  71. data/ext/minimap2/lib/simde/test/munit/example.c +0 -351
  72. data/ext/minimap2/lib/simde/test/munit/meson.build +0 -37
  73. data/ext/minimap2/lib/simde/test/munit/munit.c +0 -2055
  74. data/ext/minimap2/lib/simde/test/munit/munit.h +0 -535
  75. data/ext/minimap2/lib/simde/test/run-tests.c +0 -20
  76. data/ext/minimap2/lib/simde/test/run-tests.h +0 -260
  77. data/ext/minimap2/lib/simde/test/x86/avx.c +0 -13752
  78. data/ext/minimap2/lib/simde/test/x86/avx2.c +0 -9977
  79. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +0 -2664
  80. data/ext/minimap2/lib/simde/test/x86/avx512f.c +0 -10416
  81. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +0 -210
  82. data/ext/minimap2/lib/simde/test/x86/fma.c +0 -2557
  83. data/ext/minimap2/lib/simde/test/x86/meson.build +0 -33
  84. data/ext/minimap2/lib/simde/test/x86/mmx.c +0 -2878
  85. data/ext/minimap2/lib/simde/test/x86/skel.c +0 -2984
  86. data/ext/minimap2/lib/simde/test/x86/sse.c +0 -5121
  87. data/ext/minimap2/lib/simde/test/x86/sse2.c +0 -9860
  88. data/ext/minimap2/lib/simde/test/x86/sse3.c +0 -486
  89. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +0 -3446
  90. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +0 -101
  91. data/ext/minimap2/lib/simde/test/x86/ssse3.c +0 -2084
  92. data/ext/minimap2/lib/simde/test/x86/svml.c +0 -1545
  93. data/ext/minimap2/lib/simde/test/x86/test-avx.h +0 -16
  94. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +0 -25
  95. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +0 -13
  96. data/ext/minimap2/lib/simde/test/x86/test-sse.h +0 -13
  97. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +0 -13
  98. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +0 -196
  99. data/ext/minimap2/lib/simde/test/x86/test-x86.c +0 -48
  100. data/ext/minimap2/lib/simde/test/x86/test-x86.h +0 -8
@@ -1,3389 +0,0 @@
1
- /* Permission is hereby granted, free of charge, to any person
2
- * obtaining a copy of this software and associated documentation
3
- * files (the "Software"), to deal in the Software without
4
- * restriction, including without limitation the rights to use, copy,
5
- * modify, merge, publish, distribute, sublicense, and/or sell copies
6
- * of the Software, and to permit persons to whom the Software is
7
- * furnished to do so, subject to the following conditions:
8
- *
9
- * The above copyright notice and this permission notice shall be
10
- * included in all copies or substantial portions of the Software.
11
- *
12
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
- * SOFTWARE.
20
- *
21
- * Copyright:
22
- * 2020 Evan Nemerson <evan@nemerson.com>
23
- */
24
-
25
- #if !defined(SIMDE__AVX512F_H)
26
- # if !defined(SIMDE__AVX512F_H)
27
- # define SIMDE__AVX512F_H
28
- # endif
29
- # include "avx2.h"
30
-
31
- HEDLEY_DIAGNOSTIC_PUSH
32
- SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
33
-
34
- # if defined(SIMDE_ARCH_X86_AVX512F) && !defined(SIMDE_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
35
- # define SIMDE_AVX512F_NATIVE
36
- # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_AVX512F_NO_NEON) && !defined(SIMDE_NO_NEON)
37
- # define SIMDE_AVX512F_NEON
38
- # elif defined(SIMDE_ARCH_POWER_ALTIVEC)
39
- # define SIMDE_AVX512F_POWER_ALTIVEC
40
- # endif
41
-
42
- /* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for
43
- __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte
44
- aligned even if we reduce the alignment requirements of other members.
45
-
46
- Even if we're on x86 and use the native AVX-512 types for arguments/return values, the
47
- to/from private functions will break, and I'm not willing to change their APIs to use
48
- pointers (which would also require more verbose code on the caller side) just to make
49
- MSVC happy.
50
-
51
- If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later,
52
- or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to
53
- fix this without requiring API changes (except transparently through macros), patches
54
- are welcome. */
55
- # if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0)
56
- # if defined(SIMDE_AVX512F_NATIVE)
57
- # undef SIMDE_AVX512F_NATIVE
58
- # pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.")
59
- # endif
60
- # define SIMDE_AVX512_ALIGN SIMDE_ALIGN(32)
61
- # else
62
- # define SIMDE_AVX512_ALIGN SIMDE_ALIGN(64)
63
- # endif
64
-
65
- # if defined(SIMDE_AVX512F_NATIVE)
66
- # include <immintrin.h>
67
- # endif
68
-
69
- # if defined(SIMDE_AVX512F_POWER_ALTIVEC)
70
- # include <altivec.h>
71
- # endif
72
-
73
- SIMDE__BEGIN_DECLS
74
-
75
- typedef union {
76
- #if defined(SIMDE_VECTOR_SUBSCRIPT)
77
- SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
78
- SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
79
- SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
80
- SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
81
- SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
82
- SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
83
- SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
84
- SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
85
- #if defined(SIMDE__HAVE_INT128)
86
- SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
87
- SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
88
- #endif
89
- SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
90
- SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
91
- SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
92
- SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
93
- #else
94
- SIMDE_AVX512_ALIGN int8_t i8[64];
95
- SIMDE_AVX512_ALIGN int16_t i16[32];
96
- SIMDE_AVX512_ALIGN int32_t i32[16];
97
- SIMDE_AVX512_ALIGN int64_t i64[8];
98
- SIMDE_AVX512_ALIGN uint8_t u8[64];
99
- SIMDE_AVX512_ALIGN uint16_t u16[32];
100
- SIMDE_AVX512_ALIGN uint32_t u32[16];
101
- SIMDE_AVX512_ALIGN uint64_t u64[8];
102
- SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
103
- SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
104
- #if defined(SIMDE__HAVE_INT128)
105
- SIMDE_AVX512_ALIGN simde_int128 i128[4];
106
- SIMDE_AVX512_ALIGN simde_uint128 u128[4];
107
- #endif
108
- SIMDE_AVX512_ALIGN simde_float32 f32[16];
109
- SIMDE_AVX512_ALIGN simde_float64 f64[8];
110
- #endif
111
-
112
- SIMDE_AVX512_ALIGN simde__m128_private m128_private[4];
113
- SIMDE_AVX512_ALIGN simde__m128 m128[4];
114
- SIMDE_AVX512_ALIGN simde__m256_private m256_private[2];
115
- SIMDE_AVX512_ALIGN simde__m256 m256[2];
116
-
117
- #if defined(SIMDE_AVX512F_NATIVE)
118
- SIMDE_AVX512_ALIGN __m512 n;
119
- #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
120
- SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
121
- SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
122
- SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
123
- SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
124
- SIMDE_ALIGN(16) vector signed char altivec_i8[4];
125
- SIMDE_ALIGN(16) vector signed short altivec_i16[4];
126
- SIMDE_ALIGN(16) vector signed int altivec_i32[4];
127
- SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
128
- SIMDE_ALIGN(16) vector float altivec_f32[4];
129
- SIMDE_ALIGN(16) vector double altivec_f64[4];
130
- #endif
131
- } simde__m512_private;
132
-
133
- typedef union {
134
- #if defined(SIMDE_VECTOR_SUBSCRIPT)
135
- SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
136
- SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
137
- SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
138
- SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
139
- SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
140
- SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
141
- SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
142
- SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
143
- #if defined(SIMDE__HAVE_INT128)
144
- SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
145
- SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
146
- #endif
147
- SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
148
- SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
149
- SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
150
- SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
151
- #else
152
- SIMDE_AVX512_ALIGN int8_t i8[64];
153
- SIMDE_AVX512_ALIGN int16_t i16[32];
154
- SIMDE_AVX512_ALIGN int32_t i32[16];
155
- SIMDE_AVX512_ALIGN int64_t i64[8];
156
- SIMDE_AVX512_ALIGN uint8_t u8[64];
157
- SIMDE_AVX512_ALIGN uint16_t u16[32];
158
- SIMDE_AVX512_ALIGN uint32_t u32[16];
159
- SIMDE_AVX512_ALIGN uint64_t u64[8];
160
- #if defined(SIMDE__HAVE_INT128)
161
- SIMDE_AVX512_ALIGN simde_int128 i128[4];
162
- SIMDE_AVX512_ALIGN simde_uint128 u128[4];
163
- #endif
164
- SIMDE_AVX512_ALIGN simde_float32 f32[16];
165
- SIMDE_AVX512_ALIGN simde_float64 f64[8];
166
- SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
167
- SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
168
- #endif
169
-
170
- SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4];
171
- SIMDE_AVX512_ALIGN simde__m128d m128d[4];
172
- SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2];
173
- SIMDE_AVX512_ALIGN simde__m256d m256d[2];
174
-
175
- #if defined(SIMDE_AVX512F_NATIVE)
176
- SIMDE_AVX512_ALIGN __m512d n;
177
- #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
178
- SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
179
- SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
180
- SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
181
- SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
182
- SIMDE_ALIGN(16) vector signed char altivec_i8[4];
183
- SIMDE_ALIGN(16) vector signed short altivec_i16[4];
184
- SIMDE_ALIGN(16) vector signed int altivec_i32[4];
185
- SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
186
- SIMDE_ALIGN(16) vector float altivec_f32[4];
187
- SIMDE_ALIGN(16) vector double altivec_f64[4];
188
- #endif
189
- } simde__m512d_private;
190
-
191
- typedef union {
192
- #if defined(SIMDE_VECTOR_SUBSCRIPT)
193
- SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
194
- SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
195
- SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
196
- SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
197
- SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
198
- SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
199
- SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
200
- SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
201
- #if defined(SIMDE__HAVE_INT128)
202
- SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
203
- SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
204
- #endif
205
- SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
206
- SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
207
- SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
208
- SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
209
- #else
210
- SIMDE_AVX512_ALIGN int8_t i8[64];
211
- SIMDE_AVX512_ALIGN int16_t i16[32];
212
- SIMDE_AVX512_ALIGN int32_t i32[16];
213
- SIMDE_AVX512_ALIGN int64_t i64[8];
214
- SIMDE_AVX512_ALIGN uint8_t u8[64];
215
- SIMDE_AVX512_ALIGN uint16_t u16[32];
216
- SIMDE_AVX512_ALIGN uint32_t u32[16];
217
- SIMDE_AVX512_ALIGN uint64_t u64[8];
218
- SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
219
- SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
220
- #if defined(SIMDE__HAVE_INT128)
221
- SIMDE_AVX512_ALIGN simde_int128 i128[4];
222
- SIMDE_AVX512_ALIGN simde_uint128 u128[4];
223
- #endif
224
- SIMDE_AVX512_ALIGN simde_float32 f32[16];
225
- SIMDE_AVX512_ALIGN simde_float64 f64[8];
226
- #endif
227
-
228
- SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4];
229
- SIMDE_AVX512_ALIGN simde__m128i m128i[4];
230
- SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2];
231
- SIMDE_AVX512_ALIGN simde__m256i m256i[2];
232
-
233
- #if defined(SIMDE_AVX512F_NATIVE)
234
- SIMDE_AVX512_ALIGN __m512i n;
235
- #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
236
- SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
237
- SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
238
- SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
239
- SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
240
- SIMDE_ALIGN(16) vector signed char altivec_i8[4];
241
- SIMDE_ALIGN(16) vector signed short altivec_i16[4];
242
- SIMDE_ALIGN(16) vector signed int altivec_i32[4];
243
- SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
244
- SIMDE_ALIGN(16) vector float altivec_f32[4];
245
- SIMDE_ALIGN(16) vector double altivec_f64[4];
246
- #endif
247
- } simde__m512i_private;
248
-
249
- #if defined(SIMDE_AVX512F_NATIVE)
250
- typedef __m512 simde__m512;
251
- typedef __m512i simde__m512i;
252
- typedef __m512d simde__m512d;
253
- typedef __mmask8 simde__mmask8;
254
- typedef __mmask16 simde__mmask16;
255
- typedef __mmask32 simde__mmask32;
256
- typedef __mmask64 simde__mmask64;
257
- #else
258
- #if defined(SIMDE_VECTOR_SUBSCRIPT)
259
- typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
260
- typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
261
- typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
262
- #else
263
- typedef simde__m512_private simde__m512;
264
- typedef simde__m512i_private simde__m512i;
265
- typedef simde__m512d_private simde__m512d;
266
- #endif
267
-
268
- typedef uint_fast8_t simde__mmask8;
269
- typedef uint_fast16_t simde__mmask16;
270
- typedef uint_fast32_t simde__mmask32;
271
- typedef uint_fast64_t simde__mmask64;
272
- #endif
273
-
274
- #if !defined(SIMDE_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
275
- #define SIMDE_AVX512F_ENABLE_NATIVE_ALIASES
276
- typedef simde__m512 __m512;
277
- typedef simde__m512i __m512i;
278
- typedef simde__m512d __m512d;
279
- #endif
280
-
281
- HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect");
282
- HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect");
283
- HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect");
284
- HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect");
285
- HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect");
286
- HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect");
287
- #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
288
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned");
289
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned");
290
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned");
291
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned");
292
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned");
293
- HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned");
294
- #endif
295
-
296
- SIMDE__FUNCTION_ATTRIBUTES
297
- simde__m512
298
- simde__m512_from_private(simde__m512_private v) {
299
- simde__m512 r;
300
- simde_memcpy(&r, &v, sizeof(r));
301
- return r;
302
- }
303
-
304
- SIMDE__FUNCTION_ATTRIBUTES
305
- simde__m512_private
306
- simde__m512_to_private(simde__m512 v) {
307
- simde__m512_private r;
308
- simde_memcpy(&r, &v, sizeof(r));
309
- return r;
310
- }
311
-
312
- SIMDE__FUNCTION_ATTRIBUTES
313
- simde__m512i
314
- simde__m512i_from_private(simde__m512i_private v) {
315
- simde__m512i r;
316
- simde_memcpy(&r, &v, sizeof(r));
317
- return r;
318
- }
319
-
320
- SIMDE__FUNCTION_ATTRIBUTES
321
- simde__m512i_private
322
- simde__m512i_to_private(simde__m512i v) {
323
- simde__m512i_private r;
324
- simde_memcpy(&r, &v, sizeof(r));
325
- return r;
326
- }
327
-
328
- SIMDE__FUNCTION_ATTRIBUTES
329
- simde__m512d
330
- simde__m512d_from_private(simde__m512d_private v) {
331
- simde__m512d r;
332
- simde_memcpy(&r, &v, sizeof(r));
333
- return r;
334
- }
335
-
336
- SIMDE__FUNCTION_ATTRIBUTES
337
- simde__m512d_private
338
- simde__m512d_to_private(simde__m512d v) {
339
- simde__m512d_private r;
340
- simde_memcpy(&r, &v, sizeof(r));
341
- return r;
342
- }
343
-
344
- SIMDE__FUNCTION_ATTRIBUTES
345
- simde__mmask16
346
- simde__m512i_private_to_mmask16 (simde__m512i_private a) {
347
- #if defined(SIMDE_AVX512F_NATIVE)
348
- HEDLEY_UNREACHABLE_RETURN(0);
349
- #else
350
- simde__mmask16 r = 0;
351
-
352
- /* Note: using addition instead of a bitwise or for the reduction
353
- seems like it should improve things since hardware support for
354
- horizontal addition is better than bitwise or. However, GCC
355
- generates the same code, and clang is actually a bit slower.
356
- I suspect this can be optimized quite a bit, and this function
357
- is probably going to be pretty hot. */
358
- SIMDE__VECTORIZE_REDUCTION(|:r)
359
- for (size_t i = 0 ; i < (sizeof(a.i32) / sizeof(a.i32[0])) ; i++) {
360
- r |= !!(a.i32[i]) << i;
361
- }
362
-
363
- return r;
364
- #endif
365
- }
366
-
367
- SIMDE__FUNCTION_ATTRIBUTES
368
- simde__mmask8
369
- simde__m512i_private_to_mmask8 (simde__m512i_private a) {
370
- #if defined(SIMDE_AVX512F_NATIVE)
371
- HEDLEY_UNREACHABLE_RETURN(0);
372
- #else
373
- simde__mmask8 r = 0;
374
- SIMDE__VECTORIZE_REDUCTION(|:r)
375
- for (size_t i = 0 ; i < (sizeof(a.i64) / sizeof(a.i64[0])) ; i++) {
376
- r |= !!(a.i64[i]) << i;
377
- }
378
-
379
- return r;
380
- #endif
381
- }
382
-
383
- SIMDE__FUNCTION_ATTRIBUTES
384
- simde__m512i
385
- simde__m512i_from_mmask16 (simde__mmask16 k) {
386
- #if defined(SIMDE_AVX512F_NATIVE)
387
- /* Should never be reached. */
388
- return _mm512_mask_mov_epi32(_mm512_setzero_epi32(), k, _mm512_set1_epi32(~INT32_C(0)));
389
- #else
390
- simde__m512i_private r_;
391
-
392
- SIMDE__VECTORIZE
393
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
394
- r_.i32[i] = (k & (1 << i)) ? ~INT32_C(0) : INT32_C(0);
395
- }
396
-
397
- return simde__m512i_from_private(r_);
398
- #endif
399
- }
400
-
401
- SIMDE__FUNCTION_ATTRIBUTES
402
- simde__m512
403
- simde_mm512_castpd_ps (simde__m512d a) {
404
- #if defined(SIMDE_AVX512F_NATIVE)
405
- return _mm512_castpd_ps(a);
406
- #else
407
- simde__m512 r;
408
- memcpy(&r, &a, sizeof(r));
409
- return r;
410
- #endif
411
- }
412
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
413
- #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a)
414
- #endif
415
-
416
- SIMDE__FUNCTION_ATTRIBUTES
417
- simde__m512i
418
- simde_mm512_castpd_si512 (simde__m512d a) {
419
- #if defined(SIMDE_AVX512F_NATIVE)
420
- return _mm512_castpd_si512(a);
421
- #else
422
- simde__m512i r;
423
- memcpy(&r, &a, sizeof(r));
424
- return r;
425
- #endif
426
- }
427
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
428
- #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a)
429
- #endif
430
-
431
- SIMDE__FUNCTION_ATTRIBUTES
432
- simde__m512d
433
- simde_mm512_castps_pd (simde__m512 a) {
434
- #if defined(SIMDE_AVX512F_NATIVE)
435
- return _mm512_castps_pd(a);
436
- #else
437
- simde__m512d r;
438
- memcpy(&r, &a, sizeof(r));
439
- return r;
440
- #endif
441
- }
442
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
443
- #define _mm512_castps_pd(a) simde_mm512_castps_pd(a)
444
- #endif
445
-
446
- SIMDE__FUNCTION_ATTRIBUTES
447
- simde__m512i
448
- simde_mm512_castps_si512 (simde__m512 a) {
449
- #if defined(SIMDE_AVX512F_NATIVE)
450
- return _mm512_castps_si512(a);
451
- #else
452
- simde__m512i r;
453
- memcpy(&r, &a, sizeof(r));
454
- return r;
455
- #endif
456
- }
457
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
458
- #define _mm512_castps_si512(a) simde_mm512_castps_si512(a)
459
- #endif
460
-
461
- SIMDE__FUNCTION_ATTRIBUTES
462
- simde__m512
463
- simde_mm512_castsi512_ps (simde__m512i a) {
464
- #if defined(SIMDE_AVX512F_NATIVE)
465
- return _mm512_castsi512_ps(a);
466
- #else
467
- simde__m512 r;
468
- memcpy(&r, &a, sizeof(r));
469
- return r;
470
- #endif
471
- }
472
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
473
- #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a)
474
- #endif
475
-
476
- SIMDE__FUNCTION_ATTRIBUTES
477
- simde__m512d
478
- simde_mm512_castsi512_pd (simde__m512i a) {
479
- #if defined(SIMDE_AVX512F_NATIVE)
480
- return _mm512_castsi512_pd(a);
481
- #else
482
- simde__m512d r;
483
- memcpy(&r, &a, sizeof(r));
484
- return r;
485
- #endif
486
- }
487
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
488
- #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a)
489
- #endif
490
-
491
- SIMDE__FUNCTION_ATTRIBUTES
492
- simde__m512d
493
- simde_mm512_castpd128_pd512 (simde__m128d a) {
494
- #if defined(SIMDE_AVX512F_NATIVE)
495
- return _mm512_castpd128_pd512(a);
496
- #else
497
- simde__m512d_private r_;
498
- r_.m128d[0] = a;
499
- return simde__m512d_from_private(r_);
500
- #endif
501
- }
502
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
503
- #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a)
504
- #endif
505
-
506
- SIMDE__FUNCTION_ATTRIBUTES
507
- simde__m512d
508
- simde_mm512_castpd256_pd512 (simde__m256d a) {
509
- #if defined(SIMDE_AVX512F_NATIVE)
510
- return _mm512_castpd256_pd512(a);
511
- #else
512
- simde__m512d_private r_;
513
- r_.m256d[0] = a;
514
- return simde__m512d_from_private(r_);
515
- #endif
516
- }
517
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
518
- #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a)
519
- #endif
520
-
521
- SIMDE__FUNCTION_ATTRIBUTES
522
- simde__m128d
523
- simde_mm512_castpd512_pd128 (simde__m512d a) {
524
- #if defined(SIMDE_AVX512F_NATIVE)
525
- return _mm512_castpd512_pd128(a);
526
- #else
527
- simde__m512d_private a_ = simde__m512d_to_private(a);
528
- return a_.m128d[0];
529
- #endif
530
- }
531
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
532
- #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a)
533
- #endif
534
-
535
- SIMDE__FUNCTION_ATTRIBUTES
536
- simde__m256d
537
- simde_mm512_castpd512_pd256 (simde__m512d a) {
538
- #if defined(SIMDE_AVX512F_NATIVE)
539
- return _mm512_castpd512_pd256(a);
540
- #else
541
- simde__m512d_private a_ = simde__m512d_to_private(a);
542
- return a_.m256d[0];
543
- #endif
544
- }
545
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
546
- #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a)
547
- #endif
548
-
549
- SIMDE__FUNCTION_ATTRIBUTES
550
- simde__m512
551
- simde_mm512_castps128_ps512 (simde__m128 a) {
552
- #if defined(SIMDE_AVX512F_NATIVE)
553
- return _mm512_castps128_ps512(a);
554
- #else
555
- simde__m512_private r_;
556
- r_.m128[0] = a;
557
- return simde__m512_from_private(r_);
558
- #endif
559
- }
560
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
561
- #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a)
562
- #endif
563
-
564
- SIMDE__FUNCTION_ATTRIBUTES
565
- simde__m512
566
- simde_mm512_castps256_ps512 (simde__m256 a) {
567
- #if defined(SIMDE_AVX512F_NATIVE)
568
- return _mm512_castps256_ps512(a);
569
- #else
570
- simde__m512_private r_;
571
- r_.m256[0] = a;
572
- return simde__m512_from_private(r_);
573
- #endif
574
- }
575
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
576
- #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a)
577
- #endif
578
-
579
- SIMDE__FUNCTION_ATTRIBUTES
580
- simde__m128
581
- simde_mm512_castps512_ps128 (simde__m512 a) {
582
- #if defined(SIMDE_AVX512F_NATIVE)
583
- return _mm512_castps512_ps128(a);
584
- #else
585
- simde__m512_private a_ = simde__m512_to_private(a);
586
- return a_.m128[0];
587
- #endif
588
- }
589
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
590
- #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a)
591
- #endif
592
-
593
- SIMDE__FUNCTION_ATTRIBUTES
594
- simde__m256
595
- simde_mm512_castps512_ps256 (simde__m512 a) {
596
- #if defined(SIMDE_AVX512F_NATIVE)
597
- return _mm512_castps512_ps256(a);
598
- #else
599
- simde__m512_private a_ = simde__m512_to_private(a);
600
- return a_.m256[0];
601
- #endif
602
- }
603
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
604
- #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a)
605
- #endif
606
-
607
- SIMDE__FUNCTION_ATTRIBUTES
608
- simde__m512i
609
- simde_mm512_castsi128_si512 (simde__m128i a) {
610
- #if defined(SIMDE_AVX512F_NATIVE)
611
- return _mm512_castsi128_si512(a);
612
- #else
613
- simde__m512i_private r_;
614
- r_.m128i[0] = a;
615
- return simde__m512i_from_private(r_);
616
- #endif
617
- }
618
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
619
- #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a)
620
- #endif
621
-
622
- SIMDE__FUNCTION_ATTRIBUTES
623
- simde__m512i
624
- simde_mm512_castsi256_si512 (simde__m256i a) {
625
- #if defined(SIMDE_AVX512F_NATIVE)
626
- return _mm512_castsi256_si512(a);
627
- #else
628
- simde__m512i_private r_;
629
- r_.m256i[0] = a;
630
- return simde__m512i_from_private(r_);
631
- #endif
632
- }
633
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
634
- #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a)
635
- #endif
636
-
637
- SIMDE__FUNCTION_ATTRIBUTES
638
- simde__m128i
639
- simde_mm512_castsi512_si128 (simde__m512i a) {
640
- #if defined(SIMDE_AVX512F_NATIVE)
641
- return _mm512_castsi512_si128(a);
642
- #else
643
- simde__m512i_private a_ = simde__m512i_to_private(a);
644
- return a_.m128i[0];
645
- #endif
646
- }
647
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
648
- #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a)
649
- #endif
650
-
651
- SIMDE__FUNCTION_ATTRIBUTES
652
- simde__m256i
653
- simde_mm512_castsi512_si256 (simde__m512i a) {
654
- #if defined(SIMDE_AVX512F_NATIVE)
655
- return _mm512_castsi512_si256(a);
656
- #else
657
- simde__m512i_private a_ = simde__m512i_to_private(a);
658
- return a_.m256i[0];
659
- #endif
660
- }
661
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
662
- #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a)
663
- #endif
664
-
665
- SIMDE__FUNCTION_ATTRIBUTES
666
- simde__m512i
667
- simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56,
668
- int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48,
669
- int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40,
670
- int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32,
671
- int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24,
672
- int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16,
673
- int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8,
674
- int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {
675
- simde__m512i_private r_;
676
-
677
- r_.i8[ 0] = e0;
678
- r_.i8[ 1] = e1;
679
- r_.i8[ 2] = e2;
680
- r_.i8[ 3] = e3;
681
- r_.i8[ 4] = e4;
682
- r_.i8[ 5] = e5;
683
- r_.i8[ 6] = e6;
684
- r_.i8[ 7] = e7;
685
- r_.i8[ 8] = e8;
686
- r_.i8[ 9] = e9;
687
- r_.i8[10] = e10;
688
- r_.i8[11] = e11;
689
- r_.i8[12] = e12;
690
- r_.i8[13] = e13;
691
- r_.i8[14] = e14;
692
- r_.i8[15] = e15;
693
- r_.i8[16] = e16;
694
- r_.i8[17] = e17;
695
- r_.i8[18] = e18;
696
- r_.i8[19] = e19;
697
- r_.i8[20] = e20;
698
- r_.i8[21] = e21;
699
- r_.i8[22] = e22;
700
- r_.i8[23] = e23;
701
- r_.i8[24] = e24;
702
- r_.i8[25] = e25;
703
- r_.i8[26] = e26;
704
- r_.i8[27] = e27;
705
- r_.i8[28] = e28;
706
- r_.i8[29] = e29;
707
- r_.i8[30] = e30;
708
- r_.i8[31] = e31;
709
- r_.i8[32] = e32;
710
- r_.i8[33] = e33;
711
- r_.i8[34] = e34;
712
- r_.i8[35] = e35;
713
- r_.i8[36] = e36;
714
- r_.i8[37] = e37;
715
- r_.i8[38] = e38;
716
- r_.i8[39] = e39;
717
- r_.i8[40] = e40;
718
- r_.i8[41] = e41;
719
- r_.i8[42] = e42;
720
- r_.i8[43] = e43;
721
- r_.i8[44] = e44;
722
- r_.i8[45] = e45;
723
- r_.i8[46] = e46;
724
- r_.i8[47] = e47;
725
- r_.i8[48] = e48;
726
- r_.i8[49] = e49;
727
- r_.i8[50] = e50;
728
- r_.i8[51] = e51;
729
- r_.i8[52] = e52;
730
- r_.i8[53] = e53;
731
- r_.i8[54] = e54;
732
- r_.i8[55] = e55;
733
- r_.i8[56] = e56;
734
- r_.i8[57] = e57;
735
- r_.i8[58] = e58;
736
- r_.i8[59] = e59;
737
- r_.i8[60] = e60;
738
- r_.i8[61] = e61;
739
- r_.i8[62] = e62;
740
- r_.i8[63] = e63;
741
-
742
- return simde__m512i_from_private(r_);
743
- }
744
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
745
- #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
746
- #endif
747
-
748
- SIMDE__FUNCTION_ATTRIBUTES
749
- simde__m512i
750
- simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24,
751
- int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16,
752
- int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8,
753
- int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) {
754
- simde__m512i_private r_;
755
-
756
- r_.i16[ 0] = e0;
757
- r_.i16[ 1] = e1;
758
- r_.i16[ 2] = e2;
759
- r_.i16[ 3] = e3;
760
- r_.i16[ 4] = e4;
761
- r_.i16[ 5] = e5;
762
- r_.i16[ 6] = e6;
763
- r_.i16[ 7] = e7;
764
- r_.i16[ 8] = e8;
765
- r_.i16[ 9] = e9;
766
- r_.i16[10] = e10;
767
- r_.i16[11] = e11;
768
- r_.i16[12] = e12;
769
- r_.i16[13] = e13;
770
- r_.i16[14] = e14;
771
- r_.i16[15] = e15;
772
- r_.i16[16] = e16;
773
- r_.i16[17] = e17;
774
- r_.i16[18] = e18;
775
- r_.i16[19] = e19;
776
- r_.i16[20] = e20;
777
- r_.i16[21] = e21;
778
- r_.i16[22] = e22;
779
- r_.i16[23] = e23;
780
- r_.i16[24] = e24;
781
- r_.i16[25] = e25;
782
- r_.i16[26] = e26;
783
- r_.i16[27] = e27;
784
- r_.i16[28] = e28;
785
- r_.i16[29] = e29;
786
- r_.i16[30] = e30;
787
- r_.i16[31] = e31;
788
-
789
- return simde__m512i_from_private(r_);
790
- }
791
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
792
- #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
793
- #endif
794
-
795
- SIMDE__FUNCTION_ATTRIBUTES
796
- simde__m512i
797
- simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8,
798
- int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) {
799
- simde__m512i_private r_;
800
-
801
- r_.i32[ 0] = e0;
802
- r_.i32[ 1] = e1;
803
- r_.i32[ 2] = e2;
804
- r_.i32[ 3] = e3;
805
- r_.i32[ 4] = e4;
806
- r_.i32[ 5] = e5;
807
- r_.i32[ 6] = e6;
808
- r_.i32[ 7] = e7;
809
- r_.i32[ 8] = e8;
810
- r_.i32[ 9] = e9;
811
- r_.i32[10] = e10;
812
- r_.i32[11] = e11;
813
- r_.i32[12] = e12;
814
- r_.i32[13] = e13;
815
- r_.i32[14] = e14;
816
- r_.i32[15] = e15;
817
-
818
- return simde__m512i_from_private(r_);
819
- }
820
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
821
- #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
822
- #endif
823
-
824
- SIMDE__FUNCTION_ATTRIBUTES
825
- simde__m512i
826
- simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
827
- simde__m512i_private r_;
828
-
829
- r_.i64[0] = e0;
830
- r_.i64[1] = e1;
831
- r_.i64[2] = e2;
832
- r_.i64[3] = e3;
833
- r_.i64[4] = e4;
834
- r_.i64[5] = e5;
835
- r_.i64[6] = e6;
836
- r_.i64[7] = e7;
837
-
838
- return simde__m512i_from_private(r_);
839
- }
840
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
841
- #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
842
- #endif
843
-
844
- SIMDE__FUNCTION_ATTRIBUTES
845
- simde__m512i
846
- simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56,
847
- uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48,
848
- uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40,
849
- uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32,
850
- uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24,
851
- uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16,
852
- uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8,
853
- uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) {
854
- simde__m512i_private r_;
855
-
856
- r_.u8[ 0] = e0;
857
- r_.u8[ 1] = e1;
858
- r_.u8[ 2] = e2;
859
- r_.u8[ 3] = e3;
860
- r_.u8[ 4] = e4;
861
- r_.u8[ 5] = e5;
862
- r_.u8[ 6] = e6;
863
- r_.u8[ 7] = e7;
864
- r_.u8[ 8] = e8;
865
- r_.u8[ 9] = e9;
866
- r_.u8[10] = e10;
867
- r_.u8[11] = e11;
868
- r_.u8[12] = e12;
869
- r_.u8[13] = e13;
870
- r_.u8[14] = e14;
871
- r_.u8[15] = e15;
872
- r_.u8[16] = e16;
873
- r_.u8[17] = e17;
874
- r_.u8[18] = e18;
875
- r_.u8[19] = e19;
876
- r_.u8[20] = e20;
877
- r_.u8[21] = e21;
878
- r_.u8[22] = e22;
879
- r_.u8[23] = e23;
880
- r_.u8[24] = e24;
881
- r_.u8[25] = e25;
882
- r_.u8[26] = e26;
883
- r_.u8[27] = e27;
884
- r_.u8[28] = e28;
885
- r_.u8[29] = e29;
886
- r_.u8[30] = e30;
887
- r_.u8[31] = e31;
888
- r_.u8[32] = e32;
889
- r_.u8[33] = e33;
890
- r_.u8[34] = e34;
891
- r_.u8[35] = e35;
892
- r_.u8[36] = e36;
893
- r_.u8[37] = e37;
894
- r_.u8[38] = e38;
895
- r_.u8[39] = e39;
896
- r_.u8[40] = e40;
897
- r_.u8[41] = e41;
898
- r_.u8[42] = e42;
899
- r_.u8[43] = e43;
900
- r_.u8[44] = e44;
901
- r_.u8[45] = e45;
902
- r_.u8[46] = e46;
903
- r_.u8[47] = e47;
904
- r_.u8[48] = e48;
905
- r_.u8[49] = e49;
906
- r_.u8[50] = e50;
907
- r_.u8[51] = e51;
908
- r_.u8[52] = e52;
909
- r_.u8[53] = e53;
910
- r_.u8[54] = e54;
911
- r_.u8[55] = e55;
912
- r_.u8[56] = e56;
913
- r_.u8[57] = e57;
914
- r_.u8[58] = e58;
915
- r_.u8[59] = e59;
916
- r_.u8[60] = e60;
917
- r_.u8[61] = e61;
918
- r_.u8[62] = e62;
919
- r_.u8[63] = e63;
920
-
921
- return simde__m512i_from_private(r_);
922
- }
923
-
924
- SIMDE__FUNCTION_ATTRIBUTES
925
- simde__m512i
926
- simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24,
927
- uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16,
928
- uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8,
929
- uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) {
930
- simde__m512i_private r_;
931
-
932
- r_.u16[ 0] = e0;
933
- r_.u16[ 1] = e1;
934
- r_.u16[ 2] = e2;
935
- r_.u16[ 3] = e3;
936
- r_.u16[ 4] = e4;
937
- r_.u16[ 5] = e5;
938
- r_.u16[ 6] = e6;
939
- r_.u16[ 7] = e7;
940
- r_.u16[ 8] = e8;
941
- r_.u16[ 9] = e9;
942
- r_.u16[10] = e10;
943
- r_.u16[11] = e11;
944
- r_.u16[12] = e12;
945
- r_.u16[13] = e13;
946
- r_.u16[14] = e14;
947
- r_.u16[15] = e15;
948
- r_.u16[16] = e16;
949
- r_.u16[17] = e17;
950
- r_.u16[18] = e18;
951
- r_.u16[19] = e19;
952
- r_.u16[20] = e20;
953
- r_.u16[21] = e21;
954
- r_.u16[22] = e22;
955
- r_.u16[23] = e23;
956
- r_.u16[24] = e24;
957
- r_.u16[25] = e25;
958
- r_.u16[26] = e26;
959
- r_.u16[27] = e27;
960
- r_.u16[28] = e28;
961
- r_.u16[29] = e29;
962
- r_.u16[30] = e30;
963
- r_.u16[31] = e31;
964
-
965
- return simde__m512i_from_private(r_);
966
- }
967
-
968
- SIMDE__FUNCTION_ATTRIBUTES
969
- simde__m512i
970
- simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8,
971
- uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {
972
- simde__m512i_private r_;
973
-
974
- r_.u32[ 0] = e0;
975
- r_.u32[ 1] = e1;
976
- r_.u32[ 2] = e2;
977
- r_.u32[ 3] = e3;
978
- r_.u32[ 4] = e4;
979
- r_.u32[ 5] = e5;
980
- r_.u32[ 6] = e6;
981
- r_.u32[ 7] = e7;
982
- r_.u32[ 8] = e8;
983
- r_.u32[ 9] = e9;
984
- r_.u32[10] = e10;
985
- r_.u32[11] = e11;
986
- r_.u32[12] = e12;
987
- r_.u32[13] = e13;
988
- r_.u32[14] = e14;
989
- r_.u32[15] = e15;
990
-
991
- return simde__m512i_from_private(r_);
992
- }
993
-
994
- SIMDE__FUNCTION_ATTRIBUTES
995
- simde__m512i
996
- simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) {
997
- simde__m512i_private r_;
998
-
999
- r_.u64[ 0] = e0;
1000
- r_.u64[ 1] = e1;
1001
- r_.u64[ 2] = e2;
1002
- r_.u64[ 3] = e3;
1003
- r_.u64[ 4] = e4;
1004
- r_.u64[ 5] = e5;
1005
- r_.u64[ 6] = e6;
1006
- r_.u64[ 7] = e7;
1007
-
1008
- return simde__m512i_from_private(r_);
1009
- }
1010
-
1011
- SIMDE__FUNCTION_ATTRIBUTES
1012
- simde__m512
1013
- simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12,
1014
- simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8,
1015
- simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,
1016
- simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
1017
- simde__m512_private r_;
1018
-
1019
- r_.f32[ 0] = e0;
1020
- r_.f32[ 1] = e1;
1021
- r_.f32[ 2] = e2;
1022
- r_.f32[ 3] = e3;
1023
- r_.f32[ 4] = e4;
1024
- r_.f32[ 5] = e5;
1025
- r_.f32[ 6] = e6;
1026
- r_.f32[ 7] = e7;
1027
- r_.f32[ 8] = e8;
1028
- r_.f32[ 9] = e9;
1029
- r_.f32[10] = e10;
1030
- r_.f32[11] = e11;
1031
- r_.f32[12] = e12;
1032
- r_.f32[13] = e13;
1033
- r_.f32[14] = e14;
1034
- r_.f32[15] = e15;
1035
-
1036
- return simde__m512_from_private(r_);
1037
- }
1038
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1039
- #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1040
- #endif
1041
-
1042
- SIMDE__FUNCTION_ATTRIBUTES
1043
- simde__m512d
1044
- simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {
1045
- simde__m512d_private r_;
1046
-
1047
- r_.f64[0] = e0;
1048
- r_.f64[1] = e1;
1049
- r_.f64[2] = e2;
1050
- r_.f64[3] = e3;
1051
- r_.f64[4] = e4;
1052
- r_.f64[5] = e5;
1053
- r_.f64[6] = e6;
1054
- r_.f64[7] = e7;
1055
-
1056
- return simde__m512d_from_private(r_);
1057
- }
1058
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1059
- #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0)
1060
- #endif
1061
-
1062
- SIMDE__FUNCTION_ATTRIBUTES
1063
- simde__m512i
1064
- simde_mm512_set1_epi8 (int8_t a) {
1065
- #if defined(SIMDE_AVX512F_NATIVE)
1066
- return _mm512_set1_epi8(a);
1067
- #else
1068
- simde__m512i_private r_;
1069
-
1070
- SIMDE__VECTORIZE
1071
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1072
- r_.i8[i] = a;
1073
- }
1074
-
1075
- return simde__m512i_from_private(r_);
1076
- #endif
1077
- }
1078
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1079
- #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a)
1080
- #endif
1081
-
1082
- SIMDE__FUNCTION_ATTRIBUTES
1083
- simde__m512i
1084
- simde_mm512_set1_epi16 (int16_t a) {
1085
- #if defined(SIMDE_AVX512F_NATIVE)
1086
- return _mm512_set1_epi16(a);
1087
- #else
1088
- simde__m512i_private r_;
1089
-
1090
- SIMDE__VECTORIZE
1091
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1092
- r_.i16[i] = a;
1093
- }
1094
-
1095
- return simde__m512i_from_private(r_);
1096
- #endif
1097
- }
1098
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1099
- #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a)
1100
- #endif
1101
-
1102
- SIMDE__FUNCTION_ATTRIBUTES
1103
- simde__m512i
1104
- simde_mm512_set1_epi32 (int32_t a) {
1105
- #if defined(SIMDE_AVX512F_NATIVE)
1106
- return _mm512_set1_epi32(a);
1107
- #else
1108
- simde__m512i_private r_;
1109
-
1110
- SIMDE__VECTORIZE
1111
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1112
- r_.i32[i] = a;
1113
- }
1114
-
1115
- return simde__m512i_from_private(r_);
1116
- #endif
1117
- }
1118
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1119
- #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a)
1120
- #endif
1121
-
1122
- SIMDE__FUNCTION_ATTRIBUTES
1123
- simde__m512i
1124
- simde_mm512_set1_epi64 (int64_t a) {
1125
- #if defined(SIMDE_AVX512F_NATIVE)
1126
- return _mm512_set1_epi64(a);
1127
- #else
1128
- simde__m512i_private r_;
1129
-
1130
- SIMDE__VECTORIZE
1131
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1132
- r_.i64[i] = a;
1133
- }
1134
-
1135
- return simde__m512i_from_private(r_);
1136
- #endif
1137
- }
1138
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1139
- #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a)
1140
- #endif
1141
-
1142
- SIMDE__FUNCTION_ATTRIBUTES
1143
- simde__m512i
1144
- simde_x_mm512_set1_epu8 (uint8_t a) {
1145
- simde__m512i_private r_;
1146
-
1147
- SIMDE__VECTORIZE
1148
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
1149
- r_.u8[i] = a;
1150
- }
1151
-
1152
- return simde__m512i_from_private(r_);
1153
- }
1154
-
1155
- SIMDE__FUNCTION_ATTRIBUTES
1156
- simde__m512i
1157
- simde_x_mm512_set1_epu16 (uint16_t a) {
1158
- simde__m512i_private r_;
1159
-
1160
- SIMDE__VECTORIZE
1161
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1162
- r_.u16[i] = a;
1163
- }
1164
-
1165
- return simde__m512i_from_private(r_);
1166
- }
1167
-
1168
- SIMDE__FUNCTION_ATTRIBUTES
1169
- simde__m512i
1170
- simde_x_mm512_set1_epu32 (uint32_t a) {
1171
- simde__m512i_private r_;
1172
-
1173
- SIMDE__VECTORIZE
1174
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1175
- r_.u32[i] = a;
1176
- }
1177
-
1178
- return simde__m512i_from_private(r_);
1179
- }
1180
-
1181
- SIMDE__FUNCTION_ATTRIBUTES
1182
- simde__m512i
1183
- simde_x_mm512_set1_epu64 (uint64_t a) {
1184
- simde__m512i_private r_;
1185
-
1186
- SIMDE__VECTORIZE
1187
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
1188
- r_.u64[i] = a;
1189
- }
1190
-
1191
- return simde__m512i_from_private(r_);
1192
- }
1193
-
1194
- SIMDE__FUNCTION_ATTRIBUTES
1195
- simde__m512
1196
- simde_mm512_set1_ps (simde_float32 a) {
1197
- #if defined(SIMDE_AVX512F_NATIVE)
1198
- return _mm512_set1_ps(a);
1199
- #else
1200
- simde__m512_private r_;
1201
-
1202
- SIMDE__VECTORIZE
1203
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1204
- r_.f32[i] = a;
1205
- }
1206
-
1207
- return simde__m512_from_private(r_);
1208
- #endif
1209
- }
1210
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1211
- #define _mm512_set1_ps(a) simde_mm512_set1_ps(a)
1212
- #endif
1213
-
1214
- SIMDE__FUNCTION_ATTRIBUTES
1215
- simde__m512d
1216
- simde_mm512_set1_pd (simde_float64 a) {
1217
- #if defined(SIMDE_AVX512F_NATIVE)
1218
- return _mm512_set1_pd(a);
1219
- #else
1220
- simde__m512d_private r_;
1221
-
1222
- SIMDE__VECTORIZE
1223
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1224
- r_.f64[i] = a;
1225
- }
1226
-
1227
- return simde__m512d_from_private(r_);
1228
- #endif
1229
- }
1230
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1231
- #define _mm512_set1_pd(a) simde_mm512_set1_pd(a)
1232
- #endif
1233
-
1234
- SIMDE__FUNCTION_ATTRIBUTES
1235
- simde__m512i
1236
- simde_mm512_set4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) {
1237
- simde__m512i_private r_;
1238
-
1239
- r_.i32[ 0] = a;
1240
- r_.i32[ 1] = b;
1241
- r_.i32[ 2] = c;
1242
- r_.i32[ 3] = d;
1243
- r_.i32[ 4] = a;
1244
- r_.i32[ 5] = b;
1245
- r_.i32[ 6] = c;
1246
- r_.i32[ 7] = d;
1247
- r_.i32[ 8] = a;
1248
- r_.i32[ 9] = b;
1249
- r_.i32[10] = c;
1250
- r_.i32[11] = d;
1251
- r_.i32[12] = a;
1252
- r_.i32[13] = b;
1253
- r_.i32[14] = c;
1254
- r_.i32[15] = d;
1255
-
1256
- return simde__m512i_from_private(r_);
1257
- }
1258
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1259
- #define _mm512_set4_epi32(d,c,b,a) simde_mm512_set4_epi32(d,c,b,a)
1260
- #endif
1261
-
1262
- SIMDE__FUNCTION_ATTRIBUTES
1263
- simde__m512i
1264
- simde_mm512_set4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) {
1265
- simde__m512i_private r_;
1266
-
1267
- r_.i64[0] = a;
1268
- r_.i64[1] = b;
1269
- r_.i64[2] = c;
1270
- r_.i64[3] = d;
1271
- r_.i64[4] = a;
1272
- r_.i64[5] = b;
1273
- r_.i64[6] = c;
1274
- r_.i64[7] = d;
1275
-
1276
- return simde__m512i_from_private(r_);
1277
- }
1278
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1279
- #define _mm512_set4_epi64(d,c,b,a) simde_mm512_set4_epi64(d,c,b,a)
1280
- #endif
1281
-
1282
- SIMDE__FUNCTION_ATTRIBUTES
1283
- simde__m512
1284
- simde_mm512_set4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) {
1285
- simde__m512_private r_;
1286
-
1287
- r_.f32[ 0] = a;
1288
- r_.f32[ 1] = b;
1289
- r_.f32[ 2] = c;
1290
- r_.f32[ 3] = d;
1291
- r_.f32[ 4] = a;
1292
- r_.f32[ 5] = b;
1293
- r_.f32[ 6] = c;
1294
- r_.f32[ 7] = d;
1295
- r_.f32[ 8] = a;
1296
- r_.f32[ 9] = b;
1297
- r_.f32[10] = c;
1298
- r_.f32[11] = d;
1299
- r_.f32[12] = a;
1300
- r_.f32[13] = b;
1301
- r_.f32[14] = c;
1302
- r_.f32[15] = d;
1303
-
1304
- return simde__m512_from_private(r_);
1305
- }
1306
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1307
- #define _mm512_set4_ps(d,c,b,a) simde_mm512_set4_ps(d,c,b,a)
1308
- #endif
1309
-
1310
- SIMDE__FUNCTION_ATTRIBUTES
1311
- simde__m512d
1312
- simde_mm512_set4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) {
1313
- simde__m512d_private r_;
1314
-
1315
- r_.f64[0] = a;
1316
- r_.f64[1] = b;
1317
- r_.f64[2] = c;
1318
- r_.f64[3] = d;
1319
- r_.f64[4] = a;
1320
- r_.f64[5] = b;
1321
- r_.f64[6] = c;
1322
- r_.f64[7] = d;
1323
-
1324
- return simde__m512d_from_private(r_);
1325
- }
1326
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1327
- #define _mm512_set4_pd(d,c,b,a) simde_mm512_set4_pd(d,c,b,a)
1328
- #endif
1329
-
1330
- SIMDE__FUNCTION_ATTRIBUTES
1331
- simde__m512i
1332
- simde_mm512_setr_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8,
1333
- int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) {
1334
- simde__m512i_private r_;
1335
-
1336
- r_.i32[ 0] = e15;
1337
- r_.i32[ 1] = e14;
1338
- r_.i32[ 2] = e13;
1339
- r_.i32[ 3] = e12;
1340
- r_.i32[ 4] = e11;
1341
- r_.i32[ 5] = e10;
1342
- r_.i32[ 6] = e9;
1343
- r_.i32[ 7] = e8;
1344
- r_.i32[ 8] = e7;
1345
- r_.i32[ 9] = e6;
1346
- r_.i32[10] = e5;
1347
- r_.i32[11] = e4;
1348
- r_.i32[12] = e3;
1349
- r_.i32[13] = e2;
1350
- r_.i32[14] = e1;
1351
- r_.i32[15] = e0;
1352
-
1353
- return simde__m512i_from_private(r_);
1354
- }
1355
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1356
- #define _mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1357
- #endif
1358
-
1359
- SIMDE__FUNCTION_ATTRIBUTES
1360
- simde__m512i
1361
- simde_mm512_setr_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
1362
- simde__m512i_private r_;
1363
-
1364
- r_.i64[0] = e7;
1365
- r_.i64[1] = e6;
1366
- r_.i64[2] = e5;
1367
- r_.i64[3] = e4;
1368
- r_.i64[4] = e3;
1369
- r_.i64[5] = e2;
1370
- r_.i64[6] = e1;
1371
- r_.i64[7] = e0;
1372
-
1373
- return simde__m512i_from_private(r_);
1374
- }
1375
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1376
- #define _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
1377
- #endif
1378
-
1379
- SIMDE__FUNCTION_ATTRIBUTES
1380
- simde__m512
1381
- simde_mm512_setr_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12,
1382
- simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8,
1383
- simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,
1384
- simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
1385
- simde__m512_private r_;
1386
-
1387
- r_.f32[ 0] = e15;
1388
- r_.f32[ 1] = e14;
1389
- r_.f32[ 2] = e13;
1390
- r_.f32[ 3] = e12;
1391
- r_.f32[ 4] = e11;
1392
- r_.f32[ 5] = e10;
1393
- r_.f32[ 6] = e9;
1394
- r_.f32[ 7] = e8;
1395
- r_.f32[ 8] = e7;
1396
- r_.f32[ 9] = e6;
1397
- r_.f32[10] = e5;
1398
- r_.f32[11] = e4;
1399
- r_.f32[12] = e3;
1400
- r_.f32[13] = e2;
1401
- r_.f32[14] = e1;
1402
- r_.f32[15] = e0;
1403
-
1404
- return simde__m512_from_private(r_);
1405
- }
1406
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1407
- #define _mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1408
- #endif
1409
-
1410
- SIMDE__FUNCTION_ATTRIBUTES
1411
- simde__m512d
1412
- simde_mm512_setr_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {
1413
- simde__m512d_private r_;
1414
-
1415
- r_.f64[0] = e7;
1416
- r_.f64[1] = e6;
1417
- r_.f64[2] = e5;
1418
- r_.f64[3] = e4;
1419
- r_.f64[4] = e3;
1420
- r_.f64[5] = e2;
1421
- r_.f64[6] = e1;
1422
- r_.f64[7] = e0;
1423
-
1424
- return simde__m512d_from_private(r_);
1425
- }
1426
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1427
- #define _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
1428
- #endif
1429
-
1430
- SIMDE__FUNCTION_ATTRIBUTES
1431
- simde__m512i
1432
- simde_mm512_setr4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) {
1433
- simde__m512i_private r_;
1434
-
1435
- r_.i32[ 0] = d;
1436
- r_.i32[ 1] = c;
1437
- r_.i32[ 2] = b;
1438
- r_.i32[ 3] = a;
1439
- r_.i32[ 4] = d;
1440
- r_.i32[ 5] = c;
1441
- r_.i32[ 6] = b;
1442
- r_.i32[ 7] = a;
1443
- r_.i32[ 8] = d;
1444
- r_.i32[ 9] = c;
1445
- r_.i32[10] = b;
1446
- r_.i32[11] = a;
1447
- r_.i32[12] = d;
1448
- r_.i32[13] = c;
1449
- r_.i32[14] = b;
1450
- r_.i32[15] = a;
1451
-
1452
- return simde__m512i_from_private(r_);
1453
- }
1454
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1455
- #define _mm512_setr4_epi32(d,c,b,a) simde_mm512_setr4_epi32(d,c,b,a)
1456
- #endif
1457
-
1458
- SIMDE__FUNCTION_ATTRIBUTES
1459
- simde__m512i
1460
- simde_mm512_setr4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) {
1461
- simde__m512i_private r_;
1462
-
1463
- r_.i64[0] = d;
1464
- r_.i64[1] = c;
1465
- r_.i64[2] = b;
1466
- r_.i64[3] = a;
1467
- r_.i64[4] = d;
1468
- r_.i64[5] = c;
1469
- r_.i64[6] = b;
1470
- r_.i64[7] = a;
1471
-
1472
- return simde__m512i_from_private(r_);
1473
- }
1474
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1475
- #define _mm512_setr4_epi64(d,c,b,a) simde_mm512_setr4_epi64(d,c,b,a)
1476
- #endif
1477
-
1478
- SIMDE__FUNCTION_ATTRIBUTES
1479
- simde__m512
1480
- simde_mm512_setr4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) {
1481
- simde__m512_private r_;
1482
-
1483
- r_.f32[ 0] = d;
1484
- r_.f32[ 1] = c;
1485
- r_.f32[ 2] = b;
1486
- r_.f32[ 3] = a;
1487
- r_.f32[ 4] = d;
1488
- r_.f32[ 5] = c;
1489
- r_.f32[ 6] = b;
1490
- r_.f32[ 7] = a;
1491
- r_.f32[ 8] = d;
1492
- r_.f32[ 9] = c;
1493
- r_.f32[10] = b;
1494
- r_.f32[11] = a;
1495
- r_.f32[12] = d;
1496
- r_.f32[13] = c;
1497
- r_.f32[14] = b;
1498
- r_.f32[15] = a;
1499
-
1500
- return simde__m512_from_private(r_);
1501
- }
1502
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1503
- #define _mm512_setr4_ps(d,c,b,a) simde_mm512_setr4_ps(d,c,b,a)
1504
- #endif
1505
-
1506
- SIMDE__FUNCTION_ATTRIBUTES
1507
- simde__m512d
1508
- simde_mm512_setr4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) {
1509
- simde__m512d_private r_;
1510
-
1511
- r_.f64[0] = d;
1512
- r_.f64[1] = c;
1513
- r_.f64[2] = b;
1514
- r_.f64[3] = a;
1515
- r_.f64[4] = d;
1516
- r_.f64[5] = c;
1517
- r_.f64[6] = b;
1518
- r_.f64[7] = a;
1519
-
1520
- return simde__m512d_from_private(r_);
1521
- }
1522
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1523
- #define _mm512_setr4_pd(d,c,b,a) simde_mm512_setr4_pd(d,c,b,a)
1524
- #endif
1525
-
1526
- SIMDE__FUNCTION_ATTRIBUTES
1527
- simde__m512i
1528
- simde_mm512_setzero_si512(void) {
1529
- #if defined(SIMDE_AVX512F_NATIVE)
1530
- return _mm512_setzero_si512();
1531
- #else
1532
- simde__m512i r;
1533
- simde_memset(&r, 0, sizeof(r));
1534
- return r;
1535
- #endif
1536
- }
1537
- #define simde_mm512_setzero_epi32() simde_mm512_setzero_si512()
1538
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1539
- #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1540
- #define _mm512_setzero_epi32() simde_mm512_setzero_si512()
1541
- #endif
1542
-
1543
- SIMDE__FUNCTION_ATTRIBUTES
1544
- simde__m512i
1545
- simde_mm512_setone_si512(void) {
1546
- simde__m512i_private r_;
1547
-
1548
- SIMDE__VECTORIZE
1549
- for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
1550
- r_.i32f[i] = ~((int_fast32_t) 0);
1551
- }
1552
-
1553
- return simde__m512i_from_private(r_);
1554
- }
1555
- #define simde_mm512_setone_epi32() simde_mm512_setone_si512()
1556
-
1557
- SIMDE__FUNCTION_ATTRIBUTES
1558
- simde__m512
1559
- simde_mm512_setzero_ps(void) {
1560
- #if defined(SIMDE_AVX512F_NATIVE)
1561
- return _mm512_setzero_ps();
1562
- #else
1563
- return simde_mm512_castsi512_ps(simde_mm512_setzero_si512());
1564
- #endif
1565
- }
1566
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1567
- #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1568
- #endif
1569
-
1570
- SIMDE__FUNCTION_ATTRIBUTES
1571
- simde__m512
1572
- simde_mm512_setone_ps(void) {
1573
- return simde_mm512_castsi512_ps(simde_mm512_setone_si512());
1574
- }
1575
-
1576
- SIMDE__FUNCTION_ATTRIBUTES
1577
- simde__m512d
1578
- simde_mm512_setzero_pd(void) {
1579
- #if defined(SIMDE_AVX512F_NATIVE)
1580
- return _mm512_setzero_pd();
1581
- #else
1582
- return simde_mm512_castsi512_pd(simde_mm512_setzero_si512());
1583
- #endif
1584
- }
1585
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1586
- #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1587
- #endif
1588
-
1589
- SIMDE__FUNCTION_ATTRIBUTES
1590
- simde__m512d
1591
- simde_mm512_setone_pd(void) {
1592
- return simde_mm512_castsi512_pd(simde_mm512_setone_si512());
1593
- }
1594
-
1595
- SIMDE__FUNCTION_ATTRIBUTES
1596
- simde__m512i
1597
- simde_mm512_mask_mov_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
1598
- #if defined(SIMDE_AVX512F_NATIVE)
1599
- return _mm512_mask_mov_epi32(src, k, a);
1600
- #else
1601
- simde__m512i_private
1602
- src_ = simde__m512i_to_private(src),
1603
- a_ = simde__m512i_to_private(a),
1604
- r_;
1605
-
1606
- SIMDE__VECTORIZE
1607
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1608
- r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i];
1609
- }
1610
-
1611
- return simde__m512i_from_private(r_);
1612
- #endif
1613
- }
1614
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1615
- #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a)
1616
- #endif
1617
-
1618
- SIMDE__FUNCTION_ATTRIBUTES
1619
- simde__m512i
1620
- simde_mm512_mask_mov_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
1621
- #if defined(SIMDE_AVX512F_NATIVE)
1622
- return _mm512_mask_mov_epi64(src, k, a);
1623
- #else
1624
- simde__m512i_private
1625
- src_ = simde__m512i_to_private(src),
1626
- a_ = simde__m512i_to_private(a),
1627
- r_;
1628
-
1629
- SIMDE__VECTORIZE
1630
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1631
- r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i];
1632
- }
1633
-
1634
- return simde__m512i_from_private(r_);
1635
- #endif
1636
- }
1637
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1638
- #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a)
1639
- #endif
1640
-
1641
- SIMDE__FUNCTION_ATTRIBUTES
1642
- simde__m512
1643
- simde_mm512_mask_mov_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1644
- #if defined(SIMDE_AVX512F_NATIVE)
1645
- return _mm512_mask_mov_ps(src, k, a);
1646
- #else
1647
- simde__m512_private
1648
- src_ = simde__m512_to_private(src),
1649
- a_ = simde__m512_to_private(a),
1650
- r_;
1651
-
1652
- SIMDE__VECTORIZE
1653
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1654
- r_.f32[i] = ((k >> i) & 1) ? a_.f32[i] : src_.f32[i];
1655
- }
1656
-
1657
- return simde__m512_from_private(r_);
1658
- #endif
1659
- }
1660
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1661
- #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a)
1662
- #endif
1663
-
1664
- SIMDE__FUNCTION_ATTRIBUTES
1665
- simde__m512d
1666
- simde_mm512_mask_mov_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1667
- #if defined(SIMDE_AVX512F_NATIVE)
1668
- return _mm512_mask_mov_pd(src, k, a);
1669
- #else
1670
- simde__m512d_private
1671
- src_ = simde__m512d_to_private(src),
1672
- a_ = simde__m512d_to_private(a),
1673
- r_;
1674
-
1675
- SIMDE__VECTORIZE
1676
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1677
- r_.f64[i] = ((k >> i) & 1) ? a_.f64[i] : src_.f64[i];
1678
- }
1679
-
1680
- return simde__m512d_from_private(r_);
1681
- #endif
1682
- }
1683
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1684
- #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a)
1685
- #endif
1686
-
1687
- SIMDE__FUNCTION_ATTRIBUTES
1688
- simde__m512i
1689
- simde_mm512_maskz_mov_epi32(simde__mmask16 k, simde__m512i a) {
1690
- #if defined(SIMDE_AVX512F_NATIVE)
1691
- return _mm512_maskz_mov_epi32(k, a);
1692
- #else
1693
- simde__m512i_private
1694
- a_ = simde__m512i_to_private(a),
1695
- r_;
1696
-
1697
- SIMDE__VECTORIZE
1698
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1699
- r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0);
1700
- }
1701
-
1702
- return simde__m512i_from_private(r_);
1703
- #endif
1704
- }
1705
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1706
- #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a)
1707
- #endif
1708
-
1709
- SIMDE__FUNCTION_ATTRIBUTES
1710
- simde__m512i
1711
- simde_mm512_maskz_mov_epi64(simde__mmask8 k, simde__m512i a) {
1712
- #if defined(SIMDE_AVX512F_NATIVE)
1713
- return _mm512_maskz_mov_epi64(k, a);
1714
- #else
1715
- simde__m512i_private
1716
- a_ = simde__m512i_to_private(a),
1717
- r_;
1718
-
1719
- SIMDE__VECTORIZE
1720
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1721
- r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0);
1722
- }
1723
-
1724
- return simde__m512i_from_private(r_);
1725
- #endif
1726
- }
1727
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1728
- #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a)
1729
- #endif
1730
-
1731
- SIMDE__FUNCTION_ATTRIBUTES
1732
- simde__m512
1733
- simde_mm512_maskz_mov_ps(simde__mmask16 k, simde__m512 a) {
1734
- #if defined(SIMDE_AVX512F_NATIVE)
1735
- return _mm512_maskz_mov_ps(k, a);
1736
- #else
1737
- simde__m512_private
1738
- a_ = simde__m512_to_private(a),
1739
- r_;
1740
-
1741
- SIMDE__VECTORIZE
1742
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1743
- r_.f32[i] = ((k >> i) & 1) ? a_.f32[i] : SIMDE_FLOAT32_C(0.0);
1744
- }
1745
-
1746
- return simde__m512_from_private(r_);
1747
- #endif
1748
- }
1749
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1750
- #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a)
1751
- #endif
1752
-
1753
- SIMDE__FUNCTION_ATTRIBUTES
1754
- simde__m512d
1755
- simde_mm512_maskz_mov_pd(simde__mmask8 k, simde__m512d a) {
1756
- #if defined(SIMDE_AVX512F_NATIVE)
1757
- return _mm512_maskz_mov_pd(k, a);
1758
- #else
1759
- simde__m512d_private
1760
- a_ = simde__m512d_to_private(a),
1761
- r_;
1762
-
1763
- SIMDE__VECTORIZE
1764
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1765
- r_.f64[i] = ((k >> i) & 1) ? a_.f64[i] : SIMDE_FLOAT64_C(0.0);
1766
- }
1767
-
1768
- return simde__m512d_from_private(r_);
1769
- #endif
1770
- }
1771
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1772
- #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a)
1773
- #endif
1774
-
1775
- SIMDE__FUNCTION_ATTRIBUTES
1776
- simde__m512i
1777
- simde_mm512_abs_epi32(simde__m512i a) {
1778
- #if defined(SIMDE_AVX512F_NATIVE)
1779
- return _mm512_abs_epi32(a);
1780
- #else
1781
- simde__m512i_private
1782
- r_,
1783
- a_ = simde__m512i_to_private(a);
1784
-
1785
- SIMDE__VECTORIZE
1786
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
1787
- r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i];
1788
- }
1789
-
1790
- return simde__m512i_from_private(r_);
1791
- #endif
1792
- }
1793
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1794
- # define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a)
1795
- #endif
1796
-
1797
- SIMDE__FUNCTION_ATTRIBUTES
1798
- simde__m512i
1799
- simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
1800
- #if defined(SIMDE_AVX512F_NATIVE)
1801
- return _mm512_mask_abs_epi32(src, k, a);
1802
- #else
1803
- return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a));
1804
- #endif
1805
- }
1806
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1807
- #define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a)
1808
- #endif
1809
-
1810
- SIMDE__FUNCTION_ATTRIBUTES
1811
- simde__m512i
1812
- simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) {
1813
- #if defined(SIMDE_AVX512F_NATIVE)
1814
- return _mm512_maskz_abs_epi32(k, a);
1815
- #else
1816
- return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a));
1817
- #endif
1818
- }
1819
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1820
- #define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a)
1821
- #endif
1822
-
1823
- SIMDE__FUNCTION_ATTRIBUTES
1824
- simde__m512i
1825
- simde_mm512_abs_epi64(simde__m512i a) {
1826
- #if defined(SIMDE_AVX512F_NATIVE)
1827
- return _mm512_abs_epi64(a);
1828
- #else
1829
- simde__m512i_private
1830
- r_,
1831
- a_ = simde__m512i_to_private(a);
1832
-
1833
- SIMDE__VECTORIZE
1834
- for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
1835
- r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i];
1836
- }
1837
-
1838
- return simde__m512i_from_private(r_);
1839
- #endif
1840
- }
1841
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1842
- # define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a)
1843
- #endif
1844
-
1845
- SIMDE__FUNCTION_ATTRIBUTES
1846
- simde__m512i
1847
- simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
1848
- #if defined(SIMDE_AVX512F_NATIVE)
1849
- return _mm512_mask_abs_epi64(src, k, a);
1850
- #else
1851
- return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a));
1852
- #endif
1853
- }
1854
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1855
- #define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a)
1856
- #endif
1857
-
1858
- SIMDE__FUNCTION_ATTRIBUTES
1859
- simde__m512i
1860
- simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) {
1861
- #if defined(SIMDE_AVX512F_NATIVE)
1862
- return _mm512_maskz_abs_epi64(k, a);
1863
- #else
1864
- return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a));
1865
- #endif
1866
- }
1867
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1868
- #define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a)
1869
- #endif
1870
-
1871
- SIMDE__FUNCTION_ATTRIBUTES
1872
- simde__m512i
1873
- simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) {
1874
- #if defined(SIMDE_AVX512F_NATIVE)
1875
- return _mm512_add_epi32(a, b);
1876
- #else
1877
- simde__m512i_private
1878
- r_,
1879
- a_ = simde__m512i_to_private(a),
1880
- b_ = simde__m512i_to_private(b);
1881
-
1882
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1883
- r_.i32 = a_.i32 + b_.i32;
1884
- #else
1885
- SIMDE__VECTORIZE
1886
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
1887
- r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]);
1888
- }
1889
- #endif
1890
-
1891
- return simde__m512i_from_private(r_);
1892
- #endif
1893
- }
1894
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1895
- #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b)
1896
- #endif
1897
-
1898
- SIMDE__FUNCTION_ATTRIBUTES
1899
- simde__m512i
1900
- simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
1901
- #if defined(SIMDE_AVX512F_NATIVE)
1902
- return _mm512_mask_add_epi32(src, k, a, b);
1903
- #else
1904
- return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b));
1905
- #endif
1906
- }
1907
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1908
- #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b)
1909
- #endif
1910
-
1911
- SIMDE__FUNCTION_ATTRIBUTES
1912
- simde__m512i
1913
- simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
1914
- #if defined(SIMDE_AVX512F_NATIVE)
1915
- return _mm512_maskz_add_epi32(k, a, b);
1916
- #else
1917
- return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b));
1918
- #endif
1919
- }
1920
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1921
- #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b)
1922
- #endif
1923
-
1924
- SIMDE__FUNCTION_ATTRIBUTES
1925
- simde__m512i
1926
- simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) {
1927
- #if defined(SIMDE_AVX512F_NATIVE)
1928
- return _mm512_add_epi64(a, b);
1929
- #else
1930
- simde__m512i_private
1931
- r_,
1932
- a_ = simde__m512i_to_private(a),
1933
- b_ = simde__m512i_to_private(b);
1934
-
1935
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1936
- r_.i64 = a_.i64 + b_.i64;
1937
- #else
1938
- SIMDE__VECTORIZE
1939
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
1940
- r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]);
1941
- }
1942
- #endif
1943
-
1944
- return simde__m512i_from_private(r_);
1945
- #endif
1946
- }
1947
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1948
- #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b)
1949
- #endif
1950
-
1951
- SIMDE__FUNCTION_ATTRIBUTES
1952
- simde__m512i
1953
- simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
1954
- #if defined(SIMDE_AVX512F_NATIVE)
1955
- return _mm512_mask_add_epi64(src, k, a, b);
1956
- #else
1957
- return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b));
1958
- #endif
1959
- }
1960
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1961
- #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b)
1962
- #endif
1963
-
1964
- SIMDE__FUNCTION_ATTRIBUTES
1965
- simde__m512i
1966
- simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
1967
- #if defined(SIMDE_AVX512F_NATIVE)
1968
- return _mm512_maskz_add_epi64(k, a, b);
1969
- #else
1970
- return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b));
1971
- #endif
1972
- }
1973
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1974
- #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b)
1975
- #endif
1976
-
1977
-
1978
- SIMDE__FUNCTION_ATTRIBUTES
1979
- simde__m512
1980
- simde_mm512_add_ps (simde__m512 a, simde__m512 b) {
1981
- #if defined(SIMDE_AVX512F_NATIVE)
1982
- return _mm512_add_ps(a, b);
1983
- #else
1984
- simde__m512_private
1985
- r_,
1986
- a_ = simde__m512_to_private(a),
1987
- b_ = simde__m512_to_private(b);
1988
-
1989
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1990
- r_.f32 = a_.f32 + b_.f32;
1991
- #else
1992
- SIMDE__VECTORIZE
1993
- for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1994
- r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]);
1995
- }
1996
- #endif
1997
-
1998
- return simde__m512_from_private(r_);
1999
- #endif
2000
- }
2001
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2002
- #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b)
2003
- #endif
2004
-
2005
- SIMDE__FUNCTION_ATTRIBUTES
2006
- simde__m512
2007
- simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2008
- #if defined(SIMDE_AVX512F_NATIVE)
2009
- return _mm512_mask_add_ps(src, k, a, b);
2010
- #else
2011
- return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b));
2012
- #endif
2013
- }
2014
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2015
- #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b)
2016
- #endif
2017
-
2018
- SIMDE__FUNCTION_ATTRIBUTES
2019
- simde__m512
2020
- simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2021
- #if defined(SIMDE_AVX512F_NATIVE)
2022
- return _mm512_maskz_add_ps(k, a, b);
2023
- #else
2024
- return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b));
2025
- #endif
2026
- }
2027
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2028
- #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b)
2029
- #endif
2030
-
2031
-
2032
- SIMDE__FUNCTION_ATTRIBUTES
2033
- simde__m512d
2034
- simde_mm512_add_pd (simde__m512d a, simde__m512d b) {
2035
- #if defined(SIMDE_AVX512F_NATIVE)
2036
- return _mm512_add_pd(a, b);
2037
- #else
2038
- simde__m512d_private
2039
- r_,
2040
- a_ = simde__m512d_to_private(a),
2041
- b_ = simde__m512d_to_private(b);
2042
-
2043
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2044
- r_.f64 = a_.f64 + b_.f64;
2045
- #else
2046
- SIMDE__VECTORIZE
2047
- for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2048
- r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]);
2049
- }
2050
- #endif
2051
-
2052
- return simde__m512d_from_private(r_);
2053
- #endif
2054
- }
2055
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2056
- #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b)
2057
- #endif
2058
-
2059
- SIMDE__FUNCTION_ATTRIBUTES
2060
- simde__m512d
2061
- simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2062
- #if defined(SIMDE_AVX512F_NATIVE)
2063
- return _mm512_mask_add_pd(src, k, a, b);
2064
- #else
2065
- return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b));
2066
- #endif
2067
- }
2068
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2069
- #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b)
2070
- #endif
2071
-
2072
- SIMDE__FUNCTION_ATTRIBUTES
2073
- simde__m512d
2074
- simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2075
- #if defined(SIMDE_AVX512F_NATIVE)
2076
- return _mm512_maskz_add_pd(k, a, b);
2077
- #else
2078
- return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b));
2079
- #endif
2080
- }
2081
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2082
- #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b)
2083
- #endif
2084
-
2085
- SIMDE__FUNCTION_ATTRIBUTES
2086
- simde__m512i
2087
- simde_mm512_and_si512 (simde__m512i a, simde__m512i b) {
2088
- #if defined(SIMDE_AVX512F_NATIVE)
2089
- return _mm512_and_si512(a, b);
2090
- #else
2091
- simde__m512i_private
2092
- r_,
2093
- a_ = simde__m512i_to_private(a),
2094
- b_ = simde__m512i_to_private(b);
2095
-
2096
- #if defined(SIMDE_ARCH_X86_AVX2)
2097
- r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]);
2098
- r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]);
2099
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2100
- r_.i32f = a_.i32f & b_.i32f;
2101
- #else
2102
- SIMDE__VECTORIZE
2103
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2104
- r_.i32[i] = a_.i32[i] & b_.i32[i];
2105
- }
2106
- #endif
2107
-
2108
- return simde__m512i_from_private(r_);
2109
- #endif
2110
- }
2111
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2112
- #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b)
2113
- #endif
2114
-
2115
- SIMDE__FUNCTION_ATTRIBUTES
2116
- simde__m512i
2117
- simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) {
2118
- #if defined(SIMDE_AVX512F_NATIVE)
2119
- return _mm512_andnot_si512(a, b);
2120
- #else
2121
- simde__m512i_private
2122
- r_,
2123
- a_ = simde__m512i_to_private(a),
2124
- b_ = simde__m512i_to_private(b);
2125
-
2126
- #if defined(SIMDE_ARCH_X86_AVX2)
2127
- r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]);
2128
- r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]);
2129
- #else
2130
- SIMDE__VECTORIZE
2131
- for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
2132
- r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
2133
- }
2134
- #endif
2135
-
2136
- return simde__m512i_from_private(r_);
2137
- #endif
2138
- }
2139
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2140
- #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b)
2141
- #endif
2142
-
2143
- SIMDE__FUNCTION_ATTRIBUTES
2144
- simde__m512i
2145
- simde_mm512_broadcast_i32x4 (simde__m128i a) {
2146
- #if defined(SIMDE_AVX512F_NATIVE)
2147
- return _mm512_broadcast_i32x4(a);
2148
- #else
2149
- simde__m512i_private r_;
2150
-
2151
- #if defined(SIMDE_ARCH_X86_AVX2)
2152
- r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a);
2153
- #elif defined(SIMDE_ARCH_X86_SSE2)
2154
- r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a;
2155
- #else
2156
- SIMDE__VECTORIZE
2157
- for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
2158
- r_.m128i[i] = a;
2159
- }
2160
- #endif
2161
-
2162
- return simde__m512i_from_private(r_);
2163
- #endif
2164
- }
2165
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2166
- #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a)
2167
- #endif
2168
-
2169
- SIMDE__FUNCTION_ATTRIBUTES
2170
- simde__mmask16
2171
- simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
2172
- #if defined(SIMDE_AVX512F_NATIVE)
2173
- return _mm512_cmpeq_epi32_mask(a, b);
2174
- #else
2175
- simde__m512i_private
2176
- r_,
2177
- a_ = simde__m512i_to_private(a),
2178
- b_ = simde__m512i_to_private(b);
2179
-
2180
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2181
- r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]);
2182
- }
2183
-
2184
- return simde__m512i_private_to_mmask16(r_);
2185
- #endif
2186
- }
2187
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2188
- #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b)
2189
- #endif
2190
-
2191
- SIMDE__FUNCTION_ATTRIBUTES
2192
- simde__mmask16
2193
- simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
2194
- #if defined(SIMDE_AVX512F_NATIVE)
2195
- return _mm512_mask_cmpeq_epi32_mask(k1, a, b);
2196
- #else
2197
- return simde_mm512_cmpeq_epi32_mask(a, b) & k1;
2198
- #endif
2199
- }
2200
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2201
- #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b)
2202
- #endif
2203
-
2204
- SIMDE__FUNCTION_ATTRIBUTES
2205
- simde__mmask8
2206
- simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) {
2207
- #if defined(SIMDE_AVX512F_NATIVE)
2208
- return _mm512_cmpeq_epi64_mask(a, b);
2209
- #else
2210
- simde__m512i_private
2211
- r_,
2212
- a_ = simde__m512i_to_private(a),
2213
- b_ = simde__m512i_to_private(b);
2214
-
2215
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2216
- r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]);
2217
- }
2218
-
2219
- return simde__m512i_private_to_mmask8(r_);
2220
- #endif
2221
- }
2222
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2223
- #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b)
2224
- #endif
2225
-
2226
- SIMDE__FUNCTION_ATTRIBUTES
2227
- simde__mmask8
2228
- simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
2229
- #if defined(SIMDE_AVX512F_NATIVE)
2230
- return _mm512_mask_cmpeq_epi64_mask(k1, a, b);
2231
- #else
2232
- return simde_mm512_cmpeq_epi64_mask(a, b) & k1;
2233
- #endif
2234
- }
2235
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2236
- #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b)
2237
- #endif
2238
-
2239
- SIMDE__FUNCTION_ATTRIBUTES
2240
- simde__mmask16
2241
- simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) {
2242
- #if defined(SIMDE_AVX512F_NATIVE)
2243
- return _mm512_cmpgt_epi32_mask(a, b);
2244
- #else
2245
- simde__m512i_private
2246
- r_,
2247
- a_ = simde__m512i_to_private(a),
2248
- b_ = simde__m512i_to_private(b);
2249
-
2250
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2251
- r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]);
2252
- }
2253
-
2254
- return simde__m512i_private_to_mmask16(r_);
2255
- #endif
2256
- }
2257
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2258
- #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b)
2259
- #endif
2260
-
2261
- SIMDE__FUNCTION_ATTRIBUTES
2262
- simde__mmask16
2263
- simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
2264
- #if defined(SIMDE_AVX512F_NATIVE)
2265
- return _mm512_mask_cmpgt_epi32_mask(k1, a, b);
2266
- #else
2267
- return simde_mm512_cmpgt_epi32_mask(a, b) & k1;
2268
- #endif
2269
- }
2270
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2271
- #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b)
2272
- #endif
2273
-
2274
- SIMDE__FUNCTION_ATTRIBUTES
2275
- simde__mmask8
2276
- simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) {
2277
- #if defined(SIMDE_AVX512F_NATIVE)
2278
- return _mm512_cmpgt_epi64_mask(a, b);
2279
- #else
2280
- simde__m512i_private
2281
- r_,
2282
- a_ = simde__m512i_to_private(a),
2283
- b_ = simde__m512i_to_private(b);
2284
-
2285
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2286
- r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]);
2287
- }
2288
-
2289
- return simde__m512i_private_to_mmask8(r_);
2290
- #endif
2291
- }
2292
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2293
- #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b)
2294
- #endif
2295
-
2296
- SIMDE__FUNCTION_ATTRIBUTES
2297
- simde__mmask8
2298
- simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
2299
- #if defined(SIMDE_AVX512F_NATIVE)
2300
- return _mm512_mask_cmpgt_epi64_mask(k1, a, b);
2301
- #else
2302
- return simde_mm512_cmpgt_epi64_mask(a, b) & k1;
2303
- #endif
2304
- }
2305
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2306
- #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b)
2307
- #endif
2308
-
2309
- SIMDE__FUNCTION_ATTRIBUTES
2310
- simde__m512i
2311
- simde_mm512_cvtepi8_epi32 (simde__m128i a) {
2312
- #if defined(SIMDE_AVX512F_NATIVE)
2313
- return _mm512_cvtepi8_epi32(a);
2314
- #else
2315
- simde__m512i_private r_;
2316
- simde__m128i_private a_ = simde__m128i_to_private(a);
2317
-
2318
- #if defined(SIMDE__CONVERT_VECTOR)
2319
- SIMDE__CONVERT_VECTOR(r_.i32, a_.i8);
2320
- #else
2321
- SIMDE__VECTORIZE
2322
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2323
- r_.i32[i] = a_.i8[i];
2324
- }
2325
- #endif
2326
-
2327
- return simde__m512i_from_private(r_);
2328
- #endif
2329
- }
2330
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2331
- #define _mm512_cvtepi8_epi32(a) simde_mm512_cvtepi8_epi32(a)
2332
- #endif
2333
-
2334
- SIMDE__FUNCTION_ATTRIBUTES
2335
- simde__m512i
2336
- simde_mm512_cvtepi8_epi64 (simde__m128i a) {
2337
- #if defined(SIMDE_AVX512F_NATIVE)
2338
- return _mm512_cvtepi8_epi64(a);
2339
- #else
2340
- simde__m512i_private r_;
2341
- simde__m128i_private a_ = simde__m128i_to_private(a);
2342
-
2343
- #if defined(SIMDE__CONVERT_VECTOR)
2344
- SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i8);
2345
- #else
2346
- SIMDE__VECTORIZE
2347
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2348
- r_.i64[i] = a_.i8[i];
2349
- }
2350
- #endif
2351
-
2352
- return simde__m512i_from_private(r_);
2353
- #endif
2354
- }
2355
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2356
- #define _mm512_cvtepi8_epi64(a) simde_mm512_cvtepi8_epi64(a)
2357
- #endif
2358
-
2359
- SIMDE__FUNCTION_ATTRIBUTES
2360
- simde__m128i
2361
- simde_mm512_cvtepi32_epi8 (simde__m512i a) {
2362
- #if defined(SIMDE_AVX512F_NATIVE)
2363
- return _mm512_cvtepi32_epi8(a);
2364
- #else
2365
- simde__m128i_private r_;
2366
- simde__m512i_private a_ = simde__m512i_to_private(a);
2367
-
2368
- #if defined(SIMDE__CONVERT_VECTOR)
2369
- SIMDE__CONVERT_VECTOR(r_.i8, a_.i32);
2370
- #else
2371
- SIMDE__VECTORIZE
2372
- for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2373
- r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i32[i]);
2374
- }
2375
- #endif
2376
-
2377
- return simde__m128i_from_private(r_);
2378
- #endif
2379
- }
2380
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2381
- #define _mm512_cvtepi32_epi8(a) simde_mm512_cvtepi32_epi8(a)
2382
- #endif
2383
-
2384
- SIMDE__FUNCTION_ATTRIBUTES
2385
- simde__m256i
2386
- simde_mm512_cvtepi32_epi16 (simde__m512i a) {
2387
- #if defined(SIMDE_AVX512F_NATIVE)
2388
- return _mm512_cvtepi32_epi16(a);
2389
- #else
2390
- simde__m256i_private r_;
2391
- simde__m512i_private a_ = simde__m512i_to_private(a);
2392
-
2393
- #if defined(SIMDE__CONVERT_VECTOR)
2394
- SIMDE__CONVERT_VECTOR(r_.i16, a_.i32);
2395
- #else
2396
- SIMDE__VECTORIZE
2397
- for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2398
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
2399
- }
2400
- #endif
2401
-
2402
- return simde__m256i_from_private(r_);
2403
- #endif
2404
- }
2405
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2406
- #define _mm512_cvtepi32_epi16(a) simde_mm512_cvtepi32_epi16(a)
2407
- #endif
2408
-
2409
- SIMDE__FUNCTION_ATTRIBUTES
2410
- simde__m128i
2411
- simde_mm512_cvtepi64_epi8 (simde__m512i a) {
2412
- #if defined(SIMDE_AVX512F_NATIVE)
2413
- return _mm512_cvtepi64_epi8(a);
2414
- #else
2415
- simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2416
- simde__m512i_private a_ = simde__m512i_to_private(a);
2417
-
2418
- #if defined(SIMDE__CONVERT_VECTOR)
2419
- SIMDE__CONVERT_VECTOR(r_.m64_private[0].i8, a_.i64);
2420
- #else
2421
- SIMDE__VECTORIZE
2422
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2423
- r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i64[i]);
2424
- }
2425
- #endif
2426
-
2427
- return simde__m128i_from_private(r_);
2428
- #endif
2429
- }
2430
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2431
- #define _mm512_cvtepi64_epi8(a) simde_mm512_cvtepi64_epi8(a)
2432
- #endif
2433
-
2434
- SIMDE__FUNCTION_ATTRIBUTES
2435
- simde__m128i
2436
- simde_mm512_cvtepi64_epi16 (simde__m512i a) {
2437
- #if defined(SIMDE_AVX512F_NATIVE)
2438
- return _mm512_cvtepi64_epi16(a);
2439
- #else
2440
- simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2441
- simde__m512i_private a_ = simde__m512i_to_private(a);
2442
-
2443
- #if defined(SIMDE__CONVERT_VECTOR)
2444
- SIMDE__CONVERT_VECTOR(r_.i16, a_.i64);
2445
- #else
2446
- SIMDE__VECTORIZE
2447
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2448
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i64[i]);
2449
- }
2450
- #endif
2451
-
2452
- return simde__m128i_from_private(r_);
2453
- #endif
2454
- }
2455
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2456
- #define _mm512_cvtepi64_epi16(a) simde_mm512_cvtepi64_epi16(a)
2457
- #endif
2458
-
2459
- SIMDE__FUNCTION_ATTRIBUTES
2460
- simde__m256i
2461
- simde_mm512_cvtepi64_epi32 (simde__m512i a) {
2462
- #if defined(SIMDE_AVX512F_NATIVE)
2463
- return _mm512_cvtepi64_epi32(a);
2464
- #else
2465
- simde__m256i_private r_;
2466
- simde__m512i_private a_ = simde__m512i_to_private(a);
2467
-
2468
- #if defined(SIMDE__CONVERT_VECTOR)
2469
- SIMDE__CONVERT_VECTOR(r_.i32, a_.i64);
2470
- #else
2471
- SIMDE__VECTORIZE
2472
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2473
- r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i64[i]);
2474
- }
2475
- #endif
2476
-
2477
- return simde__m256i_from_private(r_);
2478
- #endif
2479
- }
2480
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2481
- #define _mm512_cvtepi64_epi32(a) simde_mm512_cvtepi64_epi32(a)
2482
- #endif
2483
-
2484
- SIMDE__FUNCTION_ATTRIBUTES
2485
- simde__m128i
2486
- simde_mm512_cvtsepi32_epi8 (simde__m512i a) {
2487
- #if defined(SIMDE_AVX512F_NATIVE)
2488
- return _mm512_cvtsepi32_epi8(a);
2489
- #else
2490
- simde__m128i_private r_;
2491
- simde__m512i_private a_ = simde__m512i_to_private(a);
2492
-
2493
- SIMDE__VECTORIZE
2494
- for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2495
- r_.i8[i] =
2496
- (a_.i32[i] < INT8_MIN)
2497
- ? (INT8_MIN)
2498
- : ((a_.i32[i] > INT8_MAX)
2499
- ? (INT8_MAX)
2500
- : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]));
2501
- }
2502
-
2503
- return simde__m128i_from_private(r_);
2504
- #endif
2505
- }
2506
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2507
- #define _mm512_cvtsepi32_epi8(a) simde_mm512_cvtsepi32_epi8(a)
2508
- #endif
2509
-
2510
- SIMDE__FUNCTION_ATTRIBUTES
2511
- simde__m256i
2512
- simde_mm512_cvtsepi32_epi16 (simde__m512i a) {
2513
- #if defined(SIMDE_AVX512F_NATIVE)
2514
- return _mm512_cvtsepi32_epi16(a);
2515
- #else
2516
- simde__m256i_private r_;
2517
- simde__m512i_private a_ = simde__m512i_to_private(a);
2518
-
2519
- SIMDE__VECTORIZE
2520
- for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2521
- r_.i16[i] =
2522
- (a_.i32[i] < INT16_MIN)
2523
- ? (INT16_MIN)
2524
- : ((a_.i32[i] > INT16_MAX)
2525
- ? (INT16_MAX)
2526
- : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]));
2527
- }
2528
-
2529
- return simde__m256i_from_private(r_);
2530
- #endif
2531
- }
2532
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2533
- #define _mm512_cvtsepi32_epi16(a) simde_mm512_cvtsepi32_epi16(a)
2534
- #endif
2535
-
2536
- SIMDE__FUNCTION_ATTRIBUTES
2537
- simde__m128i
2538
- simde_mm512_cvtsepi64_epi8 (simde__m512i a) {
2539
- #if defined(SIMDE_AVX512F_NATIVE)
2540
- return _mm512_cvtsepi64_epi8(a);
2541
- #else
2542
- simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2543
- simde__m512i_private a_ = simde__m512i_to_private(a);
2544
-
2545
- SIMDE__VECTORIZE
2546
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2547
- r_.i8[i] =
2548
- (a_.i64[i] < INT8_MIN)
2549
- ? (INT8_MIN)
2550
- : ((a_.i64[i] > INT8_MAX)
2551
- ? (INT8_MAX)
2552
- : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]));
2553
- }
2554
-
2555
- return simde__m128i_from_private(r_);
2556
- #endif
2557
- }
2558
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2559
- #define _mm512_cvtsepi64_epi8(a) simde_mm512_cvtsepi64_epi8(a)
2560
- #endif
2561
-
2562
- SIMDE__FUNCTION_ATTRIBUTES
2563
- simde__m128i
2564
- simde_mm512_cvtsepi64_epi16 (simde__m512i a) {
2565
- #if defined(SIMDE_AVX512F_NATIVE)
2566
- return _mm512_cvtsepi64_epi16(a);
2567
- #else
2568
- simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2569
- simde__m512i_private a_ = simde__m512i_to_private(a);
2570
-
2571
- SIMDE__VECTORIZE
2572
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2573
- r_.i16[i] =
2574
- (a_.i64[i] < INT16_MIN)
2575
- ? (INT16_MIN)
2576
- : ((a_.i64[i] > INT16_MAX)
2577
- ? (INT16_MAX)
2578
- : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]));
2579
- }
2580
-
2581
- return simde__m128i_from_private(r_);
2582
- #endif
2583
- }
2584
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2585
- #define _mm512_cvtsepi64_epi16(a) simde_mm512_cvtsepi64_epi16(a)
2586
- #endif
2587
-
2588
- SIMDE__FUNCTION_ATTRIBUTES
2589
- simde__m256i
2590
- simde_mm512_cvtsepi64_epi32 (simde__m512i a) {
2591
- #if defined(SIMDE_AVX512F_NATIVE)
2592
- return _mm512_cvtsepi64_epi32(a);
2593
- #else
2594
- simde__m256i_private r_;
2595
- simde__m512i_private a_ = simde__m512i_to_private(a);
2596
-
2597
- SIMDE__VECTORIZE
2598
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2599
- r_.i32[i] =
2600
- (a_.i64[i] < INT32_MIN)
2601
- ? (INT32_MIN)
2602
- : ((a_.i64[i] > INT32_MAX)
2603
- ? (INT32_MAX)
2604
- : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]));
2605
- }
2606
-
2607
- return simde__m256i_from_private(r_);
2608
- #endif
2609
- }
2610
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2611
- #define _mm512_cvtsepi64_epi32(a) simde_mm512_cvtsepi64_epi32(a)
2612
- #endif
2613
-
2614
- SIMDE__FUNCTION_ATTRIBUTES
2615
- simde__m512
2616
- simde_mm512_div_ps (simde__m512 a, simde__m512 b) {
2617
- #if defined(SIMDE_AVX512F_NATIVE)
2618
- return _mm512_div_ps(a, b);
2619
- #else
2620
- simde__m512_private
2621
- r_,
2622
- a_ = simde__m512_to_private(a),
2623
- b_ = simde__m512_to_private(b);
2624
-
2625
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2626
- r_.f32 = a_.f32 / b_.f32;
2627
- #else
2628
- SIMDE__VECTORIZE
2629
- for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2630
- r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]);
2631
- }
2632
- #endif
2633
-
2634
- return simde__m512_from_private(r_);
2635
- #endif
2636
- }
2637
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2638
- #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b)
2639
- #endif
2640
-
2641
- SIMDE__FUNCTION_ATTRIBUTES
2642
- simde__m512
2643
- simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2644
- #if defined(SIMDE_AVX512F_NATIVE)
2645
- return _mm512_mask_div_ps(src, k, a, b);
2646
- #else
2647
- return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b));
2648
- #endif
2649
- }
2650
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2651
- #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b)
2652
- #endif
2653
-
2654
- SIMDE__FUNCTION_ATTRIBUTES
2655
- simde__m512
2656
- simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2657
- #if defined(SIMDE_AVX512F_NATIVE)
2658
- return _mm512_maskz_div_ps(k, a, b);
2659
- #else
2660
- return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b));
2661
- #endif
2662
- }
2663
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2664
- #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b)
2665
- #endif
2666
-
2667
- SIMDE__FUNCTION_ATTRIBUTES
2668
- simde__m512d
2669
- simde_mm512_div_pd (simde__m512d a, simde__m512d b) {
2670
- #if defined(SIMDE_AVX512F_NATIVE)
2671
- return _mm512_div_pd(a, b);
2672
- #else
2673
- simde__m512d_private
2674
- r_,
2675
- a_ = simde__m512d_to_private(a),
2676
- b_ = simde__m512d_to_private(b);
2677
-
2678
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2679
- r_.f64 = a_.f64 / b_.f64;
2680
- #else
2681
- SIMDE__VECTORIZE
2682
- for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2683
- r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]);
2684
- }
2685
- #endif
2686
-
2687
- return simde__m512d_from_private(r_);
2688
- #endif
2689
- }
2690
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2691
- #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b)
2692
- #endif
2693
-
2694
- SIMDE__FUNCTION_ATTRIBUTES
2695
- simde__m512d
2696
- simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2697
- #if defined(SIMDE_AVX512F_NATIVE)
2698
- return _mm512_mask_div_pd(src, k, a, b);
2699
- #else
2700
- return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b));
2701
- #endif
2702
- }
2703
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2704
- #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b)
2705
- #endif
2706
-
2707
- SIMDE__FUNCTION_ATTRIBUTES
2708
- simde__m512d
2709
- simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2710
- #if defined(SIMDE_AVX512F_NATIVE)
2711
- return _mm512_maskz_div_pd(k, a, b);
2712
- #else
2713
- return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b));
2714
- #endif
2715
- }
2716
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2717
- #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b)
2718
- #endif
2719
-
2720
- SIMDE__FUNCTION_ATTRIBUTES
2721
- simde__m512i
2722
- simde_mm512_load_si512 (simde__m512i const * mem_addr) {
2723
- simde_assert_aligned(64, mem_addr);
2724
-
2725
- #if defined(SIMDE_AVX512F_NATIVE)
2726
- return _mm512_load_si512((__m512i const*) mem_addr);
2727
- #elif defined(SIMDE_ARCH_AARCH64) && (defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(8,0,0))
2728
- simde__m512i r;
2729
- memcpy(&r, mem_addr, sizeof(r));
2730
- return r;
2731
- #else
2732
- return *mem_addr;
2733
- #endif
2734
- }
2735
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2736
- #define _mm512_load_si512(a) simde_mm512_load_si512(a)
2737
- #endif
2738
-
2739
- SIMDE__FUNCTION_ATTRIBUTES
2740
- simde__m512i
2741
- simde_mm512_loadu_si512 (simde__m512i const * mem_addr) {
2742
- #if defined(SIMDE_AVX512F_NATIVE)
2743
- return _mm512_loadu_si512((__m512i const*) mem_addr);
2744
- #else
2745
- simde__m512i r;
2746
- simde_memcpy(&r, mem_addr, sizeof(r));
2747
- return r;
2748
- #endif
2749
- }
2750
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2751
- #define _mm512_loadu_si512(a) simde_mm512_loadu_si512(a)
2752
- #endif
2753
-
2754
- SIMDE__FUNCTION_ATTRIBUTES
2755
- simde__m512
2756
- simde_mm512_mul_ps (simde__m512 a, simde__m512 b) {
2757
- #if defined(SIMDE_AVX512F_NATIVE)
2758
- return _mm512_mul_ps(a, b);
2759
- #else
2760
- simde__m512_private
2761
- r_,
2762
- a_ = simde__m512_to_private(a),
2763
- b_ = simde__m512_to_private(b);
2764
-
2765
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2766
- r_.f32 = a_.f32 * b_.f32;
2767
- #else
2768
- SIMDE__VECTORIZE
2769
- for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2770
- r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]);
2771
- }
2772
- #endif
2773
-
2774
- return simde__m512_from_private(r_);
2775
- #endif
2776
- }
2777
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2778
- #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b)
2779
- #endif
2780
-
2781
- SIMDE__FUNCTION_ATTRIBUTES
2782
- simde__m512
2783
- simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2784
- #if defined(SIMDE_AVX512F_NATIVE)
2785
- return _mm512_mask_mul_ps(src, k, a, b);
2786
- #else
2787
- return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b));
2788
- #endif
2789
- }
2790
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2791
- #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b)
2792
- #endif
2793
-
2794
- SIMDE__FUNCTION_ATTRIBUTES
2795
- simde__m512
2796
- simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2797
- #if defined(SIMDE_AVX512F_NATIVE)
2798
- return _mm512_maskz_mul_ps(k, a, b);
2799
- #else
2800
- return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b));
2801
- #endif
2802
- }
2803
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2804
- #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b)
2805
- #endif
2806
-
2807
- SIMDE__FUNCTION_ATTRIBUTES
2808
- simde__m512d
2809
- simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
2810
- #if defined(SIMDE_AVX512F_NATIVE)
2811
- return _mm512_mul_pd(a, b);
2812
- #else
2813
- simde__m512d_private
2814
- r_,
2815
- a_ = simde__m512d_to_private(a),
2816
- b_ = simde__m512d_to_private(b);
2817
-
2818
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2819
- r_.f64 = a_.f64 * b_.f64;
2820
- #else
2821
- SIMDE__VECTORIZE
2822
- for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2823
- r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]);
2824
- }
2825
- #endif
2826
-
2827
- return simde__m512d_from_private(r_);
2828
- #endif
2829
- }
2830
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2831
- #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b)
2832
- #endif
2833
-
2834
- SIMDE__FUNCTION_ATTRIBUTES
2835
- simde__m512d
2836
- simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2837
- #if defined(SIMDE_AVX512F_NATIVE)
2838
- return _mm512_mask_mul_pd(src, k, a, b);
2839
- #else
2840
- return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b));
2841
- #endif
2842
- }
2843
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2844
- #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b)
2845
- #endif
2846
-
2847
- SIMDE__FUNCTION_ATTRIBUTES
2848
- simde__m512d
2849
- simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2850
- #if defined(SIMDE_AVX512F_NATIVE)
2851
- return _mm512_maskz_mul_pd(k, a, b);
2852
- #else
2853
- return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b));
2854
- #endif
2855
- }
2856
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2857
- #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b)
2858
- #endif
2859
-
2860
- SIMDE__FUNCTION_ATTRIBUTES
2861
- simde__m512i
2862
- simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) {
2863
- #if defined(SIMDE_AVX512F_NATIVE)
2864
- return _mm512_mul_epi32(a, b);
2865
- #else
2866
- simde__m512i_private
2867
- r_,
2868
- a_ = simde__m512i_to_private(a),
2869
- b_ = simde__m512i_to_private(b);
2870
-
2871
- #if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
2872
- simde__m512i_private x;
2873
- __typeof__(r_.i64) ta, tb;
2874
-
2875
- /* Get even numbered 32-bit values */
2876
- x.i32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
2877
- /* Cast to 64 bits */
2878
- SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].i32);
2879
- SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].i32);
2880
- r_.i64 = ta * tb;
2881
- #else
2882
- SIMDE__VECTORIZE
2883
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2884
- r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]);
2885
- }
2886
- #endif
2887
- return simde__m512i_from_private(r_);
2888
- #endif
2889
- }
2890
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2891
- #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b)
2892
- #endif
2893
-
2894
- SIMDE__FUNCTION_ATTRIBUTES
2895
- simde__m512i
2896
- simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
2897
- #if defined(SIMDE_AVX512F_NATIVE)
2898
- return _mm512_mask_mul_epi32(src, k, a, b);
2899
- #else
2900
- return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b));
2901
- #endif
2902
- }
2903
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2904
- #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b)
2905
- #endif
2906
-
2907
- SIMDE__FUNCTION_ATTRIBUTES
2908
- simde__m512i
2909
- simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
2910
- #if defined(SIMDE_AVX512F_NATIVE)
2911
- return _mm512_maskz_mul_epi32(k, a, b);
2912
- #else
2913
- return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b));
2914
- #endif
2915
- }
2916
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2917
- #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b)
2918
- #endif
2919
-
2920
- SIMDE__FUNCTION_ATTRIBUTES
2921
- simde__m512i
2922
- simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) {
2923
- #if defined(SIMDE_AVX512F_NATIVE)
2924
- return _mm512_mul_epu32(a, b);
2925
- #else
2926
- simde__m512i_private
2927
- r_,
2928
- a_ = simde__m512i_to_private(a),
2929
- b_ = simde__m512i_to_private(b);
2930
-
2931
- #if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
2932
- simde__m512i_private x;
2933
- __typeof__(r_.u64) ta, tb;
2934
-
2935
- x.u32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
2936
- SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].u32);
2937
- SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].u32);
2938
- r_.u64 = ta * tb;
2939
- #else
2940
- SIMDE__VECTORIZE
2941
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
2942
- r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]);
2943
- }
2944
- #endif
2945
-
2946
- return simde__m512i_from_private(r_);
2947
- #endif
2948
- }
2949
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2950
- #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b)
2951
- #endif
2952
-
2953
- SIMDE__FUNCTION_ATTRIBUTES
2954
- simde__m512i
2955
- simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
2956
- #if defined(SIMDE_AVX512F_NATIVE)
2957
- return _mm512_mask_mul_epu32(src, k, a, b);
2958
- #else
2959
- return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b));
2960
- #endif
2961
- }
2962
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2963
- #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b)
2964
- #endif
2965
-
2966
- SIMDE__FUNCTION_ATTRIBUTES
2967
- simde__m512i
2968
- simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
2969
- #if defined(SIMDE_AVX512F_NATIVE)
2970
- return _mm512_maskz_mul_epu32(k, a, b);
2971
- #else
2972
- return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b));
2973
- #endif
2974
- }
2975
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2976
- #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b)
2977
- #endif
2978
-
2979
- SIMDE__FUNCTION_ATTRIBUTES
2980
- simde__m512i
2981
- simde_mm512_or_si512 (simde__m512i a, simde__m512i b) {
2982
- #if defined(SIMDE_AVX512F_NATIVE)
2983
- return _mm512_or_si512(a, b);
2984
- #else
2985
- simde__m512i_private
2986
- r_,
2987
- a_ = simde__m512i_to_private(a),
2988
- b_ = simde__m512i_to_private(b);
2989
-
2990
- #if defined(SIMDE_ARCH_X86_AVX2)
2991
- r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]);
2992
- r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]);
2993
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2994
- r_.i32f = a_.i32f | b_.i32f;
2995
- #else
2996
- SIMDE__VECTORIZE
2997
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2998
- r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
2999
- }
3000
- #endif
3001
-
3002
- return simde__m512i_from_private(r_);
3003
- #endif
3004
- }
3005
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3006
- #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b)
3007
- #endif
3008
-
3009
- SIMDE__FUNCTION_ATTRIBUTES
3010
- simde__m512i
3011
- simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) {
3012
- #if defined(SIMDE_AVX512F_NATIVE)
3013
- return _mm512_sub_epi32(a, b);
3014
- #else
3015
- simde__m512i_private
3016
- r_,
3017
- a_ = simde__m512i_to_private(a),
3018
- b_ = simde__m512i_to_private(b);
3019
-
3020
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3021
- r_.i32 = a_.i32 - b_.i32;
3022
- #else
3023
- SIMDE__VECTORIZE
3024
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3025
- r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]);
3026
- }
3027
- #endif
3028
-
3029
- return simde__m512i_from_private(r_);
3030
- #endif
3031
- }
3032
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3033
- #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b)
3034
- #endif
3035
-
3036
- SIMDE__FUNCTION_ATTRIBUTES
3037
- simde__m512i
3038
- simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
3039
- #if defined(SIMDE_AVX512F_NATIVE)
3040
- return _mm512_mask_sub_epi32(src, k, a, b);
3041
- #else
3042
- return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b));
3043
- #endif
3044
- }
3045
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3046
- #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b)
3047
- #endif
3048
-
3049
- SIMDE__FUNCTION_ATTRIBUTES
3050
- simde__m512i
3051
- simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
3052
- #if defined(SIMDE_AVX512F_NATIVE)
3053
- return _mm512_maskz_sub_epi32(k, a, b);
3054
- #else
3055
- return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b));
3056
- #endif
3057
- }
3058
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3059
- #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b)
3060
- #endif
3061
-
3062
- SIMDE__FUNCTION_ATTRIBUTES
3063
- simde__m512i
3064
- simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
3065
- #if defined(SIMDE_AVX512F_NATIVE)
3066
- return _mm512_sub_epi64(a, b);
3067
- #else
3068
- simde__m512i_private
3069
- r_,
3070
- a_ = simde__m512i_to_private(a),
3071
- b_ = simde__m512i_to_private(b);
3072
-
3073
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3074
- r_.i64 = a_.i64 - b_.i64;
3075
- #else
3076
- SIMDE__VECTORIZE
3077
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3078
- r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]);
3079
- }
3080
- #endif
3081
-
3082
- return simde__m512i_from_private(r_);
3083
- #endif
3084
- }
3085
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3086
- #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b)
3087
- #endif
3088
-
3089
- SIMDE__FUNCTION_ATTRIBUTES
3090
- simde__m512i
3091
- simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
3092
- #if defined(SIMDE_AVX512F_NATIVE)
3093
- return _mm512_mask_sub_epi64(src, k, a, b);
3094
- #else
3095
- return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b));
3096
- #endif
3097
- }
3098
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3099
- #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b)
3100
- #endif
3101
-
3102
- SIMDE__FUNCTION_ATTRIBUTES
3103
- simde__m512i
3104
- simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
3105
- #if defined(SIMDE_AVX512F_NATIVE)
3106
- return _mm512_maskz_sub_epi64(k, a, b);
3107
- #else
3108
- return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b));
3109
- #endif
3110
- }
3111
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3112
- #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b)
3113
- #endif
3114
-
3115
- SIMDE__FUNCTION_ATTRIBUTES
3116
- simde__m512
3117
- simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
3118
- #if defined(SIMDE_AVX512F_NATIVE)
3119
- return _mm512_sub_ps(a, b);
3120
- #else
3121
- simde__m512_private
3122
- r_,
3123
- a_ = simde__m512_to_private(a),
3124
- b_ = simde__m512_to_private(b);
3125
-
3126
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3127
- r_.f32 = a_.f32 - b_.f32;
3128
- #else
3129
- SIMDE__VECTORIZE
3130
- for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
3131
- r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]);
3132
- }
3133
- #endif
3134
-
3135
- return simde__m512_from_private(r_);
3136
- #endif
3137
- }
3138
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3139
- #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b)
3140
- #endif
3141
-
3142
- SIMDE__FUNCTION_ATTRIBUTES
3143
- simde__m512
3144
- simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
3145
- #if defined(SIMDE_AVX512F_NATIVE)
3146
- return _mm512_mask_sub_ps(src, k, a, b);
3147
- #else
3148
- return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b));
3149
- #endif
3150
- }
3151
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3152
- #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b)
3153
- #endif
3154
-
3155
- SIMDE__FUNCTION_ATTRIBUTES
3156
- simde__m512
3157
- simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
3158
- #if defined(SIMDE_AVX512F_NATIVE)
3159
- return _mm512_maskz_sub_ps(k, a, b);
3160
- #else
3161
- return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b));
3162
- #endif
3163
- }
3164
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3165
- #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b)
3166
- #endif
3167
-
3168
- SIMDE__FUNCTION_ATTRIBUTES
3169
- simde__m512d
3170
- simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
3171
- #if defined(SIMDE_AVX512F_NATIVE)
3172
- return _mm512_sub_pd(a, b);
3173
- #else
3174
- simde__m512d_private
3175
- r_,
3176
- a_ = simde__m512d_to_private(a),
3177
- b_ = simde__m512d_to_private(b);
3178
-
3179
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3180
- r_.f64 = a_.f64 - b_.f64;
3181
- #else
3182
- SIMDE__VECTORIZE
3183
- for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
3184
- r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]);
3185
- }
3186
- #endif
3187
-
3188
- return simde__m512d_from_private(r_);
3189
- #endif
3190
- }
3191
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3192
- #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b)
3193
- #endif
3194
-
3195
- SIMDE__FUNCTION_ATTRIBUTES
3196
- simde__m512d
3197
- simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
3198
- #if defined(SIMDE_AVX512F_NATIVE)
3199
- return _mm512_mask_sub_pd(src, k, a, b);
3200
- #else
3201
- return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b));
3202
- #endif
3203
- }
3204
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3205
- #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b)
3206
- #endif
3207
-
3208
- SIMDE__FUNCTION_ATTRIBUTES
3209
- simde__m512d
3210
- simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
3211
- #if defined(SIMDE_AVX512F_NATIVE)
3212
- return _mm512_maskz_sub_pd(k, a, b);
3213
- #else
3214
- return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b));
3215
- #endif
3216
- }
3217
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3218
- #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b)
3219
- #endif
3220
-
3221
- SIMDE__FUNCTION_ATTRIBUTES
3222
- simde__m512i
3223
- simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) {
3224
- #if defined(SIMDE_AVX512F_NATIVE)
3225
- return _mm512_srli_epi32(a, imm8);
3226
- #else
3227
- simde__m512i_private
3228
- r_,
3229
- a_ = simde__m512i_to_private(a);
3230
-
3231
- #if defined(SIMDE_ARCH_X86_AVX2)
3232
- r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], imm8);
3233
- r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], imm8);
3234
- #elif defined(SIMDE_ARCH_X86_SSE2)
3235
- r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], imm8);
3236
- r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], imm8);
3237
- r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], imm8);
3238
- r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], imm8);
3239
- #else
3240
- if (imm8 > 31) {
3241
- simde_memset(&r_, 0, sizeof(r_));
3242
- } else {
3243
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
3244
- r_.u32 = a_.u32 >> imm8;
3245
- #else
3246
- SIMDE__VECTORIZE
3247
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
3248
- r_.u32[i] = a_.u32[i] >> imm8;
3249
- }
3250
- #endif
3251
- }
3252
- #endif
3253
-
3254
- return simde__m512i_from_private(r_);
3255
- #endif
3256
- }
3257
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3258
- #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8)
3259
- #endif
3260
-
3261
- SIMDE__FUNCTION_ATTRIBUTES
3262
- simde__m512i
3263
- simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) {
3264
- #if defined(SIMDE_AVX512F_NATIVE)
3265
- return _mm512_srli_epi64(a, imm8);
3266
- #else
3267
- simde__m512i_private
3268
- r_,
3269
- a_ = simde__m512i_to_private(a);
3270
-
3271
- #if defined(SIMDE_ARCH_X86_AVX2)
3272
- r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], imm8);
3273
- r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], imm8);
3274
- #elif defined(SIMDE_ARCH_X86_SSE2)
3275
- r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], imm8);
3276
- r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], imm8);
3277
- r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], imm8);
3278
- r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], imm8);
3279
- #else
3280
- /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are
3281
- * used. In this case we should do "imm8 &= 0xff" here. However in
3282
- * practice all bits are used. */
3283
- if (imm8 > 63) {
3284
- simde_memset(&r_, 0, sizeof(r_));
3285
- } else {
3286
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
3287
- r_.u64 = a_.u64 >> imm8;
3288
- #else
3289
- SIMDE__VECTORIZE
3290
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
3291
- r_.u64[i] = a_.u64[i] >> imm8;
3292
- }
3293
- #endif
3294
- }
3295
- #endif
3296
-
3297
- return simde__m512i_from_private(r_);
3298
- #endif
3299
- }
3300
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3301
- #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8)
3302
- #endif
3303
-
3304
- SIMDE__FUNCTION_ATTRIBUTES
3305
- simde__mmask16
3306
- simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
3307
- #if defined(SIMDE_AVX512F_NATIVE)
3308
- return _mm512_mask_test_epi32_mask(k1, a, b);
3309
- #else
3310
- simde__m512i_private
3311
- a_ = simde__m512i_to_private(a),
3312
- b_ = simde__m512i_to_private(b);
3313
- simde__mmask16 r = 0;
3314
-
3315
- SIMDE__VECTORIZE_REDUCTION(|:r)
3316
- for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
3317
- r |= !!(a_.i32[i] & b_.i32[i]) << i;
3318
- }
3319
-
3320
- return r & k1;
3321
- #endif
3322
- }
3323
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3324
- #define _mm512_mask_test_epi32_mask(a, b) simde_mm512_mask_test_epi32_mask(a, b)
3325
- #endif
3326
-
3327
- SIMDE__FUNCTION_ATTRIBUTES
3328
- simde__mmask8
3329
- simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
3330
- #if defined(SIMDE_AVX512F_NATIVE)
3331
- return _mm512_mask_test_epi64_mask(k1, a, b);
3332
- #else
3333
- simde__m512i_private
3334
- a_ = simde__m512i_to_private(a),
3335
- b_ = simde__m512i_to_private(b);
3336
- simde__mmask8 r = 0;
3337
-
3338
- SIMDE__VECTORIZE_REDUCTION(|:r)
3339
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
3340
- r |= !!(a_.i64[i] & b_.i64[i]) << i;
3341
- }
3342
-
3343
- return r & k1;
3344
- #endif
3345
- }
3346
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3347
- #define _mm512_mask_test_epi64_mask(a, b) simde_mm512_mask_test_epi64_mask(a, b)
3348
- #endif
3349
-
3350
- SIMDE__FUNCTION_ATTRIBUTES
3351
- simde__m512i
3352
- simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) {
3353
- #if defined(SIMDE_AVX512F_NATIVE)
3354
- return _mm512_xor_si512(a, b);
3355
- #else
3356
- simde__m512i_private
3357
- r_,
3358
- a_ = simde__m512i_to_private(a),
3359
- b_ = simde__m512i_to_private(b);
3360
-
3361
- #if defined(SIMDE_ARCH_X86_AVX2)
3362
- r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]);
3363
- r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]);
3364
- #elif defined(SIMDE_ARCH_X86_SSE2)
3365
- r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]);
3366
- r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]);
3367
- r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]);
3368
- r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]);
3369
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3370
- r_.i32f = a_.i32f ^ b_.i32f;
3371
- #else
3372
- SIMDE__VECTORIZE
3373
- for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
3374
- r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
3375
- }
3376
- #endif
3377
-
3378
- return simde__m512i_from_private(r_);
3379
- #endif
3380
- }
3381
- #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3382
- #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b)
3383
- #endif
3384
-
3385
- SIMDE__END_DECLS
3386
-
3387
- HEDLEY_DIAGNOSTIC_POP
3388
-
3389
- #endif /* !defined(SIMDE__AVX512F_H) */