minimap2 0.2.25.0 → 0.2.25.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
@@ -0,0 +1,3389 @@
1
+ /* Permission is hereby granted, free of charge, to any person
2
+ * obtaining a copy of this software and associated documentation
3
+ * files (the "Software"), to deal in the Software without
4
+ * restriction, including without limitation the rights to use, copy,
5
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
6
+ * of the Software, and to permit persons to whom the Software is
7
+ * furnished to do so, subject to the following conditions:
8
+ *
9
+ * The above copyright notice and this permission notice shall be
10
+ * included in all copies or substantial portions of the Software.
11
+ *
12
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ * SOFTWARE.
20
+ *
21
+ * Copyright:
22
+ * 2020 Evan Nemerson <evan@nemerson.com>
23
+ */
24
+
25
+ #if !defined(SIMDE__AVX512F_H)
26
+ # if !defined(SIMDE__AVX512F_H)
27
+ # define SIMDE__AVX512F_H
28
+ # endif
29
+ # include "avx2.h"
30
+
31
+ HEDLEY_DIAGNOSTIC_PUSH
32
+ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
33
+
34
+ # if defined(SIMDE_ARCH_X86_AVX512F) && !defined(SIMDE_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
35
+ # define SIMDE_AVX512F_NATIVE
36
+ # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_AVX512F_NO_NEON) && !defined(SIMDE_NO_NEON)
37
+ # define SIMDE_AVX512F_NEON
38
+ # elif defined(SIMDE_ARCH_POWER_ALTIVEC)
39
+ # define SIMDE_AVX512F_POWER_ALTIVEC
40
+ # endif
41
+
42
+ /* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for
43
+ __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte
44
+ aligned even if we reduce the alignment requirements of other members.
45
+
46
+ Even if we're on x86 and use the native AVX-512 types for arguments/return values, the
47
+ to/from private functions will break, and I'm not willing to change their APIs to use
48
+ pointers (which would also require more verbose code on the caller side) just to make
49
+ MSVC happy.
50
+
51
+ If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later,
52
+ or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to
53
+ fix this without requiring API changes (except transparently through macros), patches
54
+ are welcome. */
55
+ # if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0)
56
+ # if defined(SIMDE_AVX512F_NATIVE)
57
+ # undef SIMDE_AVX512F_NATIVE
58
+ # pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.")
59
+ # endif
60
+ # define SIMDE_AVX512_ALIGN SIMDE_ALIGN(32)
61
+ # else
62
+ # define SIMDE_AVX512_ALIGN SIMDE_ALIGN(64)
63
+ # endif
64
+
65
+ # if defined(SIMDE_AVX512F_NATIVE)
66
+ # include <immintrin.h>
67
+ # endif
68
+
69
+ # if defined(SIMDE_AVX512F_POWER_ALTIVEC)
70
+ # include <altivec.h>
71
+ # endif
72
+
73
+ SIMDE__BEGIN_DECLS
74
+
75
+ typedef union {
76
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
77
+ SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
78
+ SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
79
+ SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
80
+ SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
81
+ SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
82
+ SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
83
+ SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
84
+ SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
85
+ #if defined(SIMDE__HAVE_INT128)
86
+ SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
87
+ SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
88
+ #endif
89
+ SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
90
+ SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
91
+ SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
92
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
93
+ #else
94
+ SIMDE_AVX512_ALIGN int8_t i8[64];
95
+ SIMDE_AVX512_ALIGN int16_t i16[32];
96
+ SIMDE_AVX512_ALIGN int32_t i32[16];
97
+ SIMDE_AVX512_ALIGN int64_t i64[8];
98
+ SIMDE_AVX512_ALIGN uint8_t u8[64];
99
+ SIMDE_AVX512_ALIGN uint16_t u16[32];
100
+ SIMDE_AVX512_ALIGN uint32_t u32[16];
101
+ SIMDE_AVX512_ALIGN uint64_t u64[8];
102
+ SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
103
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
104
+ #if defined(SIMDE__HAVE_INT128)
105
+ SIMDE_AVX512_ALIGN simde_int128 i128[4];
106
+ SIMDE_AVX512_ALIGN simde_uint128 u128[4];
107
+ #endif
108
+ SIMDE_AVX512_ALIGN simde_float32 f32[16];
109
+ SIMDE_AVX512_ALIGN simde_float64 f64[8];
110
+ #endif
111
+
112
+ SIMDE_AVX512_ALIGN simde__m128_private m128_private[4];
113
+ SIMDE_AVX512_ALIGN simde__m128 m128[4];
114
+ SIMDE_AVX512_ALIGN simde__m256_private m256_private[2];
115
+ SIMDE_AVX512_ALIGN simde__m256 m256[2];
116
+
117
+ #if defined(SIMDE_AVX512F_NATIVE)
118
+ SIMDE_AVX512_ALIGN __m512 n;
119
+ #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
120
+ SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
121
+ SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
122
+ SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
123
+ SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
124
+ SIMDE_ALIGN(16) vector signed char altivec_i8[4];
125
+ SIMDE_ALIGN(16) vector signed short altivec_i16[4];
126
+ SIMDE_ALIGN(16) vector signed int altivec_i32[4];
127
+ SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
128
+ SIMDE_ALIGN(16) vector float altivec_f32[4];
129
+ SIMDE_ALIGN(16) vector double altivec_f64[4];
130
+ #endif
131
+ } simde__m512_private;
132
+
133
+ typedef union {
134
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
135
+ SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
136
+ SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
137
+ SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
138
+ SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
139
+ SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
140
+ SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
141
+ SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
142
+ SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
143
+ #if defined(SIMDE__HAVE_INT128)
144
+ SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
145
+ SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
146
+ #endif
147
+ SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
148
+ SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
149
+ SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
150
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
151
+ #else
152
+ SIMDE_AVX512_ALIGN int8_t i8[64];
153
+ SIMDE_AVX512_ALIGN int16_t i16[32];
154
+ SIMDE_AVX512_ALIGN int32_t i32[16];
155
+ SIMDE_AVX512_ALIGN int64_t i64[8];
156
+ SIMDE_AVX512_ALIGN uint8_t u8[64];
157
+ SIMDE_AVX512_ALIGN uint16_t u16[32];
158
+ SIMDE_AVX512_ALIGN uint32_t u32[16];
159
+ SIMDE_AVX512_ALIGN uint64_t u64[8];
160
+ #if defined(SIMDE__HAVE_INT128)
161
+ SIMDE_AVX512_ALIGN simde_int128 i128[4];
162
+ SIMDE_AVX512_ALIGN simde_uint128 u128[4];
163
+ #endif
164
+ SIMDE_AVX512_ALIGN simde_float32 f32[16];
165
+ SIMDE_AVX512_ALIGN simde_float64 f64[8];
166
+ SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
167
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
168
+ #endif
169
+
170
+ SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4];
171
+ SIMDE_AVX512_ALIGN simde__m128d m128d[4];
172
+ SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2];
173
+ SIMDE_AVX512_ALIGN simde__m256d m256d[2];
174
+
175
+ #if defined(SIMDE_AVX512F_NATIVE)
176
+ SIMDE_AVX512_ALIGN __m512d n;
177
+ #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
178
+ SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
179
+ SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
180
+ SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
181
+ SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
182
+ SIMDE_ALIGN(16) vector signed char altivec_i8[4];
183
+ SIMDE_ALIGN(16) vector signed short altivec_i16[4];
184
+ SIMDE_ALIGN(16) vector signed int altivec_i32[4];
185
+ SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
186
+ SIMDE_ALIGN(16) vector float altivec_f32[4];
187
+ SIMDE_ALIGN(16) vector double altivec_f64[4];
188
+ #endif
189
+ } simde__m512d_private;
190
+
191
+ typedef union {
192
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
193
+ SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
194
+ SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
195
+ SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
196
+ SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
197
+ SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
198
+ SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
199
+ SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
200
+ SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
201
+ #if defined(SIMDE__HAVE_INT128)
202
+ SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
203
+ SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
204
+ #endif
205
+ SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
206
+ SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
207
+ SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
208
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
209
+ #else
210
+ SIMDE_AVX512_ALIGN int8_t i8[64];
211
+ SIMDE_AVX512_ALIGN int16_t i16[32];
212
+ SIMDE_AVX512_ALIGN int32_t i32[16];
213
+ SIMDE_AVX512_ALIGN int64_t i64[8];
214
+ SIMDE_AVX512_ALIGN uint8_t u8[64];
215
+ SIMDE_AVX512_ALIGN uint16_t u16[32];
216
+ SIMDE_AVX512_ALIGN uint32_t u32[16];
217
+ SIMDE_AVX512_ALIGN uint64_t u64[8];
218
+ SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
219
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
220
+ #if defined(SIMDE__HAVE_INT128)
221
+ SIMDE_AVX512_ALIGN simde_int128 i128[4];
222
+ SIMDE_AVX512_ALIGN simde_uint128 u128[4];
223
+ #endif
224
+ SIMDE_AVX512_ALIGN simde_float32 f32[16];
225
+ SIMDE_AVX512_ALIGN simde_float64 f64[8];
226
+ #endif
227
+
228
+ SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4];
229
+ SIMDE_AVX512_ALIGN simde__m128i m128i[4];
230
+ SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2];
231
+ SIMDE_AVX512_ALIGN simde__m256i m256i[2];
232
+
233
+ #if defined(SIMDE_AVX512F_NATIVE)
234
+ SIMDE_AVX512_ALIGN __m512i n;
235
+ #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
236
+ SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
237
+ SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
238
+ SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
239
+ SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
240
+ SIMDE_ALIGN(16) vector signed char altivec_i8[4];
241
+ SIMDE_ALIGN(16) vector signed short altivec_i16[4];
242
+ SIMDE_ALIGN(16) vector signed int altivec_i32[4];
243
+ SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
244
+ SIMDE_ALIGN(16) vector float altivec_f32[4];
245
+ SIMDE_ALIGN(16) vector double altivec_f64[4];
246
+ #endif
247
+ } simde__m512i_private;
248
+
249
+ #if defined(SIMDE_AVX512F_NATIVE)
250
+ typedef __m512 simde__m512;
251
+ typedef __m512i simde__m512i;
252
+ typedef __m512d simde__m512d;
253
+ typedef __mmask8 simde__mmask8;
254
+ typedef __mmask16 simde__mmask16;
255
+ typedef __mmask32 simde__mmask32;
256
+ typedef __mmask64 simde__mmask64;
257
+ #else
258
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
259
+ typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
260
+ typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
261
+ typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
262
+ #else
263
+ typedef simde__m512_private simde__m512;
264
+ typedef simde__m512i_private simde__m512i;
265
+ typedef simde__m512d_private simde__m512d;
266
+ #endif
267
+
268
+ typedef uint_fast8_t simde__mmask8;
269
+ typedef uint_fast16_t simde__mmask16;
270
+ typedef uint_fast32_t simde__mmask32;
271
+ typedef uint_fast64_t simde__mmask64;
272
+ #endif
273
+
274
+ #if !defined(SIMDE_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
275
+ #define SIMDE_AVX512F_ENABLE_NATIVE_ALIASES
276
+ typedef simde__m512 __m512;
277
+ typedef simde__m512i __m512i;
278
+ typedef simde__m512d __m512d;
279
+ #endif
280
+
281
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect");
282
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect");
283
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect");
284
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect");
285
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect");
286
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect");
287
+ #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
288
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned");
289
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned");
290
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned");
291
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned");
292
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned");
293
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned");
294
+ #endif
295
+
296
+ SIMDE__FUNCTION_ATTRIBUTES
297
+ simde__m512
298
+ simde__m512_from_private(simde__m512_private v) {
299
+ simde__m512 r;
300
+ simde_memcpy(&r, &v, sizeof(r));
301
+ return r;
302
+ }
303
+
304
+ SIMDE__FUNCTION_ATTRIBUTES
305
+ simde__m512_private
306
+ simde__m512_to_private(simde__m512 v) {
307
+ simde__m512_private r;
308
+ simde_memcpy(&r, &v, sizeof(r));
309
+ return r;
310
+ }
311
+
312
+ SIMDE__FUNCTION_ATTRIBUTES
313
+ simde__m512i
314
+ simde__m512i_from_private(simde__m512i_private v) {
315
+ simde__m512i r;
316
+ simde_memcpy(&r, &v, sizeof(r));
317
+ return r;
318
+ }
319
+
320
+ SIMDE__FUNCTION_ATTRIBUTES
321
+ simde__m512i_private
322
+ simde__m512i_to_private(simde__m512i v) {
323
+ simde__m512i_private r;
324
+ simde_memcpy(&r, &v, sizeof(r));
325
+ return r;
326
+ }
327
+
328
+ SIMDE__FUNCTION_ATTRIBUTES
329
+ simde__m512d
330
+ simde__m512d_from_private(simde__m512d_private v) {
331
+ simde__m512d r;
332
+ simde_memcpy(&r, &v, sizeof(r));
333
+ return r;
334
+ }
335
+
336
+ SIMDE__FUNCTION_ATTRIBUTES
337
+ simde__m512d_private
338
+ simde__m512d_to_private(simde__m512d v) {
339
+ simde__m512d_private r;
340
+ simde_memcpy(&r, &v, sizeof(r));
341
+ return r;
342
+ }
343
+
344
+ SIMDE__FUNCTION_ATTRIBUTES
345
+ simde__mmask16
346
+ simde__m512i_private_to_mmask16 (simde__m512i_private a) {
347
+ #if defined(SIMDE_AVX512F_NATIVE)
348
+ HEDLEY_UNREACHABLE_RETURN(0);
349
+ #else
350
+ simde__mmask16 r = 0;
351
+
352
+ /* Note: using addition instead of a bitwise or for the reduction
353
+ seems like it should improve things since hardware support for
354
+ horizontal addition is better than bitwise or. However, GCC
355
+ generates the same code, and clang is actually a bit slower.
356
+ I suspect this can be optimized quite a bit, and this function
357
+ is probably going to be pretty hot. */
358
+ SIMDE__VECTORIZE_REDUCTION(|:r)
359
+ for (size_t i = 0 ; i < (sizeof(a.i32) / sizeof(a.i32[0])) ; i++) {
360
+ r |= !!(a.i32[i]) << i;
361
+ }
362
+
363
+ return r;
364
+ #endif
365
+ }
366
+
367
+ SIMDE__FUNCTION_ATTRIBUTES
368
+ simde__mmask8
369
+ simde__m512i_private_to_mmask8 (simde__m512i_private a) {
370
+ #if defined(SIMDE_AVX512F_NATIVE)
371
+ HEDLEY_UNREACHABLE_RETURN(0);
372
+ #else
373
+ simde__mmask8 r = 0;
374
+ SIMDE__VECTORIZE_REDUCTION(|:r)
375
+ for (size_t i = 0 ; i < (sizeof(a.i64) / sizeof(a.i64[0])) ; i++) {
376
+ r |= !!(a.i64[i]) << i;
377
+ }
378
+
379
+ return r;
380
+ #endif
381
+ }
382
+
383
+ SIMDE__FUNCTION_ATTRIBUTES
384
+ simde__m512i
385
+ simde__m512i_from_mmask16 (simde__mmask16 k) {
386
+ #if defined(SIMDE_AVX512F_NATIVE)
387
+ /* Should never be reached. */
388
+ return _mm512_mask_mov_epi32(_mm512_setzero_epi32(), k, _mm512_set1_epi32(~INT32_C(0)));
389
+ #else
390
+ simde__m512i_private r_;
391
+
392
+ SIMDE__VECTORIZE
393
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
394
+ r_.i32[i] = (k & (1 << i)) ? ~INT32_C(0) : INT32_C(0);
395
+ }
396
+
397
+ return simde__m512i_from_private(r_);
398
+ #endif
399
+ }
400
+
401
+ SIMDE__FUNCTION_ATTRIBUTES
402
+ simde__m512
403
+ simde_mm512_castpd_ps (simde__m512d a) {
404
+ #if defined(SIMDE_AVX512F_NATIVE)
405
+ return _mm512_castpd_ps(a);
406
+ #else
407
+ simde__m512 r;
408
+ memcpy(&r, &a, sizeof(r));
409
+ return r;
410
+ #endif
411
+ }
412
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
413
+ #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a)
414
+ #endif
415
+
416
+ SIMDE__FUNCTION_ATTRIBUTES
417
+ simde__m512i
418
+ simde_mm512_castpd_si512 (simde__m512d a) {
419
+ #if defined(SIMDE_AVX512F_NATIVE)
420
+ return _mm512_castpd_si512(a);
421
+ #else
422
+ simde__m512i r;
423
+ memcpy(&r, &a, sizeof(r));
424
+ return r;
425
+ #endif
426
+ }
427
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
428
+ #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a)
429
+ #endif
430
+
431
+ SIMDE__FUNCTION_ATTRIBUTES
432
+ simde__m512d
433
+ simde_mm512_castps_pd (simde__m512 a) {
434
+ #if defined(SIMDE_AVX512F_NATIVE)
435
+ return _mm512_castps_pd(a);
436
+ #else
437
+ simde__m512d r;
438
+ memcpy(&r, &a, sizeof(r));
439
+ return r;
440
+ #endif
441
+ }
442
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
443
+ #define _mm512_castps_pd(a) simde_mm512_castps_pd(a)
444
+ #endif
445
+
446
+ SIMDE__FUNCTION_ATTRIBUTES
447
+ simde__m512i
448
+ simde_mm512_castps_si512 (simde__m512 a) {
449
+ #if defined(SIMDE_AVX512F_NATIVE)
450
+ return _mm512_castps_si512(a);
451
+ #else
452
+ simde__m512i r;
453
+ memcpy(&r, &a, sizeof(r));
454
+ return r;
455
+ #endif
456
+ }
457
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
458
+ #define _mm512_castps_si512(a) simde_mm512_castps_si512(a)
459
+ #endif
460
+
461
+ SIMDE__FUNCTION_ATTRIBUTES
462
+ simde__m512
463
+ simde_mm512_castsi512_ps (simde__m512i a) {
464
+ #if defined(SIMDE_AVX512F_NATIVE)
465
+ return _mm512_castsi512_ps(a);
466
+ #else
467
+ simde__m512 r;
468
+ memcpy(&r, &a, sizeof(r));
469
+ return r;
470
+ #endif
471
+ }
472
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
473
+ #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a)
474
+ #endif
475
+
476
+ SIMDE__FUNCTION_ATTRIBUTES
477
+ simde__m512d
478
+ simde_mm512_castsi512_pd (simde__m512i a) {
479
+ #if defined(SIMDE_AVX512F_NATIVE)
480
+ return _mm512_castsi512_pd(a);
481
+ #else
482
+ simde__m512d r;
483
+ memcpy(&r, &a, sizeof(r));
484
+ return r;
485
+ #endif
486
+ }
487
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
488
+ #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a)
489
+ #endif
490
+
491
+ SIMDE__FUNCTION_ATTRIBUTES
492
+ simde__m512d
493
+ simde_mm512_castpd128_pd512 (simde__m128d a) {
494
+ #if defined(SIMDE_AVX512F_NATIVE)
495
+ return _mm512_castpd128_pd512(a);
496
+ #else
497
+ simde__m512d_private r_;
498
+ r_.m128d[0] = a;
499
+ return simde__m512d_from_private(r_);
500
+ #endif
501
+ }
502
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
503
+ #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a)
504
+ #endif
505
+
506
+ SIMDE__FUNCTION_ATTRIBUTES
507
+ simde__m512d
508
+ simde_mm512_castpd256_pd512 (simde__m256d a) {
509
+ #if defined(SIMDE_AVX512F_NATIVE)
510
+ return _mm512_castpd256_pd512(a);
511
+ #else
512
+ simde__m512d_private r_;
513
+ r_.m256d[0] = a;
514
+ return simde__m512d_from_private(r_);
515
+ #endif
516
+ }
517
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
518
+ #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a)
519
+ #endif
520
+
521
+ SIMDE__FUNCTION_ATTRIBUTES
522
+ simde__m128d
523
+ simde_mm512_castpd512_pd128 (simde__m512d a) {
524
+ #if defined(SIMDE_AVX512F_NATIVE)
525
+ return _mm512_castpd512_pd128(a);
526
+ #else
527
+ simde__m512d_private a_ = simde__m512d_to_private(a);
528
+ return a_.m128d[0];
529
+ #endif
530
+ }
531
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
532
+ #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a)
533
+ #endif
534
+
535
+ SIMDE__FUNCTION_ATTRIBUTES
536
+ simde__m256d
537
+ simde_mm512_castpd512_pd256 (simde__m512d a) {
538
+ #if defined(SIMDE_AVX512F_NATIVE)
539
+ return _mm512_castpd512_pd256(a);
540
+ #else
541
+ simde__m512d_private a_ = simde__m512d_to_private(a);
542
+ return a_.m256d[0];
543
+ #endif
544
+ }
545
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
546
+ #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a)
547
+ #endif
548
+
549
+ SIMDE__FUNCTION_ATTRIBUTES
550
+ simde__m512
551
+ simde_mm512_castps128_ps512 (simde__m128 a) {
552
+ #if defined(SIMDE_AVX512F_NATIVE)
553
+ return _mm512_castps128_ps512(a);
554
+ #else
555
+ simde__m512_private r_;
556
+ r_.m128[0] = a;
557
+ return simde__m512_from_private(r_);
558
+ #endif
559
+ }
560
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
561
+ #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a)
562
+ #endif
563
+
564
+ SIMDE__FUNCTION_ATTRIBUTES
565
+ simde__m512
566
+ simde_mm512_castps256_ps512 (simde__m256 a) {
567
+ #if defined(SIMDE_AVX512F_NATIVE)
568
+ return _mm512_castps256_ps512(a);
569
+ #else
570
+ simde__m512_private r_;
571
+ r_.m256[0] = a;
572
+ return simde__m512_from_private(r_);
573
+ #endif
574
+ }
575
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
576
+ #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a)
577
+ #endif
578
+
579
+ SIMDE__FUNCTION_ATTRIBUTES
580
+ simde__m128
581
+ simde_mm512_castps512_ps128 (simde__m512 a) {
582
+ #if defined(SIMDE_AVX512F_NATIVE)
583
+ return _mm512_castps512_ps128(a);
584
+ #else
585
+ simde__m512_private a_ = simde__m512_to_private(a);
586
+ return a_.m128[0];
587
+ #endif
588
+ }
589
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
590
+ #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a)
591
+ #endif
592
+
593
+ SIMDE__FUNCTION_ATTRIBUTES
594
+ simde__m256
595
+ simde_mm512_castps512_ps256 (simde__m512 a) {
596
+ #if defined(SIMDE_AVX512F_NATIVE)
597
+ return _mm512_castps512_ps256(a);
598
+ #else
599
+ simde__m512_private a_ = simde__m512_to_private(a);
600
+ return a_.m256[0];
601
+ #endif
602
+ }
603
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
604
+ #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a)
605
+ #endif
606
+
607
+ SIMDE__FUNCTION_ATTRIBUTES
608
+ simde__m512i
609
+ simde_mm512_castsi128_si512 (simde__m128i a) {
610
+ #if defined(SIMDE_AVX512F_NATIVE)
611
+ return _mm512_castsi128_si512(a);
612
+ #else
613
+ simde__m512i_private r_;
614
+ r_.m128i[0] = a;
615
+ return simde__m512i_from_private(r_);
616
+ #endif
617
+ }
618
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
619
+ #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a)
620
+ #endif
621
+
622
+ SIMDE__FUNCTION_ATTRIBUTES
623
+ simde__m512i
624
+ simde_mm512_castsi256_si512 (simde__m256i a) {
625
+ #if defined(SIMDE_AVX512F_NATIVE)
626
+ return _mm512_castsi256_si512(a);
627
+ #else
628
+ simde__m512i_private r_;
629
+ r_.m256i[0] = a;
630
+ return simde__m512i_from_private(r_);
631
+ #endif
632
+ }
633
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
634
+ #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a)
635
+ #endif
636
+
637
+ SIMDE__FUNCTION_ATTRIBUTES
638
+ simde__m128i
639
+ simde_mm512_castsi512_si128 (simde__m512i a) {
640
+ #if defined(SIMDE_AVX512F_NATIVE)
641
+ return _mm512_castsi512_si128(a);
642
+ #else
643
+ simde__m512i_private a_ = simde__m512i_to_private(a);
644
+ return a_.m128i[0];
645
+ #endif
646
+ }
647
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
648
+ #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a)
649
+ #endif
650
+
651
+ SIMDE__FUNCTION_ATTRIBUTES
652
+ simde__m256i
653
+ simde_mm512_castsi512_si256 (simde__m512i a) {
654
+ #if defined(SIMDE_AVX512F_NATIVE)
655
+ return _mm512_castsi512_si256(a);
656
+ #else
657
+ simde__m512i_private a_ = simde__m512i_to_private(a);
658
+ return a_.m256i[0];
659
+ #endif
660
+ }
661
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
662
+ #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a)
663
+ #endif
664
+
665
+ SIMDE__FUNCTION_ATTRIBUTES
666
+ simde__m512i
667
+ simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56,
668
+ int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48,
669
+ int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40,
670
+ int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32,
671
+ int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24,
672
+ int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16,
673
+ int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8,
674
+ int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {
675
+ simde__m512i_private r_;
676
+
677
+ r_.i8[ 0] = e0;
678
+ r_.i8[ 1] = e1;
679
+ r_.i8[ 2] = e2;
680
+ r_.i8[ 3] = e3;
681
+ r_.i8[ 4] = e4;
682
+ r_.i8[ 5] = e5;
683
+ r_.i8[ 6] = e6;
684
+ r_.i8[ 7] = e7;
685
+ r_.i8[ 8] = e8;
686
+ r_.i8[ 9] = e9;
687
+ r_.i8[10] = e10;
688
+ r_.i8[11] = e11;
689
+ r_.i8[12] = e12;
690
+ r_.i8[13] = e13;
691
+ r_.i8[14] = e14;
692
+ r_.i8[15] = e15;
693
+ r_.i8[16] = e16;
694
+ r_.i8[17] = e17;
695
+ r_.i8[18] = e18;
696
+ r_.i8[19] = e19;
697
+ r_.i8[20] = e20;
698
+ r_.i8[21] = e21;
699
+ r_.i8[22] = e22;
700
+ r_.i8[23] = e23;
701
+ r_.i8[24] = e24;
702
+ r_.i8[25] = e25;
703
+ r_.i8[26] = e26;
704
+ r_.i8[27] = e27;
705
+ r_.i8[28] = e28;
706
+ r_.i8[29] = e29;
707
+ r_.i8[30] = e30;
708
+ r_.i8[31] = e31;
709
+ r_.i8[32] = e32;
710
+ r_.i8[33] = e33;
711
+ r_.i8[34] = e34;
712
+ r_.i8[35] = e35;
713
+ r_.i8[36] = e36;
714
+ r_.i8[37] = e37;
715
+ r_.i8[38] = e38;
716
+ r_.i8[39] = e39;
717
+ r_.i8[40] = e40;
718
+ r_.i8[41] = e41;
719
+ r_.i8[42] = e42;
720
+ r_.i8[43] = e43;
721
+ r_.i8[44] = e44;
722
+ r_.i8[45] = e45;
723
+ r_.i8[46] = e46;
724
+ r_.i8[47] = e47;
725
+ r_.i8[48] = e48;
726
+ r_.i8[49] = e49;
727
+ r_.i8[50] = e50;
728
+ r_.i8[51] = e51;
729
+ r_.i8[52] = e52;
730
+ r_.i8[53] = e53;
731
+ r_.i8[54] = e54;
732
+ r_.i8[55] = e55;
733
+ r_.i8[56] = e56;
734
+ r_.i8[57] = e57;
735
+ r_.i8[58] = e58;
736
+ r_.i8[59] = e59;
737
+ r_.i8[60] = e60;
738
+ r_.i8[61] = e61;
739
+ r_.i8[62] = e62;
740
+ r_.i8[63] = e63;
741
+
742
+ return simde__m512i_from_private(r_);
743
+ }
744
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
745
+ #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
746
+ #endif
747
+
748
+ SIMDE__FUNCTION_ATTRIBUTES
749
+ simde__m512i
750
+ simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24,
751
+ int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16,
752
+ int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8,
753
+ int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) {
754
+ simde__m512i_private r_;
755
+
756
+ r_.i16[ 0] = e0;
757
+ r_.i16[ 1] = e1;
758
+ r_.i16[ 2] = e2;
759
+ r_.i16[ 3] = e3;
760
+ r_.i16[ 4] = e4;
761
+ r_.i16[ 5] = e5;
762
+ r_.i16[ 6] = e6;
763
+ r_.i16[ 7] = e7;
764
+ r_.i16[ 8] = e8;
765
+ r_.i16[ 9] = e9;
766
+ r_.i16[10] = e10;
767
+ r_.i16[11] = e11;
768
+ r_.i16[12] = e12;
769
+ r_.i16[13] = e13;
770
+ r_.i16[14] = e14;
771
+ r_.i16[15] = e15;
772
+ r_.i16[16] = e16;
773
+ r_.i16[17] = e17;
774
+ r_.i16[18] = e18;
775
+ r_.i16[19] = e19;
776
+ r_.i16[20] = e20;
777
+ r_.i16[21] = e21;
778
+ r_.i16[22] = e22;
779
+ r_.i16[23] = e23;
780
+ r_.i16[24] = e24;
781
+ r_.i16[25] = e25;
782
+ r_.i16[26] = e26;
783
+ r_.i16[27] = e27;
784
+ r_.i16[28] = e28;
785
+ r_.i16[29] = e29;
786
+ r_.i16[30] = e30;
787
+ r_.i16[31] = e31;
788
+
789
+ return simde__m512i_from_private(r_);
790
+ }
791
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
792
+ #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
793
+ #endif
794
+
795
+ SIMDE__FUNCTION_ATTRIBUTES
796
+ simde__m512i
797
+ simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8,
798
+ int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) {
799
+ simde__m512i_private r_;
800
+
801
+ r_.i32[ 0] = e0;
802
+ r_.i32[ 1] = e1;
803
+ r_.i32[ 2] = e2;
804
+ r_.i32[ 3] = e3;
805
+ r_.i32[ 4] = e4;
806
+ r_.i32[ 5] = e5;
807
+ r_.i32[ 6] = e6;
808
+ r_.i32[ 7] = e7;
809
+ r_.i32[ 8] = e8;
810
+ r_.i32[ 9] = e9;
811
+ r_.i32[10] = e10;
812
+ r_.i32[11] = e11;
813
+ r_.i32[12] = e12;
814
+ r_.i32[13] = e13;
815
+ r_.i32[14] = e14;
816
+ r_.i32[15] = e15;
817
+
818
+ return simde__m512i_from_private(r_);
819
+ }
820
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
821
+ #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
822
+ #endif
823
+
824
+ SIMDE__FUNCTION_ATTRIBUTES
825
+ simde__m512i
826
+ simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
827
+ simde__m512i_private r_;
828
+
829
+ r_.i64[0] = e0;
830
+ r_.i64[1] = e1;
831
+ r_.i64[2] = e2;
832
+ r_.i64[3] = e3;
833
+ r_.i64[4] = e4;
834
+ r_.i64[5] = e5;
835
+ r_.i64[6] = e6;
836
+ r_.i64[7] = e7;
837
+
838
+ return simde__m512i_from_private(r_);
839
+ }
840
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
841
+ #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
842
+ #endif
843
+
844
+ SIMDE__FUNCTION_ATTRIBUTES
845
+ simde__m512i
846
+ simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56,
847
+ uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48,
848
+ uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40,
849
+ uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32,
850
+ uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24,
851
+ uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16,
852
+ uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8,
853
+ uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) {
854
+ simde__m512i_private r_;
855
+
856
+ r_.u8[ 0] = e0;
857
+ r_.u8[ 1] = e1;
858
+ r_.u8[ 2] = e2;
859
+ r_.u8[ 3] = e3;
860
+ r_.u8[ 4] = e4;
861
+ r_.u8[ 5] = e5;
862
+ r_.u8[ 6] = e6;
863
+ r_.u8[ 7] = e7;
864
+ r_.u8[ 8] = e8;
865
+ r_.u8[ 9] = e9;
866
+ r_.u8[10] = e10;
867
+ r_.u8[11] = e11;
868
+ r_.u8[12] = e12;
869
+ r_.u8[13] = e13;
870
+ r_.u8[14] = e14;
871
+ r_.u8[15] = e15;
872
+ r_.u8[16] = e16;
873
+ r_.u8[17] = e17;
874
+ r_.u8[18] = e18;
875
+ r_.u8[19] = e19;
876
+ r_.u8[20] = e20;
877
+ r_.u8[21] = e21;
878
+ r_.u8[22] = e22;
879
+ r_.u8[23] = e23;
880
+ r_.u8[24] = e24;
881
+ r_.u8[25] = e25;
882
+ r_.u8[26] = e26;
883
+ r_.u8[27] = e27;
884
+ r_.u8[28] = e28;
885
+ r_.u8[29] = e29;
886
+ r_.u8[30] = e30;
887
+ r_.u8[31] = e31;
888
+ r_.u8[32] = e32;
889
+ r_.u8[33] = e33;
890
+ r_.u8[34] = e34;
891
+ r_.u8[35] = e35;
892
+ r_.u8[36] = e36;
893
+ r_.u8[37] = e37;
894
+ r_.u8[38] = e38;
895
+ r_.u8[39] = e39;
896
+ r_.u8[40] = e40;
897
+ r_.u8[41] = e41;
898
+ r_.u8[42] = e42;
899
+ r_.u8[43] = e43;
900
+ r_.u8[44] = e44;
901
+ r_.u8[45] = e45;
902
+ r_.u8[46] = e46;
903
+ r_.u8[47] = e47;
904
+ r_.u8[48] = e48;
905
+ r_.u8[49] = e49;
906
+ r_.u8[50] = e50;
907
+ r_.u8[51] = e51;
908
+ r_.u8[52] = e52;
909
+ r_.u8[53] = e53;
910
+ r_.u8[54] = e54;
911
+ r_.u8[55] = e55;
912
+ r_.u8[56] = e56;
913
+ r_.u8[57] = e57;
914
+ r_.u8[58] = e58;
915
+ r_.u8[59] = e59;
916
+ r_.u8[60] = e60;
917
+ r_.u8[61] = e61;
918
+ r_.u8[62] = e62;
919
+ r_.u8[63] = e63;
920
+
921
+ return simde__m512i_from_private(r_);
922
+ }
923
+
924
+ SIMDE__FUNCTION_ATTRIBUTES
925
+ simde__m512i
926
+ simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24,
927
+ uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16,
928
+ uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8,
929
+ uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) {
930
+ simde__m512i_private r_;
931
+
932
+ r_.u16[ 0] = e0;
933
+ r_.u16[ 1] = e1;
934
+ r_.u16[ 2] = e2;
935
+ r_.u16[ 3] = e3;
936
+ r_.u16[ 4] = e4;
937
+ r_.u16[ 5] = e5;
938
+ r_.u16[ 6] = e6;
939
+ r_.u16[ 7] = e7;
940
+ r_.u16[ 8] = e8;
941
+ r_.u16[ 9] = e9;
942
+ r_.u16[10] = e10;
943
+ r_.u16[11] = e11;
944
+ r_.u16[12] = e12;
945
+ r_.u16[13] = e13;
946
+ r_.u16[14] = e14;
947
+ r_.u16[15] = e15;
948
+ r_.u16[16] = e16;
949
+ r_.u16[17] = e17;
950
+ r_.u16[18] = e18;
951
+ r_.u16[19] = e19;
952
+ r_.u16[20] = e20;
953
+ r_.u16[21] = e21;
954
+ r_.u16[22] = e22;
955
+ r_.u16[23] = e23;
956
+ r_.u16[24] = e24;
957
+ r_.u16[25] = e25;
958
+ r_.u16[26] = e26;
959
+ r_.u16[27] = e27;
960
+ r_.u16[28] = e28;
961
+ r_.u16[29] = e29;
962
+ r_.u16[30] = e30;
963
+ r_.u16[31] = e31;
964
+
965
+ return simde__m512i_from_private(r_);
966
+ }
967
+
968
+ SIMDE__FUNCTION_ATTRIBUTES
969
+ simde__m512i
970
+ simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8,
971
+ uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {
972
+ simde__m512i_private r_;
973
+
974
+ r_.u32[ 0] = e0;
975
+ r_.u32[ 1] = e1;
976
+ r_.u32[ 2] = e2;
977
+ r_.u32[ 3] = e3;
978
+ r_.u32[ 4] = e4;
979
+ r_.u32[ 5] = e5;
980
+ r_.u32[ 6] = e6;
981
+ r_.u32[ 7] = e7;
982
+ r_.u32[ 8] = e8;
983
+ r_.u32[ 9] = e9;
984
+ r_.u32[10] = e10;
985
+ r_.u32[11] = e11;
986
+ r_.u32[12] = e12;
987
+ r_.u32[13] = e13;
988
+ r_.u32[14] = e14;
989
+ r_.u32[15] = e15;
990
+
991
+ return simde__m512i_from_private(r_);
992
+ }
993
+
994
+ SIMDE__FUNCTION_ATTRIBUTES
995
+ simde__m512i
996
+ simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) {
997
+ simde__m512i_private r_;
998
+
999
+ r_.u64[ 0] = e0;
1000
+ r_.u64[ 1] = e1;
1001
+ r_.u64[ 2] = e2;
1002
+ r_.u64[ 3] = e3;
1003
+ r_.u64[ 4] = e4;
1004
+ r_.u64[ 5] = e5;
1005
+ r_.u64[ 6] = e6;
1006
+ r_.u64[ 7] = e7;
1007
+
1008
+ return simde__m512i_from_private(r_);
1009
+ }
1010
+
1011
+ SIMDE__FUNCTION_ATTRIBUTES
1012
+ simde__m512
1013
+ simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12,
1014
+ simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8,
1015
+ simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,
1016
+ simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
1017
+ simde__m512_private r_;
1018
+
1019
+ r_.f32[ 0] = e0;
1020
+ r_.f32[ 1] = e1;
1021
+ r_.f32[ 2] = e2;
1022
+ r_.f32[ 3] = e3;
1023
+ r_.f32[ 4] = e4;
1024
+ r_.f32[ 5] = e5;
1025
+ r_.f32[ 6] = e6;
1026
+ r_.f32[ 7] = e7;
1027
+ r_.f32[ 8] = e8;
1028
+ r_.f32[ 9] = e9;
1029
+ r_.f32[10] = e10;
1030
+ r_.f32[11] = e11;
1031
+ r_.f32[12] = e12;
1032
+ r_.f32[13] = e13;
1033
+ r_.f32[14] = e14;
1034
+ r_.f32[15] = e15;
1035
+
1036
+ return simde__m512_from_private(r_);
1037
+ }
1038
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1039
+ #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1040
+ #endif
1041
+
1042
+ SIMDE__FUNCTION_ATTRIBUTES
1043
+ simde__m512d
1044
+ simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {
1045
+ simde__m512d_private r_;
1046
+
1047
+ r_.f64[0] = e0;
1048
+ r_.f64[1] = e1;
1049
+ r_.f64[2] = e2;
1050
+ r_.f64[3] = e3;
1051
+ r_.f64[4] = e4;
1052
+ r_.f64[5] = e5;
1053
+ r_.f64[6] = e6;
1054
+ r_.f64[7] = e7;
1055
+
1056
+ return simde__m512d_from_private(r_);
1057
+ }
1058
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1059
+ #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0)
1060
+ #endif
1061
+
1062
+ SIMDE__FUNCTION_ATTRIBUTES
1063
+ simde__m512i
1064
+ simde_mm512_set1_epi8 (int8_t a) {
1065
+ #if defined(SIMDE_AVX512F_NATIVE)
1066
+ return _mm512_set1_epi8(a);
1067
+ #else
1068
+ simde__m512i_private r_;
1069
+
1070
+ SIMDE__VECTORIZE
1071
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1072
+ r_.i8[i] = a;
1073
+ }
1074
+
1075
+ return simde__m512i_from_private(r_);
1076
+ #endif
1077
+ }
1078
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1079
+ #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a)
1080
+ #endif
1081
+
1082
+ SIMDE__FUNCTION_ATTRIBUTES
1083
+ simde__m512i
1084
+ simde_mm512_set1_epi16 (int16_t a) {
1085
+ #if defined(SIMDE_AVX512F_NATIVE)
1086
+ return _mm512_set1_epi16(a);
1087
+ #else
1088
+ simde__m512i_private r_;
1089
+
1090
+ SIMDE__VECTORIZE
1091
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1092
+ r_.i16[i] = a;
1093
+ }
1094
+
1095
+ return simde__m512i_from_private(r_);
1096
+ #endif
1097
+ }
1098
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1099
+ #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a)
1100
+ #endif
1101
+
1102
+ SIMDE__FUNCTION_ATTRIBUTES
1103
+ simde__m512i
1104
+ simde_mm512_set1_epi32 (int32_t a) {
1105
+ #if defined(SIMDE_AVX512F_NATIVE)
1106
+ return _mm512_set1_epi32(a);
1107
+ #else
1108
+ simde__m512i_private r_;
1109
+
1110
+ SIMDE__VECTORIZE
1111
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1112
+ r_.i32[i] = a;
1113
+ }
1114
+
1115
+ return simde__m512i_from_private(r_);
1116
+ #endif
1117
+ }
1118
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1119
+ #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a)
1120
+ #endif
1121
+
1122
+ SIMDE__FUNCTION_ATTRIBUTES
1123
+ simde__m512i
1124
+ simde_mm512_set1_epi64 (int64_t a) {
1125
+ #if defined(SIMDE_AVX512F_NATIVE)
1126
+ return _mm512_set1_epi64(a);
1127
+ #else
1128
+ simde__m512i_private r_;
1129
+
1130
+ SIMDE__VECTORIZE
1131
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1132
+ r_.i64[i] = a;
1133
+ }
1134
+
1135
+ return simde__m512i_from_private(r_);
1136
+ #endif
1137
+ }
1138
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1139
+ #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a)
1140
+ #endif
1141
+
1142
+ SIMDE__FUNCTION_ATTRIBUTES
1143
+ simde__m512i
1144
+ simde_x_mm512_set1_epu8 (uint8_t a) {
1145
+ simde__m512i_private r_;
1146
+
1147
+ SIMDE__VECTORIZE
1148
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
1149
+ r_.u8[i] = a;
1150
+ }
1151
+
1152
+ return simde__m512i_from_private(r_);
1153
+ }
1154
+
1155
+ SIMDE__FUNCTION_ATTRIBUTES
1156
+ simde__m512i
1157
+ simde_x_mm512_set1_epu16 (uint16_t a) {
1158
+ simde__m512i_private r_;
1159
+
1160
+ SIMDE__VECTORIZE
1161
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1162
+ r_.u16[i] = a;
1163
+ }
1164
+
1165
+ return simde__m512i_from_private(r_);
1166
+ }
1167
+
1168
+ SIMDE__FUNCTION_ATTRIBUTES
1169
+ simde__m512i
1170
+ simde_x_mm512_set1_epu32 (uint32_t a) {
1171
+ simde__m512i_private r_;
1172
+
1173
+ SIMDE__VECTORIZE
1174
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1175
+ r_.u32[i] = a;
1176
+ }
1177
+
1178
+ return simde__m512i_from_private(r_);
1179
+ }
1180
+
1181
+ SIMDE__FUNCTION_ATTRIBUTES
1182
+ simde__m512i
1183
+ simde_x_mm512_set1_epu64 (uint64_t a) {
1184
+ simde__m512i_private r_;
1185
+
1186
+ SIMDE__VECTORIZE
1187
+ for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
1188
+ r_.u64[i] = a;
1189
+ }
1190
+
1191
+ return simde__m512i_from_private(r_);
1192
+ }
1193
+
1194
+ SIMDE__FUNCTION_ATTRIBUTES
1195
+ simde__m512
1196
+ simde_mm512_set1_ps (simde_float32 a) {
1197
+ #if defined(SIMDE_AVX512F_NATIVE)
1198
+ return _mm512_set1_ps(a);
1199
+ #else
1200
+ simde__m512_private r_;
1201
+
1202
+ SIMDE__VECTORIZE
1203
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1204
+ r_.f32[i] = a;
1205
+ }
1206
+
1207
+ return simde__m512_from_private(r_);
1208
+ #endif
1209
+ }
1210
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1211
+ #define _mm512_set1_ps(a) simde_mm512_set1_ps(a)
1212
+ #endif
1213
+
1214
+ SIMDE__FUNCTION_ATTRIBUTES
1215
+ simde__m512d
1216
+ simde_mm512_set1_pd (simde_float64 a) {
1217
+ #if defined(SIMDE_AVX512F_NATIVE)
1218
+ return _mm512_set1_pd(a);
1219
+ #else
1220
+ simde__m512d_private r_;
1221
+
1222
+ SIMDE__VECTORIZE
1223
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1224
+ r_.f64[i] = a;
1225
+ }
1226
+
1227
+ return simde__m512d_from_private(r_);
1228
+ #endif
1229
+ }
1230
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1231
+ #define _mm512_set1_pd(a) simde_mm512_set1_pd(a)
1232
+ #endif
1233
+
1234
+ SIMDE__FUNCTION_ATTRIBUTES
1235
+ simde__m512i
1236
+ simde_mm512_set4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) {
1237
+ simde__m512i_private r_;
1238
+
1239
+ r_.i32[ 0] = a;
1240
+ r_.i32[ 1] = b;
1241
+ r_.i32[ 2] = c;
1242
+ r_.i32[ 3] = d;
1243
+ r_.i32[ 4] = a;
1244
+ r_.i32[ 5] = b;
1245
+ r_.i32[ 6] = c;
1246
+ r_.i32[ 7] = d;
1247
+ r_.i32[ 8] = a;
1248
+ r_.i32[ 9] = b;
1249
+ r_.i32[10] = c;
1250
+ r_.i32[11] = d;
1251
+ r_.i32[12] = a;
1252
+ r_.i32[13] = b;
1253
+ r_.i32[14] = c;
1254
+ r_.i32[15] = d;
1255
+
1256
+ return simde__m512i_from_private(r_);
1257
+ }
1258
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1259
+ #define _mm512_set4_epi32(d,c,b,a) simde_mm512_set4_epi32(d,c,b,a)
1260
+ #endif
1261
+
1262
+ SIMDE__FUNCTION_ATTRIBUTES
1263
+ simde__m512i
1264
+ simde_mm512_set4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) {
1265
+ simde__m512i_private r_;
1266
+
1267
+ r_.i64[0] = a;
1268
+ r_.i64[1] = b;
1269
+ r_.i64[2] = c;
1270
+ r_.i64[3] = d;
1271
+ r_.i64[4] = a;
1272
+ r_.i64[5] = b;
1273
+ r_.i64[6] = c;
1274
+ r_.i64[7] = d;
1275
+
1276
+ return simde__m512i_from_private(r_);
1277
+ }
1278
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1279
+ #define _mm512_set4_epi64(d,c,b,a) simde_mm512_set4_epi64(d,c,b,a)
1280
+ #endif
1281
+
1282
+ SIMDE__FUNCTION_ATTRIBUTES
1283
+ simde__m512
1284
+ simde_mm512_set4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) {
1285
+ simde__m512_private r_;
1286
+
1287
+ r_.f32[ 0] = a;
1288
+ r_.f32[ 1] = b;
1289
+ r_.f32[ 2] = c;
1290
+ r_.f32[ 3] = d;
1291
+ r_.f32[ 4] = a;
1292
+ r_.f32[ 5] = b;
1293
+ r_.f32[ 6] = c;
1294
+ r_.f32[ 7] = d;
1295
+ r_.f32[ 8] = a;
1296
+ r_.f32[ 9] = b;
1297
+ r_.f32[10] = c;
1298
+ r_.f32[11] = d;
1299
+ r_.f32[12] = a;
1300
+ r_.f32[13] = b;
1301
+ r_.f32[14] = c;
1302
+ r_.f32[15] = d;
1303
+
1304
+ return simde__m512_from_private(r_);
1305
+ }
1306
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1307
+ #define _mm512_set4_ps(d,c,b,a) simde_mm512_set4_ps(d,c,b,a)
1308
+ #endif
1309
+
1310
+ SIMDE__FUNCTION_ATTRIBUTES
1311
+ simde__m512d
1312
+ simde_mm512_set4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) {
1313
+ simde__m512d_private r_;
1314
+
1315
+ r_.f64[0] = a;
1316
+ r_.f64[1] = b;
1317
+ r_.f64[2] = c;
1318
+ r_.f64[3] = d;
1319
+ r_.f64[4] = a;
1320
+ r_.f64[5] = b;
1321
+ r_.f64[6] = c;
1322
+ r_.f64[7] = d;
1323
+
1324
+ return simde__m512d_from_private(r_);
1325
+ }
1326
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1327
+ #define _mm512_set4_pd(d,c,b,a) simde_mm512_set4_pd(d,c,b,a)
1328
+ #endif
1329
+
1330
+ SIMDE__FUNCTION_ATTRIBUTES
1331
+ simde__m512i
1332
+ simde_mm512_setr_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8,
1333
+ int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) {
1334
+ simde__m512i_private r_;
1335
+
1336
+ r_.i32[ 0] = e15;
1337
+ r_.i32[ 1] = e14;
1338
+ r_.i32[ 2] = e13;
1339
+ r_.i32[ 3] = e12;
1340
+ r_.i32[ 4] = e11;
1341
+ r_.i32[ 5] = e10;
1342
+ r_.i32[ 6] = e9;
1343
+ r_.i32[ 7] = e8;
1344
+ r_.i32[ 8] = e7;
1345
+ r_.i32[ 9] = e6;
1346
+ r_.i32[10] = e5;
1347
+ r_.i32[11] = e4;
1348
+ r_.i32[12] = e3;
1349
+ r_.i32[13] = e2;
1350
+ r_.i32[14] = e1;
1351
+ r_.i32[15] = e0;
1352
+
1353
+ return simde__m512i_from_private(r_);
1354
+ }
1355
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1356
+ #define _mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1357
+ #endif
1358
+
1359
+ SIMDE__FUNCTION_ATTRIBUTES
1360
+ simde__m512i
1361
+ simde_mm512_setr_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
1362
+ simde__m512i_private r_;
1363
+
1364
+ r_.i64[0] = e7;
1365
+ r_.i64[1] = e6;
1366
+ r_.i64[2] = e5;
1367
+ r_.i64[3] = e4;
1368
+ r_.i64[4] = e3;
1369
+ r_.i64[5] = e2;
1370
+ r_.i64[6] = e1;
1371
+ r_.i64[7] = e0;
1372
+
1373
+ return simde__m512i_from_private(r_);
1374
+ }
1375
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1376
+ #define _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
1377
+ #endif
1378
+
1379
+ SIMDE__FUNCTION_ATTRIBUTES
1380
+ simde__m512
1381
+ simde_mm512_setr_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12,
1382
+ simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8,
1383
+ simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,
1384
+ simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
1385
+ simde__m512_private r_;
1386
+
1387
+ r_.f32[ 0] = e15;
1388
+ r_.f32[ 1] = e14;
1389
+ r_.f32[ 2] = e13;
1390
+ r_.f32[ 3] = e12;
1391
+ r_.f32[ 4] = e11;
1392
+ r_.f32[ 5] = e10;
1393
+ r_.f32[ 6] = e9;
1394
+ r_.f32[ 7] = e8;
1395
+ r_.f32[ 8] = e7;
1396
+ r_.f32[ 9] = e6;
1397
+ r_.f32[10] = e5;
1398
+ r_.f32[11] = e4;
1399
+ r_.f32[12] = e3;
1400
+ r_.f32[13] = e2;
1401
+ r_.f32[14] = e1;
1402
+ r_.f32[15] = e0;
1403
+
1404
+ return simde__m512_from_private(r_);
1405
+ }
1406
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1407
+ #define _mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1408
+ #endif
1409
+
1410
+ SIMDE__FUNCTION_ATTRIBUTES
1411
+ simde__m512d
1412
+ simde_mm512_setr_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {
1413
+ simde__m512d_private r_;
1414
+
1415
+ r_.f64[0] = e7;
1416
+ r_.f64[1] = e6;
1417
+ r_.f64[2] = e5;
1418
+ r_.f64[3] = e4;
1419
+ r_.f64[4] = e3;
1420
+ r_.f64[5] = e2;
1421
+ r_.f64[6] = e1;
1422
+ r_.f64[7] = e0;
1423
+
1424
+ return simde__m512d_from_private(r_);
1425
+ }
1426
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1427
+ #define _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
1428
+ #endif
1429
+
1430
+ SIMDE__FUNCTION_ATTRIBUTES
1431
+ simde__m512i
1432
+ simde_mm512_setr4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) {
1433
+ simde__m512i_private r_;
1434
+
1435
+ r_.i32[ 0] = d;
1436
+ r_.i32[ 1] = c;
1437
+ r_.i32[ 2] = b;
1438
+ r_.i32[ 3] = a;
1439
+ r_.i32[ 4] = d;
1440
+ r_.i32[ 5] = c;
1441
+ r_.i32[ 6] = b;
1442
+ r_.i32[ 7] = a;
1443
+ r_.i32[ 8] = d;
1444
+ r_.i32[ 9] = c;
1445
+ r_.i32[10] = b;
1446
+ r_.i32[11] = a;
1447
+ r_.i32[12] = d;
1448
+ r_.i32[13] = c;
1449
+ r_.i32[14] = b;
1450
+ r_.i32[15] = a;
1451
+
1452
+ return simde__m512i_from_private(r_);
1453
+ }
1454
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1455
+ #define _mm512_setr4_epi32(d,c,b,a) simde_mm512_setr4_epi32(d,c,b,a)
1456
+ #endif
1457
+
1458
+ SIMDE__FUNCTION_ATTRIBUTES
1459
+ simde__m512i
1460
+ simde_mm512_setr4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) {
1461
+ simde__m512i_private r_;
1462
+
1463
+ r_.i64[0] = d;
1464
+ r_.i64[1] = c;
1465
+ r_.i64[2] = b;
1466
+ r_.i64[3] = a;
1467
+ r_.i64[4] = d;
1468
+ r_.i64[5] = c;
1469
+ r_.i64[6] = b;
1470
+ r_.i64[7] = a;
1471
+
1472
+ return simde__m512i_from_private(r_);
1473
+ }
1474
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1475
+ #define _mm512_setr4_epi64(d,c,b,a) simde_mm512_setr4_epi64(d,c,b,a)
1476
+ #endif
1477
+
1478
+ SIMDE__FUNCTION_ATTRIBUTES
1479
+ simde__m512
1480
+ simde_mm512_setr4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) {
1481
+ simde__m512_private r_;
1482
+
1483
+ r_.f32[ 0] = d;
1484
+ r_.f32[ 1] = c;
1485
+ r_.f32[ 2] = b;
1486
+ r_.f32[ 3] = a;
1487
+ r_.f32[ 4] = d;
1488
+ r_.f32[ 5] = c;
1489
+ r_.f32[ 6] = b;
1490
+ r_.f32[ 7] = a;
1491
+ r_.f32[ 8] = d;
1492
+ r_.f32[ 9] = c;
1493
+ r_.f32[10] = b;
1494
+ r_.f32[11] = a;
1495
+ r_.f32[12] = d;
1496
+ r_.f32[13] = c;
1497
+ r_.f32[14] = b;
1498
+ r_.f32[15] = a;
1499
+
1500
+ return simde__m512_from_private(r_);
1501
+ }
1502
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1503
+ #define _mm512_setr4_ps(d,c,b,a) simde_mm512_setr4_ps(d,c,b,a)
1504
+ #endif
1505
+
1506
+ SIMDE__FUNCTION_ATTRIBUTES
1507
+ simde__m512d
1508
+ simde_mm512_setr4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) {
1509
+ simde__m512d_private r_;
1510
+
1511
+ r_.f64[0] = d;
1512
+ r_.f64[1] = c;
1513
+ r_.f64[2] = b;
1514
+ r_.f64[3] = a;
1515
+ r_.f64[4] = d;
1516
+ r_.f64[5] = c;
1517
+ r_.f64[6] = b;
1518
+ r_.f64[7] = a;
1519
+
1520
+ return simde__m512d_from_private(r_);
1521
+ }
1522
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1523
+ #define _mm512_setr4_pd(d,c,b,a) simde_mm512_setr4_pd(d,c,b,a)
1524
+ #endif
1525
+
1526
+ SIMDE__FUNCTION_ATTRIBUTES
1527
+ simde__m512i
1528
+ simde_mm512_setzero_si512(void) {
1529
+ #if defined(SIMDE_AVX512F_NATIVE)
1530
+ return _mm512_setzero_si512();
1531
+ #else
1532
+ simde__m512i r;
1533
+ simde_memset(&r, 0, sizeof(r));
1534
+ return r;
1535
+ #endif
1536
+ }
1537
+ #define simde_mm512_setzero_epi32() simde_mm512_setzero_si512()
1538
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1539
+ #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1540
+ #define _mm512_setzero_epi32() simde_mm512_setzero_si512()
1541
+ #endif
1542
+
1543
+ SIMDE__FUNCTION_ATTRIBUTES
1544
+ simde__m512i
1545
+ simde_mm512_setone_si512(void) {
1546
+ simde__m512i_private r_;
1547
+
1548
+ SIMDE__VECTORIZE
1549
+ for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
1550
+ r_.i32f[i] = ~((int_fast32_t) 0);
1551
+ }
1552
+
1553
+ return simde__m512i_from_private(r_);
1554
+ }
1555
+ #define simde_mm512_setone_epi32() simde_mm512_setone_si512()
1556
+
1557
+ SIMDE__FUNCTION_ATTRIBUTES
1558
+ simde__m512
1559
+ simde_mm512_setzero_ps(void) {
1560
+ #if defined(SIMDE_AVX512F_NATIVE)
1561
+ return _mm512_setzero_ps();
1562
+ #else
1563
+ return simde_mm512_castsi512_ps(simde_mm512_setzero_si512());
1564
+ #endif
1565
+ }
1566
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1567
+ #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1568
+ #endif
1569
+
1570
+ SIMDE__FUNCTION_ATTRIBUTES
1571
+ simde__m512
1572
+ simde_mm512_setone_ps(void) {
1573
+ return simde_mm512_castsi512_ps(simde_mm512_setone_si512());
1574
+ }
1575
+
1576
+ SIMDE__FUNCTION_ATTRIBUTES
1577
+ simde__m512d
1578
+ simde_mm512_setzero_pd(void) {
1579
+ #if defined(SIMDE_AVX512F_NATIVE)
1580
+ return _mm512_setzero_pd();
1581
+ #else
1582
+ return simde_mm512_castsi512_pd(simde_mm512_setzero_si512());
1583
+ #endif
1584
+ }
1585
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1586
+ #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1587
+ #endif
1588
+
1589
+ SIMDE__FUNCTION_ATTRIBUTES
1590
+ simde__m512d
1591
+ simde_mm512_setone_pd(void) {
1592
+ return simde_mm512_castsi512_pd(simde_mm512_setone_si512());
1593
+ }
1594
+
1595
+ SIMDE__FUNCTION_ATTRIBUTES
1596
+ simde__m512i
1597
+ simde_mm512_mask_mov_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
1598
+ #if defined(SIMDE_AVX512F_NATIVE)
1599
+ return _mm512_mask_mov_epi32(src, k, a);
1600
+ #else
1601
+ simde__m512i_private
1602
+ src_ = simde__m512i_to_private(src),
1603
+ a_ = simde__m512i_to_private(a),
1604
+ r_;
1605
+
1606
+ SIMDE__VECTORIZE
1607
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1608
+ r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i];
1609
+ }
1610
+
1611
+ return simde__m512i_from_private(r_);
1612
+ #endif
1613
+ }
1614
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1615
+ #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a)
1616
+ #endif
1617
+
1618
+ SIMDE__FUNCTION_ATTRIBUTES
1619
+ simde__m512i
1620
+ simde_mm512_mask_mov_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
1621
+ #if defined(SIMDE_AVX512F_NATIVE)
1622
+ return _mm512_mask_mov_epi64(src, k, a);
1623
+ #else
1624
+ simde__m512i_private
1625
+ src_ = simde__m512i_to_private(src),
1626
+ a_ = simde__m512i_to_private(a),
1627
+ r_;
1628
+
1629
+ SIMDE__VECTORIZE
1630
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1631
+ r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i];
1632
+ }
1633
+
1634
+ return simde__m512i_from_private(r_);
1635
+ #endif
1636
+ }
1637
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1638
+ #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a)
1639
+ #endif
1640
+
1641
+ SIMDE__FUNCTION_ATTRIBUTES
1642
+ simde__m512
1643
+ simde_mm512_mask_mov_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1644
+ #if defined(SIMDE_AVX512F_NATIVE)
1645
+ return _mm512_mask_mov_ps(src, k, a);
1646
+ #else
1647
+ simde__m512_private
1648
+ src_ = simde__m512_to_private(src),
1649
+ a_ = simde__m512_to_private(a),
1650
+ r_;
1651
+
1652
+ SIMDE__VECTORIZE
1653
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1654
+ r_.f32[i] = ((k >> i) & 1) ? a_.f32[i] : src_.f32[i];
1655
+ }
1656
+
1657
+ return simde__m512_from_private(r_);
1658
+ #endif
1659
+ }
1660
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1661
+ #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a)
1662
+ #endif
1663
+
1664
+ SIMDE__FUNCTION_ATTRIBUTES
1665
+ simde__m512d
1666
+ simde_mm512_mask_mov_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1667
+ #if defined(SIMDE_AVX512F_NATIVE)
1668
+ return _mm512_mask_mov_pd(src, k, a);
1669
+ #else
1670
+ simde__m512d_private
1671
+ src_ = simde__m512d_to_private(src),
1672
+ a_ = simde__m512d_to_private(a),
1673
+ r_;
1674
+
1675
+ SIMDE__VECTORIZE
1676
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1677
+ r_.f64[i] = ((k >> i) & 1) ? a_.f64[i] : src_.f64[i];
1678
+ }
1679
+
1680
+ return simde__m512d_from_private(r_);
1681
+ #endif
1682
+ }
1683
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1684
+ #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a)
1685
+ #endif
1686
+
1687
+ SIMDE__FUNCTION_ATTRIBUTES
1688
+ simde__m512i
1689
+ simde_mm512_maskz_mov_epi32(simde__mmask16 k, simde__m512i a) {
1690
+ #if defined(SIMDE_AVX512F_NATIVE)
1691
+ return _mm512_maskz_mov_epi32(k, a);
1692
+ #else
1693
+ simde__m512i_private
1694
+ a_ = simde__m512i_to_private(a),
1695
+ r_;
1696
+
1697
+ SIMDE__VECTORIZE
1698
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1699
+ r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0);
1700
+ }
1701
+
1702
+ return simde__m512i_from_private(r_);
1703
+ #endif
1704
+ }
1705
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1706
+ #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a)
1707
+ #endif
1708
+
1709
+ SIMDE__FUNCTION_ATTRIBUTES
1710
+ simde__m512i
1711
+ simde_mm512_maskz_mov_epi64(simde__mmask8 k, simde__m512i a) {
1712
+ #if defined(SIMDE_AVX512F_NATIVE)
1713
+ return _mm512_maskz_mov_epi64(k, a);
1714
+ #else
1715
+ simde__m512i_private
1716
+ a_ = simde__m512i_to_private(a),
1717
+ r_;
1718
+
1719
+ SIMDE__VECTORIZE
1720
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1721
+ r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0);
1722
+ }
1723
+
1724
+ return simde__m512i_from_private(r_);
1725
+ #endif
1726
+ }
1727
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1728
+ #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a)
1729
+ #endif
1730
+
1731
+ SIMDE__FUNCTION_ATTRIBUTES
1732
+ simde__m512
1733
+ simde_mm512_maskz_mov_ps(simde__mmask16 k, simde__m512 a) {
1734
+ #if defined(SIMDE_AVX512F_NATIVE)
1735
+ return _mm512_maskz_mov_ps(k, a);
1736
+ #else
1737
+ simde__m512_private
1738
+ a_ = simde__m512_to_private(a),
1739
+ r_;
1740
+
1741
+ SIMDE__VECTORIZE
1742
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1743
+ r_.f32[i] = ((k >> i) & 1) ? a_.f32[i] : SIMDE_FLOAT32_C(0.0);
1744
+ }
1745
+
1746
+ return simde__m512_from_private(r_);
1747
+ #endif
1748
+ }
1749
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1750
+ #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a)
1751
+ #endif
1752
+
1753
+ SIMDE__FUNCTION_ATTRIBUTES
1754
+ simde__m512d
1755
+ simde_mm512_maskz_mov_pd(simde__mmask8 k, simde__m512d a) {
1756
+ #if defined(SIMDE_AVX512F_NATIVE)
1757
+ return _mm512_maskz_mov_pd(k, a);
1758
+ #else
1759
+ simde__m512d_private
1760
+ a_ = simde__m512d_to_private(a),
1761
+ r_;
1762
+
1763
+ SIMDE__VECTORIZE
1764
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1765
+ r_.f64[i] = ((k >> i) & 1) ? a_.f64[i] : SIMDE_FLOAT64_C(0.0);
1766
+ }
1767
+
1768
+ return simde__m512d_from_private(r_);
1769
+ #endif
1770
+ }
1771
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1772
+ #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a)
1773
+ #endif
1774
+
1775
+ SIMDE__FUNCTION_ATTRIBUTES
1776
+ simde__m512i
1777
+ simde_mm512_abs_epi32(simde__m512i a) {
1778
+ #if defined(SIMDE_AVX512F_NATIVE)
1779
+ return _mm512_abs_epi32(a);
1780
+ #else
1781
+ simde__m512i_private
1782
+ r_,
1783
+ a_ = simde__m512i_to_private(a);
1784
+
1785
+ SIMDE__VECTORIZE
1786
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
1787
+ r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i];
1788
+ }
1789
+
1790
+ return simde__m512i_from_private(r_);
1791
+ #endif
1792
+ }
1793
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1794
+ # define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a)
1795
+ #endif
1796
+
1797
+ SIMDE__FUNCTION_ATTRIBUTES
1798
+ simde__m512i
1799
+ simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
1800
+ #if defined(SIMDE_AVX512F_NATIVE)
1801
+ return _mm512_mask_abs_epi32(src, k, a);
1802
+ #else
1803
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a));
1804
+ #endif
1805
+ }
1806
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1807
+ #define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a)
1808
+ #endif
1809
+
1810
+ SIMDE__FUNCTION_ATTRIBUTES
1811
+ simde__m512i
1812
+ simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) {
1813
+ #if defined(SIMDE_AVX512F_NATIVE)
1814
+ return _mm512_maskz_abs_epi32(k, a);
1815
+ #else
1816
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a));
1817
+ #endif
1818
+ }
1819
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1820
+ #define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a)
1821
+ #endif
1822
+
1823
+ SIMDE__FUNCTION_ATTRIBUTES
1824
+ simde__m512i
1825
+ simde_mm512_abs_epi64(simde__m512i a) {
1826
+ #if defined(SIMDE_AVX512F_NATIVE)
1827
+ return _mm512_abs_epi64(a);
1828
+ #else
1829
+ simde__m512i_private
1830
+ r_,
1831
+ a_ = simde__m512i_to_private(a);
1832
+
1833
+ SIMDE__VECTORIZE
1834
+ for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
1835
+ r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i];
1836
+ }
1837
+
1838
+ return simde__m512i_from_private(r_);
1839
+ #endif
1840
+ }
1841
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1842
+ # define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a)
1843
+ #endif
1844
+
1845
+ SIMDE__FUNCTION_ATTRIBUTES
1846
+ simde__m512i
1847
+ simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
1848
+ #if defined(SIMDE_AVX512F_NATIVE)
1849
+ return _mm512_mask_abs_epi64(src, k, a);
1850
+ #else
1851
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a));
1852
+ #endif
1853
+ }
1854
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1855
+ #define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a)
1856
+ #endif
1857
+
1858
+ SIMDE__FUNCTION_ATTRIBUTES
1859
+ simde__m512i
1860
+ simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) {
1861
+ #if defined(SIMDE_AVX512F_NATIVE)
1862
+ return _mm512_maskz_abs_epi64(k, a);
1863
+ #else
1864
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a));
1865
+ #endif
1866
+ }
1867
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1868
+ #define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a)
1869
+ #endif
1870
+
1871
+ SIMDE__FUNCTION_ATTRIBUTES
1872
+ simde__m512i
1873
+ simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) {
1874
+ #if defined(SIMDE_AVX512F_NATIVE)
1875
+ return _mm512_add_epi32(a, b);
1876
+ #else
1877
+ simde__m512i_private
1878
+ r_,
1879
+ a_ = simde__m512i_to_private(a),
1880
+ b_ = simde__m512i_to_private(b);
1881
+
1882
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1883
+ r_.i32 = a_.i32 + b_.i32;
1884
+ #else
1885
+ SIMDE__VECTORIZE
1886
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
1887
+ r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]);
1888
+ }
1889
+ #endif
1890
+
1891
+ return simde__m512i_from_private(r_);
1892
+ #endif
1893
+ }
1894
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1895
+ #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b)
1896
+ #endif
1897
+
1898
+ SIMDE__FUNCTION_ATTRIBUTES
1899
+ simde__m512i
1900
+ simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
1901
+ #if defined(SIMDE_AVX512F_NATIVE)
1902
+ return _mm512_mask_add_epi32(src, k, a, b);
1903
+ #else
1904
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b));
1905
+ #endif
1906
+ }
1907
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1908
+ #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b)
1909
+ #endif
1910
+
1911
+ SIMDE__FUNCTION_ATTRIBUTES
1912
+ simde__m512i
1913
+ simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
1914
+ #if defined(SIMDE_AVX512F_NATIVE)
1915
+ return _mm512_maskz_add_epi32(k, a, b);
1916
+ #else
1917
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b));
1918
+ #endif
1919
+ }
1920
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1921
+ #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b)
1922
+ #endif
1923
+
1924
+ SIMDE__FUNCTION_ATTRIBUTES
1925
+ simde__m512i
1926
+ simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) {
1927
+ #if defined(SIMDE_AVX512F_NATIVE)
1928
+ return _mm512_add_epi64(a, b);
1929
+ #else
1930
+ simde__m512i_private
1931
+ r_,
1932
+ a_ = simde__m512i_to_private(a),
1933
+ b_ = simde__m512i_to_private(b);
1934
+
1935
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1936
+ r_.i64 = a_.i64 + b_.i64;
1937
+ #else
1938
+ SIMDE__VECTORIZE
1939
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
1940
+ r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]);
1941
+ }
1942
+ #endif
1943
+
1944
+ return simde__m512i_from_private(r_);
1945
+ #endif
1946
+ }
1947
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1948
+ #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b)
1949
+ #endif
1950
+
1951
+ SIMDE__FUNCTION_ATTRIBUTES
1952
+ simde__m512i
1953
+ simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
1954
+ #if defined(SIMDE_AVX512F_NATIVE)
1955
+ return _mm512_mask_add_epi64(src, k, a, b);
1956
+ #else
1957
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b));
1958
+ #endif
1959
+ }
1960
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1961
+ #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b)
1962
+ #endif
1963
+
1964
+ SIMDE__FUNCTION_ATTRIBUTES
1965
+ simde__m512i
1966
+ simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
1967
+ #if defined(SIMDE_AVX512F_NATIVE)
1968
+ return _mm512_maskz_add_epi64(k, a, b);
1969
+ #else
1970
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b));
1971
+ #endif
1972
+ }
1973
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1974
+ #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b)
1975
+ #endif
1976
+
1977
+
1978
+ SIMDE__FUNCTION_ATTRIBUTES
1979
+ simde__m512
1980
+ simde_mm512_add_ps (simde__m512 a, simde__m512 b) {
1981
+ #if defined(SIMDE_AVX512F_NATIVE)
1982
+ return _mm512_add_ps(a, b);
1983
+ #else
1984
+ simde__m512_private
1985
+ r_,
1986
+ a_ = simde__m512_to_private(a),
1987
+ b_ = simde__m512_to_private(b);
1988
+
1989
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1990
+ r_.f32 = a_.f32 + b_.f32;
1991
+ #else
1992
+ SIMDE__VECTORIZE
1993
+ for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1994
+ r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]);
1995
+ }
1996
+ #endif
1997
+
1998
+ return simde__m512_from_private(r_);
1999
+ #endif
2000
+ }
2001
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2002
+ #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b)
2003
+ #endif
2004
+
2005
+ SIMDE__FUNCTION_ATTRIBUTES
2006
+ simde__m512
2007
+ simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2008
+ #if defined(SIMDE_AVX512F_NATIVE)
2009
+ return _mm512_mask_add_ps(src, k, a, b);
2010
+ #else
2011
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b));
2012
+ #endif
2013
+ }
2014
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2015
+ #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b)
2016
+ #endif
2017
+
2018
+ SIMDE__FUNCTION_ATTRIBUTES
2019
+ simde__m512
2020
+ simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2021
+ #if defined(SIMDE_AVX512F_NATIVE)
2022
+ return _mm512_maskz_add_ps(k, a, b);
2023
+ #else
2024
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b));
2025
+ #endif
2026
+ }
2027
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2028
+ #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b)
2029
+ #endif
2030
+
2031
+
2032
+ SIMDE__FUNCTION_ATTRIBUTES
2033
+ simde__m512d
2034
+ simde_mm512_add_pd (simde__m512d a, simde__m512d b) {
2035
+ #if defined(SIMDE_AVX512F_NATIVE)
2036
+ return _mm512_add_pd(a, b);
2037
+ #else
2038
+ simde__m512d_private
2039
+ r_,
2040
+ a_ = simde__m512d_to_private(a),
2041
+ b_ = simde__m512d_to_private(b);
2042
+
2043
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2044
+ r_.f64 = a_.f64 + b_.f64;
2045
+ #else
2046
+ SIMDE__VECTORIZE
2047
+ for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2048
+ r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]);
2049
+ }
2050
+ #endif
2051
+
2052
+ return simde__m512d_from_private(r_);
2053
+ #endif
2054
+ }
2055
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2056
+ #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b)
2057
+ #endif
2058
+
2059
+ SIMDE__FUNCTION_ATTRIBUTES
2060
+ simde__m512d
2061
+ simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2062
+ #if defined(SIMDE_AVX512F_NATIVE)
2063
+ return _mm512_mask_add_pd(src, k, a, b);
2064
+ #else
2065
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b));
2066
+ #endif
2067
+ }
2068
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2069
+ #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b)
2070
+ #endif
2071
+
2072
+ SIMDE__FUNCTION_ATTRIBUTES
2073
+ simde__m512d
2074
+ simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2075
+ #if defined(SIMDE_AVX512F_NATIVE)
2076
+ return _mm512_maskz_add_pd(k, a, b);
2077
+ #else
2078
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b));
2079
+ #endif
2080
+ }
2081
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2082
+ #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b)
2083
+ #endif
2084
+
2085
+ SIMDE__FUNCTION_ATTRIBUTES
2086
+ simde__m512i
2087
+ simde_mm512_and_si512 (simde__m512i a, simde__m512i b) {
2088
+ #if defined(SIMDE_AVX512F_NATIVE)
2089
+ return _mm512_and_si512(a, b);
2090
+ #else
2091
+ simde__m512i_private
2092
+ r_,
2093
+ a_ = simde__m512i_to_private(a),
2094
+ b_ = simde__m512i_to_private(b);
2095
+
2096
+ #if defined(SIMDE_ARCH_X86_AVX2)
2097
+ r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]);
2098
+ r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]);
2099
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2100
+ r_.i32f = a_.i32f & b_.i32f;
2101
+ #else
2102
+ SIMDE__VECTORIZE
2103
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2104
+ r_.i32[i] = a_.i32[i] & b_.i32[i];
2105
+ }
2106
+ #endif
2107
+
2108
+ return simde__m512i_from_private(r_);
2109
+ #endif
2110
+ }
2111
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2112
+ #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b)
2113
+ #endif
2114
+
2115
+ SIMDE__FUNCTION_ATTRIBUTES
2116
+ simde__m512i
2117
+ simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) {
2118
+ #if defined(SIMDE_AVX512F_NATIVE)
2119
+ return _mm512_andnot_si512(a, b);
2120
+ #else
2121
+ simde__m512i_private
2122
+ r_,
2123
+ a_ = simde__m512i_to_private(a),
2124
+ b_ = simde__m512i_to_private(b);
2125
+
2126
+ #if defined(SIMDE_ARCH_X86_AVX2)
2127
+ r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]);
2128
+ r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]);
2129
+ #else
2130
+ SIMDE__VECTORIZE
2131
+ for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
2132
+ r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
2133
+ }
2134
+ #endif
2135
+
2136
+ return simde__m512i_from_private(r_);
2137
+ #endif
2138
+ }
2139
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2140
+ #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b)
2141
+ #endif
2142
+
2143
+ SIMDE__FUNCTION_ATTRIBUTES
2144
+ simde__m512i
2145
+ simde_mm512_broadcast_i32x4 (simde__m128i a) {
2146
+ #if defined(SIMDE_AVX512F_NATIVE)
2147
+ return _mm512_broadcast_i32x4(a);
2148
+ #else
2149
+ simde__m512i_private r_;
2150
+
2151
+ #if defined(SIMDE_ARCH_X86_AVX2)
2152
+ r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a);
2153
+ #elif defined(SIMDE_ARCH_X86_SSE2)
2154
+ r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a;
2155
+ #else
2156
+ SIMDE__VECTORIZE
2157
+ for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
2158
+ r_.m128i[i] = a;
2159
+ }
2160
+ #endif
2161
+
2162
+ return simde__m512i_from_private(r_);
2163
+ #endif
2164
+ }
2165
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2166
+ #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a)
2167
+ #endif
2168
+
2169
+ SIMDE__FUNCTION_ATTRIBUTES
2170
+ simde__mmask16
2171
+ simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
2172
+ #if defined(SIMDE_AVX512F_NATIVE)
2173
+ return _mm512_cmpeq_epi32_mask(a, b);
2174
+ #else
2175
+ simde__m512i_private
2176
+ r_,
2177
+ a_ = simde__m512i_to_private(a),
2178
+ b_ = simde__m512i_to_private(b);
2179
+
2180
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2181
+ r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]);
2182
+ }
2183
+
2184
+ return simde__m512i_private_to_mmask16(r_);
2185
+ #endif
2186
+ }
2187
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2188
+ #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b)
2189
+ #endif
2190
+
2191
+ SIMDE__FUNCTION_ATTRIBUTES
2192
+ simde__mmask16
2193
+ simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
2194
+ #if defined(SIMDE_AVX512F_NATIVE)
2195
+ return _mm512_mask_cmpeq_epi32_mask(k1, a, b);
2196
+ #else
2197
+ return simde_mm512_cmpeq_epi32_mask(a, b) & k1;
2198
+ #endif
2199
+ }
2200
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2201
+ #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b)
2202
+ #endif
2203
+
2204
+ SIMDE__FUNCTION_ATTRIBUTES
2205
+ simde__mmask8
2206
+ simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) {
2207
+ #if defined(SIMDE_AVX512F_NATIVE)
2208
+ return _mm512_cmpeq_epi64_mask(a, b);
2209
+ #else
2210
+ simde__m512i_private
2211
+ r_,
2212
+ a_ = simde__m512i_to_private(a),
2213
+ b_ = simde__m512i_to_private(b);
2214
+
2215
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2216
+ r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]);
2217
+ }
2218
+
2219
+ return simde__m512i_private_to_mmask8(r_);
2220
+ #endif
2221
+ }
2222
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2223
+ #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b)
2224
+ #endif
2225
+
2226
+ SIMDE__FUNCTION_ATTRIBUTES
2227
+ simde__mmask8
2228
+ simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
2229
+ #if defined(SIMDE_AVX512F_NATIVE)
2230
+ return _mm512_mask_cmpeq_epi64_mask(k1, a, b);
2231
+ #else
2232
+ return simde_mm512_cmpeq_epi64_mask(a, b) & k1;
2233
+ #endif
2234
+ }
2235
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2236
+ #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b)
2237
+ #endif
2238
+
2239
+ SIMDE__FUNCTION_ATTRIBUTES
2240
+ simde__mmask16
2241
+ simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) {
2242
+ #if defined(SIMDE_AVX512F_NATIVE)
2243
+ return _mm512_cmpgt_epi32_mask(a, b);
2244
+ #else
2245
+ simde__m512i_private
2246
+ r_,
2247
+ a_ = simde__m512i_to_private(a),
2248
+ b_ = simde__m512i_to_private(b);
2249
+
2250
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2251
+ r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]);
2252
+ }
2253
+
2254
+ return simde__m512i_private_to_mmask16(r_);
2255
+ #endif
2256
+ }
2257
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2258
+ #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b)
2259
+ #endif
2260
+
2261
+ SIMDE__FUNCTION_ATTRIBUTES
2262
+ simde__mmask16
2263
+ simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
2264
+ #if defined(SIMDE_AVX512F_NATIVE)
2265
+ return _mm512_mask_cmpgt_epi32_mask(k1, a, b);
2266
+ #else
2267
+ return simde_mm512_cmpgt_epi32_mask(a, b) & k1;
2268
+ #endif
2269
+ }
2270
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2271
+ #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b)
2272
+ #endif
2273
+
2274
+ SIMDE__FUNCTION_ATTRIBUTES
2275
+ simde__mmask8
2276
+ simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) {
2277
+ #if defined(SIMDE_AVX512F_NATIVE)
2278
+ return _mm512_cmpgt_epi64_mask(a, b);
2279
+ #else
2280
+ simde__m512i_private
2281
+ r_,
2282
+ a_ = simde__m512i_to_private(a),
2283
+ b_ = simde__m512i_to_private(b);
2284
+
2285
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2286
+ r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]);
2287
+ }
2288
+
2289
+ return simde__m512i_private_to_mmask8(r_);
2290
+ #endif
2291
+ }
2292
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2293
+ #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b)
2294
+ #endif
2295
+
2296
+ SIMDE__FUNCTION_ATTRIBUTES
2297
+ simde__mmask8
2298
+ simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
2299
+ #if defined(SIMDE_AVX512F_NATIVE)
2300
+ return _mm512_mask_cmpgt_epi64_mask(k1, a, b);
2301
+ #else
2302
+ return simde_mm512_cmpgt_epi64_mask(a, b) & k1;
2303
+ #endif
2304
+ }
2305
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2306
+ #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b)
2307
+ #endif
2308
+
2309
+ SIMDE__FUNCTION_ATTRIBUTES
2310
+ simde__m512i
2311
+ simde_mm512_cvtepi8_epi32 (simde__m128i a) {
2312
+ #if defined(SIMDE_AVX512F_NATIVE)
2313
+ return _mm512_cvtepi8_epi32(a);
2314
+ #else
2315
+ simde__m512i_private r_;
2316
+ simde__m128i_private a_ = simde__m128i_to_private(a);
2317
+
2318
+ #if defined(SIMDE__CONVERT_VECTOR)
2319
+ SIMDE__CONVERT_VECTOR(r_.i32, a_.i8);
2320
+ #else
2321
+ SIMDE__VECTORIZE
2322
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2323
+ r_.i32[i] = a_.i8[i];
2324
+ }
2325
+ #endif
2326
+
2327
+ return simde__m512i_from_private(r_);
2328
+ #endif
2329
+ }
2330
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2331
+ #define _mm512_cvtepi8_epi32(a) simde_mm512_cvtepi8_epi32(a)
2332
+ #endif
2333
+
2334
+ SIMDE__FUNCTION_ATTRIBUTES
2335
+ simde__m512i
2336
+ simde_mm512_cvtepi8_epi64 (simde__m128i a) {
2337
+ #if defined(SIMDE_AVX512F_NATIVE)
2338
+ return _mm512_cvtepi8_epi64(a);
2339
+ #else
2340
+ simde__m512i_private r_;
2341
+ simde__m128i_private a_ = simde__m128i_to_private(a);
2342
+
2343
+ #if defined(SIMDE__CONVERT_VECTOR)
2344
+ SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i8);
2345
+ #else
2346
+ SIMDE__VECTORIZE
2347
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2348
+ r_.i64[i] = a_.i8[i];
2349
+ }
2350
+ #endif
2351
+
2352
+ return simde__m512i_from_private(r_);
2353
+ #endif
2354
+ }
2355
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2356
+ #define _mm512_cvtepi8_epi64(a) simde_mm512_cvtepi8_epi64(a)
2357
+ #endif
2358
+
2359
+ SIMDE__FUNCTION_ATTRIBUTES
2360
+ simde__m128i
2361
+ simde_mm512_cvtepi32_epi8 (simde__m512i a) {
2362
+ #if defined(SIMDE_AVX512F_NATIVE)
2363
+ return _mm512_cvtepi32_epi8(a);
2364
+ #else
2365
+ simde__m128i_private r_;
2366
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2367
+
2368
+ #if defined(SIMDE__CONVERT_VECTOR)
2369
+ SIMDE__CONVERT_VECTOR(r_.i8, a_.i32);
2370
+ #else
2371
+ SIMDE__VECTORIZE
2372
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2373
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i32[i]);
2374
+ }
2375
+ #endif
2376
+
2377
+ return simde__m128i_from_private(r_);
2378
+ #endif
2379
+ }
2380
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2381
+ #define _mm512_cvtepi32_epi8(a) simde_mm512_cvtepi32_epi8(a)
2382
+ #endif
2383
+
2384
+ SIMDE__FUNCTION_ATTRIBUTES
2385
+ simde__m256i
2386
+ simde_mm512_cvtepi32_epi16 (simde__m512i a) {
2387
+ #if defined(SIMDE_AVX512F_NATIVE)
2388
+ return _mm512_cvtepi32_epi16(a);
2389
+ #else
2390
+ simde__m256i_private r_;
2391
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2392
+
2393
+ #if defined(SIMDE__CONVERT_VECTOR)
2394
+ SIMDE__CONVERT_VECTOR(r_.i16, a_.i32);
2395
+ #else
2396
+ SIMDE__VECTORIZE
2397
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2398
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
2399
+ }
2400
+ #endif
2401
+
2402
+ return simde__m256i_from_private(r_);
2403
+ #endif
2404
+ }
2405
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2406
+ #define _mm512_cvtepi32_epi16(a) simde_mm512_cvtepi32_epi16(a)
2407
+ #endif
2408
+
2409
+ SIMDE__FUNCTION_ATTRIBUTES
2410
+ simde__m128i
2411
+ simde_mm512_cvtepi64_epi8 (simde__m512i a) {
2412
+ #if defined(SIMDE_AVX512F_NATIVE)
2413
+ return _mm512_cvtepi64_epi8(a);
2414
+ #else
2415
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2416
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2417
+
2418
+ #if defined(SIMDE__CONVERT_VECTOR)
2419
+ SIMDE__CONVERT_VECTOR(r_.m64_private[0].i8, a_.i64);
2420
+ #else
2421
+ SIMDE__VECTORIZE
2422
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2423
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i64[i]);
2424
+ }
2425
+ #endif
2426
+
2427
+ return simde__m128i_from_private(r_);
2428
+ #endif
2429
+ }
2430
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2431
+ #define _mm512_cvtepi64_epi8(a) simde_mm512_cvtepi64_epi8(a)
2432
+ #endif
2433
+
2434
+ SIMDE__FUNCTION_ATTRIBUTES
2435
+ simde__m128i
2436
+ simde_mm512_cvtepi64_epi16 (simde__m512i a) {
2437
+ #if defined(SIMDE_AVX512F_NATIVE)
2438
+ return _mm512_cvtepi64_epi16(a);
2439
+ #else
2440
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2441
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2442
+
2443
+ #if defined(SIMDE__CONVERT_VECTOR)
2444
+ SIMDE__CONVERT_VECTOR(r_.i16, a_.i64);
2445
+ #else
2446
+ SIMDE__VECTORIZE
2447
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2448
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i64[i]);
2449
+ }
2450
+ #endif
2451
+
2452
+ return simde__m128i_from_private(r_);
2453
+ #endif
2454
+ }
2455
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2456
+ #define _mm512_cvtepi64_epi16(a) simde_mm512_cvtepi64_epi16(a)
2457
+ #endif
2458
+
2459
+ SIMDE__FUNCTION_ATTRIBUTES
2460
+ simde__m256i
2461
+ simde_mm512_cvtepi64_epi32 (simde__m512i a) {
2462
+ #if defined(SIMDE_AVX512F_NATIVE)
2463
+ return _mm512_cvtepi64_epi32(a);
2464
+ #else
2465
+ simde__m256i_private r_;
2466
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2467
+
2468
+ #if defined(SIMDE__CONVERT_VECTOR)
2469
+ SIMDE__CONVERT_VECTOR(r_.i32, a_.i64);
2470
+ #else
2471
+ SIMDE__VECTORIZE
2472
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2473
+ r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i64[i]);
2474
+ }
2475
+ #endif
2476
+
2477
+ return simde__m256i_from_private(r_);
2478
+ #endif
2479
+ }
2480
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2481
+ #define _mm512_cvtepi64_epi32(a) simde_mm512_cvtepi64_epi32(a)
2482
+ #endif
2483
+
2484
+ SIMDE__FUNCTION_ATTRIBUTES
2485
+ simde__m128i
2486
+ simde_mm512_cvtsepi32_epi8 (simde__m512i a) {
2487
+ #if defined(SIMDE_AVX512F_NATIVE)
2488
+ return _mm512_cvtsepi32_epi8(a);
2489
+ #else
2490
+ simde__m128i_private r_;
2491
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2492
+
2493
+ SIMDE__VECTORIZE
2494
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2495
+ r_.i8[i] =
2496
+ (a_.i32[i] < INT8_MIN)
2497
+ ? (INT8_MIN)
2498
+ : ((a_.i32[i] > INT8_MAX)
2499
+ ? (INT8_MAX)
2500
+ : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]));
2501
+ }
2502
+
2503
+ return simde__m128i_from_private(r_);
2504
+ #endif
2505
+ }
2506
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2507
+ #define _mm512_cvtsepi32_epi8(a) simde_mm512_cvtsepi32_epi8(a)
2508
+ #endif
2509
+
2510
+ SIMDE__FUNCTION_ATTRIBUTES
2511
+ simde__m256i
2512
+ simde_mm512_cvtsepi32_epi16 (simde__m512i a) {
2513
+ #if defined(SIMDE_AVX512F_NATIVE)
2514
+ return _mm512_cvtsepi32_epi16(a);
2515
+ #else
2516
+ simde__m256i_private r_;
2517
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2518
+
2519
+ SIMDE__VECTORIZE
2520
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2521
+ r_.i16[i] =
2522
+ (a_.i32[i] < INT16_MIN)
2523
+ ? (INT16_MIN)
2524
+ : ((a_.i32[i] > INT16_MAX)
2525
+ ? (INT16_MAX)
2526
+ : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]));
2527
+ }
2528
+
2529
+ return simde__m256i_from_private(r_);
2530
+ #endif
2531
+ }
2532
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2533
+ #define _mm512_cvtsepi32_epi16(a) simde_mm512_cvtsepi32_epi16(a)
2534
+ #endif
2535
+
2536
+ SIMDE__FUNCTION_ATTRIBUTES
2537
+ simde__m128i
2538
+ simde_mm512_cvtsepi64_epi8 (simde__m512i a) {
2539
+ #if defined(SIMDE_AVX512F_NATIVE)
2540
+ return _mm512_cvtsepi64_epi8(a);
2541
+ #else
2542
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2543
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2544
+
2545
+ SIMDE__VECTORIZE
2546
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2547
+ r_.i8[i] =
2548
+ (a_.i64[i] < INT8_MIN)
2549
+ ? (INT8_MIN)
2550
+ : ((a_.i64[i] > INT8_MAX)
2551
+ ? (INT8_MAX)
2552
+ : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]));
2553
+ }
2554
+
2555
+ return simde__m128i_from_private(r_);
2556
+ #endif
2557
+ }
2558
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2559
+ #define _mm512_cvtsepi64_epi8(a) simde_mm512_cvtsepi64_epi8(a)
2560
+ #endif
2561
+
2562
+ SIMDE__FUNCTION_ATTRIBUTES
2563
+ simde__m128i
2564
+ simde_mm512_cvtsepi64_epi16 (simde__m512i a) {
2565
+ #if defined(SIMDE_AVX512F_NATIVE)
2566
+ return _mm512_cvtsepi64_epi16(a);
2567
+ #else
2568
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2569
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2570
+
2571
+ SIMDE__VECTORIZE
2572
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2573
+ r_.i16[i] =
2574
+ (a_.i64[i] < INT16_MIN)
2575
+ ? (INT16_MIN)
2576
+ : ((a_.i64[i] > INT16_MAX)
2577
+ ? (INT16_MAX)
2578
+ : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]));
2579
+ }
2580
+
2581
+ return simde__m128i_from_private(r_);
2582
+ #endif
2583
+ }
2584
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2585
+ #define _mm512_cvtsepi64_epi16(a) simde_mm512_cvtsepi64_epi16(a)
2586
+ #endif
2587
+
2588
+ SIMDE__FUNCTION_ATTRIBUTES
2589
+ simde__m256i
2590
+ simde_mm512_cvtsepi64_epi32 (simde__m512i a) {
2591
+ #if defined(SIMDE_AVX512F_NATIVE)
2592
+ return _mm512_cvtsepi64_epi32(a);
2593
+ #else
2594
+ simde__m256i_private r_;
2595
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2596
+
2597
+ SIMDE__VECTORIZE
2598
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2599
+ r_.i32[i] =
2600
+ (a_.i64[i] < INT32_MIN)
2601
+ ? (INT32_MIN)
2602
+ : ((a_.i64[i] > INT32_MAX)
2603
+ ? (INT32_MAX)
2604
+ : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]));
2605
+ }
2606
+
2607
+ return simde__m256i_from_private(r_);
2608
+ #endif
2609
+ }
2610
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2611
+ #define _mm512_cvtsepi64_epi32(a) simde_mm512_cvtsepi64_epi32(a)
2612
+ #endif
2613
+
2614
+ SIMDE__FUNCTION_ATTRIBUTES
2615
+ simde__m512
2616
+ simde_mm512_div_ps (simde__m512 a, simde__m512 b) {
2617
+ #if defined(SIMDE_AVX512F_NATIVE)
2618
+ return _mm512_div_ps(a, b);
2619
+ #else
2620
+ simde__m512_private
2621
+ r_,
2622
+ a_ = simde__m512_to_private(a),
2623
+ b_ = simde__m512_to_private(b);
2624
+
2625
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2626
+ r_.f32 = a_.f32 / b_.f32;
2627
+ #else
2628
+ SIMDE__VECTORIZE
2629
+ for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2630
+ r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]);
2631
+ }
2632
+ #endif
2633
+
2634
+ return simde__m512_from_private(r_);
2635
+ #endif
2636
+ }
2637
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2638
+ #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b)
2639
+ #endif
2640
+
2641
+ SIMDE__FUNCTION_ATTRIBUTES
2642
+ simde__m512
2643
+ simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2644
+ #if defined(SIMDE_AVX512F_NATIVE)
2645
+ return _mm512_mask_div_ps(src, k, a, b);
2646
+ #else
2647
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b));
2648
+ #endif
2649
+ }
2650
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2651
+ #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b)
2652
+ #endif
2653
+
2654
+ SIMDE__FUNCTION_ATTRIBUTES
2655
+ simde__m512
2656
+ simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2657
+ #if defined(SIMDE_AVX512F_NATIVE)
2658
+ return _mm512_maskz_div_ps(k, a, b);
2659
+ #else
2660
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b));
2661
+ #endif
2662
+ }
2663
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2664
+ #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b)
2665
+ #endif
2666
+
2667
+ SIMDE__FUNCTION_ATTRIBUTES
2668
+ simde__m512d
2669
+ simde_mm512_div_pd (simde__m512d a, simde__m512d b) {
2670
+ #if defined(SIMDE_AVX512F_NATIVE)
2671
+ return _mm512_div_pd(a, b);
2672
+ #else
2673
+ simde__m512d_private
2674
+ r_,
2675
+ a_ = simde__m512d_to_private(a),
2676
+ b_ = simde__m512d_to_private(b);
2677
+
2678
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2679
+ r_.f64 = a_.f64 / b_.f64;
2680
+ #else
2681
+ SIMDE__VECTORIZE
2682
+ for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2683
+ r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]);
2684
+ }
2685
+ #endif
2686
+
2687
+ return simde__m512d_from_private(r_);
2688
+ #endif
2689
+ }
2690
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2691
+ #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b)
2692
+ #endif
2693
+
2694
+ SIMDE__FUNCTION_ATTRIBUTES
2695
+ simde__m512d
2696
+ simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2697
+ #if defined(SIMDE_AVX512F_NATIVE)
2698
+ return _mm512_mask_div_pd(src, k, a, b);
2699
+ #else
2700
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b));
2701
+ #endif
2702
+ }
2703
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2704
+ #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b)
2705
+ #endif
2706
+
2707
+ SIMDE__FUNCTION_ATTRIBUTES
2708
+ simde__m512d
2709
+ simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2710
+ #if defined(SIMDE_AVX512F_NATIVE)
2711
+ return _mm512_maskz_div_pd(k, a, b);
2712
+ #else
2713
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b));
2714
+ #endif
2715
+ }
2716
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2717
+ #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b)
2718
+ #endif
2719
+
2720
+ SIMDE__FUNCTION_ATTRIBUTES
2721
+ simde__m512i
2722
+ simde_mm512_load_si512 (simde__m512i const * mem_addr) {
2723
+ simde_assert_aligned(64, mem_addr);
2724
+
2725
+ #if defined(SIMDE_AVX512F_NATIVE)
2726
+ return _mm512_load_si512((__m512i const*) mem_addr);
2727
+ #elif defined(SIMDE_ARCH_AARCH64) && (defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(8,0,0))
2728
+ simde__m512i r;
2729
+ memcpy(&r, mem_addr, sizeof(r));
2730
+ return r;
2731
+ #else
2732
+ return *mem_addr;
2733
+ #endif
2734
+ }
2735
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2736
+ #define _mm512_load_si512(a) simde_mm512_load_si512(a)
2737
+ #endif
2738
+
2739
+ SIMDE__FUNCTION_ATTRIBUTES
2740
+ simde__m512i
2741
+ simde_mm512_loadu_si512 (simde__m512i const * mem_addr) {
2742
+ #if defined(SIMDE_AVX512F_NATIVE)
2743
+ return _mm512_loadu_si512((__m512i const*) mem_addr);
2744
+ #else
2745
+ simde__m512i r;
2746
+ simde_memcpy(&r, mem_addr, sizeof(r));
2747
+ return r;
2748
+ #endif
2749
+ }
2750
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2751
+ #define _mm512_loadu_si512(a) simde_mm512_loadu_si512(a)
2752
+ #endif
2753
+
2754
+ SIMDE__FUNCTION_ATTRIBUTES
2755
+ simde__m512
2756
+ simde_mm512_mul_ps (simde__m512 a, simde__m512 b) {
2757
+ #if defined(SIMDE_AVX512F_NATIVE)
2758
+ return _mm512_mul_ps(a, b);
2759
+ #else
2760
+ simde__m512_private
2761
+ r_,
2762
+ a_ = simde__m512_to_private(a),
2763
+ b_ = simde__m512_to_private(b);
2764
+
2765
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2766
+ r_.f32 = a_.f32 * b_.f32;
2767
+ #else
2768
+ SIMDE__VECTORIZE
2769
+ for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2770
+ r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]);
2771
+ }
2772
+ #endif
2773
+
2774
+ return simde__m512_from_private(r_);
2775
+ #endif
2776
+ }
2777
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2778
+ #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b)
2779
+ #endif
2780
+
2781
+ SIMDE__FUNCTION_ATTRIBUTES
2782
+ simde__m512
2783
+ simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2784
+ #if defined(SIMDE_AVX512F_NATIVE)
2785
+ return _mm512_mask_mul_ps(src, k, a, b);
2786
+ #else
2787
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b));
2788
+ #endif
2789
+ }
2790
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2791
+ #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b)
2792
+ #endif
2793
+
2794
+ SIMDE__FUNCTION_ATTRIBUTES
2795
+ simde__m512
2796
+ simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2797
+ #if defined(SIMDE_AVX512F_NATIVE)
2798
+ return _mm512_maskz_mul_ps(k, a, b);
2799
+ #else
2800
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b));
2801
+ #endif
2802
+ }
2803
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2804
+ #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b)
2805
+ #endif
2806
+
2807
+ SIMDE__FUNCTION_ATTRIBUTES
2808
+ simde__m512d
2809
+ simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
2810
+ #if defined(SIMDE_AVX512F_NATIVE)
2811
+ return _mm512_mul_pd(a, b);
2812
+ #else
2813
+ simde__m512d_private
2814
+ r_,
2815
+ a_ = simde__m512d_to_private(a),
2816
+ b_ = simde__m512d_to_private(b);
2817
+
2818
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2819
+ r_.f64 = a_.f64 * b_.f64;
2820
+ #else
2821
+ SIMDE__VECTORIZE
2822
+ for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2823
+ r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]);
2824
+ }
2825
+ #endif
2826
+
2827
+ return simde__m512d_from_private(r_);
2828
+ #endif
2829
+ }
2830
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2831
+ #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b)
2832
+ #endif
2833
+
2834
+ SIMDE__FUNCTION_ATTRIBUTES
2835
+ simde__m512d
2836
+ simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2837
+ #if defined(SIMDE_AVX512F_NATIVE)
2838
+ return _mm512_mask_mul_pd(src, k, a, b);
2839
+ #else
2840
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b));
2841
+ #endif
2842
+ }
2843
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2844
+ #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b)
2845
+ #endif
2846
+
2847
+ SIMDE__FUNCTION_ATTRIBUTES
2848
+ simde__m512d
2849
+ simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2850
+ #if defined(SIMDE_AVX512F_NATIVE)
2851
+ return _mm512_maskz_mul_pd(k, a, b);
2852
+ #else
2853
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b));
2854
+ #endif
2855
+ }
2856
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2857
+ #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b)
2858
+ #endif
2859
+
2860
+ SIMDE__FUNCTION_ATTRIBUTES
2861
+ simde__m512i
2862
+ simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) {
2863
+ #if defined(SIMDE_AVX512F_NATIVE)
2864
+ return _mm512_mul_epi32(a, b);
2865
+ #else
2866
+ simde__m512i_private
2867
+ r_,
2868
+ a_ = simde__m512i_to_private(a),
2869
+ b_ = simde__m512i_to_private(b);
2870
+
2871
+ #if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
2872
+ simde__m512i_private x;
2873
+ __typeof__(r_.i64) ta, tb;
2874
+
2875
+ /* Get even numbered 32-bit values */
2876
+ x.i32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
2877
+ /* Cast to 64 bits */
2878
+ SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].i32);
2879
+ SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].i32);
2880
+ r_.i64 = ta * tb;
2881
+ #else
2882
+ SIMDE__VECTORIZE
2883
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2884
+ r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]);
2885
+ }
2886
+ #endif
2887
+ return simde__m512i_from_private(r_);
2888
+ #endif
2889
+ }
2890
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2891
+ #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b)
2892
+ #endif
2893
+
2894
+ SIMDE__FUNCTION_ATTRIBUTES
2895
+ simde__m512i
2896
+ simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
2897
+ #if defined(SIMDE_AVX512F_NATIVE)
2898
+ return _mm512_mask_mul_epi32(src, k, a, b);
2899
+ #else
2900
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b));
2901
+ #endif
2902
+ }
2903
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2904
+ #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b)
2905
+ #endif
2906
+
2907
+ SIMDE__FUNCTION_ATTRIBUTES
2908
+ simde__m512i
2909
+ simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
2910
+ #if defined(SIMDE_AVX512F_NATIVE)
2911
+ return _mm512_maskz_mul_epi32(k, a, b);
2912
+ #else
2913
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b));
2914
+ #endif
2915
+ }
2916
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2917
+ #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b)
2918
+ #endif
2919
+
2920
+ SIMDE__FUNCTION_ATTRIBUTES
2921
+ simde__m512i
2922
+ simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) {
2923
+ #if defined(SIMDE_AVX512F_NATIVE)
2924
+ return _mm512_mul_epu32(a, b);
2925
+ #else
2926
+ simde__m512i_private
2927
+ r_,
2928
+ a_ = simde__m512i_to_private(a),
2929
+ b_ = simde__m512i_to_private(b);
2930
+
2931
+ #if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
2932
+ simde__m512i_private x;
2933
+ __typeof__(r_.u64) ta, tb;
2934
+
2935
+ x.u32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
2936
+ SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].u32);
2937
+ SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].u32);
2938
+ r_.u64 = ta * tb;
2939
+ #else
2940
+ SIMDE__VECTORIZE
2941
+ for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
2942
+ r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]);
2943
+ }
2944
+ #endif
2945
+
2946
+ return simde__m512i_from_private(r_);
2947
+ #endif
2948
+ }
2949
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2950
+ #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b)
2951
+ #endif
2952
+
2953
+ SIMDE__FUNCTION_ATTRIBUTES
2954
+ simde__m512i
2955
+ simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
2956
+ #if defined(SIMDE_AVX512F_NATIVE)
2957
+ return _mm512_mask_mul_epu32(src, k, a, b);
2958
+ #else
2959
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b));
2960
+ #endif
2961
+ }
2962
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2963
+ #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b)
2964
+ #endif
2965
+
2966
+ SIMDE__FUNCTION_ATTRIBUTES
2967
+ simde__m512i
2968
+ simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
2969
+ #if defined(SIMDE_AVX512F_NATIVE)
2970
+ return _mm512_maskz_mul_epu32(k, a, b);
2971
+ #else
2972
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b));
2973
+ #endif
2974
+ }
2975
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2976
+ #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b)
2977
+ #endif
2978
+
2979
+ SIMDE__FUNCTION_ATTRIBUTES
2980
+ simde__m512i
2981
+ simde_mm512_or_si512 (simde__m512i a, simde__m512i b) {
2982
+ #if defined(SIMDE_AVX512F_NATIVE)
2983
+ return _mm512_or_si512(a, b);
2984
+ #else
2985
+ simde__m512i_private
2986
+ r_,
2987
+ a_ = simde__m512i_to_private(a),
2988
+ b_ = simde__m512i_to_private(b);
2989
+
2990
+ #if defined(SIMDE_ARCH_X86_AVX2)
2991
+ r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]);
2992
+ r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]);
2993
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2994
+ r_.i32f = a_.i32f | b_.i32f;
2995
+ #else
2996
+ SIMDE__VECTORIZE
2997
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2998
+ r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
2999
+ }
3000
+ #endif
3001
+
3002
+ return simde__m512i_from_private(r_);
3003
+ #endif
3004
+ }
3005
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3006
+ #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b)
3007
+ #endif
3008
+
3009
+ SIMDE__FUNCTION_ATTRIBUTES
3010
+ simde__m512i
3011
+ simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) {
3012
+ #if defined(SIMDE_AVX512F_NATIVE)
3013
+ return _mm512_sub_epi32(a, b);
3014
+ #else
3015
+ simde__m512i_private
3016
+ r_,
3017
+ a_ = simde__m512i_to_private(a),
3018
+ b_ = simde__m512i_to_private(b);
3019
+
3020
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3021
+ r_.i32 = a_.i32 - b_.i32;
3022
+ #else
3023
+ SIMDE__VECTORIZE
3024
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3025
+ r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]);
3026
+ }
3027
+ #endif
3028
+
3029
+ return simde__m512i_from_private(r_);
3030
+ #endif
3031
+ }
3032
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3033
+ #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b)
3034
+ #endif
3035
+
3036
+ SIMDE__FUNCTION_ATTRIBUTES
3037
+ simde__m512i
3038
+ simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
3039
+ #if defined(SIMDE_AVX512F_NATIVE)
3040
+ return _mm512_mask_sub_epi32(src, k, a, b);
3041
+ #else
3042
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b));
3043
+ #endif
3044
+ }
3045
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3046
+ #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b)
3047
+ #endif
3048
+
3049
+ SIMDE__FUNCTION_ATTRIBUTES
3050
+ simde__m512i
3051
+ simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
3052
+ #if defined(SIMDE_AVX512F_NATIVE)
3053
+ return _mm512_maskz_sub_epi32(k, a, b);
3054
+ #else
3055
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b));
3056
+ #endif
3057
+ }
3058
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3059
+ #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b)
3060
+ #endif
3061
+
3062
+ SIMDE__FUNCTION_ATTRIBUTES
3063
+ simde__m512i
3064
+ simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
3065
+ #if defined(SIMDE_AVX512F_NATIVE)
3066
+ return _mm512_sub_epi64(a, b);
3067
+ #else
3068
+ simde__m512i_private
3069
+ r_,
3070
+ a_ = simde__m512i_to_private(a),
3071
+ b_ = simde__m512i_to_private(b);
3072
+
3073
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3074
+ r_.i64 = a_.i64 - b_.i64;
3075
+ #else
3076
+ SIMDE__VECTORIZE
3077
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3078
+ r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]);
3079
+ }
3080
+ #endif
3081
+
3082
+ return simde__m512i_from_private(r_);
3083
+ #endif
3084
+ }
3085
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3086
+ #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b)
3087
+ #endif
3088
+
3089
+ SIMDE__FUNCTION_ATTRIBUTES
3090
+ simde__m512i
3091
+ simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
3092
+ #if defined(SIMDE_AVX512F_NATIVE)
3093
+ return _mm512_mask_sub_epi64(src, k, a, b);
3094
+ #else
3095
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b));
3096
+ #endif
3097
+ }
3098
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3099
+ #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b)
3100
+ #endif
3101
+
3102
+ SIMDE__FUNCTION_ATTRIBUTES
3103
+ simde__m512i
3104
+ simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
3105
+ #if defined(SIMDE_AVX512F_NATIVE)
3106
+ return _mm512_maskz_sub_epi64(k, a, b);
3107
+ #else
3108
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b));
3109
+ #endif
3110
+ }
3111
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3112
+ #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b)
3113
+ #endif
3114
+
3115
+ SIMDE__FUNCTION_ATTRIBUTES
3116
+ simde__m512
3117
+ simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
3118
+ #if defined(SIMDE_AVX512F_NATIVE)
3119
+ return _mm512_sub_ps(a, b);
3120
+ #else
3121
+ simde__m512_private
3122
+ r_,
3123
+ a_ = simde__m512_to_private(a),
3124
+ b_ = simde__m512_to_private(b);
3125
+
3126
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3127
+ r_.f32 = a_.f32 - b_.f32;
3128
+ #else
3129
+ SIMDE__VECTORIZE
3130
+ for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
3131
+ r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]);
3132
+ }
3133
+ #endif
3134
+
3135
+ return simde__m512_from_private(r_);
3136
+ #endif
3137
+ }
3138
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3139
+ #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b)
3140
+ #endif
3141
+
3142
+ SIMDE__FUNCTION_ATTRIBUTES
3143
+ simde__m512
3144
+ simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
3145
+ #if defined(SIMDE_AVX512F_NATIVE)
3146
+ return _mm512_mask_sub_ps(src, k, a, b);
3147
+ #else
3148
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b));
3149
+ #endif
3150
+ }
3151
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3152
+ #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b)
3153
+ #endif
3154
+
3155
+ SIMDE__FUNCTION_ATTRIBUTES
3156
+ simde__m512
3157
+ simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
3158
+ #if defined(SIMDE_AVX512F_NATIVE)
3159
+ return _mm512_maskz_sub_ps(k, a, b);
3160
+ #else
3161
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b));
3162
+ #endif
3163
+ }
3164
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3165
+ #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b)
3166
+ #endif
3167
+
3168
+ SIMDE__FUNCTION_ATTRIBUTES
3169
+ simde__m512d
3170
+ simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
3171
+ #if defined(SIMDE_AVX512F_NATIVE)
3172
+ return _mm512_sub_pd(a, b);
3173
+ #else
3174
+ simde__m512d_private
3175
+ r_,
3176
+ a_ = simde__m512d_to_private(a),
3177
+ b_ = simde__m512d_to_private(b);
3178
+
3179
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3180
+ r_.f64 = a_.f64 - b_.f64;
3181
+ #else
3182
+ SIMDE__VECTORIZE
3183
+ for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
3184
+ r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]);
3185
+ }
3186
+ #endif
3187
+
3188
+ return simde__m512d_from_private(r_);
3189
+ #endif
3190
+ }
3191
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3192
+ #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b)
3193
+ #endif
3194
+
3195
+ SIMDE__FUNCTION_ATTRIBUTES
3196
+ simde__m512d
3197
+ simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
3198
+ #if defined(SIMDE_AVX512F_NATIVE)
3199
+ return _mm512_mask_sub_pd(src, k, a, b);
3200
+ #else
3201
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b));
3202
+ #endif
3203
+ }
3204
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3205
+ #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b)
3206
+ #endif
3207
+
3208
+ SIMDE__FUNCTION_ATTRIBUTES
3209
+ simde__m512d
3210
+ simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
3211
+ #if defined(SIMDE_AVX512F_NATIVE)
3212
+ return _mm512_maskz_sub_pd(k, a, b);
3213
+ #else
3214
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b));
3215
+ #endif
3216
+ }
3217
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3218
+ #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b)
3219
+ #endif
3220
+
3221
+ SIMDE__FUNCTION_ATTRIBUTES
3222
+ simde__m512i
3223
+ simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) {
3224
+ #if defined(SIMDE_AVX512F_NATIVE)
3225
+ return _mm512_srli_epi32(a, imm8);
3226
+ #else
3227
+ simde__m512i_private
3228
+ r_,
3229
+ a_ = simde__m512i_to_private(a);
3230
+
3231
+ #if defined(SIMDE_ARCH_X86_AVX2)
3232
+ r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], imm8);
3233
+ r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], imm8);
3234
+ #elif defined(SIMDE_ARCH_X86_SSE2)
3235
+ r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], imm8);
3236
+ r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], imm8);
3237
+ r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], imm8);
3238
+ r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], imm8);
3239
+ #else
3240
+ if (imm8 > 31) {
3241
+ simde_memset(&r_, 0, sizeof(r_));
3242
+ } else {
3243
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
3244
+ r_.u32 = a_.u32 >> imm8;
3245
+ #else
3246
+ SIMDE__VECTORIZE
3247
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
3248
+ r_.u32[i] = a_.u32[i] >> imm8;
3249
+ }
3250
+ #endif
3251
+ }
3252
+ #endif
3253
+
3254
+ return simde__m512i_from_private(r_);
3255
+ #endif
3256
+ }
3257
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3258
+ #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8)
3259
+ #endif
3260
+
3261
+ SIMDE__FUNCTION_ATTRIBUTES
3262
+ simde__m512i
3263
+ simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) {
3264
+ #if defined(SIMDE_AVX512F_NATIVE)
3265
+ return _mm512_srli_epi64(a, imm8);
3266
+ #else
3267
+ simde__m512i_private
3268
+ r_,
3269
+ a_ = simde__m512i_to_private(a);
3270
+
3271
+ #if defined(SIMDE_ARCH_X86_AVX2)
3272
+ r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], imm8);
3273
+ r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], imm8);
3274
+ #elif defined(SIMDE_ARCH_X86_SSE2)
3275
+ r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], imm8);
3276
+ r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], imm8);
3277
+ r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], imm8);
3278
+ r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], imm8);
3279
+ #else
3280
+ /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are
3281
+ * used. In this case we should do "imm8 &= 0xff" here. However in
3282
+ * practice all bits are used. */
3283
+ if (imm8 > 63) {
3284
+ simde_memset(&r_, 0, sizeof(r_));
3285
+ } else {
3286
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
3287
+ r_.u64 = a_.u64 >> imm8;
3288
+ #else
3289
+ SIMDE__VECTORIZE
3290
+ for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
3291
+ r_.u64[i] = a_.u64[i] >> imm8;
3292
+ }
3293
+ #endif
3294
+ }
3295
+ #endif
3296
+
3297
+ return simde__m512i_from_private(r_);
3298
+ #endif
3299
+ }
3300
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3301
+ #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8)
3302
+ #endif
3303
+
3304
+ SIMDE__FUNCTION_ATTRIBUTES
3305
+ simde__mmask16
3306
+ simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
3307
+ #if defined(SIMDE_AVX512F_NATIVE)
3308
+ return _mm512_mask_test_epi32_mask(k1, a, b);
3309
+ #else
3310
+ simde__m512i_private
3311
+ a_ = simde__m512i_to_private(a),
3312
+ b_ = simde__m512i_to_private(b);
3313
+ simde__mmask16 r = 0;
3314
+
3315
+ SIMDE__VECTORIZE_REDUCTION(|:r)
3316
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
3317
+ r |= !!(a_.i32[i] & b_.i32[i]) << i;
3318
+ }
3319
+
3320
+ return r & k1;
3321
+ #endif
3322
+ }
3323
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3324
+ #define _mm512_mask_test_epi32_mask(a, b) simde_mm512_mask_test_epi32_mask(a, b)
3325
+ #endif
3326
+
3327
+ SIMDE__FUNCTION_ATTRIBUTES
3328
+ simde__mmask8
3329
+ simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
3330
+ #if defined(SIMDE_AVX512F_NATIVE)
3331
+ return _mm512_mask_test_epi64_mask(k1, a, b);
3332
+ #else
3333
+ simde__m512i_private
3334
+ a_ = simde__m512i_to_private(a),
3335
+ b_ = simde__m512i_to_private(b);
3336
+ simde__mmask8 r = 0;
3337
+
3338
+ SIMDE__VECTORIZE_REDUCTION(|:r)
3339
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
3340
+ r |= !!(a_.i64[i] & b_.i64[i]) << i;
3341
+ }
3342
+
3343
+ return r & k1;
3344
+ #endif
3345
+ }
3346
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3347
+ #define _mm512_mask_test_epi64_mask(a, b) simde_mm512_mask_test_epi64_mask(a, b)
3348
+ #endif
3349
+
3350
+ SIMDE__FUNCTION_ATTRIBUTES
3351
+ simde__m512i
3352
+ simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) {
3353
+ #if defined(SIMDE_AVX512F_NATIVE)
3354
+ return _mm512_xor_si512(a, b);
3355
+ #else
3356
+ simde__m512i_private
3357
+ r_,
3358
+ a_ = simde__m512i_to_private(a),
3359
+ b_ = simde__m512i_to_private(b);
3360
+
3361
+ #if defined(SIMDE_ARCH_X86_AVX2)
3362
+ r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]);
3363
+ r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]);
3364
+ #elif defined(SIMDE_ARCH_X86_SSE2)
3365
+ r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]);
3366
+ r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]);
3367
+ r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]);
3368
+ r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]);
3369
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3370
+ r_.i32f = a_.i32f ^ b_.i32f;
3371
+ #else
3372
+ SIMDE__VECTORIZE
3373
+ for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
3374
+ r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
3375
+ }
3376
+ #endif
3377
+
3378
+ return simde__m512i_from_private(r_);
3379
+ #endif
3380
+ }
3381
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3382
+ #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b)
3383
+ #endif
3384
+
3385
+ SIMDE__END_DECLS
3386
+
3387
+ HEDLEY_DIAGNOSTIC_POP
3388
+
3389
+ #endif /* !defined(SIMDE__AVX512F_H) */