minimap2 0.2.25.0 → 0.2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
@@ -0,0 +1,3389 @@
1
+ /* Permission is hereby granted, free of charge, to any person
2
+ * obtaining a copy of this software and associated documentation
3
+ * files (the "Software"), to deal in the Software without
4
+ * restriction, including without limitation the rights to use, copy,
5
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
6
+ * of the Software, and to permit persons to whom the Software is
7
+ * furnished to do so, subject to the following conditions:
8
+ *
9
+ * The above copyright notice and this permission notice shall be
10
+ * included in all copies or substantial portions of the Software.
11
+ *
12
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ * SOFTWARE.
20
+ *
21
+ * Copyright:
22
+ * 2020 Evan Nemerson <evan@nemerson.com>
23
+ */
24
+
25
+ #if !defined(SIMDE__AVX512F_H)
26
+ # if !defined(SIMDE__AVX512F_H)
27
+ # define SIMDE__AVX512F_H
28
+ # endif
29
+ # include "avx2.h"
30
+
31
+ HEDLEY_DIAGNOSTIC_PUSH
32
+ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
33
+
34
+ # if defined(SIMDE_ARCH_X86_AVX512F) && !defined(SIMDE_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
35
+ # define SIMDE_AVX512F_NATIVE
36
+ # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_AVX512F_NO_NEON) && !defined(SIMDE_NO_NEON)
37
+ # define SIMDE_AVX512F_NEON
38
+ # elif defined(SIMDE_ARCH_POWER_ALTIVEC)
39
+ # define SIMDE_AVX512F_POWER_ALTIVEC
40
+ # endif
41
+
42
+ /* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for
43
+ __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte
44
+ aligned even if we reduce the alignment requirements of other members.
45
+
46
+ Even if we're on x86 and use the native AVX-512 types for arguments/return values, the
47
+ to/from private functions will break, and I'm not willing to change their APIs to use
48
+ pointers (which would also require more verbose code on the caller side) just to make
49
+ MSVC happy.
50
+
51
+ If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later,
52
+ or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to
53
+ fix this without requiring API changes (except transparently through macros), patches
54
+ are welcome. */
55
+ # if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0)
56
+ # if defined(SIMDE_AVX512F_NATIVE)
57
+ # undef SIMDE_AVX512F_NATIVE
58
+ # pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.")
59
+ # endif
60
+ # define SIMDE_AVX512_ALIGN SIMDE_ALIGN(32)
61
+ # else
62
+ # define SIMDE_AVX512_ALIGN SIMDE_ALIGN(64)
63
+ # endif
64
+
65
+ # if defined(SIMDE_AVX512F_NATIVE)
66
+ # include <immintrin.h>
67
+ # endif
68
+
69
+ # if defined(SIMDE_AVX512F_POWER_ALTIVEC)
70
+ # include <altivec.h>
71
+ # endif
72
+
73
+ SIMDE__BEGIN_DECLS
74
+
75
+ typedef union {
76
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
77
+ SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
78
+ SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
79
+ SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
80
+ SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
81
+ SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
82
+ SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
83
+ SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
84
+ SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
85
+ #if defined(SIMDE__HAVE_INT128)
86
+ SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
87
+ SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
88
+ #endif
89
+ SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
90
+ SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
91
+ SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
92
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
93
+ #else
94
+ SIMDE_AVX512_ALIGN int8_t i8[64];
95
+ SIMDE_AVX512_ALIGN int16_t i16[32];
96
+ SIMDE_AVX512_ALIGN int32_t i32[16];
97
+ SIMDE_AVX512_ALIGN int64_t i64[8];
98
+ SIMDE_AVX512_ALIGN uint8_t u8[64];
99
+ SIMDE_AVX512_ALIGN uint16_t u16[32];
100
+ SIMDE_AVX512_ALIGN uint32_t u32[16];
101
+ SIMDE_AVX512_ALIGN uint64_t u64[8];
102
+ SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
103
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
104
+ #if defined(SIMDE__HAVE_INT128)
105
+ SIMDE_AVX512_ALIGN simde_int128 i128[4];
106
+ SIMDE_AVX512_ALIGN simde_uint128 u128[4];
107
+ #endif
108
+ SIMDE_AVX512_ALIGN simde_float32 f32[16];
109
+ SIMDE_AVX512_ALIGN simde_float64 f64[8];
110
+ #endif
111
+
112
+ SIMDE_AVX512_ALIGN simde__m128_private m128_private[4];
113
+ SIMDE_AVX512_ALIGN simde__m128 m128[4];
114
+ SIMDE_AVX512_ALIGN simde__m256_private m256_private[2];
115
+ SIMDE_AVX512_ALIGN simde__m256 m256[2];
116
+
117
+ #if defined(SIMDE_AVX512F_NATIVE)
118
+ SIMDE_AVX512_ALIGN __m512 n;
119
+ #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
120
+ SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
121
+ SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
122
+ SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
123
+ SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
124
+ SIMDE_ALIGN(16) vector signed char altivec_i8[4];
125
+ SIMDE_ALIGN(16) vector signed short altivec_i16[4];
126
+ SIMDE_ALIGN(16) vector signed int altivec_i32[4];
127
+ SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
128
+ SIMDE_ALIGN(16) vector float altivec_f32[4];
129
+ SIMDE_ALIGN(16) vector double altivec_f64[4];
130
+ #endif
131
+ } simde__m512_private;
132
+
133
+ typedef union {
134
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
135
+ SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
136
+ SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
137
+ SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
138
+ SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
139
+ SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
140
+ SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
141
+ SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
142
+ SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
143
+ #if defined(SIMDE__HAVE_INT128)
144
+ SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
145
+ SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
146
+ #endif
147
+ SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
148
+ SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
149
+ SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
150
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
151
+ #else
152
+ SIMDE_AVX512_ALIGN int8_t i8[64];
153
+ SIMDE_AVX512_ALIGN int16_t i16[32];
154
+ SIMDE_AVX512_ALIGN int32_t i32[16];
155
+ SIMDE_AVX512_ALIGN int64_t i64[8];
156
+ SIMDE_AVX512_ALIGN uint8_t u8[64];
157
+ SIMDE_AVX512_ALIGN uint16_t u16[32];
158
+ SIMDE_AVX512_ALIGN uint32_t u32[16];
159
+ SIMDE_AVX512_ALIGN uint64_t u64[8];
160
+ #if defined(SIMDE__HAVE_INT128)
161
+ SIMDE_AVX512_ALIGN simde_int128 i128[4];
162
+ SIMDE_AVX512_ALIGN simde_uint128 u128[4];
163
+ #endif
164
+ SIMDE_AVX512_ALIGN simde_float32 f32[16];
165
+ SIMDE_AVX512_ALIGN simde_float64 f64[8];
166
+ SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
167
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
168
+ #endif
169
+
170
+ SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4];
171
+ SIMDE_AVX512_ALIGN simde__m128d m128d[4];
172
+ SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2];
173
+ SIMDE_AVX512_ALIGN simde__m256d m256d[2];
174
+
175
+ #if defined(SIMDE_AVX512F_NATIVE)
176
+ SIMDE_AVX512_ALIGN __m512d n;
177
+ #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
178
+ SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
179
+ SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
180
+ SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
181
+ SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
182
+ SIMDE_ALIGN(16) vector signed char altivec_i8[4];
183
+ SIMDE_ALIGN(16) vector signed short altivec_i16[4];
184
+ SIMDE_ALIGN(16) vector signed int altivec_i32[4];
185
+ SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
186
+ SIMDE_ALIGN(16) vector float altivec_f32[4];
187
+ SIMDE_ALIGN(16) vector double altivec_f64[4];
188
+ #endif
189
+ } simde__m512d_private;
190
+
191
+ typedef union {
192
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
193
+ SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
194
+ SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
195
+ SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
196
+ SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
197
+ SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
198
+ SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
199
+ SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
200
+ SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
201
+ #if defined(SIMDE__HAVE_INT128)
202
+ SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
203
+ SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
204
+ #endif
205
+ SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
206
+ SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
207
+ SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
208
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
209
+ #else
210
+ SIMDE_AVX512_ALIGN int8_t i8[64];
211
+ SIMDE_AVX512_ALIGN int16_t i16[32];
212
+ SIMDE_AVX512_ALIGN int32_t i32[16];
213
+ SIMDE_AVX512_ALIGN int64_t i64[8];
214
+ SIMDE_AVX512_ALIGN uint8_t u8[64];
215
+ SIMDE_AVX512_ALIGN uint16_t u16[32];
216
+ SIMDE_AVX512_ALIGN uint32_t u32[16];
217
+ SIMDE_AVX512_ALIGN uint64_t u64[8];
218
+ SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)];
219
+ SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)];
220
+ #if defined(SIMDE__HAVE_INT128)
221
+ SIMDE_AVX512_ALIGN simde_int128 i128[4];
222
+ SIMDE_AVX512_ALIGN simde_uint128 u128[4];
223
+ #endif
224
+ SIMDE_AVX512_ALIGN simde_float32 f32[16];
225
+ SIMDE_AVX512_ALIGN simde_float64 f64[8];
226
+ #endif
227
+
228
+ SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4];
229
+ SIMDE_AVX512_ALIGN simde__m128i m128i[4];
230
+ SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2];
231
+ SIMDE_AVX512_ALIGN simde__m256i m256i[2];
232
+
233
+ #if defined(SIMDE_AVX512F_NATIVE)
234
+ SIMDE_AVX512_ALIGN __m512i n;
235
+ #elif defined(SIMDE_ARCH_POWER_ALTIVEC)
236
+ SIMDE_ALIGN(16) vector unsigned char altivec_u8[4];
237
+ SIMDE_ALIGN(16) vector unsigned short altivec_u16[4];
238
+ SIMDE_ALIGN(16) vector unsigned int altivec_u32[4];
239
+ SIMDE_ALIGN(16) vector unsigned long long altivec_u64[4];
240
+ SIMDE_ALIGN(16) vector signed char altivec_i8[4];
241
+ SIMDE_ALIGN(16) vector signed short altivec_i16[4];
242
+ SIMDE_ALIGN(16) vector signed int altivec_i32[4];
243
+ SIMDE_ALIGN(16) vector signed long long altivec_i64[4];
244
+ SIMDE_ALIGN(16) vector float altivec_f32[4];
245
+ SIMDE_ALIGN(16) vector double altivec_f64[4];
246
+ #endif
247
+ } simde__m512i_private;
248
+
249
+ #if defined(SIMDE_AVX512F_NATIVE)
250
+ typedef __m512 simde__m512;
251
+ typedef __m512i simde__m512i;
252
+ typedef __m512d simde__m512d;
253
+ typedef __mmask8 simde__mmask8;
254
+ typedef __mmask16 simde__mmask16;
255
+ typedef __mmask32 simde__mmask32;
256
+ typedef __mmask64 simde__mmask64;
257
+ #else
258
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
259
+ typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
260
+ typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
261
+ typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
262
+ #else
263
+ typedef simde__m512_private simde__m512;
264
+ typedef simde__m512i_private simde__m512i;
265
+ typedef simde__m512d_private simde__m512d;
266
+ #endif
267
+
268
+ typedef uint_fast8_t simde__mmask8;
269
+ typedef uint_fast16_t simde__mmask16;
270
+ typedef uint_fast32_t simde__mmask32;
271
+ typedef uint_fast64_t simde__mmask64;
272
+ #endif
273
+
274
+ #if !defined(SIMDE_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
275
+ #define SIMDE_AVX512F_ENABLE_NATIVE_ALIASES
276
+ typedef simde__m512 __m512;
277
+ typedef simde__m512i __m512i;
278
+ typedef simde__m512d __m512d;
279
+ #endif
280
+
281
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect");
282
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect");
283
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect");
284
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect");
285
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect");
286
+ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect");
287
+ #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
288
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned");
289
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned");
290
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned");
291
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned");
292
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned");
293
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned");
294
+ #endif
295
+
296
+ SIMDE__FUNCTION_ATTRIBUTES
297
+ simde__m512
298
+ simde__m512_from_private(simde__m512_private v) {
299
+ simde__m512 r;
300
+ simde_memcpy(&r, &v, sizeof(r));
301
+ return r;
302
+ }
303
+
304
+ SIMDE__FUNCTION_ATTRIBUTES
305
+ simde__m512_private
306
+ simde__m512_to_private(simde__m512 v) {
307
+ simde__m512_private r;
308
+ simde_memcpy(&r, &v, sizeof(r));
309
+ return r;
310
+ }
311
+
312
+ SIMDE__FUNCTION_ATTRIBUTES
313
+ simde__m512i
314
+ simde__m512i_from_private(simde__m512i_private v) {
315
+ simde__m512i r;
316
+ simde_memcpy(&r, &v, sizeof(r));
317
+ return r;
318
+ }
319
+
320
+ SIMDE__FUNCTION_ATTRIBUTES
321
+ simde__m512i_private
322
+ simde__m512i_to_private(simde__m512i v) {
323
+ simde__m512i_private r;
324
+ simde_memcpy(&r, &v, sizeof(r));
325
+ return r;
326
+ }
327
+
328
+ SIMDE__FUNCTION_ATTRIBUTES
329
+ simde__m512d
330
+ simde__m512d_from_private(simde__m512d_private v) {
331
+ simde__m512d r;
332
+ simde_memcpy(&r, &v, sizeof(r));
333
+ return r;
334
+ }
335
+
336
+ SIMDE__FUNCTION_ATTRIBUTES
337
+ simde__m512d_private
338
+ simde__m512d_to_private(simde__m512d v) {
339
+ simde__m512d_private r;
340
+ simde_memcpy(&r, &v, sizeof(r));
341
+ return r;
342
+ }
343
+
344
+ SIMDE__FUNCTION_ATTRIBUTES
345
+ simde__mmask16
346
+ simde__m512i_private_to_mmask16 (simde__m512i_private a) {
347
+ #if defined(SIMDE_AVX512F_NATIVE)
348
+ HEDLEY_UNREACHABLE_RETURN(0);
349
+ #else
350
+ simde__mmask16 r = 0;
351
+
352
+ /* Note: using addition instead of a bitwise or for the reduction
353
+ seems like it should improve things since hardware support for
354
+ horizontal addition is better than bitwise or. However, GCC
355
+ generates the same code, and clang is actually a bit slower.
356
+ I suspect this can be optimized quite a bit, and this function
357
+ is probably going to be pretty hot. */
358
+ SIMDE__VECTORIZE_REDUCTION(|:r)
359
+ for (size_t i = 0 ; i < (sizeof(a.i32) / sizeof(a.i32[0])) ; i++) {
360
+ r |= !!(a.i32[i]) << i;
361
+ }
362
+
363
+ return r;
364
+ #endif
365
+ }
366
+
367
+ SIMDE__FUNCTION_ATTRIBUTES
368
+ simde__mmask8
369
+ simde__m512i_private_to_mmask8 (simde__m512i_private a) {
370
+ #if defined(SIMDE_AVX512F_NATIVE)
371
+ HEDLEY_UNREACHABLE_RETURN(0);
372
+ #else
373
+ simde__mmask8 r = 0;
374
+ SIMDE__VECTORIZE_REDUCTION(|:r)
375
+ for (size_t i = 0 ; i < (sizeof(a.i64) / sizeof(a.i64[0])) ; i++) {
376
+ r |= !!(a.i64[i]) << i;
377
+ }
378
+
379
+ return r;
380
+ #endif
381
+ }
382
+
383
+ SIMDE__FUNCTION_ATTRIBUTES
384
+ simde__m512i
385
+ simde__m512i_from_mmask16 (simde__mmask16 k) {
386
+ #if defined(SIMDE_AVX512F_NATIVE)
387
+ /* Should never be reached. */
388
+ return _mm512_mask_mov_epi32(_mm512_setzero_epi32(), k, _mm512_set1_epi32(~INT32_C(0)));
389
+ #else
390
+ simde__m512i_private r_;
391
+
392
+ SIMDE__VECTORIZE
393
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
394
+ r_.i32[i] = (k & (1 << i)) ? ~INT32_C(0) : INT32_C(0);
395
+ }
396
+
397
+ return simde__m512i_from_private(r_);
398
+ #endif
399
+ }
400
+
401
+ SIMDE__FUNCTION_ATTRIBUTES
402
+ simde__m512
403
+ simde_mm512_castpd_ps (simde__m512d a) {
404
+ #if defined(SIMDE_AVX512F_NATIVE)
405
+ return _mm512_castpd_ps(a);
406
+ #else
407
+ simde__m512 r;
408
+ memcpy(&r, &a, sizeof(r));
409
+ return r;
410
+ #endif
411
+ }
412
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
413
+ #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a)
414
+ #endif
415
+
416
+ SIMDE__FUNCTION_ATTRIBUTES
417
+ simde__m512i
418
+ simde_mm512_castpd_si512 (simde__m512d a) {
419
+ #if defined(SIMDE_AVX512F_NATIVE)
420
+ return _mm512_castpd_si512(a);
421
+ #else
422
+ simde__m512i r;
423
+ memcpy(&r, &a, sizeof(r));
424
+ return r;
425
+ #endif
426
+ }
427
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
428
+ #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a)
429
+ #endif
430
+
431
+ SIMDE__FUNCTION_ATTRIBUTES
432
+ simde__m512d
433
+ simde_mm512_castps_pd (simde__m512 a) {
434
+ #if defined(SIMDE_AVX512F_NATIVE)
435
+ return _mm512_castps_pd(a);
436
+ #else
437
+ simde__m512d r;
438
+ memcpy(&r, &a, sizeof(r));
439
+ return r;
440
+ #endif
441
+ }
442
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
443
+ #define _mm512_castps_pd(a) simde_mm512_castps_pd(a)
444
+ #endif
445
+
446
+ SIMDE__FUNCTION_ATTRIBUTES
447
+ simde__m512i
448
+ simde_mm512_castps_si512 (simde__m512 a) {
449
+ #if defined(SIMDE_AVX512F_NATIVE)
450
+ return _mm512_castps_si512(a);
451
+ #else
452
+ simde__m512i r;
453
+ memcpy(&r, &a, sizeof(r));
454
+ return r;
455
+ #endif
456
+ }
457
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
458
+ #define _mm512_castps_si512(a) simde_mm512_castps_si512(a)
459
+ #endif
460
+
461
+ SIMDE__FUNCTION_ATTRIBUTES
462
+ simde__m512
463
+ simde_mm512_castsi512_ps (simde__m512i a) {
464
+ #if defined(SIMDE_AVX512F_NATIVE)
465
+ return _mm512_castsi512_ps(a);
466
+ #else
467
+ simde__m512 r;
468
+ memcpy(&r, &a, sizeof(r));
469
+ return r;
470
+ #endif
471
+ }
472
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
473
+ #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a)
474
+ #endif
475
+
476
+ SIMDE__FUNCTION_ATTRIBUTES
477
+ simde__m512d
478
+ simde_mm512_castsi512_pd (simde__m512i a) {
479
+ #if defined(SIMDE_AVX512F_NATIVE)
480
+ return _mm512_castsi512_pd(a);
481
+ #else
482
+ simde__m512d r;
483
+ memcpy(&r, &a, sizeof(r));
484
+ return r;
485
+ #endif
486
+ }
487
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
488
+ #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a)
489
+ #endif
490
+
491
+ SIMDE__FUNCTION_ATTRIBUTES
492
+ simde__m512d
493
+ simde_mm512_castpd128_pd512 (simde__m128d a) {
494
+ #if defined(SIMDE_AVX512F_NATIVE)
495
+ return _mm512_castpd128_pd512(a);
496
+ #else
497
+ simde__m512d_private r_;
498
+ r_.m128d[0] = a;
499
+ return simde__m512d_from_private(r_);
500
+ #endif
501
+ }
502
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
503
+ #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a)
504
+ #endif
505
+
506
+ SIMDE__FUNCTION_ATTRIBUTES
507
+ simde__m512d
508
+ simde_mm512_castpd256_pd512 (simde__m256d a) {
509
+ #if defined(SIMDE_AVX512F_NATIVE)
510
+ return _mm512_castpd256_pd512(a);
511
+ #else
512
+ simde__m512d_private r_;
513
+ r_.m256d[0] = a;
514
+ return simde__m512d_from_private(r_);
515
+ #endif
516
+ }
517
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
518
+ #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a)
519
+ #endif
520
+
521
+ SIMDE__FUNCTION_ATTRIBUTES
522
+ simde__m128d
523
+ simde_mm512_castpd512_pd128 (simde__m512d a) {
524
+ #if defined(SIMDE_AVX512F_NATIVE)
525
+ return _mm512_castpd512_pd128(a);
526
+ #else
527
+ simde__m512d_private a_ = simde__m512d_to_private(a);
528
+ return a_.m128d[0];
529
+ #endif
530
+ }
531
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
532
+ #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a)
533
+ #endif
534
+
535
+ SIMDE__FUNCTION_ATTRIBUTES
536
+ simde__m256d
537
+ simde_mm512_castpd512_pd256 (simde__m512d a) {
538
+ #if defined(SIMDE_AVX512F_NATIVE)
539
+ return _mm512_castpd512_pd256(a);
540
+ #else
541
+ simde__m512d_private a_ = simde__m512d_to_private(a);
542
+ return a_.m256d[0];
543
+ #endif
544
+ }
545
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
546
+ #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a)
547
+ #endif
548
+
549
+ SIMDE__FUNCTION_ATTRIBUTES
550
+ simde__m512
551
+ simde_mm512_castps128_ps512 (simde__m128 a) {
552
+ #if defined(SIMDE_AVX512F_NATIVE)
553
+ return _mm512_castps128_ps512(a);
554
+ #else
555
+ simde__m512_private r_;
556
+ r_.m128[0] = a;
557
+ return simde__m512_from_private(r_);
558
+ #endif
559
+ }
560
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
561
+ #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a)
562
+ #endif
563
+
564
+ SIMDE__FUNCTION_ATTRIBUTES
565
+ simde__m512
566
+ simde_mm512_castps256_ps512 (simde__m256 a) {
567
+ #if defined(SIMDE_AVX512F_NATIVE)
568
+ return _mm512_castps256_ps512(a);
569
+ #else
570
+ simde__m512_private r_;
571
+ r_.m256[0] = a;
572
+ return simde__m512_from_private(r_);
573
+ #endif
574
+ }
575
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
576
+ #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a)
577
+ #endif
578
+
579
+ SIMDE__FUNCTION_ATTRIBUTES
580
+ simde__m128
581
+ simde_mm512_castps512_ps128 (simde__m512 a) {
582
+ #if defined(SIMDE_AVX512F_NATIVE)
583
+ return _mm512_castps512_ps128(a);
584
+ #else
585
+ simde__m512_private a_ = simde__m512_to_private(a);
586
+ return a_.m128[0];
587
+ #endif
588
+ }
589
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
590
+ #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a)
591
+ #endif
592
+
593
+ SIMDE__FUNCTION_ATTRIBUTES
594
+ simde__m256
595
+ simde_mm512_castps512_ps256 (simde__m512 a) {
596
+ #if defined(SIMDE_AVX512F_NATIVE)
597
+ return _mm512_castps512_ps256(a);
598
+ #else
599
+ simde__m512_private a_ = simde__m512_to_private(a);
600
+ return a_.m256[0];
601
+ #endif
602
+ }
603
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
604
+ #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a)
605
+ #endif
606
+
607
+ SIMDE__FUNCTION_ATTRIBUTES
608
+ simde__m512i
609
+ simde_mm512_castsi128_si512 (simde__m128i a) {
610
+ #if defined(SIMDE_AVX512F_NATIVE)
611
+ return _mm512_castsi128_si512(a);
612
+ #else
613
+ simde__m512i_private r_;
614
+ r_.m128i[0] = a;
615
+ return simde__m512i_from_private(r_);
616
+ #endif
617
+ }
618
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
619
+ #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a)
620
+ #endif
621
+
622
+ SIMDE__FUNCTION_ATTRIBUTES
623
+ simde__m512i
624
+ simde_mm512_castsi256_si512 (simde__m256i a) {
625
+ #if defined(SIMDE_AVX512F_NATIVE)
626
+ return _mm512_castsi256_si512(a);
627
+ #else
628
+ simde__m512i_private r_;
629
+ r_.m256i[0] = a;
630
+ return simde__m512i_from_private(r_);
631
+ #endif
632
+ }
633
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
634
+ #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a)
635
+ #endif
636
+
637
+ SIMDE__FUNCTION_ATTRIBUTES
638
+ simde__m128i
639
+ simde_mm512_castsi512_si128 (simde__m512i a) {
640
+ #if defined(SIMDE_AVX512F_NATIVE)
641
+ return _mm512_castsi512_si128(a);
642
+ #else
643
+ simde__m512i_private a_ = simde__m512i_to_private(a);
644
+ return a_.m128i[0];
645
+ #endif
646
+ }
647
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
648
+ #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a)
649
+ #endif
650
+
651
+ SIMDE__FUNCTION_ATTRIBUTES
652
+ simde__m256i
653
+ simde_mm512_castsi512_si256 (simde__m512i a) {
654
+ #if defined(SIMDE_AVX512F_NATIVE)
655
+ return _mm512_castsi512_si256(a);
656
+ #else
657
+ simde__m512i_private a_ = simde__m512i_to_private(a);
658
+ return a_.m256i[0];
659
+ #endif
660
+ }
661
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
662
+ #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a)
663
+ #endif
664
+
665
+ SIMDE__FUNCTION_ATTRIBUTES
666
+ simde__m512i
667
+ simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56,
668
+ int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48,
669
+ int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40,
670
+ int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32,
671
+ int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24,
672
+ int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16,
673
+ int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8,
674
+ int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {
675
+ simde__m512i_private r_;
676
+
677
+ r_.i8[ 0] = e0;
678
+ r_.i8[ 1] = e1;
679
+ r_.i8[ 2] = e2;
680
+ r_.i8[ 3] = e3;
681
+ r_.i8[ 4] = e4;
682
+ r_.i8[ 5] = e5;
683
+ r_.i8[ 6] = e6;
684
+ r_.i8[ 7] = e7;
685
+ r_.i8[ 8] = e8;
686
+ r_.i8[ 9] = e9;
687
+ r_.i8[10] = e10;
688
+ r_.i8[11] = e11;
689
+ r_.i8[12] = e12;
690
+ r_.i8[13] = e13;
691
+ r_.i8[14] = e14;
692
+ r_.i8[15] = e15;
693
+ r_.i8[16] = e16;
694
+ r_.i8[17] = e17;
695
+ r_.i8[18] = e18;
696
+ r_.i8[19] = e19;
697
+ r_.i8[20] = e20;
698
+ r_.i8[21] = e21;
699
+ r_.i8[22] = e22;
700
+ r_.i8[23] = e23;
701
+ r_.i8[24] = e24;
702
+ r_.i8[25] = e25;
703
+ r_.i8[26] = e26;
704
+ r_.i8[27] = e27;
705
+ r_.i8[28] = e28;
706
+ r_.i8[29] = e29;
707
+ r_.i8[30] = e30;
708
+ r_.i8[31] = e31;
709
+ r_.i8[32] = e32;
710
+ r_.i8[33] = e33;
711
+ r_.i8[34] = e34;
712
+ r_.i8[35] = e35;
713
+ r_.i8[36] = e36;
714
+ r_.i8[37] = e37;
715
+ r_.i8[38] = e38;
716
+ r_.i8[39] = e39;
717
+ r_.i8[40] = e40;
718
+ r_.i8[41] = e41;
719
+ r_.i8[42] = e42;
720
+ r_.i8[43] = e43;
721
+ r_.i8[44] = e44;
722
+ r_.i8[45] = e45;
723
+ r_.i8[46] = e46;
724
+ r_.i8[47] = e47;
725
+ r_.i8[48] = e48;
726
+ r_.i8[49] = e49;
727
+ r_.i8[50] = e50;
728
+ r_.i8[51] = e51;
729
+ r_.i8[52] = e52;
730
+ r_.i8[53] = e53;
731
+ r_.i8[54] = e54;
732
+ r_.i8[55] = e55;
733
+ r_.i8[56] = e56;
734
+ r_.i8[57] = e57;
735
+ r_.i8[58] = e58;
736
+ r_.i8[59] = e59;
737
+ r_.i8[60] = e60;
738
+ r_.i8[61] = e61;
739
+ r_.i8[62] = e62;
740
+ r_.i8[63] = e63;
741
+
742
+ return simde__m512i_from_private(r_);
743
+ }
744
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
745
+ #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
746
+ #endif
747
+
748
+ SIMDE__FUNCTION_ATTRIBUTES
749
+ simde__m512i
750
+ simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24,
751
+ int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16,
752
+ int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8,
753
+ int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) {
754
+ simde__m512i_private r_;
755
+
756
+ r_.i16[ 0] = e0;
757
+ r_.i16[ 1] = e1;
758
+ r_.i16[ 2] = e2;
759
+ r_.i16[ 3] = e3;
760
+ r_.i16[ 4] = e4;
761
+ r_.i16[ 5] = e5;
762
+ r_.i16[ 6] = e6;
763
+ r_.i16[ 7] = e7;
764
+ r_.i16[ 8] = e8;
765
+ r_.i16[ 9] = e9;
766
+ r_.i16[10] = e10;
767
+ r_.i16[11] = e11;
768
+ r_.i16[12] = e12;
769
+ r_.i16[13] = e13;
770
+ r_.i16[14] = e14;
771
+ r_.i16[15] = e15;
772
+ r_.i16[16] = e16;
773
+ r_.i16[17] = e17;
774
+ r_.i16[18] = e18;
775
+ r_.i16[19] = e19;
776
+ r_.i16[20] = e20;
777
+ r_.i16[21] = e21;
778
+ r_.i16[22] = e22;
779
+ r_.i16[23] = e23;
780
+ r_.i16[24] = e24;
781
+ r_.i16[25] = e25;
782
+ r_.i16[26] = e26;
783
+ r_.i16[27] = e27;
784
+ r_.i16[28] = e28;
785
+ r_.i16[29] = e29;
786
+ r_.i16[30] = e30;
787
+ r_.i16[31] = e31;
788
+
789
+ return simde__m512i_from_private(r_);
790
+ }
791
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
792
+ #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
793
+ #endif
794
+
795
+ SIMDE__FUNCTION_ATTRIBUTES
796
+ simde__m512i
797
+ simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8,
798
+ int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) {
799
+ simde__m512i_private r_;
800
+
801
+ r_.i32[ 0] = e0;
802
+ r_.i32[ 1] = e1;
803
+ r_.i32[ 2] = e2;
804
+ r_.i32[ 3] = e3;
805
+ r_.i32[ 4] = e4;
806
+ r_.i32[ 5] = e5;
807
+ r_.i32[ 6] = e6;
808
+ r_.i32[ 7] = e7;
809
+ r_.i32[ 8] = e8;
810
+ r_.i32[ 9] = e9;
811
+ r_.i32[10] = e10;
812
+ r_.i32[11] = e11;
813
+ r_.i32[12] = e12;
814
+ r_.i32[13] = e13;
815
+ r_.i32[14] = e14;
816
+ r_.i32[15] = e15;
817
+
818
+ return simde__m512i_from_private(r_);
819
+ }
820
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
821
+ #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
822
+ #endif
823
+
824
+ SIMDE__FUNCTION_ATTRIBUTES
825
+ simde__m512i
826
+ simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
827
+ simde__m512i_private r_;
828
+
829
+ r_.i64[0] = e0;
830
+ r_.i64[1] = e1;
831
+ r_.i64[2] = e2;
832
+ r_.i64[3] = e3;
833
+ r_.i64[4] = e4;
834
+ r_.i64[5] = e5;
835
+ r_.i64[6] = e6;
836
+ r_.i64[7] = e7;
837
+
838
+ return simde__m512i_from_private(r_);
839
+ }
840
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
841
+ #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
842
+ #endif
843
+
844
+ SIMDE__FUNCTION_ATTRIBUTES
845
+ simde__m512i
846
+ simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56,
847
+ uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48,
848
+ uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40,
849
+ uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32,
850
+ uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24,
851
+ uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16,
852
+ uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8,
853
+ uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) {
854
+ simde__m512i_private r_;
855
+
856
+ r_.u8[ 0] = e0;
857
+ r_.u8[ 1] = e1;
858
+ r_.u8[ 2] = e2;
859
+ r_.u8[ 3] = e3;
860
+ r_.u8[ 4] = e4;
861
+ r_.u8[ 5] = e5;
862
+ r_.u8[ 6] = e6;
863
+ r_.u8[ 7] = e7;
864
+ r_.u8[ 8] = e8;
865
+ r_.u8[ 9] = e9;
866
+ r_.u8[10] = e10;
867
+ r_.u8[11] = e11;
868
+ r_.u8[12] = e12;
869
+ r_.u8[13] = e13;
870
+ r_.u8[14] = e14;
871
+ r_.u8[15] = e15;
872
+ r_.u8[16] = e16;
873
+ r_.u8[17] = e17;
874
+ r_.u8[18] = e18;
875
+ r_.u8[19] = e19;
876
+ r_.u8[20] = e20;
877
+ r_.u8[21] = e21;
878
+ r_.u8[22] = e22;
879
+ r_.u8[23] = e23;
880
+ r_.u8[24] = e24;
881
+ r_.u8[25] = e25;
882
+ r_.u8[26] = e26;
883
+ r_.u8[27] = e27;
884
+ r_.u8[28] = e28;
885
+ r_.u8[29] = e29;
886
+ r_.u8[30] = e30;
887
+ r_.u8[31] = e31;
888
+ r_.u8[32] = e32;
889
+ r_.u8[33] = e33;
890
+ r_.u8[34] = e34;
891
+ r_.u8[35] = e35;
892
+ r_.u8[36] = e36;
893
+ r_.u8[37] = e37;
894
+ r_.u8[38] = e38;
895
+ r_.u8[39] = e39;
896
+ r_.u8[40] = e40;
897
+ r_.u8[41] = e41;
898
+ r_.u8[42] = e42;
899
+ r_.u8[43] = e43;
900
+ r_.u8[44] = e44;
901
+ r_.u8[45] = e45;
902
+ r_.u8[46] = e46;
903
+ r_.u8[47] = e47;
904
+ r_.u8[48] = e48;
905
+ r_.u8[49] = e49;
906
+ r_.u8[50] = e50;
907
+ r_.u8[51] = e51;
908
+ r_.u8[52] = e52;
909
+ r_.u8[53] = e53;
910
+ r_.u8[54] = e54;
911
+ r_.u8[55] = e55;
912
+ r_.u8[56] = e56;
913
+ r_.u8[57] = e57;
914
+ r_.u8[58] = e58;
915
+ r_.u8[59] = e59;
916
+ r_.u8[60] = e60;
917
+ r_.u8[61] = e61;
918
+ r_.u8[62] = e62;
919
+ r_.u8[63] = e63;
920
+
921
+ return simde__m512i_from_private(r_);
922
+ }
923
+
924
+ SIMDE__FUNCTION_ATTRIBUTES
925
+ simde__m512i
926
+ simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24,
927
+ uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16,
928
+ uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8,
929
+ uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) {
930
+ simde__m512i_private r_;
931
+
932
+ r_.u16[ 0] = e0;
933
+ r_.u16[ 1] = e1;
934
+ r_.u16[ 2] = e2;
935
+ r_.u16[ 3] = e3;
936
+ r_.u16[ 4] = e4;
937
+ r_.u16[ 5] = e5;
938
+ r_.u16[ 6] = e6;
939
+ r_.u16[ 7] = e7;
940
+ r_.u16[ 8] = e8;
941
+ r_.u16[ 9] = e9;
942
+ r_.u16[10] = e10;
943
+ r_.u16[11] = e11;
944
+ r_.u16[12] = e12;
945
+ r_.u16[13] = e13;
946
+ r_.u16[14] = e14;
947
+ r_.u16[15] = e15;
948
+ r_.u16[16] = e16;
949
+ r_.u16[17] = e17;
950
+ r_.u16[18] = e18;
951
+ r_.u16[19] = e19;
952
+ r_.u16[20] = e20;
953
+ r_.u16[21] = e21;
954
+ r_.u16[22] = e22;
955
+ r_.u16[23] = e23;
956
+ r_.u16[24] = e24;
957
+ r_.u16[25] = e25;
958
+ r_.u16[26] = e26;
959
+ r_.u16[27] = e27;
960
+ r_.u16[28] = e28;
961
+ r_.u16[29] = e29;
962
+ r_.u16[30] = e30;
963
+ r_.u16[31] = e31;
964
+
965
+ return simde__m512i_from_private(r_);
966
+ }
967
+
968
+ SIMDE__FUNCTION_ATTRIBUTES
969
+ simde__m512i
970
+ simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8,
971
+ uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {
972
+ simde__m512i_private r_;
973
+
974
+ r_.u32[ 0] = e0;
975
+ r_.u32[ 1] = e1;
976
+ r_.u32[ 2] = e2;
977
+ r_.u32[ 3] = e3;
978
+ r_.u32[ 4] = e4;
979
+ r_.u32[ 5] = e5;
980
+ r_.u32[ 6] = e6;
981
+ r_.u32[ 7] = e7;
982
+ r_.u32[ 8] = e8;
983
+ r_.u32[ 9] = e9;
984
+ r_.u32[10] = e10;
985
+ r_.u32[11] = e11;
986
+ r_.u32[12] = e12;
987
+ r_.u32[13] = e13;
988
+ r_.u32[14] = e14;
989
+ r_.u32[15] = e15;
990
+
991
+ return simde__m512i_from_private(r_);
992
+ }
993
+
994
+ SIMDE__FUNCTION_ATTRIBUTES
995
+ simde__m512i
996
+ simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) {
997
+ simde__m512i_private r_;
998
+
999
+ r_.u64[ 0] = e0;
1000
+ r_.u64[ 1] = e1;
1001
+ r_.u64[ 2] = e2;
1002
+ r_.u64[ 3] = e3;
1003
+ r_.u64[ 4] = e4;
1004
+ r_.u64[ 5] = e5;
1005
+ r_.u64[ 6] = e6;
1006
+ r_.u64[ 7] = e7;
1007
+
1008
+ return simde__m512i_from_private(r_);
1009
+ }
1010
+
1011
+ SIMDE__FUNCTION_ATTRIBUTES
1012
+ simde__m512
1013
+ simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12,
1014
+ simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8,
1015
+ simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,
1016
+ simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
1017
+ simde__m512_private r_;
1018
+
1019
+ r_.f32[ 0] = e0;
1020
+ r_.f32[ 1] = e1;
1021
+ r_.f32[ 2] = e2;
1022
+ r_.f32[ 3] = e3;
1023
+ r_.f32[ 4] = e4;
1024
+ r_.f32[ 5] = e5;
1025
+ r_.f32[ 6] = e6;
1026
+ r_.f32[ 7] = e7;
1027
+ r_.f32[ 8] = e8;
1028
+ r_.f32[ 9] = e9;
1029
+ r_.f32[10] = e10;
1030
+ r_.f32[11] = e11;
1031
+ r_.f32[12] = e12;
1032
+ r_.f32[13] = e13;
1033
+ r_.f32[14] = e14;
1034
+ r_.f32[15] = e15;
1035
+
1036
+ return simde__m512_from_private(r_);
1037
+ }
1038
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1039
+ #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1040
+ #endif
1041
+
1042
+ SIMDE__FUNCTION_ATTRIBUTES
1043
+ simde__m512d
1044
+ simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {
1045
+ simde__m512d_private r_;
1046
+
1047
+ r_.f64[0] = e0;
1048
+ r_.f64[1] = e1;
1049
+ r_.f64[2] = e2;
1050
+ r_.f64[3] = e3;
1051
+ r_.f64[4] = e4;
1052
+ r_.f64[5] = e5;
1053
+ r_.f64[6] = e6;
1054
+ r_.f64[7] = e7;
1055
+
1056
+ return simde__m512d_from_private(r_);
1057
+ }
1058
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1059
+ #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0)
1060
+ #endif
1061
+
1062
+ SIMDE__FUNCTION_ATTRIBUTES
1063
+ simde__m512i
1064
+ simde_mm512_set1_epi8 (int8_t a) {
1065
+ #if defined(SIMDE_AVX512F_NATIVE)
1066
+ return _mm512_set1_epi8(a);
1067
+ #else
1068
+ simde__m512i_private r_;
1069
+
1070
+ SIMDE__VECTORIZE
1071
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1072
+ r_.i8[i] = a;
1073
+ }
1074
+
1075
+ return simde__m512i_from_private(r_);
1076
+ #endif
1077
+ }
1078
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1079
+ #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a)
1080
+ #endif
1081
+
1082
+ SIMDE__FUNCTION_ATTRIBUTES
1083
+ simde__m512i
1084
+ simde_mm512_set1_epi16 (int16_t a) {
1085
+ #if defined(SIMDE_AVX512F_NATIVE)
1086
+ return _mm512_set1_epi16(a);
1087
+ #else
1088
+ simde__m512i_private r_;
1089
+
1090
+ SIMDE__VECTORIZE
1091
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1092
+ r_.i16[i] = a;
1093
+ }
1094
+
1095
+ return simde__m512i_from_private(r_);
1096
+ #endif
1097
+ }
1098
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1099
+ #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a)
1100
+ #endif
1101
+
1102
+ SIMDE__FUNCTION_ATTRIBUTES
1103
+ simde__m512i
1104
+ simde_mm512_set1_epi32 (int32_t a) {
1105
+ #if defined(SIMDE_AVX512F_NATIVE)
1106
+ return _mm512_set1_epi32(a);
1107
+ #else
1108
+ simde__m512i_private r_;
1109
+
1110
+ SIMDE__VECTORIZE
1111
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1112
+ r_.i32[i] = a;
1113
+ }
1114
+
1115
+ return simde__m512i_from_private(r_);
1116
+ #endif
1117
+ }
1118
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1119
+ #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a)
1120
+ #endif
1121
+
1122
+ SIMDE__FUNCTION_ATTRIBUTES
1123
+ simde__m512i
1124
+ simde_mm512_set1_epi64 (int64_t a) {
1125
+ #if defined(SIMDE_AVX512F_NATIVE)
1126
+ return _mm512_set1_epi64(a);
1127
+ #else
1128
+ simde__m512i_private r_;
1129
+
1130
+ SIMDE__VECTORIZE
1131
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1132
+ r_.i64[i] = a;
1133
+ }
1134
+
1135
+ return simde__m512i_from_private(r_);
1136
+ #endif
1137
+ }
1138
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1139
+ #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a)
1140
+ #endif
1141
+
1142
+ SIMDE__FUNCTION_ATTRIBUTES
1143
+ simde__m512i
1144
+ simde_x_mm512_set1_epu8 (uint8_t a) {
1145
+ simde__m512i_private r_;
1146
+
1147
+ SIMDE__VECTORIZE
1148
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
1149
+ r_.u8[i] = a;
1150
+ }
1151
+
1152
+ return simde__m512i_from_private(r_);
1153
+ }
1154
+
1155
+ SIMDE__FUNCTION_ATTRIBUTES
1156
+ simde__m512i
1157
+ simde_x_mm512_set1_epu16 (uint16_t a) {
1158
+ simde__m512i_private r_;
1159
+
1160
+ SIMDE__VECTORIZE
1161
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1162
+ r_.u16[i] = a;
1163
+ }
1164
+
1165
+ return simde__m512i_from_private(r_);
1166
+ }
1167
+
1168
+ SIMDE__FUNCTION_ATTRIBUTES
1169
+ simde__m512i
1170
+ simde_x_mm512_set1_epu32 (uint32_t a) {
1171
+ simde__m512i_private r_;
1172
+
1173
+ SIMDE__VECTORIZE
1174
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1175
+ r_.u32[i] = a;
1176
+ }
1177
+
1178
+ return simde__m512i_from_private(r_);
1179
+ }
1180
+
1181
+ SIMDE__FUNCTION_ATTRIBUTES
1182
+ simde__m512i
1183
+ simde_x_mm512_set1_epu64 (uint64_t a) {
1184
+ simde__m512i_private r_;
1185
+
1186
+ SIMDE__VECTORIZE
1187
+ for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
1188
+ r_.u64[i] = a;
1189
+ }
1190
+
1191
+ return simde__m512i_from_private(r_);
1192
+ }
1193
+
1194
+ SIMDE__FUNCTION_ATTRIBUTES
1195
+ simde__m512
1196
+ simde_mm512_set1_ps (simde_float32 a) {
1197
+ #if defined(SIMDE_AVX512F_NATIVE)
1198
+ return _mm512_set1_ps(a);
1199
+ #else
1200
+ simde__m512_private r_;
1201
+
1202
+ SIMDE__VECTORIZE
1203
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1204
+ r_.f32[i] = a;
1205
+ }
1206
+
1207
+ return simde__m512_from_private(r_);
1208
+ #endif
1209
+ }
1210
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1211
+ #define _mm512_set1_ps(a) simde_mm512_set1_ps(a)
1212
+ #endif
1213
+
1214
+ SIMDE__FUNCTION_ATTRIBUTES
1215
+ simde__m512d
1216
+ simde_mm512_set1_pd (simde_float64 a) {
1217
+ #if defined(SIMDE_AVX512F_NATIVE)
1218
+ return _mm512_set1_pd(a);
1219
+ #else
1220
+ simde__m512d_private r_;
1221
+
1222
+ SIMDE__VECTORIZE
1223
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1224
+ r_.f64[i] = a;
1225
+ }
1226
+
1227
+ return simde__m512d_from_private(r_);
1228
+ #endif
1229
+ }
1230
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1231
+ #define _mm512_set1_pd(a) simde_mm512_set1_pd(a)
1232
+ #endif
1233
+
1234
+ SIMDE__FUNCTION_ATTRIBUTES
1235
+ simde__m512i
1236
+ simde_mm512_set4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) {
1237
+ simde__m512i_private r_;
1238
+
1239
+ r_.i32[ 0] = a;
1240
+ r_.i32[ 1] = b;
1241
+ r_.i32[ 2] = c;
1242
+ r_.i32[ 3] = d;
1243
+ r_.i32[ 4] = a;
1244
+ r_.i32[ 5] = b;
1245
+ r_.i32[ 6] = c;
1246
+ r_.i32[ 7] = d;
1247
+ r_.i32[ 8] = a;
1248
+ r_.i32[ 9] = b;
1249
+ r_.i32[10] = c;
1250
+ r_.i32[11] = d;
1251
+ r_.i32[12] = a;
1252
+ r_.i32[13] = b;
1253
+ r_.i32[14] = c;
1254
+ r_.i32[15] = d;
1255
+
1256
+ return simde__m512i_from_private(r_);
1257
+ }
1258
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1259
+ #define _mm512_set4_epi32(d,c,b,a) simde_mm512_set4_epi32(d,c,b,a)
1260
+ #endif
1261
+
1262
+ SIMDE__FUNCTION_ATTRIBUTES
1263
+ simde__m512i
1264
+ simde_mm512_set4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) {
1265
+ simde__m512i_private r_;
1266
+
1267
+ r_.i64[0] = a;
1268
+ r_.i64[1] = b;
1269
+ r_.i64[2] = c;
1270
+ r_.i64[3] = d;
1271
+ r_.i64[4] = a;
1272
+ r_.i64[5] = b;
1273
+ r_.i64[6] = c;
1274
+ r_.i64[7] = d;
1275
+
1276
+ return simde__m512i_from_private(r_);
1277
+ }
1278
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1279
+ #define _mm512_set4_epi64(d,c,b,a) simde_mm512_set4_epi64(d,c,b,a)
1280
+ #endif
1281
+
1282
+ SIMDE__FUNCTION_ATTRIBUTES
1283
+ simde__m512
1284
+ simde_mm512_set4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) {
1285
+ simde__m512_private r_;
1286
+
1287
+ r_.f32[ 0] = a;
1288
+ r_.f32[ 1] = b;
1289
+ r_.f32[ 2] = c;
1290
+ r_.f32[ 3] = d;
1291
+ r_.f32[ 4] = a;
1292
+ r_.f32[ 5] = b;
1293
+ r_.f32[ 6] = c;
1294
+ r_.f32[ 7] = d;
1295
+ r_.f32[ 8] = a;
1296
+ r_.f32[ 9] = b;
1297
+ r_.f32[10] = c;
1298
+ r_.f32[11] = d;
1299
+ r_.f32[12] = a;
1300
+ r_.f32[13] = b;
1301
+ r_.f32[14] = c;
1302
+ r_.f32[15] = d;
1303
+
1304
+ return simde__m512_from_private(r_);
1305
+ }
1306
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1307
+ #define _mm512_set4_ps(d,c,b,a) simde_mm512_set4_ps(d,c,b,a)
1308
+ #endif
1309
+
1310
+ SIMDE__FUNCTION_ATTRIBUTES
1311
+ simde__m512d
1312
+ simde_mm512_set4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) {
1313
+ simde__m512d_private r_;
1314
+
1315
+ r_.f64[0] = a;
1316
+ r_.f64[1] = b;
1317
+ r_.f64[2] = c;
1318
+ r_.f64[3] = d;
1319
+ r_.f64[4] = a;
1320
+ r_.f64[5] = b;
1321
+ r_.f64[6] = c;
1322
+ r_.f64[7] = d;
1323
+
1324
+ return simde__m512d_from_private(r_);
1325
+ }
1326
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1327
+ #define _mm512_set4_pd(d,c,b,a) simde_mm512_set4_pd(d,c,b,a)
1328
+ #endif
1329
+
1330
+ SIMDE__FUNCTION_ATTRIBUTES
1331
+ simde__m512i
1332
+ simde_mm512_setr_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8,
1333
+ int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) {
1334
+ simde__m512i_private r_;
1335
+
1336
+ r_.i32[ 0] = e15;
1337
+ r_.i32[ 1] = e14;
1338
+ r_.i32[ 2] = e13;
1339
+ r_.i32[ 3] = e12;
1340
+ r_.i32[ 4] = e11;
1341
+ r_.i32[ 5] = e10;
1342
+ r_.i32[ 6] = e9;
1343
+ r_.i32[ 7] = e8;
1344
+ r_.i32[ 8] = e7;
1345
+ r_.i32[ 9] = e6;
1346
+ r_.i32[10] = e5;
1347
+ r_.i32[11] = e4;
1348
+ r_.i32[12] = e3;
1349
+ r_.i32[13] = e2;
1350
+ r_.i32[14] = e1;
1351
+ r_.i32[15] = e0;
1352
+
1353
+ return simde__m512i_from_private(r_);
1354
+ }
1355
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1356
+ #define _mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1357
+ #endif
1358
+
1359
+ SIMDE__FUNCTION_ATTRIBUTES
1360
+ simde__m512i
1361
+ simde_mm512_setr_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
1362
+ simde__m512i_private r_;
1363
+
1364
+ r_.i64[0] = e7;
1365
+ r_.i64[1] = e6;
1366
+ r_.i64[2] = e5;
1367
+ r_.i64[3] = e4;
1368
+ r_.i64[4] = e3;
1369
+ r_.i64[5] = e2;
1370
+ r_.i64[6] = e1;
1371
+ r_.i64[7] = e0;
1372
+
1373
+ return simde__m512i_from_private(r_);
1374
+ }
1375
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1376
+ #define _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
1377
+ #endif
1378
+
1379
+ SIMDE__FUNCTION_ATTRIBUTES
1380
+ simde__m512
1381
+ simde_mm512_setr_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12,
1382
+ simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8,
1383
+ simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4,
1384
+ simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) {
1385
+ simde__m512_private r_;
1386
+
1387
+ r_.f32[ 0] = e15;
1388
+ r_.f32[ 1] = e14;
1389
+ r_.f32[ 2] = e13;
1390
+ r_.f32[ 3] = e12;
1391
+ r_.f32[ 4] = e11;
1392
+ r_.f32[ 5] = e10;
1393
+ r_.f32[ 6] = e9;
1394
+ r_.f32[ 7] = e8;
1395
+ r_.f32[ 8] = e7;
1396
+ r_.f32[ 9] = e6;
1397
+ r_.f32[10] = e5;
1398
+ r_.f32[11] = e4;
1399
+ r_.f32[12] = e3;
1400
+ r_.f32[13] = e2;
1401
+ r_.f32[14] = e1;
1402
+ r_.f32[15] = e0;
1403
+
1404
+ return simde__m512_from_private(r_);
1405
+ }
1406
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1407
+ #define _mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)
1408
+ #endif
1409
+
1410
+ SIMDE__FUNCTION_ATTRIBUTES
1411
+ simde__m512d
1412
+ simde_mm512_setr_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) {
1413
+ simde__m512d_private r_;
1414
+
1415
+ r_.f64[0] = e7;
1416
+ r_.f64[1] = e6;
1417
+ r_.f64[2] = e5;
1418
+ r_.f64[3] = e4;
1419
+ r_.f64[4] = e3;
1420
+ r_.f64[5] = e2;
1421
+ r_.f64[6] = e1;
1422
+ r_.f64[7] = e0;
1423
+
1424
+ return simde__m512d_from_private(r_);
1425
+ }
1426
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1427
+ #define _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
1428
+ #endif
1429
+
1430
+ SIMDE__FUNCTION_ATTRIBUTES
1431
+ simde__m512i
1432
+ simde_mm512_setr4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) {
1433
+ simde__m512i_private r_;
1434
+
1435
+ r_.i32[ 0] = d;
1436
+ r_.i32[ 1] = c;
1437
+ r_.i32[ 2] = b;
1438
+ r_.i32[ 3] = a;
1439
+ r_.i32[ 4] = d;
1440
+ r_.i32[ 5] = c;
1441
+ r_.i32[ 6] = b;
1442
+ r_.i32[ 7] = a;
1443
+ r_.i32[ 8] = d;
1444
+ r_.i32[ 9] = c;
1445
+ r_.i32[10] = b;
1446
+ r_.i32[11] = a;
1447
+ r_.i32[12] = d;
1448
+ r_.i32[13] = c;
1449
+ r_.i32[14] = b;
1450
+ r_.i32[15] = a;
1451
+
1452
+ return simde__m512i_from_private(r_);
1453
+ }
1454
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1455
+ #define _mm512_setr4_epi32(d,c,b,a) simde_mm512_setr4_epi32(d,c,b,a)
1456
+ #endif
1457
+
1458
+ SIMDE__FUNCTION_ATTRIBUTES
1459
+ simde__m512i
1460
+ simde_mm512_setr4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) {
1461
+ simde__m512i_private r_;
1462
+
1463
+ r_.i64[0] = d;
1464
+ r_.i64[1] = c;
1465
+ r_.i64[2] = b;
1466
+ r_.i64[3] = a;
1467
+ r_.i64[4] = d;
1468
+ r_.i64[5] = c;
1469
+ r_.i64[6] = b;
1470
+ r_.i64[7] = a;
1471
+
1472
+ return simde__m512i_from_private(r_);
1473
+ }
1474
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1475
+ #define _mm512_setr4_epi64(d,c,b,a) simde_mm512_setr4_epi64(d,c,b,a)
1476
+ #endif
1477
+
1478
+ SIMDE__FUNCTION_ATTRIBUTES
1479
+ simde__m512
1480
+ simde_mm512_setr4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) {
1481
+ simde__m512_private r_;
1482
+
1483
+ r_.f32[ 0] = d;
1484
+ r_.f32[ 1] = c;
1485
+ r_.f32[ 2] = b;
1486
+ r_.f32[ 3] = a;
1487
+ r_.f32[ 4] = d;
1488
+ r_.f32[ 5] = c;
1489
+ r_.f32[ 6] = b;
1490
+ r_.f32[ 7] = a;
1491
+ r_.f32[ 8] = d;
1492
+ r_.f32[ 9] = c;
1493
+ r_.f32[10] = b;
1494
+ r_.f32[11] = a;
1495
+ r_.f32[12] = d;
1496
+ r_.f32[13] = c;
1497
+ r_.f32[14] = b;
1498
+ r_.f32[15] = a;
1499
+
1500
+ return simde__m512_from_private(r_);
1501
+ }
1502
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1503
+ #define _mm512_setr4_ps(d,c,b,a) simde_mm512_setr4_ps(d,c,b,a)
1504
+ #endif
1505
+
1506
+ SIMDE__FUNCTION_ATTRIBUTES
1507
+ simde__m512d
1508
+ simde_mm512_setr4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) {
1509
+ simde__m512d_private r_;
1510
+
1511
+ r_.f64[0] = d;
1512
+ r_.f64[1] = c;
1513
+ r_.f64[2] = b;
1514
+ r_.f64[3] = a;
1515
+ r_.f64[4] = d;
1516
+ r_.f64[5] = c;
1517
+ r_.f64[6] = b;
1518
+ r_.f64[7] = a;
1519
+
1520
+ return simde__m512d_from_private(r_);
1521
+ }
1522
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1523
+ #define _mm512_setr4_pd(d,c,b,a) simde_mm512_setr4_pd(d,c,b,a)
1524
+ #endif
1525
+
1526
+ SIMDE__FUNCTION_ATTRIBUTES
1527
+ simde__m512i
1528
+ simde_mm512_setzero_si512(void) {
1529
+ #if defined(SIMDE_AVX512F_NATIVE)
1530
+ return _mm512_setzero_si512();
1531
+ #else
1532
+ simde__m512i r;
1533
+ simde_memset(&r, 0, sizeof(r));
1534
+ return r;
1535
+ #endif
1536
+ }
1537
+ #define simde_mm512_setzero_epi32() simde_mm512_setzero_si512()
1538
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1539
+ #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1540
+ #define _mm512_setzero_epi32() simde_mm512_setzero_si512()
1541
+ #endif
1542
+
1543
+ SIMDE__FUNCTION_ATTRIBUTES
1544
+ simde__m512i
1545
+ simde_mm512_setone_si512(void) {
1546
+ simde__m512i_private r_;
1547
+
1548
+ SIMDE__VECTORIZE
1549
+ for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
1550
+ r_.i32f[i] = ~((int_fast32_t) 0);
1551
+ }
1552
+
1553
+ return simde__m512i_from_private(r_);
1554
+ }
1555
+ #define simde_mm512_setone_epi32() simde_mm512_setone_si512()
1556
+
1557
+ SIMDE__FUNCTION_ATTRIBUTES
1558
+ simde__m512
1559
+ simde_mm512_setzero_ps(void) {
1560
+ #if defined(SIMDE_AVX512F_NATIVE)
1561
+ return _mm512_setzero_ps();
1562
+ #else
1563
+ return simde_mm512_castsi512_ps(simde_mm512_setzero_si512());
1564
+ #endif
1565
+ }
1566
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1567
+ #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1568
+ #endif
1569
+
1570
+ SIMDE__FUNCTION_ATTRIBUTES
1571
+ simde__m512
1572
+ simde_mm512_setone_ps(void) {
1573
+ return simde_mm512_castsi512_ps(simde_mm512_setone_si512());
1574
+ }
1575
+
1576
+ SIMDE__FUNCTION_ATTRIBUTES
1577
+ simde__m512d
1578
+ simde_mm512_setzero_pd(void) {
1579
+ #if defined(SIMDE_AVX512F_NATIVE)
1580
+ return _mm512_setzero_pd();
1581
+ #else
1582
+ return simde_mm512_castsi512_pd(simde_mm512_setzero_si512());
1583
+ #endif
1584
+ }
1585
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1586
+ #define _mm512_setzero_si512() simde_mm512_setzero_si512()
1587
+ #endif
1588
+
1589
+ SIMDE__FUNCTION_ATTRIBUTES
1590
+ simde__m512d
1591
+ simde_mm512_setone_pd(void) {
1592
+ return simde_mm512_castsi512_pd(simde_mm512_setone_si512());
1593
+ }
1594
+
1595
+ SIMDE__FUNCTION_ATTRIBUTES
1596
+ simde__m512i
1597
+ simde_mm512_mask_mov_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
1598
+ #if defined(SIMDE_AVX512F_NATIVE)
1599
+ return _mm512_mask_mov_epi32(src, k, a);
1600
+ #else
1601
+ simde__m512i_private
1602
+ src_ = simde__m512i_to_private(src),
1603
+ a_ = simde__m512i_to_private(a),
1604
+ r_;
1605
+
1606
+ SIMDE__VECTORIZE
1607
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1608
+ r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i];
1609
+ }
1610
+
1611
+ return simde__m512i_from_private(r_);
1612
+ #endif
1613
+ }
1614
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1615
+ #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a)
1616
+ #endif
1617
+
1618
+ SIMDE__FUNCTION_ATTRIBUTES
1619
+ simde__m512i
1620
+ simde_mm512_mask_mov_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
1621
+ #if defined(SIMDE_AVX512F_NATIVE)
1622
+ return _mm512_mask_mov_epi64(src, k, a);
1623
+ #else
1624
+ simde__m512i_private
1625
+ src_ = simde__m512i_to_private(src),
1626
+ a_ = simde__m512i_to_private(a),
1627
+ r_;
1628
+
1629
+ SIMDE__VECTORIZE
1630
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1631
+ r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i];
1632
+ }
1633
+
1634
+ return simde__m512i_from_private(r_);
1635
+ #endif
1636
+ }
1637
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1638
+ #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a)
1639
+ #endif
1640
+
1641
+ SIMDE__FUNCTION_ATTRIBUTES
1642
+ simde__m512
1643
+ simde_mm512_mask_mov_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1644
+ #if defined(SIMDE_AVX512F_NATIVE)
1645
+ return _mm512_mask_mov_ps(src, k, a);
1646
+ #else
1647
+ simde__m512_private
1648
+ src_ = simde__m512_to_private(src),
1649
+ a_ = simde__m512_to_private(a),
1650
+ r_;
1651
+
1652
+ SIMDE__VECTORIZE
1653
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1654
+ r_.f32[i] = ((k >> i) & 1) ? a_.f32[i] : src_.f32[i];
1655
+ }
1656
+
1657
+ return simde__m512_from_private(r_);
1658
+ #endif
1659
+ }
1660
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1661
+ #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a)
1662
+ #endif
1663
+
1664
+ SIMDE__FUNCTION_ATTRIBUTES
1665
+ simde__m512d
1666
+ simde_mm512_mask_mov_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1667
+ #if defined(SIMDE_AVX512F_NATIVE)
1668
+ return _mm512_mask_mov_pd(src, k, a);
1669
+ #else
1670
+ simde__m512d_private
1671
+ src_ = simde__m512d_to_private(src),
1672
+ a_ = simde__m512d_to_private(a),
1673
+ r_;
1674
+
1675
+ SIMDE__VECTORIZE
1676
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1677
+ r_.f64[i] = ((k >> i) & 1) ? a_.f64[i] : src_.f64[i];
1678
+ }
1679
+
1680
+ return simde__m512d_from_private(r_);
1681
+ #endif
1682
+ }
1683
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1684
+ #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a)
1685
+ #endif
1686
+
1687
+ SIMDE__FUNCTION_ATTRIBUTES
1688
+ simde__m512i
1689
+ simde_mm512_maskz_mov_epi32(simde__mmask16 k, simde__m512i a) {
1690
+ #if defined(SIMDE_AVX512F_NATIVE)
1691
+ return _mm512_maskz_mov_epi32(k, a);
1692
+ #else
1693
+ simde__m512i_private
1694
+ a_ = simde__m512i_to_private(a),
1695
+ r_;
1696
+
1697
+ SIMDE__VECTORIZE
1698
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1699
+ r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0);
1700
+ }
1701
+
1702
+ return simde__m512i_from_private(r_);
1703
+ #endif
1704
+ }
1705
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1706
+ #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a)
1707
+ #endif
1708
+
1709
+ SIMDE__FUNCTION_ATTRIBUTES
1710
+ simde__m512i
1711
+ simde_mm512_maskz_mov_epi64(simde__mmask8 k, simde__m512i a) {
1712
+ #if defined(SIMDE_AVX512F_NATIVE)
1713
+ return _mm512_maskz_mov_epi64(k, a);
1714
+ #else
1715
+ simde__m512i_private
1716
+ a_ = simde__m512i_to_private(a),
1717
+ r_;
1718
+
1719
+ SIMDE__VECTORIZE
1720
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1721
+ r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0);
1722
+ }
1723
+
1724
+ return simde__m512i_from_private(r_);
1725
+ #endif
1726
+ }
1727
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1728
+ #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a)
1729
+ #endif
1730
+
1731
+ SIMDE__FUNCTION_ATTRIBUTES
1732
+ simde__m512
1733
+ simde_mm512_maskz_mov_ps(simde__mmask16 k, simde__m512 a) {
1734
+ #if defined(SIMDE_AVX512F_NATIVE)
1735
+ return _mm512_maskz_mov_ps(k, a);
1736
+ #else
1737
+ simde__m512_private
1738
+ a_ = simde__m512_to_private(a),
1739
+ r_;
1740
+
1741
+ SIMDE__VECTORIZE
1742
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1743
+ r_.f32[i] = ((k >> i) & 1) ? a_.f32[i] : SIMDE_FLOAT32_C(0.0);
1744
+ }
1745
+
1746
+ return simde__m512_from_private(r_);
1747
+ #endif
1748
+ }
1749
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1750
+ #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a)
1751
+ #endif
1752
+
1753
+ SIMDE__FUNCTION_ATTRIBUTES
1754
+ simde__m512d
1755
+ simde_mm512_maskz_mov_pd(simde__mmask8 k, simde__m512d a) {
1756
+ #if defined(SIMDE_AVX512F_NATIVE)
1757
+ return _mm512_maskz_mov_pd(k, a);
1758
+ #else
1759
+ simde__m512d_private
1760
+ a_ = simde__m512d_to_private(a),
1761
+ r_;
1762
+
1763
+ SIMDE__VECTORIZE
1764
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1765
+ r_.f64[i] = ((k >> i) & 1) ? a_.f64[i] : SIMDE_FLOAT64_C(0.0);
1766
+ }
1767
+
1768
+ return simde__m512d_from_private(r_);
1769
+ #endif
1770
+ }
1771
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1772
+ #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a)
1773
+ #endif
1774
+
1775
+ SIMDE__FUNCTION_ATTRIBUTES
1776
+ simde__m512i
1777
+ simde_mm512_abs_epi32(simde__m512i a) {
1778
+ #if defined(SIMDE_AVX512F_NATIVE)
1779
+ return _mm512_abs_epi32(a);
1780
+ #else
1781
+ simde__m512i_private
1782
+ r_,
1783
+ a_ = simde__m512i_to_private(a);
1784
+
1785
+ SIMDE__VECTORIZE
1786
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
1787
+ r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i];
1788
+ }
1789
+
1790
+ return simde__m512i_from_private(r_);
1791
+ #endif
1792
+ }
1793
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1794
+ # define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a)
1795
+ #endif
1796
+
1797
+ SIMDE__FUNCTION_ATTRIBUTES
1798
+ simde__m512i
1799
+ simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
1800
+ #if defined(SIMDE_AVX512F_NATIVE)
1801
+ return _mm512_mask_abs_epi32(src, k, a);
1802
+ #else
1803
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a));
1804
+ #endif
1805
+ }
1806
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1807
+ #define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a)
1808
+ #endif
1809
+
1810
+ SIMDE__FUNCTION_ATTRIBUTES
1811
+ simde__m512i
1812
+ simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) {
1813
+ #if defined(SIMDE_AVX512F_NATIVE)
1814
+ return _mm512_maskz_abs_epi32(k, a);
1815
+ #else
1816
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a));
1817
+ #endif
1818
+ }
1819
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1820
+ #define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a)
1821
+ #endif
1822
+
1823
+ SIMDE__FUNCTION_ATTRIBUTES
1824
+ simde__m512i
1825
+ simde_mm512_abs_epi64(simde__m512i a) {
1826
+ #if defined(SIMDE_AVX512F_NATIVE)
1827
+ return _mm512_abs_epi64(a);
1828
+ #else
1829
+ simde__m512i_private
1830
+ r_,
1831
+ a_ = simde__m512i_to_private(a);
1832
+
1833
+ SIMDE__VECTORIZE
1834
+ for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
1835
+ r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i];
1836
+ }
1837
+
1838
+ return simde__m512i_from_private(r_);
1839
+ #endif
1840
+ }
1841
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1842
+ # define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a)
1843
+ #endif
1844
+
1845
+ SIMDE__FUNCTION_ATTRIBUTES
1846
+ simde__m512i
1847
+ simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
1848
+ #if defined(SIMDE_AVX512F_NATIVE)
1849
+ return _mm512_mask_abs_epi64(src, k, a);
1850
+ #else
1851
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a));
1852
+ #endif
1853
+ }
1854
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1855
+ #define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a)
1856
+ #endif
1857
+
1858
+ SIMDE__FUNCTION_ATTRIBUTES
1859
+ simde__m512i
1860
+ simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) {
1861
+ #if defined(SIMDE_AVX512F_NATIVE)
1862
+ return _mm512_maskz_abs_epi64(k, a);
1863
+ #else
1864
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a));
1865
+ #endif
1866
+ }
1867
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1868
+ #define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a)
1869
+ #endif
1870
+
1871
+ SIMDE__FUNCTION_ATTRIBUTES
1872
+ simde__m512i
1873
+ simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) {
1874
+ #if defined(SIMDE_AVX512F_NATIVE)
1875
+ return _mm512_add_epi32(a, b);
1876
+ #else
1877
+ simde__m512i_private
1878
+ r_,
1879
+ a_ = simde__m512i_to_private(a),
1880
+ b_ = simde__m512i_to_private(b);
1881
+
1882
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1883
+ r_.i32 = a_.i32 + b_.i32;
1884
+ #else
1885
+ SIMDE__VECTORIZE
1886
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
1887
+ r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]);
1888
+ }
1889
+ #endif
1890
+
1891
+ return simde__m512i_from_private(r_);
1892
+ #endif
1893
+ }
1894
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1895
+ #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b)
1896
+ #endif
1897
+
1898
+ SIMDE__FUNCTION_ATTRIBUTES
1899
+ simde__m512i
1900
+ simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
1901
+ #if defined(SIMDE_AVX512F_NATIVE)
1902
+ return _mm512_mask_add_epi32(src, k, a, b);
1903
+ #else
1904
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b));
1905
+ #endif
1906
+ }
1907
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1908
+ #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b)
1909
+ #endif
1910
+
1911
+ SIMDE__FUNCTION_ATTRIBUTES
1912
+ simde__m512i
1913
+ simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
1914
+ #if defined(SIMDE_AVX512F_NATIVE)
1915
+ return _mm512_maskz_add_epi32(k, a, b);
1916
+ #else
1917
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b));
1918
+ #endif
1919
+ }
1920
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1921
+ #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b)
1922
+ #endif
1923
+
1924
+ SIMDE__FUNCTION_ATTRIBUTES
1925
+ simde__m512i
1926
+ simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) {
1927
+ #if defined(SIMDE_AVX512F_NATIVE)
1928
+ return _mm512_add_epi64(a, b);
1929
+ #else
1930
+ simde__m512i_private
1931
+ r_,
1932
+ a_ = simde__m512i_to_private(a),
1933
+ b_ = simde__m512i_to_private(b);
1934
+
1935
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1936
+ r_.i64 = a_.i64 + b_.i64;
1937
+ #else
1938
+ SIMDE__VECTORIZE
1939
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
1940
+ r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]);
1941
+ }
1942
+ #endif
1943
+
1944
+ return simde__m512i_from_private(r_);
1945
+ #endif
1946
+ }
1947
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1948
+ #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b)
1949
+ #endif
1950
+
1951
+ SIMDE__FUNCTION_ATTRIBUTES
1952
+ simde__m512i
1953
+ simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
1954
+ #if defined(SIMDE_AVX512F_NATIVE)
1955
+ return _mm512_mask_add_epi64(src, k, a, b);
1956
+ #else
1957
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b));
1958
+ #endif
1959
+ }
1960
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1961
+ #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b)
1962
+ #endif
1963
+
1964
+ SIMDE__FUNCTION_ATTRIBUTES
1965
+ simde__m512i
1966
+ simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
1967
+ #if defined(SIMDE_AVX512F_NATIVE)
1968
+ return _mm512_maskz_add_epi64(k, a, b);
1969
+ #else
1970
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b));
1971
+ #endif
1972
+ }
1973
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
1974
+ #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b)
1975
+ #endif
1976
+
1977
+
1978
+ SIMDE__FUNCTION_ATTRIBUTES
1979
+ simde__m512
1980
+ simde_mm512_add_ps (simde__m512 a, simde__m512 b) {
1981
+ #if defined(SIMDE_AVX512F_NATIVE)
1982
+ return _mm512_add_ps(a, b);
1983
+ #else
1984
+ simde__m512_private
1985
+ r_,
1986
+ a_ = simde__m512_to_private(a),
1987
+ b_ = simde__m512_to_private(b);
1988
+
1989
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1990
+ r_.f32 = a_.f32 + b_.f32;
1991
+ #else
1992
+ SIMDE__VECTORIZE
1993
+ for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1994
+ r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]);
1995
+ }
1996
+ #endif
1997
+
1998
+ return simde__m512_from_private(r_);
1999
+ #endif
2000
+ }
2001
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2002
+ #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b)
2003
+ #endif
2004
+
2005
+ SIMDE__FUNCTION_ATTRIBUTES
2006
+ simde__m512
2007
+ simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2008
+ #if defined(SIMDE_AVX512F_NATIVE)
2009
+ return _mm512_mask_add_ps(src, k, a, b);
2010
+ #else
2011
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b));
2012
+ #endif
2013
+ }
2014
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2015
+ #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b)
2016
+ #endif
2017
+
2018
+ SIMDE__FUNCTION_ATTRIBUTES
2019
+ simde__m512
2020
+ simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2021
+ #if defined(SIMDE_AVX512F_NATIVE)
2022
+ return _mm512_maskz_add_ps(k, a, b);
2023
+ #else
2024
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b));
2025
+ #endif
2026
+ }
2027
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2028
+ #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b)
2029
+ #endif
2030
+
2031
+
2032
+ SIMDE__FUNCTION_ATTRIBUTES
2033
+ simde__m512d
2034
+ simde_mm512_add_pd (simde__m512d a, simde__m512d b) {
2035
+ #if defined(SIMDE_AVX512F_NATIVE)
2036
+ return _mm512_add_pd(a, b);
2037
+ #else
2038
+ simde__m512d_private
2039
+ r_,
2040
+ a_ = simde__m512d_to_private(a),
2041
+ b_ = simde__m512d_to_private(b);
2042
+
2043
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2044
+ r_.f64 = a_.f64 + b_.f64;
2045
+ #else
2046
+ SIMDE__VECTORIZE
2047
+ for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2048
+ r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]);
2049
+ }
2050
+ #endif
2051
+
2052
+ return simde__m512d_from_private(r_);
2053
+ #endif
2054
+ }
2055
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2056
+ #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b)
2057
+ #endif
2058
+
2059
+ SIMDE__FUNCTION_ATTRIBUTES
2060
+ simde__m512d
2061
+ simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2062
+ #if defined(SIMDE_AVX512F_NATIVE)
2063
+ return _mm512_mask_add_pd(src, k, a, b);
2064
+ #else
2065
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b));
2066
+ #endif
2067
+ }
2068
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2069
+ #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b)
2070
+ #endif
2071
+
2072
+ SIMDE__FUNCTION_ATTRIBUTES
2073
+ simde__m512d
2074
+ simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2075
+ #if defined(SIMDE_AVX512F_NATIVE)
2076
+ return _mm512_maskz_add_pd(k, a, b);
2077
+ #else
2078
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b));
2079
+ #endif
2080
+ }
2081
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2082
+ #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b)
2083
+ #endif
2084
+
2085
+ SIMDE__FUNCTION_ATTRIBUTES
2086
+ simde__m512i
2087
+ simde_mm512_and_si512 (simde__m512i a, simde__m512i b) {
2088
+ #if defined(SIMDE_AVX512F_NATIVE)
2089
+ return _mm512_and_si512(a, b);
2090
+ #else
2091
+ simde__m512i_private
2092
+ r_,
2093
+ a_ = simde__m512i_to_private(a),
2094
+ b_ = simde__m512i_to_private(b);
2095
+
2096
+ #if defined(SIMDE_ARCH_X86_AVX2)
2097
+ r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]);
2098
+ r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]);
2099
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2100
+ r_.i32f = a_.i32f & b_.i32f;
2101
+ #else
2102
+ SIMDE__VECTORIZE
2103
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2104
+ r_.i32[i] = a_.i32[i] & b_.i32[i];
2105
+ }
2106
+ #endif
2107
+
2108
+ return simde__m512i_from_private(r_);
2109
+ #endif
2110
+ }
2111
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2112
+ #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b)
2113
+ #endif
2114
+
2115
+ SIMDE__FUNCTION_ATTRIBUTES
2116
+ simde__m512i
2117
+ simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) {
2118
+ #if defined(SIMDE_AVX512F_NATIVE)
2119
+ return _mm512_andnot_si512(a, b);
2120
+ #else
2121
+ simde__m512i_private
2122
+ r_,
2123
+ a_ = simde__m512i_to_private(a),
2124
+ b_ = simde__m512i_to_private(b);
2125
+
2126
+ #if defined(SIMDE_ARCH_X86_AVX2)
2127
+ r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]);
2128
+ r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]);
2129
+ #else
2130
+ SIMDE__VECTORIZE
2131
+ for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
2132
+ r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
2133
+ }
2134
+ #endif
2135
+
2136
+ return simde__m512i_from_private(r_);
2137
+ #endif
2138
+ }
2139
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2140
+ #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b)
2141
+ #endif
2142
+
2143
+ SIMDE__FUNCTION_ATTRIBUTES
2144
+ simde__m512i
2145
+ simde_mm512_broadcast_i32x4 (simde__m128i a) {
2146
+ #if defined(SIMDE_AVX512F_NATIVE)
2147
+ return _mm512_broadcast_i32x4(a);
2148
+ #else
2149
+ simde__m512i_private r_;
2150
+
2151
+ #if defined(SIMDE_ARCH_X86_AVX2)
2152
+ r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a);
2153
+ #elif defined(SIMDE_ARCH_X86_SSE2)
2154
+ r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a;
2155
+ #else
2156
+ SIMDE__VECTORIZE
2157
+ for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
2158
+ r_.m128i[i] = a;
2159
+ }
2160
+ #endif
2161
+
2162
+ return simde__m512i_from_private(r_);
2163
+ #endif
2164
+ }
2165
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2166
+ #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a)
2167
+ #endif
2168
+
2169
+ SIMDE__FUNCTION_ATTRIBUTES
2170
+ simde__mmask16
2171
+ simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
2172
+ #if defined(SIMDE_AVX512F_NATIVE)
2173
+ return _mm512_cmpeq_epi32_mask(a, b);
2174
+ #else
2175
+ simde__m512i_private
2176
+ r_,
2177
+ a_ = simde__m512i_to_private(a),
2178
+ b_ = simde__m512i_to_private(b);
2179
+
2180
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2181
+ r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]);
2182
+ }
2183
+
2184
+ return simde__m512i_private_to_mmask16(r_);
2185
+ #endif
2186
+ }
2187
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2188
+ #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b)
2189
+ #endif
2190
+
2191
+ SIMDE__FUNCTION_ATTRIBUTES
2192
+ simde__mmask16
2193
+ simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
2194
+ #if defined(SIMDE_AVX512F_NATIVE)
2195
+ return _mm512_mask_cmpeq_epi32_mask(k1, a, b);
2196
+ #else
2197
+ return simde_mm512_cmpeq_epi32_mask(a, b) & k1;
2198
+ #endif
2199
+ }
2200
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2201
+ #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b)
2202
+ #endif
2203
+
2204
+ SIMDE__FUNCTION_ATTRIBUTES
2205
+ simde__mmask8
2206
+ simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) {
2207
+ #if defined(SIMDE_AVX512F_NATIVE)
2208
+ return _mm512_cmpeq_epi64_mask(a, b);
2209
+ #else
2210
+ simde__m512i_private
2211
+ r_,
2212
+ a_ = simde__m512i_to_private(a),
2213
+ b_ = simde__m512i_to_private(b);
2214
+
2215
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2216
+ r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]);
2217
+ }
2218
+
2219
+ return simde__m512i_private_to_mmask8(r_);
2220
+ #endif
2221
+ }
2222
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2223
+ #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b)
2224
+ #endif
2225
+
2226
+ SIMDE__FUNCTION_ATTRIBUTES
2227
+ simde__mmask8
2228
+ simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
2229
+ #if defined(SIMDE_AVX512F_NATIVE)
2230
+ return _mm512_mask_cmpeq_epi64_mask(k1, a, b);
2231
+ #else
2232
+ return simde_mm512_cmpeq_epi64_mask(a, b) & k1;
2233
+ #endif
2234
+ }
2235
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2236
+ #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b)
2237
+ #endif
2238
+
2239
+ SIMDE__FUNCTION_ATTRIBUTES
2240
+ simde__mmask16
2241
+ simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) {
2242
+ #if defined(SIMDE_AVX512F_NATIVE)
2243
+ return _mm512_cmpgt_epi32_mask(a, b);
2244
+ #else
2245
+ simde__m512i_private
2246
+ r_,
2247
+ a_ = simde__m512i_to_private(a),
2248
+ b_ = simde__m512i_to_private(b);
2249
+
2250
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2251
+ r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]);
2252
+ }
2253
+
2254
+ return simde__m512i_private_to_mmask16(r_);
2255
+ #endif
2256
+ }
2257
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2258
+ #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b)
2259
+ #endif
2260
+
2261
+ SIMDE__FUNCTION_ATTRIBUTES
2262
+ simde__mmask16
2263
+ simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
2264
+ #if defined(SIMDE_AVX512F_NATIVE)
2265
+ return _mm512_mask_cmpgt_epi32_mask(k1, a, b);
2266
+ #else
2267
+ return simde_mm512_cmpgt_epi32_mask(a, b) & k1;
2268
+ #endif
2269
+ }
2270
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2271
+ #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b)
2272
+ #endif
2273
+
2274
+ SIMDE__FUNCTION_ATTRIBUTES
2275
+ simde__mmask8
2276
+ simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) {
2277
+ #if defined(SIMDE_AVX512F_NATIVE)
2278
+ return _mm512_cmpgt_epi64_mask(a, b);
2279
+ #else
2280
+ simde__m512i_private
2281
+ r_,
2282
+ a_ = simde__m512i_to_private(a),
2283
+ b_ = simde__m512i_to_private(b);
2284
+
2285
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
2286
+ r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]);
2287
+ }
2288
+
2289
+ return simde__m512i_private_to_mmask8(r_);
2290
+ #endif
2291
+ }
2292
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2293
+ #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b)
2294
+ #endif
2295
+
2296
+ SIMDE__FUNCTION_ATTRIBUTES
2297
+ simde__mmask8
2298
+ simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
2299
+ #if defined(SIMDE_AVX512F_NATIVE)
2300
+ return _mm512_mask_cmpgt_epi64_mask(k1, a, b);
2301
+ #else
2302
+ return simde_mm512_cmpgt_epi64_mask(a, b) & k1;
2303
+ #endif
2304
+ }
2305
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2306
+ #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b)
2307
+ #endif
2308
+
2309
+ SIMDE__FUNCTION_ATTRIBUTES
2310
+ simde__m512i
2311
+ simde_mm512_cvtepi8_epi32 (simde__m128i a) {
2312
+ #if defined(SIMDE_AVX512F_NATIVE)
2313
+ return _mm512_cvtepi8_epi32(a);
2314
+ #else
2315
+ simde__m512i_private r_;
2316
+ simde__m128i_private a_ = simde__m128i_to_private(a);
2317
+
2318
+ #if defined(SIMDE__CONVERT_VECTOR)
2319
+ SIMDE__CONVERT_VECTOR(r_.i32, a_.i8);
2320
+ #else
2321
+ SIMDE__VECTORIZE
2322
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2323
+ r_.i32[i] = a_.i8[i];
2324
+ }
2325
+ #endif
2326
+
2327
+ return simde__m512i_from_private(r_);
2328
+ #endif
2329
+ }
2330
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2331
+ #define _mm512_cvtepi8_epi32(a) simde_mm512_cvtepi8_epi32(a)
2332
+ #endif
2333
+
2334
+ SIMDE__FUNCTION_ATTRIBUTES
2335
+ simde__m512i
2336
+ simde_mm512_cvtepi8_epi64 (simde__m128i a) {
2337
+ #if defined(SIMDE_AVX512F_NATIVE)
2338
+ return _mm512_cvtepi8_epi64(a);
2339
+ #else
2340
+ simde__m512i_private r_;
2341
+ simde__m128i_private a_ = simde__m128i_to_private(a);
2342
+
2343
+ #if defined(SIMDE__CONVERT_VECTOR)
2344
+ SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i8);
2345
+ #else
2346
+ SIMDE__VECTORIZE
2347
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2348
+ r_.i64[i] = a_.i8[i];
2349
+ }
2350
+ #endif
2351
+
2352
+ return simde__m512i_from_private(r_);
2353
+ #endif
2354
+ }
2355
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2356
+ #define _mm512_cvtepi8_epi64(a) simde_mm512_cvtepi8_epi64(a)
2357
+ #endif
2358
+
2359
+ SIMDE__FUNCTION_ATTRIBUTES
2360
+ simde__m128i
2361
+ simde_mm512_cvtepi32_epi8 (simde__m512i a) {
2362
+ #if defined(SIMDE_AVX512F_NATIVE)
2363
+ return _mm512_cvtepi32_epi8(a);
2364
+ #else
2365
+ simde__m128i_private r_;
2366
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2367
+
2368
+ #if defined(SIMDE__CONVERT_VECTOR)
2369
+ SIMDE__CONVERT_VECTOR(r_.i8, a_.i32);
2370
+ #else
2371
+ SIMDE__VECTORIZE
2372
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2373
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i32[i]);
2374
+ }
2375
+ #endif
2376
+
2377
+ return simde__m128i_from_private(r_);
2378
+ #endif
2379
+ }
2380
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2381
+ #define _mm512_cvtepi32_epi8(a) simde_mm512_cvtepi32_epi8(a)
2382
+ #endif
2383
+
2384
+ SIMDE__FUNCTION_ATTRIBUTES
2385
+ simde__m256i
2386
+ simde_mm512_cvtepi32_epi16 (simde__m512i a) {
2387
+ #if defined(SIMDE_AVX512F_NATIVE)
2388
+ return _mm512_cvtepi32_epi16(a);
2389
+ #else
2390
+ simde__m256i_private r_;
2391
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2392
+
2393
+ #if defined(SIMDE__CONVERT_VECTOR)
2394
+ SIMDE__CONVERT_VECTOR(r_.i16, a_.i32);
2395
+ #else
2396
+ SIMDE__VECTORIZE
2397
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2398
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
2399
+ }
2400
+ #endif
2401
+
2402
+ return simde__m256i_from_private(r_);
2403
+ #endif
2404
+ }
2405
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2406
+ #define _mm512_cvtepi32_epi16(a) simde_mm512_cvtepi32_epi16(a)
2407
+ #endif
2408
+
2409
+ SIMDE__FUNCTION_ATTRIBUTES
2410
+ simde__m128i
2411
+ simde_mm512_cvtepi64_epi8 (simde__m512i a) {
2412
+ #if defined(SIMDE_AVX512F_NATIVE)
2413
+ return _mm512_cvtepi64_epi8(a);
2414
+ #else
2415
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2416
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2417
+
2418
+ #if defined(SIMDE__CONVERT_VECTOR)
2419
+ SIMDE__CONVERT_VECTOR(r_.m64_private[0].i8, a_.i64);
2420
+ #else
2421
+ SIMDE__VECTORIZE
2422
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2423
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i64[i]);
2424
+ }
2425
+ #endif
2426
+
2427
+ return simde__m128i_from_private(r_);
2428
+ #endif
2429
+ }
2430
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2431
+ #define _mm512_cvtepi64_epi8(a) simde_mm512_cvtepi64_epi8(a)
2432
+ #endif
2433
+
2434
+ SIMDE__FUNCTION_ATTRIBUTES
2435
+ simde__m128i
2436
+ simde_mm512_cvtepi64_epi16 (simde__m512i a) {
2437
+ #if defined(SIMDE_AVX512F_NATIVE)
2438
+ return _mm512_cvtepi64_epi16(a);
2439
+ #else
2440
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2441
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2442
+
2443
+ #if defined(SIMDE__CONVERT_VECTOR)
2444
+ SIMDE__CONVERT_VECTOR(r_.i16, a_.i64);
2445
+ #else
2446
+ SIMDE__VECTORIZE
2447
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2448
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i64[i]);
2449
+ }
2450
+ #endif
2451
+
2452
+ return simde__m128i_from_private(r_);
2453
+ #endif
2454
+ }
2455
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2456
+ #define _mm512_cvtepi64_epi16(a) simde_mm512_cvtepi64_epi16(a)
2457
+ #endif
2458
+
2459
+ SIMDE__FUNCTION_ATTRIBUTES
2460
+ simde__m256i
2461
+ simde_mm512_cvtepi64_epi32 (simde__m512i a) {
2462
+ #if defined(SIMDE_AVX512F_NATIVE)
2463
+ return _mm512_cvtepi64_epi32(a);
2464
+ #else
2465
+ simde__m256i_private r_;
2466
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2467
+
2468
+ #if defined(SIMDE__CONVERT_VECTOR)
2469
+ SIMDE__CONVERT_VECTOR(r_.i32, a_.i64);
2470
+ #else
2471
+ SIMDE__VECTORIZE
2472
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2473
+ r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i64[i]);
2474
+ }
2475
+ #endif
2476
+
2477
+ return simde__m256i_from_private(r_);
2478
+ #endif
2479
+ }
2480
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2481
+ #define _mm512_cvtepi64_epi32(a) simde_mm512_cvtepi64_epi32(a)
2482
+ #endif
2483
+
2484
+ SIMDE__FUNCTION_ATTRIBUTES
2485
+ simde__m128i
2486
+ simde_mm512_cvtsepi32_epi8 (simde__m512i a) {
2487
+ #if defined(SIMDE_AVX512F_NATIVE)
2488
+ return _mm512_cvtsepi32_epi8(a);
2489
+ #else
2490
+ simde__m128i_private r_;
2491
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2492
+
2493
+ SIMDE__VECTORIZE
2494
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2495
+ r_.i8[i] =
2496
+ (a_.i32[i] < INT8_MIN)
2497
+ ? (INT8_MIN)
2498
+ : ((a_.i32[i] > INT8_MAX)
2499
+ ? (INT8_MAX)
2500
+ : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]));
2501
+ }
2502
+
2503
+ return simde__m128i_from_private(r_);
2504
+ #endif
2505
+ }
2506
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2507
+ #define _mm512_cvtsepi32_epi8(a) simde_mm512_cvtsepi32_epi8(a)
2508
+ #endif
2509
+
2510
+ SIMDE__FUNCTION_ATTRIBUTES
2511
+ simde__m256i
2512
+ simde_mm512_cvtsepi32_epi16 (simde__m512i a) {
2513
+ #if defined(SIMDE_AVX512F_NATIVE)
2514
+ return _mm512_cvtsepi32_epi16(a);
2515
+ #else
2516
+ simde__m256i_private r_;
2517
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2518
+
2519
+ SIMDE__VECTORIZE
2520
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
2521
+ r_.i16[i] =
2522
+ (a_.i32[i] < INT16_MIN)
2523
+ ? (INT16_MIN)
2524
+ : ((a_.i32[i] > INT16_MAX)
2525
+ ? (INT16_MAX)
2526
+ : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]));
2527
+ }
2528
+
2529
+ return simde__m256i_from_private(r_);
2530
+ #endif
2531
+ }
2532
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2533
+ #define _mm512_cvtsepi32_epi16(a) simde_mm512_cvtsepi32_epi16(a)
2534
+ #endif
2535
+
2536
+ SIMDE__FUNCTION_ATTRIBUTES
2537
+ simde__m128i
2538
+ simde_mm512_cvtsepi64_epi8 (simde__m512i a) {
2539
+ #if defined(SIMDE_AVX512F_NATIVE)
2540
+ return _mm512_cvtsepi64_epi8(a);
2541
+ #else
2542
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2543
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2544
+
2545
+ SIMDE__VECTORIZE
2546
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2547
+ r_.i8[i] =
2548
+ (a_.i64[i] < INT8_MIN)
2549
+ ? (INT8_MIN)
2550
+ : ((a_.i64[i] > INT8_MAX)
2551
+ ? (INT8_MAX)
2552
+ : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]));
2553
+ }
2554
+
2555
+ return simde__m128i_from_private(r_);
2556
+ #endif
2557
+ }
2558
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2559
+ #define _mm512_cvtsepi64_epi8(a) simde_mm512_cvtsepi64_epi8(a)
2560
+ #endif
2561
+
2562
+ SIMDE__FUNCTION_ATTRIBUTES
2563
+ simde__m128i
2564
+ simde_mm512_cvtsepi64_epi16 (simde__m512i a) {
2565
+ #if defined(SIMDE_AVX512F_NATIVE)
2566
+ return _mm512_cvtsepi64_epi16(a);
2567
+ #else
2568
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
2569
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2570
+
2571
+ SIMDE__VECTORIZE
2572
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2573
+ r_.i16[i] =
2574
+ (a_.i64[i] < INT16_MIN)
2575
+ ? (INT16_MIN)
2576
+ : ((a_.i64[i] > INT16_MAX)
2577
+ ? (INT16_MAX)
2578
+ : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]));
2579
+ }
2580
+
2581
+ return simde__m128i_from_private(r_);
2582
+ #endif
2583
+ }
2584
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2585
+ #define _mm512_cvtsepi64_epi16(a) simde_mm512_cvtsepi64_epi16(a)
2586
+ #endif
2587
+
2588
+ SIMDE__FUNCTION_ATTRIBUTES
2589
+ simde__m256i
2590
+ simde_mm512_cvtsepi64_epi32 (simde__m512i a) {
2591
+ #if defined(SIMDE_AVX512F_NATIVE)
2592
+ return _mm512_cvtsepi64_epi32(a);
2593
+ #else
2594
+ simde__m256i_private r_;
2595
+ simde__m512i_private a_ = simde__m512i_to_private(a);
2596
+
2597
+ SIMDE__VECTORIZE
2598
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
2599
+ r_.i32[i] =
2600
+ (a_.i64[i] < INT32_MIN)
2601
+ ? (INT32_MIN)
2602
+ : ((a_.i64[i] > INT32_MAX)
2603
+ ? (INT32_MAX)
2604
+ : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]));
2605
+ }
2606
+
2607
+ return simde__m256i_from_private(r_);
2608
+ #endif
2609
+ }
2610
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2611
+ #define _mm512_cvtsepi64_epi32(a) simde_mm512_cvtsepi64_epi32(a)
2612
+ #endif
2613
+
2614
+ SIMDE__FUNCTION_ATTRIBUTES
2615
+ simde__m512
2616
+ simde_mm512_div_ps (simde__m512 a, simde__m512 b) {
2617
+ #if defined(SIMDE_AVX512F_NATIVE)
2618
+ return _mm512_div_ps(a, b);
2619
+ #else
2620
+ simde__m512_private
2621
+ r_,
2622
+ a_ = simde__m512_to_private(a),
2623
+ b_ = simde__m512_to_private(b);
2624
+
2625
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2626
+ r_.f32 = a_.f32 / b_.f32;
2627
+ #else
2628
+ SIMDE__VECTORIZE
2629
+ for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2630
+ r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]);
2631
+ }
2632
+ #endif
2633
+
2634
+ return simde__m512_from_private(r_);
2635
+ #endif
2636
+ }
2637
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2638
+ #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b)
2639
+ #endif
2640
+
2641
+ SIMDE__FUNCTION_ATTRIBUTES
2642
+ simde__m512
2643
+ simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2644
+ #if defined(SIMDE_AVX512F_NATIVE)
2645
+ return _mm512_mask_div_ps(src, k, a, b);
2646
+ #else
2647
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b));
2648
+ #endif
2649
+ }
2650
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2651
+ #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b)
2652
+ #endif
2653
+
2654
+ SIMDE__FUNCTION_ATTRIBUTES
2655
+ simde__m512
2656
+ simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2657
+ #if defined(SIMDE_AVX512F_NATIVE)
2658
+ return _mm512_maskz_div_ps(k, a, b);
2659
+ #else
2660
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b));
2661
+ #endif
2662
+ }
2663
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2664
+ #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b)
2665
+ #endif
2666
+
2667
+ SIMDE__FUNCTION_ATTRIBUTES
2668
+ simde__m512d
2669
+ simde_mm512_div_pd (simde__m512d a, simde__m512d b) {
2670
+ #if defined(SIMDE_AVX512F_NATIVE)
2671
+ return _mm512_div_pd(a, b);
2672
+ #else
2673
+ simde__m512d_private
2674
+ r_,
2675
+ a_ = simde__m512d_to_private(a),
2676
+ b_ = simde__m512d_to_private(b);
2677
+
2678
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2679
+ r_.f64 = a_.f64 / b_.f64;
2680
+ #else
2681
+ SIMDE__VECTORIZE
2682
+ for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2683
+ r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]);
2684
+ }
2685
+ #endif
2686
+
2687
+ return simde__m512d_from_private(r_);
2688
+ #endif
2689
+ }
2690
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2691
+ #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b)
2692
+ #endif
2693
+
2694
+ SIMDE__FUNCTION_ATTRIBUTES
2695
+ simde__m512d
2696
+ simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2697
+ #if defined(SIMDE_AVX512F_NATIVE)
2698
+ return _mm512_mask_div_pd(src, k, a, b);
2699
+ #else
2700
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b));
2701
+ #endif
2702
+ }
2703
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2704
+ #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b)
2705
+ #endif
2706
+
2707
+ SIMDE__FUNCTION_ATTRIBUTES
2708
+ simde__m512d
2709
+ simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2710
+ #if defined(SIMDE_AVX512F_NATIVE)
2711
+ return _mm512_maskz_div_pd(k, a, b);
2712
+ #else
2713
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b));
2714
+ #endif
2715
+ }
2716
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2717
+ #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b)
2718
+ #endif
2719
+
2720
+ SIMDE__FUNCTION_ATTRIBUTES
2721
+ simde__m512i
2722
+ simde_mm512_load_si512 (simde__m512i const * mem_addr) {
2723
+ simde_assert_aligned(64, mem_addr);
2724
+
2725
+ #if defined(SIMDE_AVX512F_NATIVE)
2726
+ return _mm512_load_si512((__m512i const*) mem_addr);
2727
+ #elif defined(SIMDE_ARCH_AARCH64) && (defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(8,0,0))
2728
+ simde__m512i r;
2729
+ memcpy(&r, mem_addr, sizeof(r));
2730
+ return r;
2731
+ #else
2732
+ return *mem_addr;
2733
+ #endif
2734
+ }
2735
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2736
+ #define _mm512_load_si512(a) simde_mm512_load_si512(a)
2737
+ #endif
2738
+
2739
+ SIMDE__FUNCTION_ATTRIBUTES
2740
+ simde__m512i
2741
+ simde_mm512_loadu_si512 (simde__m512i const * mem_addr) {
2742
+ #if defined(SIMDE_AVX512F_NATIVE)
2743
+ return _mm512_loadu_si512((__m512i const*) mem_addr);
2744
+ #else
2745
+ simde__m512i r;
2746
+ simde_memcpy(&r, mem_addr, sizeof(r));
2747
+ return r;
2748
+ #endif
2749
+ }
2750
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2751
+ #define _mm512_loadu_si512(a) simde_mm512_loadu_si512(a)
2752
+ #endif
2753
+
2754
+ SIMDE__FUNCTION_ATTRIBUTES
2755
+ simde__m512
2756
+ simde_mm512_mul_ps (simde__m512 a, simde__m512 b) {
2757
+ #if defined(SIMDE_AVX512F_NATIVE)
2758
+ return _mm512_mul_ps(a, b);
2759
+ #else
2760
+ simde__m512_private
2761
+ r_,
2762
+ a_ = simde__m512_to_private(a),
2763
+ b_ = simde__m512_to_private(b);
2764
+
2765
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2766
+ r_.f32 = a_.f32 * b_.f32;
2767
+ #else
2768
+ SIMDE__VECTORIZE
2769
+ for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2770
+ r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]);
2771
+ }
2772
+ #endif
2773
+
2774
+ return simde__m512_from_private(r_);
2775
+ #endif
2776
+ }
2777
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2778
+ #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b)
2779
+ #endif
2780
+
2781
+ SIMDE__FUNCTION_ATTRIBUTES
2782
+ simde__m512
2783
+ simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
2784
+ #if defined(SIMDE_AVX512F_NATIVE)
2785
+ return _mm512_mask_mul_ps(src, k, a, b);
2786
+ #else
2787
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b));
2788
+ #endif
2789
+ }
2790
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2791
+ #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b)
2792
+ #endif
2793
+
2794
+ SIMDE__FUNCTION_ATTRIBUTES
2795
+ simde__m512
2796
+ simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
2797
+ #if defined(SIMDE_AVX512F_NATIVE)
2798
+ return _mm512_maskz_mul_ps(k, a, b);
2799
+ #else
2800
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b));
2801
+ #endif
2802
+ }
2803
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2804
+ #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b)
2805
+ #endif
2806
+
2807
+ SIMDE__FUNCTION_ATTRIBUTES
2808
+ simde__m512d
2809
+ simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
2810
+ #if defined(SIMDE_AVX512F_NATIVE)
2811
+ return _mm512_mul_pd(a, b);
2812
+ #else
2813
+ simde__m512d_private
2814
+ r_,
2815
+ a_ = simde__m512d_to_private(a),
2816
+ b_ = simde__m512d_to_private(b);
2817
+
2818
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2819
+ r_.f64 = a_.f64 * b_.f64;
2820
+ #else
2821
+ SIMDE__VECTORIZE
2822
+ for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2823
+ r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]);
2824
+ }
2825
+ #endif
2826
+
2827
+ return simde__m512d_from_private(r_);
2828
+ #endif
2829
+ }
2830
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2831
+ #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b)
2832
+ #endif
2833
+
2834
+ SIMDE__FUNCTION_ATTRIBUTES
2835
+ simde__m512d
2836
+ simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
2837
+ #if defined(SIMDE_AVX512F_NATIVE)
2838
+ return _mm512_mask_mul_pd(src, k, a, b);
2839
+ #else
2840
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b));
2841
+ #endif
2842
+ }
2843
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2844
+ #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b)
2845
+ #endif
2846
+
2847
+ SIMDE__FUNCTION_ATTRIBUTES
2848
+ simde__m512d
2849
+ simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
2850
+ #if defined(SIMDE_AVX512F_NATIVE)
2851
+ return _mm512_maskz_mul_pd(k, a, b);
2852
+ #else
2853
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b));
2854
+ #endif
2855
+ }
2856
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2857
+ #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b)
2858
+ #endif
2859
+
2860
+ SIMDE__FUNCTION_ATTRIBUTES
2861
+ simde__m512i
2862
+ simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) {
2863
+ #if defined(SIMDE_AVX512F_NATIVE)
2864
+ return _mm512_mul_epi32(a, b);
2865
+ #else
2866
+ simde__m512i_private
2867
+ r_,
2868
+ a_ = simde__m512i_to_private(a),
2869
+ b_ = simde__m512i_to_private(b);
2870
+
2871
+ #if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
2872
+ simde__m512i_private x;
2873
+ __typeof__(r_.i64) ta, tb;
2874
+
2875
+ /* Get even numbered 32-bit values */
2876
+ x.i32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
2877
+ /* Cast to 64 bits */
2878
+ SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].i32);
2879
+ SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].i32);
2880
+ r_.i64 = ta * tb;
2881
+ #else
2882
+ SIMDE__VECTORIZE
2883
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2884
+ r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]);
2885
+ }
2886
+ #endif
2887
+ return simde__m512i_from_private(r_);
2888
+ #endif
2889
+ }
2890
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2891
+ #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b)
2892
+ #endif
2893
+
2894
+ SIMDE__FUNCTION_ATTRIBUTES
2895
+ simde__m512i
2896
+ simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
2897
+ #if defined(SIMDE_AVX512F_NATIVE)
2898
+ return _mm512_mask_mul_epi32(src, k, a, b);
2899
+ #else
2900
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b));
2901
+ #endif
2902
+ }
2903
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2904
+ #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b)
2905
+ #endif
2906
+
2907
+ SIMDE__FUNCTION_ATTRIBUTES
2908
+ simde__m512i
2909
+ simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
2910
+ #if defined(SIMDE_AVX512F_NATIVE)
2911
+ return _mm512_maskz_mul_epi32(k, a, b);
2912
+ #else
2913
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b));
2914
+ #endif
2915
+ }
2916
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2917
+ #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b)
2918
+ #endif
2919
+
2920
+ SIMDE__FUNCTION_ATTRIBUTES
2921
+ simde__m512i
2922
+ simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) {
2923
+ #if defined(SIMDE_AVX512F_NATIVE)
2924
+ return _mm512_mul_epu32(a, b);
2925
+ #else
2926
+ simde__m512i_private
2927
+ r_,
2928
+ a_ = simde__m512i_to_private(a),
2929
+ b_ = simde__m512i_to_private(b);
2930
+
2931
+ #if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
2932
+ simde__m512i_private x;
2933
+ __typeof__(r_.u64) ta, tb;
2934
+
2935
+ x.u32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
2936
+ SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].u32);
2937
+ SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].u32);
2938
+ r_.u64 = ta * tb;
2939
+ #else
2940
+ SIMDE__VECTORIZE
2941
+ for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
2942
+ r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]);
2943
+ }
2944
+ #endif
2945
+
2946
+ return simde__m512i_from_private(r_);
2947
+ #endif
2948
+ }
2949
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2950
+ #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b)
2951
+ #endif
2952
+
2953
+ SIMDE__FUNCTION_ATTRIBUTES
2954
+ simde__m512i
2955
+ simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
2956
+ #if defined(SIMDE_AVX512F_NATIVE)
2957
+ return _mm512_mask_mul_epu32(src, k, a, b);
2958
+ #else
2959
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b));
2960
+ #endif
2961
+ }
2962
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2963
+ #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b)
2964
+ #endif
2965
+
2966
+ SIMDE__FUNCTION_ATTRIBUTES
2967
+ simde__m512i
2968
+ simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
2969
+ #if defined(SIMDE_AVX512F_NATIVE)
2970
+ return _mm512_maskz_mul_epu32(k, a, b);
2971
+ #else
2972
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b));
2973
+ #endif
2974
+ }
2975
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
2976
+ #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b)
2977
+ #endif
2978
+
2979
+ SIMDE__FUNCTION_ATTRIBUTES
2980
+ simde__m512i
2981
+ simde_mm512_or_si512 (simde__m512i a, simde__m512i b) {
2982
+ #if defined(SIMDE_AVX512F_NATIVE)
2983
+ return _mm512_or_si512(a, b);
2984
+ #else
2985
+ simde__m512i_private
2986
+ r_,
2987
+ a_ = simde__m512i_to_private(a),
2988
+ b_ = simde__m512i_to_private(b);
2989
+
2990
+ #if defined(SIMDE_ARCH_X86_AVX2)
2991
+ r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]);
2992
+ r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]);
2993
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2994
+ r_.i32f = a_.i32f | b_.i32f;
2995
+ #else
2996
+ SIMDE__VECTORIZE
2997
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2998
+ r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
2999
+ }
3000
+ #endif
3001
+
3002
+ return simde__m512i_from_private(r_);
3003
+ #endif
3004
+ }
3005
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3006
+ #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b)
3007
+ #endif
3008
+
3009
+ SIMDE__FUNCTION_ATTRIBUTES
3010
+ simde__m512i
3011
+ simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) {
3012
+ #if defined(SIMDE_AVX512F_NATIVE)
3013
+ return _mm512_sub_epi32(a, b);
3014
+ #else
3015
+ simde__m512i_private
3016
+ r_,
3017
+ a_ = simde__m512i_to_private(a),
3018
+ b_ = simde__m512i_to_private(b);
3019
+
3020
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3021
+ r_.i32 = a_.i32 - b_.i32;
3022
+ #else
3023
+ SIMDE__VECTORIZE
3024
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3025
+ r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]);
3026
+ }
3027
+ #endif
3028
+
3029
+ return simde__m512i_from_private(r_);
3030
+ #endif
3031
+ }
3032
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3033
+ #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b)
3034
+ #endif
3035
+
3036
+ SIMDE__FUNCTION_ATTRIBUTES
3037
+ simde__m512i
3038
+ simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
3039
+ #if defined(SIMDE_AVX512F_NATIVE)
3040
+ return _mm512_mask_sub_epi32(src, k, a, b);
3041
+ #else
3042
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b));
3043
+ #endif
3044
+ }
3045
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3046
+ #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b)
3047
+ #endif
3048
+
3049
+ SIMDE__FUNCTION_ATTRIBUTES
3050
+ simde__m512i
3051
+ simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
3052
+ #if defined(SIMDE_AVX512F_NATIVE)
3053
+ return _mm512_maskz_sub_epi32(k, a, b);
3054
+ #else
3055
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b));
3056
+ #endif
3057
+ }
3058
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3059
+ #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b)
3060
+ #endif
3061
+
3062
+ SIMDE__FUNCTION_ATTRIBUTES
3063
+ simde__m512i
3064
+ simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
3065
+ #if defined(SIMDE_AVX512F_NATIVE)
3066
+ return _mm512_sub_epi64(a, b);
3067
+ #else
3068
+ simde__m512i_private
3069
+ r_,
3070
+ a_ = simde__m512i_to_private(a),
3071
+ b_ = simde__m512i_to_private(b);
3072
+
3073
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3074
+ r_.i64 = a_.i64 - b_.i64;
3075
+ #else
3076
+ SIMDE__VECTORIZE
3077
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3078
+ r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]);
3079
+ }
3080
+ #endif
3081
+
3082
+ return simde__m512i_from_private(r_);
3083
+ #endif
3084
+ }
3085
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3086
+ #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b)
3087
+ #endif
3088
+
3089
+ SIMDE__FUNCTION_ATTRIBUTES
3090
+ simde__m512i
3091
+ simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
3092
+ #if defined(SIMDE_AVX512F_NATIVE)
3093
+ return _mm512_mask_sub_epi64(src, k, a, b);
3094
+ #else
3095
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b));
3096
+ #endif
3097
+ }
3098
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3099
+ #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b)
3100
+ #endif
3101
+
3102
+ SIMDE__FUNCTION_ATTRIBUTES
3103
+ simde__m512i
3104
+ simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
3105
+ #if defined(SIMDE_AVX512F_NATIVE)
3106
+ return _mm512_maskz_sub_epi64(k, a, b);
3107
+ #else
3108
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b));
3109
+ #endif
3110
+ }
3111
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3112
+ #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b)
3113
+ #endif
3114
+
3115
+ SIMDE__FUNCTION_ATTRIBUTES
3116
+ simde__m512
3117
+ simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
3118
+ #if defined(SIMDE_AVX512F_NATIVE)
3119
+ return _mm512_sub_ps(a, b);
3120
+ #else
3121
+ simde__m512_private
3122
+ r_,
3123
+ a_ = simde__m512_to_private(a),
3124
+ b_ = simde__m512_to_private(b);
3125
+
3126
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3127
+ r_.f32 = a_.f32 - b_.f32;
3128
+ #else
3129
+ SIMDE__VECTORIZE
3130
+ for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
3131
+ r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]);
3132
+ }
3133
+ #endif
3134
+
3135
+ return simde__m512_from_private(r_);
3136
+ #endif
3137
+ }
3138
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3139
+ #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b)
3140
+ #endif
3141
+
3142
+ SIMDE__FUNCTION_ATTRIBUTES
3143
+ simde__m512
3144
+ simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
3145
+ #if defined(SIMDE_AVX512F_NATIVE)
3146
+ return _mm512_mask_sub_ps(src, k, a, b);
3147
+ #else
3148
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b));
3149
+ #endif
3150
+ }
3151
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3152
+ #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b)
3153
+ #endif
3154
+
3155
+ SIMDE__FUNCTION_ATTRIBUTES
3156
+ simde__m512
3157
+ simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
3158
+ #if defined(SIMDE_AVX512F_NATIVE)
3159
+ return _mm512_maskz_sub_ps(k, a, b);
3160
+ #else
3161
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b));
3162
+ #endif
3163
+ }
3164
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3165
+ #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b)
3166
+ #endif
3167
+
3168
+ SIMDE__FUNCTION_ATTRIBUTES
3169
+ simde__m512d
3170
+ simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
3171
+ #if defined(SIMDE_AVX512F_NATIVE)
3172
+ return _mm512_sub_pd(a, b);
3173
+ #else
3174
+ simde__m512d_private
3175
+ r_,
3176
+ a_ = simde__m512d_to_private(a),
3177
+ b_ = simde__m512d_to_private(b);
3178
+
3179
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3180
+ r_.f64 = a_.f64 - b_.f64;
3181
+ #else
3182
+ SIMDE__VECTORIZE
3183
+ for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
3184
+ r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]);
3185
+ }
3186
+ #endif
3187
+
3188
+ return simde__m512d_from_private(r_);
3189
+ #endif
3190
+ }
3191
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3192
+ #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b)
3193
+ #endif
3194
+
3195
+ SIMDE__FUNCTION_ATTRIBUTES
3196
+ simde__m512d
3197
+ simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
3198
+ #if defined(SIMDE_AVX512F_NATIVE)
3199
+ return _mm512_mask_sub_pd(src, k, a, b);
3200
+ #else
3201
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b));
3202
+ #endif
3203
+ }
3204
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3205
+ #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b)
3206
+ #endif
3207
+
3208
+ SIMDE__FUNCTION_ATTRIBUTES
3209
+ simde__m512d
3210
+ simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
3211
+ #if defined(SIMDE_AVX512F_NATIVE)
3212
+ return _mm512_maskz_sub_pd(k, a, b);
3213
+ #else
3214
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b));
3215
+ #endif
3216
+ }
3217
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3218
+ #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b)
3219
+ #endif
3220
+
3221
+ SIMDE__FUNCTION_ATTRIBUTES
3222
+ simde__m512i
3223
+ simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) {
3224
+ #if defined(SIMDE_AVX512F_NATIVE)
3225
+ return _mm512_srli_epi32(a, imm8);
3226
+ #else
3227
+ simde__m512i_private
3228
+ r_,
3229
+ a_ = simde__m512i_to_private(a);
3230
+
3231
+ #if defined(SIMDE_ARCH_X86_AVX2)
3232
+ r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], imm8);
3233
+ r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], imm8);
3234
+ #elif defined(SIMDE_ARCH_X86_SSE2)
3235
+ r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], imm8);
3236
+ r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], imm8);
3237
+ r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], imm8);
3238
+ r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], imm8);
3239
+ #else
3240
+ if (imm8 > 31) {
3241
+ simde_memset(&r_, 0, sizeof(r_));
3242
+ } else {
3243
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
3244
+ r_.u32 = a_.u32 >> imm8;
3245
+ #else
3246
+ SIMDE__VECTORIZE
3247
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
3248
+ r_.u32[i] = a_.u32[i] >> imm8;
3249
+ }
3250
+ #endif
3251
+ }
3252
+ #endif
3253
+
3254
+ return simde__m512i_from_private(r_);
3255
+ #endif
3256
+ }
3257
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3258
+ #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8)
3259
+ #endif
3260
+
3261
+ SIMDE__FUNCTION_ATTRIBUTES
3262
+ simde__m512i
3263
+ simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) {
3264
+ #if defined(SIMDE_AVX512F_NATIVE)
3265
+ return _mm512_srli_epi64(a, imm8);
3266
+ #else
3267
+ simde__m512i_private
3268
+ r_,
3269
+ a_ = simde__m512i_to_private(a);
3270
+
3271
+ #if defined(SIMDE_ARCH_X86_AVX2)
3272
+ r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], imm8);
3273
+ r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], imm8);
3274
+ #elif defined(SIMDE_ARCH_X86_SSE2)
3275
+ r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], imm8);
3276
+ r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], imm8);
3277
+ r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], imm8);
3278
+ r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], imm8);
3279
+ #else
3280
+ /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are
3281
+ * used. In this case we should do "imm8 &= 0xff" here. However in
3282
+ * practice all bits are used. */
3283
+ if (imm8 > 63) {
3284
+ simde_memset(&r_, 0, sizeof(r_));
3285
+ } else {
3286
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
3287
+ r_.u64 = a_.u64 >> imm8;
3288
+ #else
3289
+ SIMDE__VECTORIZE
3290
+ for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
3291
+ r_.u64[i] = a_.u64[i] >> imm8;
3292
+ }
3293
+ #endif
3294
+ }
3295
+ #endif
3296
+
3297
+ return simde__m512i_from_private(r_);
3298
+ #endif
3299
+ }
3300
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3301
+ #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8)
3302
+ #endif
3303
+
3304
+ SIMDE__FUNCTION_ATTRIBUTES
3305
+ simde__mmask16
3306
+ simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
3307
+ #if defined(SIMDE_AVX512F_NATIVE)
3308
+ return _mm512_mask_test_epi32_mask(k1, a, b);
3309
+ #else
3310
+ simde__m512i_private
3311
+ a_ = simde__m512i_to_private(a),
3312
+ b_ = simde__m512i_to_private(b);
3313
+ simde__mmask16 r = 0;
3314
+
3315
+ SIMDE__VECTORIZE_REDUCTION(|:r)
3316
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
3317
+ r |= !!(a_.i32[i] & b_.i32[i]) << i;
3318
+ }
3319
+
3320
+ return r & k1;
3321
+ #endif
3322
+ }
3323
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3324
+ #define _mm512_mask_test_epi32_mask(a, b) simde_mm512_mask_test_epi32_mask(a, b)
3325
+ #endif
3326
+
3327
+ SIMDE__FUNCTION_ATTRIBUTES
3328
+ simde__mmask8
3329
+ simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
3330
+ #if defined(SIMDE_AVX512F_NATIVE)
3331
+ return _mm512_mask_test_epi64_mask(k1, a, b);
3332
+ #else
3333
+ simde__m512i_private
3334
+ a_ = simde__m512i_to_private(a),
3335
+ b_ = simde__m512i_to_private(b);
3336
+ simde__mmask8 r = 0;
3337
+
3338
+ SIMDE__VECTORIZE_REDUCTION(|:r)
3339
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
3340
+ r |= !!(a_.i64[i] & b_.i64[i]) << i;
3341
+ }
3342
+
3343
+ return r & k1;
3344
+ #endif
3345
+ }
3346
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3347
+ #define _mm512_mask_test_epi64_mask(a, b) simde_mm512_mask_test_epi64_mask(a, b)
3348
+ #endif
3349
+
3350
+ SIMDE__FUNCTION_ATTRIBUTES
3351
+ simde__m512i
3352
+ simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) {
3353
+ #if defined(SIMDE_AVX512F_NATIVE)
3354
+ return _mm512_xor_si512(a, b);
3355
+ #else
3356
+ simde__m512i_private
3357
+ r_,
3358
+ a_ = simde__m512i_to_private(a),
3359
+ b_ = simde__m512i_to_private(b);
3360
+
3361
+ #if defined(SIMDE_ARCH_X86_AVX2)
3362
+ r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]);
3363
+ r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]);
3364
+ #elif defined(SIMDE_ARCH_X86_SSE2)
3365
+ r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]);
3366
+ r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]);
3367
+ r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]);
3368
+ r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]);
3369
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3370
+ r_.i32f = a_.i32f ^ b_.i32f;
3371
+ #else
3372
+ SIMDE__VECTORIZE
3373
+ for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
3374
+ r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
3375
+ }
3376
+ #endif
3377
+
3378
+ return simde__m512i_from_private(r_);
3379
+ #endif
3380
+ }
3381
+ #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
3382
+ #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b)
3383
+ #endif
3384
+
3385
+ SIMDE__END_DECLS
3386
+
3387
+ HEDLEY_DIAGNOSTIC_POP
3388
+
3389
+ #endif /* !defined(SIMDE__AVX512F_H) */