minimap2 0.2.25.0 → 0.2.25.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
@@ -0,0 +1,2984 @@
1
+ /* These are just some skeletons I've been using to speed up the
2
+ process of creating new tests for SSE functions. */
3
+
4
+ static MunitResult
5
+ test_simde_mm_xxx_epi8(const MunitParameter params[], void* data) {
6
+ (void) params;
7
+ (void) data;
8
+
9
+ const struct {
10
+ simde__m128i a;
11
+ simde__m128i b;
12
+ simde__m128i r;
13
+ } test_vec[8] = {
14
+
15
+ };
16
+
17
+ printf("\n");
18
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
19
+ simde__m128i_private a, b, r;
20
+
21
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
22
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
23
+
24
+ r = simde__m128i_to_private(simde_mm_xxx_epi8(simde__m128i_from_private(a), simde__m128i_from_private(b)));
25
+
26
+ printf(" { simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
27
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
28
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
29
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
30
+ a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
31
+ a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
32
+ printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
33
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
34
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
35
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
36
+ b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
37
+ b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
38
+ printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
39
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
40
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
41
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n",
42
+ r.i8[15], r.i8[14], r.i8[13], r.i8[12], r.i8[11], r.i8[10], r.i8[ 9], r.i8[ 8],
43
+ r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]);
44
+ }
45
+ return MUNIT_FAIL;
46
+
47
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
48
+ simde__m128i r = simde_mm_xxx_epi8(test_vec[i].a, test_vec[i].b);
49
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
50
+ }
51
+
52
+ return MUNIT_OK;
53
+ }
54
+
55
+ static MunitResult
56
+ test_simde_mm_xxx_epi16(const MunitParameter params[], void* data) {
57
+ (void) params;
58
+ (void) data;
59
+
60
+ const struct {
61
+ simde__m128i a;
62
+ simde__m128i b;
63
+ simde__m128i r;
64
+ } test_vec[8] = {
65
+
66
+ };
67
+
68
+ printf("\n");
69
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
70
+ simde__m128i_private a, b, r;
71
+
72
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
73
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
74
+
75
+ r = simde__m128i_to_private(simde_mm_xxx_epi16(simde__m128i_from_private(a), simde__m128i_from_private(b)));
76
+
77
+ printf(" { simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
78
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
79
+ a.i16[7], a.i16[6], a.i16[5], a.i16[4], a.i16[3], a.i16[2], a.i16[1], a.i16[0]);
80
+ printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
81
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
82
+ b.i16[7], b.i16[6], b.i16[5], b.i16[4], b.i16[3], b.i16[2], b.i16[1], b.i16[0]);
83
+ printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
84
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")) },\n",
85
+ r.i16[7], r.i16[6], r.i16[5], r.i16[4], r.i16[3], r.i16[2], r.i16[1], r.i16[0]);
86
+ }
87
+ return MUNIT_FAIL;
88
+
89
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
90
+ simde__m128i r = simde_mm_xxx_epi16(test_vec[i].a, test_vec[i].b);
91
+ simde_assert_m128i_i16(r, ==, test_vec[i].r);
92
+ }
93
+
94
+ return MUNIT_OK;
95
+ }
96
+
97
+ static MunitResult
98
+ test_simde_mm_xxx_epi32(const MunitParameter params[], void* data) {
99
+ (void) params;
100
+ (void) data;
101
+
102
+ const struct {
103
+ simde__m128i a;
104
+ simde__m128i b;
105
+ simde__m128i r;
106
+ } test_vec[8] = {
107
+
108
+ };
109
+
110
+ printf("\n");
111
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
112
+ simde__m128i_private a, b, r;
113
+
114
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
115
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
116
+
117
+ r = simde__m128i_to_private(simde_mm_xxx_epi32(simde__m128i_from_private(a), simde__m128i_from_private(b)));
118
+
119
+ printf(" { simde_mm_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
120
+ a.i32[3], a.i32[2], a.i32[1], a.i32[0]);
121
+ printf(" simde_mm_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
122
+ b.i32[3], b.i32[2], b.i32[1], b.i32[0]);
123
+ printf(" simde_mm_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n",
124
+ r.i32[3], r.i32[2], r.i32[1], r.i32[0]);
125
+ }
126
+ return MUNIT_FAIL;
127
+
128
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
129
+ simde__m128i r = simde_mm_xxx_epi32(test_vec[i].a, test_vec[i].b);
130
+ simde_assert_m128i_i32(r, ==, test_vec[i].r);
131
+ }
132
+
133
+ return MUNIT_OK;
134
+ }
135
+
136
+ static MunitResult
137
+ test_simde_mm_xxx_epi64(const MunitParameter params[], void* data) {
138
+ (void) params;
139
+ (void) data;
140
+
141
+ const struct {
142
+ simde__m128i a;
143
+ simde__m128i b;
144
+ simde__m128i r;
145
+ } test_vec[8] = {
146
+
147
+ };
148
+
149
+ printf("\n");
150
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
151
+ simde__m128i_private a, b, r;
152
+
153
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
154
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
155
+
156
+ r = simde__m128i_to_private(simde_mm_xxx_epi64(simde__m128i_from_private(a), simde__m128i_from_private(b)));
157
+
158
+ printf(" { simde_mm_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n", a.i64[1], a.i64[0]);
159
+ printf(" simde_mm_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n", b.i64[1], b.i64[0]);
160
+ printf(" simde_mm_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n", r.i64[1], r.i64[0]);
161
+ }
162
+ return MUNIT_FAIL;
163
+
164
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
165
+ simde__m128i r = simde_mm_xxx_epi64(test_vec[i].a, test_vec[i].b);
166
+ simde_assert_m128i_i64(r, ==, test_vec[i].r);
167
+ }
168
+
169
+ return MUNIT_OK;
170
+ }
171
+
172
+ static MunitResult
173
+ test_simde_mm_xxx_ps(const MunitParameter params[], void* data) {
174
+ (void) params;
175
+ (void) data;
176
+
177
+ const struct {
178
+ simde__m128 a;
179
+ simde__m128 b;
180
+ simde__m128 r;
181
+ } test_vec[8] = {
182
+
183
+ };
184
+
185
+ printf("\n");
186
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
187
+ simde__m128_private a, b, r;
188
+
189
+ for (size_t j = 0 ; j < sizeof(simde__m128) / sizeof(simde_float32) ; j++) {
190
+ a.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
191
+ b.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
192
+ }
193
+
194
+ r = simde__m128_to_private(simde_mm_xxx_ps(simde__m128_from_private(a), simde__m128_from_private(b)));
195
+
196
+ printf(" { simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
197
+ 9, a.f32[3], 9, a.f32[2], 9, a.f32[1], 9, a.f32[0]);
198
+ printf(" simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
199
+ 9, b.f32[3], 9, b.f32[2], 9, b.f32[1], 9, b.f32[0]);
200
+ printf(" simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)) },\n",
201
+ 9, r.f32[3], 9, r.f32[2], 9, r.f32[1], 9, r.f32[0]);
202
+ }
203
+ return MUNIT_FAIL;
204
+
205
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
206
+ simde__m128 r = simde_mm_xxx_ps(test_vec[i].a, test_vec[i].b);
207
+ simde_assert_m128_close(r, test_vec[i].r, 1);
208
+ }
209
+
210
+ return MUNIT_OK;
211
+ }
212
+
213
+ static MunitResult
214
+ test_simde_mm_xxx_pd(const MunitParameter params[], void* data) {
215
+ (void) params;
216
+ (void) data;
217
+
218
+ const struct {
219
+ simde__m128d a;
220
+ simde__m128d b;
221
+ simde__m128d r;
222
+ } test_vec[8] = {
223
+
224
+ };
225
+
226
+ printf("\n");
227
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
228
+ simde__m128d_private a, b, r;
229
+
230
+ for (size_t j = 0 ; j < sizeof(simde__m128) / sizeof(simde_float64) ; j++) {
231
+ a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
232
+ b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
233
+ }
234
+
235
+ r = simde__m128d_to_private(simde_mm_xxx_pd(simde__m128d_from_private(a), simde__m128d_from_private(b)));
236
+
237
+ printf(" { simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, a.f64[1], 8, a.f64[0]);
238
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, b.f64[1], 8, b.f64[0]);
239
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
240
+ }
241
+ return MUNIT_FAIL;
242
+
243
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
244
+ simde__m128d r = simde_mm_xxx_pd(test_vec[i].a, test_vec[i].b);
245
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
246
+ }
247
+
248
+ return MUNIT_OK;
249
+ }
250
+
251
+ static MunitResult
252
+ test_simde_mm_cmpxxx_sd(const MunitParameter params[], void* data) {
253
+ (void) params;
254
+ (void) data;
255
+
256
+ const struct {
257
+ simde__m128d a;
258
+ simde__m128d b;
259
+ simde__m128d r;
260
+ } test_vec[8] = {
261
+
262
+ };
263
+
264
+ printf("\n");
265
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
266
+ simde__m128d_private a, b, r;
267
+
268
+ for (size_t j = 0 ; j < sizeof(simde__m128) / sizeof(simde_float64) ; j++) {
269
+ a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
270
+ b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
271
+ }
272
+
273
+ r = simde__m128d_to_private(simde_mm_cmpxxx_sd(simde__m128d_from_private(a), simde__m128d_from_private(b)));
274
+
275
+ printf(" { simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, a.f64[1], 8, a.f64[0]);
276
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, b.f64[1], 8, b.f64[0]);
277
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_F64_ALL_%s) },\n", 8, r.f64[1], r.f64[0] == 0.0 ? "UNSET" : "SET");
278
+ }
279
+ return MUNIT_FAIL;
280
+
281
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
282
+ simde__m128d r = simde_mm_xxx_pd(test_vec[i].a, test_vec[i].b);
283
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
284
+ }
285
+
286
+ return MUNIT_OK;
287
+ }
288
+
289
+ static MunitResult
290
+ test_simde_mm_xxx_sd(const MunitParameter params[], void* data) {
291
+ (void) params;
292
+ (void) data;
293
+
294
+ const struct {
295
+ simde__m128d a;
296
+ simde__m128d b;
297
+ simde__m128d r;
298
+ } test_vec[8] = {
299
+
300
+ };
301
+
302
+ printf("\n");
303
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
304
+ simde__m128d_private a, b, r;
305
+
306
+ for (size_t j = 0 ; j < sizeof(simde__m128) / sizeof(simde_float64) ; j++) {
307
+ a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
308
+ b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
309
+ }
310
+
311
+ r = simde_mm_xxx_sd(a, b);
312
+
313
+ printf(" { simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, a.f64[1], 8, a.f64[0]);
314
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, b.f64[1], 8, b.f64[0]);
315
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
316
+ }
317
+ return MUNIT_FAIL;
318
+
319
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
320
+ simde__m128d r = simde_mm_xxx_sd(test_vec[i].a, test_vec[i].b);
321
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
322
+ }
323
+
324
+ return MUNIT_OK;
325
+ }
326
+
327
+ static MunitResult
328
+ test_simde_mm_xxx_si64(const MunitParameter params[], void* data) {
329
+ (void) params;
330
+ (void) data;
331
+
332
+ const struct {
333
+ simde__m64 a;
334
+ simde__m64 b;
335
+ simde__m64 r;
336
+ } test_vec[8] = {
337
+
338
+ };
339
+
340
+ printf("\n");
341
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
342
+ simde__m64_private a, b, r;
343
+
344
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
345
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
346
+
347
+ r = simde_mm_xxx_si64(a, b);
348
+
349
+ printf(" { simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")),\n", a.i64[0]);
350
+ printf(" simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")),\n", b.i64[0]);
351
+ printf(" simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")), },\n", r.i64[0]);
352
+ }
353
+ return MUNIT_FAIL;
354
+
355
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
356
+ simde__m64 r = simde_mm_xxx_si64(test_vec[i].a, test_vec[i].b);
357
+ simde_assert_m64_i64(r, ==, test_vec[i].r);
358
+ }
359
+
360
+ return MUNIT_OK;
361
+ }
362
+
363
+ static MunitResult
364
+ test_simde_mm_xxx_epi8(const MunitParameter params[], void* data) {
365
+ (void) params;
366
+ (void) data;
367
+
368
+ const struct {
369
+ simde__m128i a;
370
+ simde__m128i b;
371
+ simde__m128i r;
372
+ } test_vec[8] = {
373
+
374
+ };
375
+
376
+ printf("\n");
377
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
378
+ simde__m128i_private a, b, r;
379
+
380
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
381
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
382
+
383
+ r = simde__m128i_to_private(simde_mm_xxx_epi8(simde__m128i_from_private(a), simde__m128i_from_private(b)));
384
+
385
+ printf(" { simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
386
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
387
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
388
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
389
+ a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
390
+ a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
391
+ printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
392
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
393
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
394
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
395
+ b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
396
+ b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
397
+ printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
398
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
399
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
400
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n",
401
+ r.i8[15], r.i8[14], r.i8[13], r.i8[12], r.i8[11], r.i8[10], r.i8[ 9], r.i8[ 8],
402
+ r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]);
403
+ }
404
+ return MUNIT_FAIL;
405
+
406
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
407
+ simde__m128i r = simde_mm_xxx_epi8(test_vec[i].a, test_vec[i].b);
408
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
409
+ }
410
+
411
+ return MUNIT_OK;
412
+ }
413
+
414
+ static MunitResult
415
+ test_simde_mm_xxx_epi16(const MunitParameter params[], void* data) {
416
+ (void) params;
417
+ (void) data;
418
+
419
+ const struct {
420
+ simde__m128i a;
421
+ simde__m128i b;
422
+ simde__m128i r;
423
+ } test_vec[8] = {
424
+
425
+ };
426
+
427
+ printf("\n");
428
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
429
+ simde__m128i_private a, b, r;
430
+
431
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
432
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
433
+
434
+ r = simde__m128i_to_private(simde_mm_xxx_epi16(simde__m128i_from_private(a), simde__m128i_from_private(b)));
435
+
436
+ printf(" { simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
437
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
438
+ a.i16[7], a.i16[6], a.i16[5], a.i16[4], a.i16[3], a.i16[2], a.i16[1], a.i16[0]);
439
+ printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
440
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
441
+ b.i16[7], b.i16[6], b.i16[5], b.i16[4], b.i16[3], b.i16[2], b.i16[1], b.i16[0]);
442
+ printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
443
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")) },\n",
444
+ r.i16[7], r.i16[6], r.i16[5], r.i16[4], r.i16[3], r.i16[2], r.i16[1], r.i16[0]);
445
+ }
446
+ return MUNIT_FAIL;
447
+
448
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
449
+ simde__m128i r = simde_mm_xxx_epi16(test_vec[i].a, test_vec[i].b);
450
+ simde_assert_m128i_i16(r, ==, test_vec[i].r);
451
+ }
452
+
453
+ return MUNIT_OK;
454
+ }
455
+
456
+ static MunitResult
457
+ test_simde_mm_xxx_epu8(const MunitParameter params[], void* data) {
458
+ (void) params;
459
+ (void) data;
460
+
461
+ const struct {
462
+ simde__m128i a;
463
+ simde__m128i b;
464
+ simde__m128i r;
465
+ } test_vec[8] = {
466
+
467
+ };
468
+
469
+ printf("\n");
470
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
471
+ simde__m128i_private a, b, r;
472
+
473
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
474
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
475
+
476
+ r = simde__m128i_to_private(simde_mm_xxx_epu8(simde__m128i_from_private(a), simde__m128i_from_private(b)));
477
+
478
+ printf(" { simde_x_mm_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
479
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
480
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
481
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")),\n",
482
+ a.u8[15], a.u8[14], a.u8[13], a.u8[12], a.u8[11], a.u8[10], a.u8[ 9], a.u8[ 8],
483
+ a.u8[ 7], a.u8[ 6], a.u8[ 5], a.u8[ 4], a.u8[ 3], a.u8[ 2], a.u8[ 1], a.u8[ 0]);
484
+ printf(" simde_x_mm_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
485
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
486
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
487
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")),\n",
488
+ b.u8[15], b.u8[14], b.u8[13], b.u8[12], b.u8[11], b.u8[10], b.u8[ 9], b.u8[ 8],
489
+ b.u8[ 7], b.u8[ 6], b.u8[ 5], b.u8[ 4], b.u8[ 3], b.u8[ 2], b.u8[ 1], b.u8[ 0]);
490
+ printf(" simde_x_mm_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
491
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
492
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
493
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")) },\n",
494
+ r.u8[15], r.u8[14], r.u8[13], r.u8[12], r.u8[11], r.u8[10], r.u8[ 9], r.u8[ 8],
495
+ r.u8[ 7], r.u8[ 6], r.u8[ 5], r.u8[ 4], r.u8[ 3], r.u8[ 2], r.u8[ 1], r.u8[ 0]);
496
+ }
497
+ return MUNIT_FAIL;
498
+
499
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
500
+ simde__m128i r = simde_mm_xxx_epu8(test_vec[i].a, test_vec[i].b);
501
+ simde_assert_m128i_u8(r, ==, test_vec[i].r);
502
+ }
503
+
504
+ return MUNIT_OK;
505
+ }
506
+
507
+ static MunitResult
508
+ test_simde_mm_xxx_epu16(const MunitParameter params[], void* data) {
509
+ (void) params;
510
+ (void) data;
511
+
512
+ const struct {
513
+ simde__m128i a;
514
+ simde__m128i b;
515
+ simde__m128i r;
516
+ } test_vec[8] = {
517
+
518
+ };
519
+
520
+ printf("\n");
521
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
522
+ simde__m128i_private a, b, r;
523
+
524
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
525
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
526
+
527
+ r = simde__m128i_to_private(simde_mm_xxx_epu16(simde__m128i_from_private(a), simde__m128i_from_private(b)));
528
+
529
+ printf(" { simde_x_mm_set_epu16(UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "),\n"
530
+ " UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 ")),\n",
531
+ a.u16[7], a.u16[6], a.u16[5], a.u16[4], a.u16[3], a.u16[2], a.u16[1], a.u16[0]);
532
+ printf(" simde_x_mm_set_epu16(UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "),\n"
533
+ " UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 ")),\n",
534
+ b.u16[7], b.u16[6], b.u16[5], b.u16[4], b.u16[3], b.u16[2], b.u16[1], b.u16[0]);
535
+ printf(" simde_x_mm_set_epu16(UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "),\n"
536
+ " UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 ")) },\n",
537
+ r.u16[7], r.u16[6], r.u16[5], r.u16[4], r.u16[3], r.u16[2], r.u16[1], r.u16[0]);
538
+ }
539
+ return MUNIT_FAIL;
540
+
541
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
542
+ simde__m128i r = simde_mm_xxx_epu16(test_vec[i].a, test_vec[i].b);
543
+ simde_assert_m128i_u16(r, ==, test_vec[i].r);
544
+ }
545
+
546
+ return MUNIT_OK;
547
+ }
548
+
549
+ static MunitResult
550
+ test_simde_mm_xxx_epu32(const MunitParameter params[], void* data) {
551
+ (void) params;
552
+ (void) data;
553
+
554
+ const struct {
555
+ simde__m128i a;
556
+ simde__m128i b;
557
+ simde__m128i r;
558
+ } test_vec[8] = {
559
+
560
+ };
561
+
562
+ printf("\n");
563
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
564
+ simde__m128i_private a, b, r;
565
+
566
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
567
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
568
+
569
+ r = simde__m128i_to_private(simde_mm_xxx_epu32(simde__m128i_from_private(a), simde__m128i_from_private(b)));
570
+
571
+ printf(" { simde_x_mm_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
572
+ a.u32[3], a.u32[2], a.u32[1], a.u32[0]);
573
+ printf(" simde_x_mm_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
574
+ b.u32[3], b.u32[2], b.u32[1], b.u32[0]);
575
+ printf(" simde_x_mm_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
576
+ r.u32[3], r.u32[2], r.u32[1], r.u32[0]);
577
+ }
578
+ return MUNIT_FAIL;
579
+
580
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
581
+ simde__m128i r = simde_mm_xxx_epu32(test_vec[i].a, test_vec[i].b);
582
+ simde_assert_m128i_u32(r, ==, test_vec[i].r);
583
+ }
584
+
585
+ return MUNIT_OK;
586
+ }
587
+
588
+ static MunitResult
589
+ test_simde_mm_xxx_epu64(const MunitParameter params[], void* data) {
590
+ (void) params;
591
+ (void) data;
592
+
593
+ const struct {
594
+ simde__m128i a;
595
+ simde__m128i b;
596
+ simde__m128i r;
597
+ } test_vec[8] = {
598
+
599
+ };
600
+
601
+ printf("\n");
602
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
603
+ simde__m128i_private a, b, r;
604
+
605
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
606
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
607
+
608
+ r = simde__m128i_to_private(simde_mm_xxx_epu64(simde__m128i_from_private(a), simde__m128i_from_private(b)));
609
+
610
+ printf(" { simde_x_mm_set_epu64x(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")),\n", a.u64[1], a.u64[0]);
611
+ printf(" simde_x_mm_set_epu64x(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")),\n", b.u64[1], b.u64[0]);
612
+ printf(" simde_x_mm_set_epu64x(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")) },\n", r.u64[1], r.u64[0]);
613
+ }
614
+ return MUNIT_FAIL;
615
+
616
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
617
+ simde__m128i r = simde_mm_xxx_epu64(test_vec[i].a, test_vec[i].b);
618
+ simde_assert_m128i_u64(r, ==, test_vec[i].r);
619
+ }
620
+
621
+ return MUNIT_OK;
622
+ }
623
+
624
+ static MunitResult
625
+ test_simde_mm_xxx_epi64(const MunitParameter params[], void* data) {
626
+ (void) params;
627
+ (void) data;
628
+
629
+ const struct {
630
+ simde__m128i a;
631
+ simde__m128i b;
632
+ simde__m128i r;
633
+ } test_vec[8] = {
634
+
635
+ };
636
+
637
+ printf("\n");
638
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
639
+ simde__m128i_private a, b, r;
640
+
641
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
642
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
643
+
644
+ r = simde__m128i_to_private(simde_mm_xxx_epi64(simde__m128i_from_private(a), simde__m128i_from_private(b)));
645
+
646
+ printf(" { simde_mm_set_epi64x(INT64_C(%19" PRId64 "), INT64_C(%19" PRId64 ")),\n", a.i64[1], a.i64[0]);
647
+ printf(" simde_mm_set_epi64x(INT64_C(%19" PRId64 "), INT64_C(%19" PRId64 ")),\n", b.i64[1], b.i64[0]);
648
+ printf(" simde_mm_set_epi64x(INT64_C(%19" PRId64 "), INT64_C(%19" PRId64 ")) },\n", r.i64[1], r.i64[0]);
649
+ }
650
+ return MUNIT_FAIL;
651
+
652
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
653
+ simde__m128i r = simde_mm_xxx_epi64(test_vec[i].a, test_vec[i].b);
654
+ simde_assert_m128i_i64(r, ==, test_vec[i].r);
655
+ }
656
+
657
+ return MUNIT_OK;
658
+ }
659
+
660
+ static MunitResult
661
+ test_simde_mm_xxx_pi8(const MunitParameter params[], void* data) {
662
+ (void) params;
663
+ (void) data;
664
+
665
+ const struct {
666
+ simde__m64 a;
667
+ simde__m64 b;
668
+ simde__m64 r;
669
+ } test_vec[8] = {
670
+
671
+ };
672
+
673
+ printf("\n");
674
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
675
+ simde__m64_private a, b, r;
676
+
677
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
678
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
679
+
680
+ r = simde__m64_to_private(simde_mm_xxx_pi8(simde__m64_from_private(a), simde__m64_from_private(b)));
681
+
682
+ printf(" { simde_mm_set_pi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
683
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
684
+ a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
685
+ printf(" simde_mm_set_pi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
686
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
687
+ b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
688
+ printf(" simde_mm_set_pi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
689
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n",
690
+ r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]);
691
+ }
692
+ return MUNIT_FAIL;
693
+
694
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
695
+ simde__m64 r = simde_mm_xxx_pi8(test_vec[i].a, test_vec[i].b);
696
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
697
+ }
698
+
699
+ return MUNIT_OK;
700
+ }
701
+
702
+ static MunitResult
703
+ test_simde_mm_xxx_pu8(const MunitParameter params[], void* data) {
704
+ (void) params;
705
+ (void) data;
706
+
707
+ const struct {
708
+ simde__m64 a;
709
+ simde__m64 b;
710
+ simde__m64 r;
711
+ } test_vec[8] = {
712
+
713
+ };
714
+
715
+ printf("\n");
716
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
717
+ simde__m64_private a, b, r;
718
+
719
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
720
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
721
+
722
+ r = simde__m64_to_private(simde_mm_xxx_pu8(simde__m64_from_private(a), simde__m64_from_private(b)));
723
+
724
+ printf(" { simde_x_mm_set_pu8(UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "),\n"
725
+ " UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 ")),\n",
726
+ a.u8[ 7], a.u8[ 6], a.u8[ 5], a.u8[ 4], a.u8[ 3], a.u8[ 2], a.u8[ 1], a.u8[ 0]);
727
+ printf(" simde_x_mm_set_pu8(UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "),\n"
728
+ " UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 ")),\n",
729
+ b.u8[ 7], b.u8[ 6], b.u8[ 5], b.u8[ 4], b.u8[ 3], b.u8[ 2], b.u8[ 1], b.u8[ 0]);
730
+ printf(" simde_x_mm_set_pu8(UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "),\n"
731
+ " UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 "), UINT8_C(%4" PRIu8 ")) },\n",
732
+ r.u8[ 7], r.u8[ 6], r.u8[ 5], r.u8[ 4], r.u8[ 3], r.u8[ 2], r.u8[ 1], r.u8[ 0]);
733
+ }
734
+ return MUNIT_FAIL;
735
+
736
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
737
+ simde__m64 r = simde_mm_xxx_pu8(test_vec[i].a, test_vec[i].b);
738
+ simde_assert_m64_u8(r, ==, test_vec[i].r);
739
+ }
740
+
741
+ return MUNIT_OK;
742
+ }
743
+
744
+ static MunitResult
745
+ test_simde_mm_xxx_pi16(const MunitParameter params[], void* data) {
746
+ (void) params;
747
+ (void) data;
748
+
749
+ const struct {
750
+ simde__m64 a;
751
+ simde__m64 b;
752
+ simde__m64 r;
753
+ } test_vec[8] = {
754
+
755
+ };
756
+
757
+ printf("\n");
758
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
759
+ simde__m64_private a, b, r;
760
+
761
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
762
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
763
+
764
+ r = simde__m64_to_private(simde_mm_xxx_pi16(simde__m64_from_private(a), simde__m64_from_private(b)));
765
+
766
+ printf(" { simde_mm_set_pi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
767
+ a.i16[3], a.i16[2], a.i16[1], a.i16[0]);
768
+ printf(" simde_mm_set_pi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
769
+ b.i16[3], b.i16[2], b.i16[1], b.i16[0]);
770
+ printf(" simde_mm_set_pi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")) },\n",
771
+ r.i16[3], r.i16[2], r.i16[1], r.i16[0]);
772
+ }
773
+ return MUNIT_FAIL;
774
+
775
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
776
+ simde__m64 r = simde_mm_xxx_pi16(test_vec[i].a, test_vec[i].b);
777
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
778
+ }
779
+
780
+ return MUNIT_OK;
781
+ }
782
+
783
+ static MunitResult
784
+ test_simde_mm_xxx_pu16(const MunitParameter params[], void* data) {
785
+ (void) params;
786
+ (void) data;
787
+
788
+ const struct {
789
+ simde__m64 a;
790
+ simde__m64 b;
791
+ simde__m64 r;
792
+ } test_vec[8] = {
793
+
794
+ };
795
+
796
+ printf("\n");
797
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
798
+ simde__m64_private a, b, r;
799
+
800
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
801
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
802
+
803
+ r = simde__m64_to_private(simde_mm_xxx_pu16(simde__m64_from_private(a), simde__m64_from_private(b)));
804
+
805
+ printf(" { simde_x_mm_set_pu16(UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 ")),\n",
806
+ a.u16[3], a.u16[2], a.u16[1], a.u16[0]);
807
+ printf(" simde_x_mm_set_pu16(UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 ")),\n",
808
+ b.u16[3], b.u16[2], b.u16[1], b.u16[0]);
809
+ printf(" simde_x_mm_set_pu16(UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 "), UINT16_C(%5" PRIu16 ")) },\n",
810
+ r.u16[3], r.u16[2], r.u16[1], r.u16[0]);
811
+ }
812
+ return MUNIT_FAIL;
813
+
814
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
815
+ simde__m64 r = simde_mm_xxx_pu16(test_vec[i].a, test_vec[i].b);
816
+ simde_assert_m64_u16(r, ==, test_vec[i].r);
817
+ }
818
+
819
+ return MUNIT_OK;
820
+ }
821
+
822
+ static MunitResult
823
+ test_simde_mm_xxx_pi32(const MunitParameter params[], void* data) {
824
+ (void) params;
825
+ (void) data;
826
+
827
+ const struct {
828
+ simde__m64 a;
829
+ simde__m64 b;
830
+ simde__m64 r;
831
+ } test_vec[8] = {
832
+
833
+ };
834
+
835
+ printf("\n");
836
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
837
+ simde__m64_private a, b, r;
838
+
839
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
840
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
841
+
842
+ r = simde__m64_to_private(simde_mm_xxx_pi32(simde__m64_from_private(a), simde__m64_from_private(b)));
843
+
844
+ printf(" { simde_mm_set_pi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
845
+ a.i32[1], a.i32[0]);
846
+ printf(" simde_mm_set_pi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
847
+ b.i32[1], b.i32[0]);
848
+ printf(" simde_mm_set_pi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n",
849
+ r.i32[1], r.i32[0]);
850
+ }
851
+ return MUNIT_FAIL;
852
+
853
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
854
+ simde__m64 r = simde_mm_xxx_pi32(test_vec[i].a, test_vec[i].b);
855
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
856
+ }
857
+
858
+ return MUNIT_OK;
859
+ }
860
+
861
+ static MunitResult
862
+ test_simde_mm256_xxx_epi8(const MunitParameter params[], void* data) {
863
+ (void) params;
864
+ (void) data;
865
+
866
+ const struct {
867
+ simde__m256i a;
868
+ simde__m256i b;
869
+ simde__m256i r;
870
+ } test_vec[8] = {
871
+
872
+ };
873
+
874
+ printf("\n");
875
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
876
+ simde__m256i_private a, b, r;
877
+
878
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
879
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
880
+
881
+ r = simde__m256i_to_private(simde_mm256_xxx_epi8(simde__m256i_from_private(a), simde__m256i_from_private(b)));
882
+
883
+ printf(" { simde_mm256_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
884
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
885
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
886
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
887
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
888
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
889
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
890
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
891
+ a.i8[31], a.i8[30], a.i8[29], a.i8[28], a.i8[27], a.i8[26], a.i8[25], a.i8[24],
892
+ a.i8[23], a.i8[22], a.i8[21], a.i8[20], a.i8[19], a.i8[18], a.i8[17], a.i8[16],
893
+ a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
894
+ a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
895
+ printf(" simde_mm256_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
896
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
897
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
898
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
899
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
900
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
901
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
902
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
903
+ b.i8[31], b.i8[30], b.i8[29], b.i8[28], b.i8[27], b.i8[26], b.i8[25], b.i8[24],
904
+ b.i8[23], b.i8[22], b.i8[21], b.i8[20], b.i8[19], b.i8[18], b.i8[17], b.i8[16],
905
+ b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
906
+ b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
907
+ printf(" simde_mm256_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
908
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
909
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
910
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
911
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
912
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
913
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
914
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n",
915
+ r.i8[31], r.i8[30], r.i8[29], r.i8[28], r.i8[27], r.i8[26], r.i8[25], r.i8[24],
916
+ r.i8[23], r.i8[22], r.i8[21], r.i8[20], r.i8[19], r.i8[18], r.i8[17], r.i8[16],
917
+ r.i8[15], r.i8[14], r.i8[13], r.i8[12], r.i8[11], r.i8[10], r.i8[ 9], r.i8[ 8],
918
+ r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]);
919
+ }
920
+ return MUNIT_FAIL;
921
+
922
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
923
+ simde__m256i r = simde_mm256_xxx_epi8(test_vec[i].a, test_vec[i].b);
924
+ simde_assert_m256i_i8(r, ==, test_vec[i].r);
925
+ }
926
+
927
+ return MUNIT_OK;
928
+ }
929
+
930
+ static MunitResult
931
+ test_simde_mm256_xxx_epi16(const MunitParameter params[], void* data) {
932
+ (void) params;
933
+ (void) data;
934
+
935
+ const struct {
936
+ simde__m256i a;
937
+ simde__m256i b;
938
+ simde__m256i r;
939
+ } test_vec[8] = {
940
+
941
+ };
942
+
943
+ printf("\n");
944
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
945
+ simde__m256i_private a, b, r;
946
+
947
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
948
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
949
+
950
+ r = simde__m256i_to_private(simde_mm256_xxx_epi16(simde__m256i_from_private(a), simde__m256i_from_private(b)));
951
+
952
+ printf(" { simde_mm256_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
953
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
954
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
955
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
956
+ a.i16[15], a.i16[14], a.i16[13], a.i16[12], a.i16[11], a.i16[10], a.i16[ 9], a.i16[ 8],
957
+ a.i16[ 7], a.i16[ 6], a.i16[ 5], a.i16[ 4], a.i16[ 3], a.i16[ 2], a.i16[ 1], a.i16[ 0]);
958
+ printf(" simde_mm256_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
959
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
960
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
961
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
962
+ b.i16[15], b.i16[14], b.i16[13], b.i16[12], b.i16[11], b.i16[10], b.i16[ 9], b.i16[ 8],
963
+ b.i16[ 7], b.i16[ 6], b.i16[ 5], b.i16[ 4], b.i16[ 3], b.i16[ 2], b.i16[ 1], b.i16[ 0]);
964
+ printf(" simde_mm256_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
965
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
966
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
967
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")) },\n",
968
+ r.i16[15], r.i16[14], r.i16[13], r.i16[12], r.i16[11], r.i16[10], r.i16[ 9], r.i16[ 8],
969
+ r.i16[ 7], r.i16[ 6], r.i16[ 5], r.i16[ 4], r.i16[ 3], r.i16[ 2], r.i16[ 1], r.i16[ 0]);
970
+ }
971
+ return MUNIT_FAIL;
972
+
973
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
974
+ simde__m256i r = simde_mm256_xxx_epi16(test_vec[i].a, test_vec[i].b);
975
+ simde_assert_m256i_i16(r, ==, test_vec[i].r);
976
+ }
977
+
978
+ return MUNIT_OK;
979
+ }
980
+
981
+ static MunitResult
982
+ test_simde_mm256_xxx_epi32(const MunitParameter params[], void* data) {
983
+ (void) params;
984
+ (void) data;
985
+
986
+ const struct {
987
+ simde__m256i a;
988
+ simde__m256i b;
989
+ simde__m256i r;
990
+ } test_vec[8] = {
991
+
992
+ };
993
+
994
+ printf("\n");
995
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
996
+ simde__m256i_private a, b, r;
997
+
998
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
999
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1000
+
1001
+ r = simde__m256i_to_private(simde_mm256_xxx_epi32(simde__m256i_from_private(a), simde__m256i_from_private(b)));
1002
+
1003
+ printf(" { simde_mm256_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1004
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
1005
+ a.i32[7], a.i32[6], a.i32[5], a.i32[4], a.i32[3], a.i32[2], a.i32[1], a.i32[0]);
1006
+ printf(" simde_mm256_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1007
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
1008
+ b.i32[7], b.i32[6], b.i32[5], b.i32[4], b.i32[3], b.i32[2], b.i32[1], b.i32[0]);
1009
+ printf(" simde_mm256_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1010
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n",
1011
+ r.i32[7], r.i32[6], r.i32[5], r.i32[4], r.i32[3], r.i32[2], r.i32[1], r.i32[0]);
1012
+ }
1013
+ return MUNIT_FAIL;
1014
+
1015
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1016
+ simde__m256i r = simde_mm256_xxx_epi32(test_vec[i].a, test_vec[i].b);
1017
+ simde_assert_m256i_i32(r, ==, test_vec[i].r);
1018
+ }
1019
+
1020
+ return MUNIT_OK;
1021
+ }
1022
+
1023
+ static MunitResult
1024
+ test_simde_mm256_xxx_epi64(const MunitParameter params[], void* data) {
1025
+ (void) params;
1026
+ (void) data;
1027
+
1028
+ const struct {
1029
+ simde__m256i a;
1030
+ simde__m256i b;
1031
+ simde__m256i r;
1032
+ } test_vec[8] = {
1033
+
1034
+ };
1035
+
1036
+ printf("\n");
1037
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1038
+ simde__m256i_private a, b, r;
1039
+
1040
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1041
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1042
+
1043
+ r = simde__m256i_to_private(simde_mm256_xxx_epi64(simde__m256i_from_private(a), simde__m256i_from_private(b)));
1044
+
1045
+ printf(" { simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
1046
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
1047
+ a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
1048
+ printf(" simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
1049
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
1050
+ b.i64[3], b.i64[2], b.i64[1], b.i64[0]);
1051
+ printf(" simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
1052
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n",
1053
+ r.i64[3], r.i64[2], r.i64[1], r.i64[0]);
1054
+ }
1055
+ return MUNIT_FAIL;
1056
+
1057
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1058
+ simde__m256i r = simde_mm256_xxx_epi64(test_vec[i].a, test_vec[i].b);
1059
+ simde_assert_m256i_i64(r, ==, test_vec[i].r);
1060
+ }
1061
+
1062
+ return MUNIT_OK;
1063
+ }
1064
+
1065
+ static MunitResult
1066
+ test_simde_mm256_xxx_epu8(const MunitParameter params[], void* data) {
1067
+ (void) params;
1068
+ (void) data;
1069
+
1070
+ const struct {
1071
+ simde__m256i a;
1072
+ simde__m256i b;
1073
+ simde__m256i r;
1074
+ } test_vec[8] = {
1075
+
1076
+ };
1077
+
1078
+ printf("\n");
1079
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1080
+ simde__m256i_private a, b, r;
1081
+
1082
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1083
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1084
+
1085
+ r = simde__m256i_to_private(simde_mm256_xxx_epu8(simde__m256i_from_private(a), simde__m256i_from_private(b)));
1086
+
1087
+ printf(" { simde_x_mm256_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1088
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1089
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1090
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1091
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1092
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1093
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1094
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")),\n",
1095
+ a.u8[31], a.u8[30], a.u8[29], a.u8[28], a.u8[27], a.u8[26], a.u8[25], a.u8[24],
1096
+ a.u8[23], a.u8[22], a.u8[21], a.u8[20], a.u8[19], a.u8[18], a.u8[17], a.u8[16],
1097
+ a.u8[15], a.u8[14], a.u8[13], a.u8[12], a.u8[11], a.u8[10], a.u8[ 9], a.u8[ 8],
1098
+ a.u8[ 7], a.u8[ 6], a.u8[ 5], a.u8[ 4], a.u8[ 3], a.u8[ 2], a.u8[ 1], a.u8[ 0]);
1099
+ printf(" simde_x_mm256_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1100
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1101
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1102
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1103
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1104
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1105
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1106
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")),\n",
1107
+ b.u8[31], b.u8[30], b.u8[29], b.u8[28], b.u8[27], b.u8[26], b.u8[25], b.u8[24],
1108
+ b.u8[23], b.u8[22], b.u8[21], b.u8[20], b.u8[19], b.u8[18], b.u8[17], b.u8[16],
1109
+ b.u8[15], b.u8[14], b.u8[13], b.u8[12], b.u8[11], b.u8[10], b.u8[ 9], b.u8[ 8],
1110
+ b.u8[ 7], b.u8[ 6], b.u8[ 5], b.u8[ 4], b.u8[ 3], b.u8[ 2], b.u8[ 1], b.u8[ 0]);
1111
+ printf(" simde_x_mm256_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1112
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1113
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1114
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1115
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1116
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1117
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
1118
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")) },\n",
1119
+ r.u8[31], r.u8[30], r.u8[29], r.u8[28], r.u8[27], r.u8[26], r.u8[25], r.u8[24],
1120
+ r.u8[23], r.u8[22], r.u8[21], r.u8[20], r.u8[19], r.u8[18], r.u8[17], r.u8[16],
1121
+ r.u8[15], r.u8[14], r.u8[13], r.u8[12], r.u8[11], r.u8[10], r.u8[ 9], r.u8[ 8],
1122
+ r.u8[ 7], r.u8[ 6], r.u8[ 5], r.u8[ 4], r.u8[ 3], r.u8[ 2], r.u8[ 1], r.u8[ 0]);
1123
+ }
1124
+ return MUNIT_FAIL;
1125
+
1126
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1127
+ simde__m256i r = simde_mm256_xxx_epu8(test_vec[i].a, test_vec[i].b);
1128
+ simde_assert_m256i_u8(r, ==, test_vec[i].r);
1129
+ }
1130
+
1131
+ return MUNIT_OK;
1132
+ }
1133
+
1134
+ static MunitResult
1135
+ test_simde_mm256_xxx_epu16(const MunitParameter params[], void* data) {
1136
+ (void) params;
1137
+ (void) data;
1138
+
1139
+ const struct {
1140
+ simde__m256i a;
1141
+ simde__m256i b;
1142
+ simde__m256i r;
1143
+ } test_vec[8] = {
1144
+
1145
+ };
1146
+
1147
+ printf("\n");
1148
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1149
+ simde__m256i_private a, b, r;
1150
+
1151
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1152
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1153
+
1154
+ r = simde__m256i_to_private(simde_mm256_xxx_epu16(simde__m256i_from_private(a), simde__m256i_from_private(b)));
1155
+
1156
+ printf(" { simde_x_mm256_set_epu16(UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1157
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1158
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1159
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 ")),\n",
1160
+ a.u16[15], a.u16[14], a.u16[13], a.u16[12], a.u16[11], a.u16[10], a.u16[ 9], a.u16[ 8],
1161
+ a.u16[ 7], a.u16[ 6], a.u16[ 5], a.u16[ 4], a.u16[ 3], a.u16[ 2], a.u16[ 1], a.u16[ 0]);
1162
+ printf(" simde_x_mm256_set_epu16(UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1163
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1164
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1165
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 ")),\n",
1166
+ b.u16[15], b.u16[14], b.u16[13], b.u16[12], b.u16[11], b.u16[10], b.u16[ 9], b.u16[ 8],
1167
+ b.u16[ 7], b.u16[ 6], b.u16[ 5], b.u16[ 4], b.u16[ 3], b.u16[ 2], b.u16[ 1], b.u16[ 0]);
1168
+ printf(" simde_x_mm256_set_epu16(UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1169
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1170
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
1171
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 ")) },\n",
1172
+ r.u16[15], r.u16[14], r.u16[13], r.u16[12], r.u16[11], r.u16[10], r.u16[ 9], r.u16[ 8],
1173
+ r.u16[ 7], r.u16[ 6], r.u16[ 5], r.u16[ 4], r.u16[ 3], r.u16[ 2], r.u16[ 1], r.u16[ 0]);
1174
+ }
1175
+ return MUNIT_FAIL;
1176
+
1177
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1178
+ simde__m256i r = simde_mm256_xxx_epu16(test_vec[i].a, test_vec[i].b);
1179
+ simde_assert_m256i_u16(r, ==, test_vec[i].r);
1180
+ }
1181
+
1182
+ return MUNIT_OK;
1183
+ }
1184
+
1185
+ static MunitResult
1186
+ test_simde_mm256_xxx_epu32(const MunitParameter params[], void* data) {
1187
+ (void) params;
1188
+ (void) data;
1189
+
1190
+ const struct {
1191
+ simde__m256i a;
1192
+ simde__m256i b;
1193
+ simde__m256i r;
1194
+ } test_vec[8] = {
1195
+
1196
+ };
1197
+
1198
+ printf("\n");
1199
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1200
+ simde__m256i_private a, b, r;
1201
+
1202
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1203
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1204
+
1205
+ r = simde__m256i_to_private(simde_mm256_xxx_epu32(simde__m256i_from_private(a), simde__m256i_from_private(b)));
1206
+
1207
+ printf(" { simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
1208
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
1209
+ a.u32[7], a.u32[6], a.u32[5], a.u32[4], a.u32[3], a.u32[2], a.u32[1], a.u32[0]);
1210
+ printf(" simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
1211
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
1212
+ b.u32[7], b.u32[6], b.u32[5], b.u32[4], b.u32[3], b.u32[2], b.u32[1], b.u32[0]);
1213
+ printf(" simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
1214
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
1215
+ r.u32[7], r.u32[6], r.u32[5], r.u32[4], r.u32[3], r.u32[2], r.u32[1], r.u32[0]);
1216
+ }
1217
+ return MUNIT_FAIL;
1218
+
1219
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1220
+ simde__m256i r = simde_mm256_xxx_epu32(test_vec[i].a, test_vec[i].b);
1221
+ simde_assert_m256i_u32(r, ==, test_vec[i].r);
1222
+ }
1223
+
1224
+ return MUNIT_OK;
1225
+ }
1226
+
1227
+ static MunitResult
1228
+ test_simde_mm256_xxx_epu64(const MunitParameter params[], void* data) {
1229
+ (void) params;
1230
+ (void) data;
1231
+
1232
+ const struct {
1233
+ simde__m256i a;
1234
+ simde__m256i b;
1235
+ simde__m256i r;
1236
+ } test_vec[8] = {
1237
+
1238
+ };
1239
+
1240
+ printf("\n");
1241
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1242
+ simde__m256i_private a, b, r;
1243
+
1244
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1245
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1246
+
1247
+ r = simde__m256i_to_private(simde_mm256_xxx_epi64(simde__m256i_from_private(a), simde__m256i_from_private(b)));
1248
+
1249
+ printf(" { simde_x_mm256_set_epu64x(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
1250
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")) },\n",
1251
+ a.u64[3], a.u64[2], a.u64[1], a.u64[0]);
1252
+ printf(" simde_x_mm256_set_epu64x(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
1253
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")),\n",
1254
+ b.u64[3], b.u64[2], b.u64[1], b.u64[0]);
1255
+ printf(" simde_x_mm256_set_epu64x(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
1256
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")) },\n",
1257
+ r.u64[3], r.u64[2], r.u64[1], r.u64[0]);
1258
+ }
1259
+ return MUNIT_FAIL;
1260
+
1261
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1262
+ simde__m256i r = simde_mm256_xxx_epi64(test_vec[i].a, test_vec[i].b);
1263
+ simde_assert_m256i_i64(r, ==, test_vec[i].r);
1264
+ }
1265
+
1266
+ return MUNIT_OK;
1267
+ }
1268
+
1269
+ static MunitResult
1270
+ test_simde_mm256_xxx_ps(const MunitParameter params[], void* data) {
1271
+ (void) params;
1272
+ (void) data;
1273
+
1274
+ const struct {
1275
+ simde__m256 a;
1276
+ simde__m256 b;
1277
+ simde__m256 r;
1278
+ } test_vec[8] = {
1279
+
1280
+ };
1281
+
1282
+ printf("\n");
1283
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1284
+ simde__m256_private a, b, r;
1285
+
1286
+ for (size_t j = 0 ; j < sizeof(simde__m256) / sizeof(simde_float32) ; j++) {
1287
+ a.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
1288
+ b.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
1289
+ }
1290
+
1291
+ r = simde__m256_to_private(simde_mm256_xxx_ps(simde__m256_from_private(a), simde__m256_from_private(b)));
1292
+
1293
+ printf(" { simde_mm256_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1294
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1295
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1296
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1297
+ 9, a.f32[7], 9, a.f32[6], 9, a.f32[5], 9, a.f32[4],
1298
+ 9, a.f32[3], 9, a.f32[2], 9, a.f32[1], 9, a.f32[0]);
1299
+ printf(" simde_mm256_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1300
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1301
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1302
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1303
+ 9, b.f32[7], 9, b.f32[6], 9, b.f32[5], 9, b.f32[4],
1304
+ 9, b.f32[3], 9, b.f32[2], 9, b.f32[1], 9, b.f32[0]);
1305
+ printf(" simde_mm256_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1306
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1307
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1308
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)) },\n",
1309
+ 9, r.f32[7], 9, r.f32[6], 9, r.f32[5], 9, r.f32[4],
1310
+ 9, r.f32[3], 9, r.f32[2], 9, r.f32[1], 9, r.f32[0]);
1311
+ }
1312
+ return MUNIT_FAIL;
1313
+
1314
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1315
+ simde__m256 r = simde_mm256_xxx_ps(test_vec[i].a, test_vec[i].b);
1316
+ simde_assert_m256_close(r, test_vec[i].r, 1);
1317
+ }
1318
+
1319
+ return MUNIT_OK;
1320
+ }
1321
+
1322
+ static MunitResult
1323
+ test_simde_mm256_xxx_pd(const MunitParameter params[], void* data) {
1324
+ (void) params;
1325
+ (void) data;
1326
+
1327
+ const struct {
1328
+ simde__m256d a;
1329
+ simde__m256d b;
1330
+ simde__m256d r;
1331
+ } test_vec[8] = {
1332
+
1333
+ };
1334
+
1335
+ printf("\n");
1336
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1337
+ simde__m256d_private a, b, r;
1338
+
1339
+ for (size_t j = 0 ; j < sizeof(simde__m256d) / sizeof(simde_float64) ; j++) {
1340
+ a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
1341
+ b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
1342
+ }
1343
+
1344
+ r = simde__m256d_to_private(simde_mm256_xxx_pd(simde__m256d_from_private(a), simde__m256d_from_private(b)));
1345
+
1346
+ printf(" { simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1347
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
1348
+ 8, a.f64[3], 8, a.f64[2], 8, a.f64[1], 8, a.f64[0]);
1349
+ printf(" simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1350
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
1351
+ 8, b.f64[3], 8, b.f64[2], 8, b.f64[1], 8, b.f64[0]);
1352
+ printf(" simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1353
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n",
1354
+ 8, r.f64[3], 8, r.f64[2], 8, r.f64[1], 8, r.f64[0]);
1355
+ }
1356
+ return MUNIT_FAIL;
1357
+
1358
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1359
+ simde__m256d r = simde_mm256_xxx_pd(test_vec[i].a, test_vec[i].b);
1360
+ simde_assert_m256d_close(r, test_vec[i].r, 1);
1361
+ }
1362
+
1363
+ return MUNIT_OK;
1364
+ }
1365
+
1366
+ static MunitResult
1367
+ test_simde_mm256_xxx_sd(const MunitParameter params[], void* data) {
1368
+ (void) params;
1369
+ (void) data;
1370
+
1371
+ const struct {
1372
+ simde__m256d a;
1373
+ simde__m256d b;
1374
+ simde__m256d r;
1375
+ } test_vec[8] = {
1376
+
1377
+ };
1378
+
1379
+ printf("\n");
1380
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1381
+ simde__m256d a, b, r;
1382
+
1383
+ for (size_t j = 0 ; j < sizeof(simde__m256) / sizeof(simde_float64) ; j++) {
1384
+ a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
1385
+ b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
1386
+ }
1387
+
1388
+ r = simde_mm_xxx_sd(a, b);
1389
+
1390
+ printf(" { simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1391
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
1392
+ 8, a.f64[3], 8, a.f64[2], 8, a.f64[1], 8, a.f64[0]);
1393
+ printf(" simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1394
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
1395
+ 8, b.f64[3], 8, b.f64[2], 8, b.f64[1], 8, b.f64[0]);
1396
+ printf(" simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1397
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n",
1398
+ 8, r.f64[3], 8, r.f64[2], 8, r.f64[1], 8, r.f64[0]);
1399
+ }
1400
+ return MUNIT_FAIL;
1401
+
1402
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1403
+ simde__m256d r = simde_mm_xxx_sd(test_vec[i].a, test_vec[i].b);
1404
+ simde_assert_m256d_close(r, test_vec[i].r, 1);
1405
+ }
1406
+
1407
+ return MUNIT_OK;
1408
+ }
1409
+
1410
+ static MunitResult
1411
+ test_simde_mm_x3x_pd(const MunitParameter params[], void* data) {
1412
+ (void) params;
1413
+ (void) data;
1414
+
1415
+ const struct {
1416
+ simde__m128d a;
1417
+ simde__m128d b;
1418
+ simde__m128d c;
1419
+ simde__m128d r;
1420
+ } test_vec[8] = {
1421
+
1422
+ };
1423
+
1424
+ printf("\n");
1425
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1426
+ simde__m128d a, b, c, r;
1427
+
1428
+ for (size_t j = 0 ; j < sizeof(simde__m128) / sizeof(simde_float64) ; j++) {
1429
+ a.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1430
+ b.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1431
+ c.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1432
+ }
1433
+
1434
+ r = simde_mm_x3x_pd(a, b, c);
1435
+
1436
+ printf(" { simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, a.f64[1], 8, a.f64[0]);
1437
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, b.f64[1], 8, b.f64[0]);
1438
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, c.f64[1], 8, c.f64[0]);
1439
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
1440
+ }
1441
+ return MUNIT_FAIL;
1442
+
1443
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1444
+ simde__m128d r = simde_mm_x3x_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c);
1445
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
1446
+ }
1447
+
1448
+ return MUNIT_OK;
1449
+ }
1450
+
1451
+ static MunitResult
1452
+ test_simde_mm256_x3x_pd(const MunitParameter params[], void* data) {
1453
+ (void) params;
1454
+ (void) data;
1455
+
1456
+ const struct {
1457
+ simde__m256d a;
1458
+ simde__m256d b;
1459
+ simde__m256d c;
1460
+ simde__m256d r;
1461
+ } test_vec[8] = {
1462
+
1463
+ };
1464
+
1465
+ printf("\n");
1466
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1467
+ simde__m256d a, b, c, r;
1468
+
1469
+ for (size_t j = 0 ; j < sizeof(simde__m256d) / sizeof(simde_float64) ; j++) {
1470
+ a.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1471
+ b.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1472
+ c.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1473
+ }
1474
+
1475
+ r = simde_mm256_x3x_pd(a, b, c);
1476
+
1477
+ printf(" { simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1478
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
1479
+ 8, a.f64[3], 8, a.f64[2], 8, a.f64[1], 8, a.f64[0]);
1480
+ printf(" simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1481
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
1482
+ 8, b.f64[3], 8, b.f64[2], 8, b.f64[1], 8, b.f64[0]);
1483
+ printf(" simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1484
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
1485
+ 8, c.f64[3], 8, c.f64[2], 8, c.f64[1], 8, c.f64[0]);
1486
+ printf(" simde_mm256_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
1487
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n",
1488
+ 8, r.f64[3], 8, r.f64[2], 8, r.f64[1], 8, r.f64[0]);
1489
+ }
1490
+ return MUNIT_FAIL;
1491
+
1492
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1493
+ simde__m256d r = simde_mm256_x3x_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c);
1494
+ simde_assert_m256d_close(r, test_vec[i].r, 1);
1495
+ }
1496
+
1497
+ return MUNIT_OK;
1498
+ }
1499
+
1500
+ static MunitResult
1501
+ test_simde_mm_x3x_ps(const MunitParameter params[], void* data) {
1502
+ (void) params;
1503
+ (void) data;
1504
+
1505
+ const struct {
1506
+ simde__m128 a;
1507
+ simde__m128 b;
1508
+ simde__m128 c;
1509
+ simde__m128 r;
1510
+ } test_vec[8] = {
1511
+
1512
+ };
1513
+
1514
+ printf("\n");
1515
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1516
+ simde__m128 a, b, c, r;
1517
+
1518
+ for (size_t j = 0 ; j < sizeof(simde__m128) / sizeof(simde_float32) ; j++) {
1519
+ a.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1520
+ b.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1521
+ c.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1522
+ }
1523
+
1524
+ r = simde_mm_x3x_ps(a, b, c);
1525
+
1526
+ printf(" { simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1527
+ 9, a.f32[3], 9, a.f32[2], 9, a.f32[1], 9, a.f32[0]);
1528
+ printf(" simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1529
+ 9, b.f32[3], 9, b.f32[2], 9, b.f32[1], 9, b.f32[0]);
1530
+ printf(" simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1531
+ 9, c.f32[3], 9, c.f32[2], 9, c.f32[1], 9, c.f32[0]);
1532
+ printf(" simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)) },\n",
1533
+ 9, r.f32[3], 9, r.f32[2], 9, r.f32[1], 9, r.f32[0]);
1534
+ }
1535
+ return MUNIT_FAIL;
1536
+
1537
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1538
+ simde__m128 r = simde_mm_x3x_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c);
1539
+ simde_assert_m128_close(r, test_vec[i].r, 1);
1540
+ }
1541
+
1542
+ return MUNIT_OK;
1543
+ }
1544
+
1545
+ static MunitResult
1546
+ test_simde_mm256_x3x_ps(const MunitParameter params[], void* data) {
1547
+ (void) params;
1548
+ (void) data;
1549
+
1550
+ const struct {
1551
+ simde__m256 a;
1552
+ simde__m256 b;
1553
+ simde__m256 c;
1554
+ simde__m256 r;
1555
+ } test_vec[8] = {
1556
+
1557
+ };
1558
+
1559
+ printf("\n");
1560
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1561
+ simde__m256 a, b, c, r;
1562
+
1563
+ for (size_t j = 0 ; j < sizeof(simde__m256) / sizeof(simde_float32) ; j++) {
1564
+ a.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1565
+ b.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1566
+ c.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1567
+ }
1568
+
1569
+ r = simde_mm256_x3x_ps(a, b, c);
1570
+
1571
+ printf(" { simde_mm256_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1572
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1573
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1574
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1575
+ 9, a.f32[7], 9, a.f32[6], 9, a.f32[5], 9, a.f32[4],
1576
+ 9, a.f32[3], 9, a.f32[2], 9, a.f32[1], 9, a.f32[0]);
1577
+ printf(" simde_mm256_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1578
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1579
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1580
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1581
+ 9, b.f32[7], 9, b.f32[6], 9, b.f32[5], 9, b.f32[4],
1582
+ 9, b.f32[3], 9, b.f32[2], 9, b.f32[1], 9, b.f32[0]);
1583
+ printf(" simde_mm256_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1584
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1585
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1586
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1587
+ 9, c.f32[7], 9, c.f32[6], 9, c.f32[5], 9, c.f32[4],
1588
+ 9, c.f32[3], 9, c.f32[2], 9, c.f32[1], 9, c.f32[0]);
1589
+ printf(" simde_mm256_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1590
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1591
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
1592
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)) },\n",
1593
+ 9, r.f32[7], 9, r.f32[6], 9, r.f32[5], 9, r.f32[4],
1594
+ 9, r.f32[3], 9, r.f32[2], 9, r.f32[1], 9, r.f32[0]);
1595
+ }
1596
+ return MUNIT_FAIL;
1597
+
1598
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1599
+ simde__m256 r = simde_mm256_x3x_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c);
1600
+ simde_assert_m256_close(r, test_vec[i].r, 1);
1601
+ }
1602
+
1603
+ return MUNIT_OK;
1604
+ }
1605
+
1606
+ static MunitResult
1607
+ test_simde_mm_x3x_sd(const MunitParameter params[], void* data) {
1608
+ (void) params;
1609
+ (void) data;
1610
+
1611
+ const struct {
1612
+ simde__m128d a;
1613
+ simde__m128d b;
1614
+ simde__m128d c;
1615
+ simde__m128d r;
1616
+ } test_vec[8] = {
1617
+
1618
+ };
1619
+
1620
+ printf("\n");
1621
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1622
+ simde__m128d a, b, c, r;
1623
+
1624
+ for (size_t j = 0 ; j < sizeof(simde__m128) / sizeof(simde_float64) ; j++) {
1625
+ a.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1626
+ b.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1627
+ c.f64[j] = round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0;
1628
+ }
1629
+
1630
+ r = simde_mm_x3x_sd(a, b, c);
1631
+
1632
+ printf(" { simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, a.f64[1], 8, a.f64[0]);
1633
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, b.f64[1], 8, b.f64[0]);
1634
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n", 8, c.f64[1], 8, c.f64[0]);
1635
+ printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
1636
+ }
1637
+ return MUNIT_FAIL;
1638
+
1639
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1640
+ simde__m128d r = simde_mm_x3x_sd(test_vec[i].a, test_vec[i].b, test_vec[i].c);
1641
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
1642
+ }
1643
+
1644
+ return MUNIT_OK;
1645
+ }
1646
+
1647
+ static MunitResult
1648
+ test_simde_mm_x3x_ss(const MunitParameter params[], void* data) {
1649
+ (void) params;
1650
+ (void) data;
1651
+
1652
+ const struct {
1653
+ simde__m128 a;
1654
+ simde__m128 b;
1655
+ simde__m128 c;
1656
+ simde__m128 r;
1657
+ } test_vec[8] = {
1658
+
1659
+ };
1660
+
1661
+ printf("\n");
1662
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1663
+ simde__m128 a, b, c, r;
1664
+
1665
+ for (size_t j = 0 ; j < sizeof(simde__m128) / sizeof(simde_float32) ; j++) {
1666
+ a.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1667
+ b.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1668
+ c.f32[j] = (simde_float32) (round(random_f64_range(-100.0, 100.0) * 10.0) / 10.0);
1669
+ }
1670
+
1671
+ r = simde_mm_x3x_ss(a, b, c);
1672
+
1673
+ printf(" { simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1674
+ 9, a.f32[3], 9, a.f32[2], 9, a.f32[1], 9, a.f32[0]);
1675
+ printf(" simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1676
+ 9, b.f32[3], 9, b.f32[2], 9, b.f32[1], 9, b.f32[0]);
1677
+ printf(" simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
1678
+ 9, c.f32[3], 9, c.f32[2], 9, c.f32[1], 9, c.f32[0]);
1679
+ printf(" simde_mm_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)) },\n",
1680
+ 9, r.f32[3], 9, r.f32[2], 9, r.f32[1], 9, r.f32[0]);
1681
+ }
1682
+ return MUNIT_FAIL;
1683
+
1684
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1685
+ simde__m128 r = simde_mm_x3x_ss(test_vec[i].a, test_vec[i].b, test_vec[i].c);
1686
+ simde_assert_m128_close(r, test_vec[i].r, 1);
1687
+ }
1688
+
1689
+ return MUNIT_OK;
1690
+ }
1691
+
1692
+ static MunitResult
1693
+ test_simde_mm512_xxx_epi8(const MunitParameter params[], void* data) {
1694
+ (void) params;
1695
+ (void) data;
1696
+
1697
+ const struct {
1698
+ simde__m512i a;
1699
+ simde__m512i b;
1700
+ simde__m512i r;
1701
+ } test_vec[8] = {
1702
+
1703
+ };
1704
+
1705
+ printf("\n");
1706
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1707
+ simde__m512i_private a, b, r;
1708
+
1709
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1710
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1711
+
1712
+ r = simde__m512i_to_private(simde_mm512_xxx_epi8(simde__m512i_from_private(a), simde__m512i_from_private(b)));
1713
+
1714
+ printf(" { simde_mm512_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1715
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1716
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1717
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1718
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1719
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1720
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1721
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1722
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1723
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1724
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1725
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1726
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1727
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1728
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1729
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
1730
+ a.i8[63], a.i8[62], a.i8[61], a.i8[60], a.i8[59], a.i8[58], a.i8[57], a.i8[56],
1731
+ a.i8[55], a.i8[54], a.i8[53], a.i8[52], a.i8[51], a.i8[50], a.i8[49], a.i8[48],
1732
+ a.i8[47], a.i8[46], a.i8[45], a.i8[44], a.i8[43], a.i8[42], a.i8[41], a.i8[40],
1733
+ a.i8[39], a.i8[38], a.i8[37], a.i8[36], a.i8[35], a.i8[34], a.i8[33], a.i8[32],
1734
+ a.i8[31], a.i8[30], a.i8[29], a.i8[28], a.i8[27], a.i8[26], a.i8[25], a.i8[24],
1735
+ a.i8[23], a.i8[22], a.i8[21], a.i8[20], a.i8[19], a.i8[18], a.i8[17], a.i8[16],
1736
+ a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
1737
+ a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
1738
+ printf(" simde_mm512_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1739
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1740
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1741
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1742
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1743
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1744
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1745
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1746
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1747
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1748
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1749
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1750
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1751
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1752
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1753
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
1754
+ b.i8[63], b.i8[62], b.i8[61], b.i8[60], b.i8[59], b.i8[58], b.i8[57], b.i8[56],
1755
+ b.i8[55], b.i8[54], b.i8[53], b.i8[52], b.i8[51], b.i8[50], b.i8[49], b.i8[48],
1756
+ b.i8[47], b.i8[46], b.i8[45], b.i8[44], b.i8[43], b.i8[42], b.i8[41], b.i8[40],
1757
+ b.i8[39], b.i8[38], b.i8[37], b.i8[36], b.i8[35], b.i8[34], b.i8[33], b.i8[32],
1758
+ b.i8[31], b.i8[30], b.i8[29], b.i8[28], b.i8[27], b.i8[26], b.i8[25], b.i8[24],
1759
+ b.i8[23], b.i8[22], b.i8[21], b.i8[20], b.i8[19], b.i8[18], b.i8[17], b.i8[16],
1760
+ b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
1761
+ b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
1762
+ printf(" simde_mm512_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1763
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1764
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1765
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1766
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1767
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1768
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1769
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1770
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1771
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1772
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1773
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1774
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1775
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1776
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
1777
+ " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n",
1778
+ r.i8[63], r.i8[62], r.i8[61], r.i8[60], r.i8[59], r.i8[58], r.i8[57], r.i8[56],
1779
+ r.i8[55], r.i8[54], r.i8[53], r.i8[52], r.i8[51], r.i8[50], r.i8[49], r.i8[48],
1780
+ r.i8[47], r.i8[46], r.i8[45], r.i8[44], r.i8[43], r.i8[42], r.i8[41], r.i8[40],
1781
+ r.i8[39], r.i8[38], r.i8[37], r.i8[36], r.i8[35], r.i8[34], r.i8[33], r.i8[32],
1782
+ r.i8[31], r.i8[30], r.i8[29], r.i8[28], r.i8[27], r.i8[26], r.i8[25], r.i8[24],
1783
+ r.i8[23], r.i8[22], r.i8[21], r.i8[20], r.i8[19], r.i8[18], r.i8[17], r.i8[16],
1784
+ r.i8[15], r.i8[14], r.i8[13], r.i8[12], r.i8[11], r.i8[10], r.i8[ 9], r.i8[ 8],
1785
+ r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]);
1786
+ }
1787
+ return MUNIT_FAIL;
1788
+
1789
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1790
+ simde__m512i r = simde_mm512_xxx_epi8(test_vec[i].a, test_vec[i].b);
1791
+ simde_assert_m512i_i8(r, ==, test_vec[i].r);
1792
+ }
1793
+
1794
+ return MUNIT_OK;
1795
+ }
1796
+
1797
+ static MunitResult
1798
+ test_simde_mm512_xxx_epi16(const MunitParameter params[], void* data) {
1799
+ (void) params;
1800
+ (void) data;
1801
+
1802
+ const struct {
1803
+ simde__m512i a;
1804
+ simde__m512i b;
1805
+ simde__m512i r;
1806
+ } test_vec[8] = {
1807
+
1808
+ };
1809
+
1810
+ printf("\n");
1811
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1812
+ simde__m512i_private a, b, r;
1813
+
1814
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1815
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1816
+
1817
+ r = simde__m512i_to_private(simde_mm512_xxx_epi16(simde__m512i_from_private(a), simde__m512i_from_private(b)));
1818
+
1819
+ printf(" { simde_mm512_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1820
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1821
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1822
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1823
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1824
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1825
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1826
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
1827
+ a.i16[31], a.i16[30], a.i16[29], a.i16[28], a.i16[27], a.i16[26], a.i16[25], a.i16[24],
1828
+ a.i16[23], a.i16[22], a.i16[21], a.i16[20], a.i16[19], a.i16[18], a.i16[17], a.i16[16],
1829
+ a.i16[15], a.i16[14], a.i16[13], a.i16[12], a.i16[11], a.i16[10], a.i16[ 9], a.i16[ 8],
1830
+ a.i16[ 7], a.i16[ 6], a.i16[ 5], a.i16[ 4], a.i16[ 3], a.i16[ 2], a.i16[ 1], a.i16[ 0]);
1831
+ printf(" simde_mm512_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1832
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1833
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1834
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1835
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1836
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1837
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1838
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
1839
+ b.i16[31], b.i16[30], b.i16[29], b.i16[28], b.i16[27], b.i16[26], b.i16[25], b.i16[24],
1840
+ b.i16[23], b.i16[22], b.i16[21], b.i16[20], b.i16[19], b.i16[18], b.i16[17], b.i16[16],
1841
+ b.i16[15], b.i16[14], b.i16[13], b.i16[12], b.i16[11], b.i16[10], b.i16[ 9], b.i16[ 8],
1842
+ b.i16[ 7], b.i16[ 6], b.i16[ 5], b.i16[ 4], b.i16[ 3], b.i16[ 2], b.i16[ 1], b.i16[ 0]);
1843
+ printf(" simde_mm512_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1844
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1845
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1846
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1847
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1848
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1849
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
1850
+ " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")) },\n",
1851
+ r.i16[31], r.i16[30], r.i16[29], r.i16[28], r.i16[27], r.i16[26], r.i16[25], r.i16[24],
1852
+ r.i16[23], r.i16[22], r.i16[21], r.i16[20], r.i16[19], r.i16[18], r.i16[17], r.i16[16],
1853
+ r.i16[15], r.i16[14], r.i16[13], r.i16[12], r.i16[11], r.i16[10], r.i16[ 9], r.i16[ 8],
1854
+ r.i16[ 7], r.i16[ 6], r.i16[ 5], r.i16[ 4], r.i16[ 3], r.i16[ 2], r.i16[ 1], r.i16[ 0]);
1855
+ }
1856
+ return MUNIT_FAIL;
1857
+
1858
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1859
+ simde__m512i r = simde_mm512_xxx_epi16(test_vec[i].a, test_vec[i].b);
1860
+ simde_assert_m512i_i16(r, ==, test_vec[i].r);
1861
+ }
1862
+
1863
+ return MUNIT_OK;
1864
+ }
1865
+
1866
+ static MunitResult
1867
+ test_simde_mm512_xxx_epi32(const MunitParameter params[], void* data) {
1868
+ (void) params;
1869
+ (void) data;
1870
+
1871
+ const struct {
1872
+ simde__m512i a;
1873
+ simde__m512i b;
1874
+ simde__m512i r;
1875
+ } test_vec[8] = {
1876
+
1877
+ };
1878
+
1879
+ printf("\n");
1880
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1881
+ simde__m512i_private a, b, r;
1882
+
1883
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1884
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
1885
+
1886
+ r = simde__m512i_to_private(simde_mm512_xxx_epi32(simde__m512i_from_private(a), simde__m512i_from_private(b)));
1887
+
1888
+ printf(" { simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1889
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1890
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1891
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
1892
+ a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
1893
+ a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
1894
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1895
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1896
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1897
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
1898
+ b.i32[15], b.i32[14], b.i32[13], b.i32[12], b.i32[11], b.i32[10], b.i32[ 9], b.i32[ 8],
1899
+ b.i32[ 7], b.i32[ 6], b.i32[ 5], b.i32[ 4], b.i32[ 3], b.i32[ 2], b.i32[ 1], b.i32[ 0]);
1900
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1901
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1902
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1903
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n",
1904
+ r.i32[15], r.i32[14], r.i32[13], r.i32[12], r.i32[11], r.i32[10], r.i32[ 9], r.i32[ 8],
1905
+ r.i32[ 7], r.i32[ 6], r.i32[ 5], r.i32[ 4], r.i32[ 3], r.i32[ 2], r.i32[ 1], r.i32[ 0]);
1906
+ }
1907
+ return MUNIT_FAIL;
1908
+
1909
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1910
+ simde__m512i r = simde_mm512_xxx_epi32(test_vec[i].a, test_vec[i].b);
1911
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
1912
+ }
1913
+
1914
+ return MUNIT_OK;
1915
+ }
1916
+
1917
+ static MunitResult
1918
+ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
1919
+ (void) params;
1920
+ (void) data;
1921
+
1922
+ const struct {
1923
+ simde__m512i src;
1924
+ simde__mmask16 k;
1925
+ simde__m512i a;
1926
+ simde__m512i b;
1927
+ simde__m512i r;
1928
+ } test_vec[8] = {
1929
+
1930
+ };
1931
+
1932
+ printf("\n");
1933
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1934
+ simde__m512i_private src, a, b, r;
1935
+ simde__mmask16 k;
1936
+
1937
+ munit_rand_memory(sizeof(src), (uint8_t*) &src);
1938
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
1939
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
1940
+ munit_rand_memory(sizeof(a), (uint8_t*) &b);
1941
+ k &= UINT16_C(0xffff);
1942
+
1943
+ r = simde__m512i_to_private(simde_mm512_mask_xxx_epi32(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
1944
+
1945
+ printf(" { simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1946
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1947
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1948
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
1949
+ src.i32[15], src.i32[14], src.i32[13], src.i32[12], src.i32[11], src.i32[10], src.i32[ 9], src.i32[ 8],
1950
+ src.i32[ 7], src.i32[ 6], src.i32[ 5], src.i32[ 4], src.i32[ 3], src.i32[ 2], src.i32[ 1], src.i32[ 0]);
1951
+ printf(" UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
1952
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1953
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1954
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1955
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
1956
+ a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
1957
+ a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
1958
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1959
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1960
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1961
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
1962
+ b.i32[15], b.i32[14], b.i32[13], b.i32[12], b.i32[11], b.i32[10], b.i32[ 9], b.i32[ 8],
1963
+ b.i32[ 7], b.i32[ 6], b.i32[ 5], b.i32[ 4], b.i32[ 3], b.i32[ 2], b.i32[ 1], b.i32[ 0]);
1964
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1965
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1966
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
1967
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n",
1968
+ r.i32[15], r.i32[14], r.i32[13], r.i32[12], r.i32[11], r.i32[10], r.i32[ 9], r.i32[ 8],
1969
+ r.i32[ 7], r.i32[ 6], r.i32[ 5], r.i32[ 4], r.i32[ 3], r.i32[ 2], r.i32[ 1], r.i32[ 0]);
1970
+ }
1971
+ return MUNIT_FAIL;
1972
+
1973
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1974
+ simde__m512i r = simde_mm512_mask_xxx_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
1975
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
1976
+ }
1977
+
1978
+ return MUNIT_OK;
1979
+ }
1980
+
1981
+ static MunitResult
1982
+ test_simde_mm512_mask_xxx_epi32_mask(const MunitParameter params[], void* data) {
1983
+ (void) params;
1984
+ (void) data;
1985
+
1986
+ const struct {
1987
+ simde__mmask16 k;
1988
+ simde__m512i a;
1989
+ simde__m512i b;
1990
+ simde__mmask16 r;
1991
+ } test_vec[8] = {
1992
+
1993
+ };
1994
+
1995
+ printf("\n");
1996
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
1997
+ simde__m512i_private a, b;
1998
+ simde__mmask16 k, r;
1999
+
2000
+ k = (simde__mmask16) munit_rand_int_range(0, UINT16_MAX);
2001
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2002
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
2003
+
2004
+ r = simde_mm512_mask_xxx_epi32_mask(k, simde__m512i_from_private(a), simde__m512i_from_private(b));
2005
+
2006
+ printf(" { UINT16_C(%5" PRIu16 "),\n", k);
2007
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2008
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2009
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2010
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
2011
+ a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
2012
+ a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
2013
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2014
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2015
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2016
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
2017
+ b.i32[15], b.i32[14], b.i32[13], b.i32[12], b.i32[11], b.i32[10], b.i32[ 9], b.i32[ 8],
2018
+ b.i32[ 7], b.i32[ 6], b.i32[ 5], b.i32[ 4], b.i32[ 3], b.i32[ 2], b.i32[ 1], b.i32[ 0]);
2019
+ printf(" UINT16_C(%5" PRIu16 ") },\n", r);
2020
+ }
2021
+ return MUNIT_FAIL;
2022
+
2023
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2024
+ simde__mmask16 r = simde_mm512_mask_xxx_epi32_mask(test_vec[i].k, test_vec[i].a, test_vec[i].b);
2025
+ simde_assert_mmask16(r, ==, test_vec[i].r);
2026
+ }
2027
+
2028
+ return MUNIT_OK;
2029
+ }
2030
+
2031
+ static MunitResult
2032
+ test_simde_mm512_xxx_epi64(const MunitParameter params[], void* data) {
2033
+ (void) params;
2034
+ (void) data;
2035
+
2036
+ const struct {
2037
+ simde__m512i a;
2038
+ simde__m512i b;
2039
+ simde__m512i r;
2040
+ } test_vec[8] = {
2041
+
2042
+ };
2043
+
2044
+ printf("\n");
2045
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2046
+ simde__m512i_private a, b, r;
2047
+
2048
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2049
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
2050
+
2051
+ r = simde__m512i_to_private(simde_mm512_xxx_epi64(simde__m512i_from_private(a), simde__m512i_from_private(b)));
2052
+
2053
+ printf(" { simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2054
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2055
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2056
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
2057
+ a.i64[7], a.i64[6], a.i64[5], a.i64[4],
2058
+ a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
2059
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2060
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2061
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2062
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
2063
+ b.i64[7], b.i64[6], b.i64[5], b.i64[4],
2064
+ b.i64[3], b.i64[2], b.i64[1], b.i64[0]);
2065
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2066
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2067
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2068
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n",
2069
+ r.i64[7], r.i64[6], r.i64[5], r.i64[4],
2070
+ r.i64[3], r.i64[2], r.i64[1], r.i64[0]);
2071
+ }
2072
+ return MUNIT_FAIL;
2073
+
2074
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2075
+ simde__m512i r = simde_mm512_xxx_epi64(test_vec[i].a, test_vec[i].b);
2076
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
2077
+ }
2078
+
2079
+ return MUNIT_OK;
2080
+ }
2081
+
2082
+ static MunitResult
2083
+ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
2084
+ (void) params;
2085
+ (void) data;
2086
+
2087
+ const struct {
2088
+ simde__m512i src;
2089
+ simde__mmask8 k;
2090
+ simde__m512i a;
2091
+ simde__m512i b;
2092
+ simde__m512i r;
2093
+ } test_vec[8] = {
2094
+
2095
+ };
2096
+
2097
+ printf("\n");
2098
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2099
+ simde__m512i_private src, a, b, r;
2100
+ simde__mmask8 k;
2101
+
2102
+ munit_rand_memory(sizeof(src), (uint8_t*) &src);
2103
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
2104
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2105
+ munit_rand_memory(sizeof(a), (uint8_t*) &b);
2106
+ k &= UINT8_C(0xff);
2107
+
2108
+ r = simde__m512i_to_private(simde_mm512_mask_xxx_epi64(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
2109
+
2110
+ printf(" { simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2111
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2112
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2113
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
2114
+ src.i64[7], src.i64[6], src.i64[5], src.i64[4],
2115
+ src.i64[3], src.i64[2], src.i64[1], src.i64[0]);
2116
+ printf(" UINT8_C(%3" PRIu8 "),\n", HEDLEY_STATIC_CAST(uint8_t, k));
2117
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2118
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2119
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2120
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
2121
+ a.i64[7], a.i64[6], a.i64[5], a.i64[4],
2122
+ a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
2123
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2124
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2125
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2126
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
2127
+ b.i64[7], b.i64[6], b.i64[5], b.i64[4],
2128
+ b.i64[3], b.i64[2], b.i64[1], b.i64[0]);
2129
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2130
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2131
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2132
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n",
2133
+ r.i64[7], r.i64[6], r.i64[5], r.i64[4],
2134
+ r.i64[3], r.i64[2], r.i64[1], r.i64[0]);
2135
+ }
2136
+ return MUNIT_FAIL;
2137
+
2138
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2139
+ simde__m512i r = simde_mm512_mask_xxx_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
2140
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
2141
+ }
2142
+
2143
+ return MUNIT_OK;
2144
+ }
2145
+
2146
+ static MunitResult
2147
+ test_simde_mm512_mask_xxx_epi64_mask(const MunitParameter params[], void* data) {
2148
+ (void) params;
2149
+ (void) data;
2150
+
2151
+ const struct {
2152
+ simde__mmask8 k;
2153
+ simde__m512i a;
2154
+ simde__m512i b;
2155
+ simde__mmask8 r;
2156
+ } test_vec[8] = {
2157
+
2158
+ };
2159
+
2160
+ printf("\n");
2161
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2162
+ simde__m512i_private a, b;
2163
+ simde__mmask8 k, r;
2164
+
2165
+ k = (simde__mmask8) munit_rand_int_range(0, UINT8_MAX);
2166
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2167
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
2168
+
2169
+ r = simde_mm512_mask_xxx_epi64_mask(k, simde__m512i_from_private(a), simde__m512i_from_private(b));
2170
+
2171
+ printf(" { UINT8_C(%3" PRIu8 "),\n", k);
2172
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2173
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2174
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2175
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
2176
+ a.i64[7], a.i64[6], a.i64[5], a.i64[4],
2177
+ a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
2178
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2179
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2180
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2181
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
2182
+ b.i64[7], b.i64[6], b.i64[5], b.i64[4],
2183
+ b.i64[3], b.i64[2], b.i64[1], b.i64[0]);
2184
+ printf(" UINT8_C(%3" PRIu8 ") },\n", r);
2185
+ }
2186
+ return MUNIT_FAIL;
2187
+
2188
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2189
+ simde__mmask8 r = simde_mm512_mask_xxx_epi64_mask(test_vec[i].k, test_vec[i].a, test_vec[i].b);
2190
+ simde_assert_mmask8(r, ==, test_vec[i].r);
2191
+ }
2192
+
2193
+ return MUNIT_OK;
2194
+ }
2195
+
2196
+ static MunitResult
2197
+ test_simde_mm512_xxx_epu8(const MunitParameter params[], void* data) {
2198
+ (void) params;
2199
+ (void) data;
2200
+
2201
+ const struct {
2202
+ simde__m512i a;
2203
+ simde__m512i b;
2204
+ simde__m512i r;
2205
+ } test_vec[8] = {
2206
+
2207
+ };
2208
+
2209
+ printf("\n");
2210
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2211
+ simde__m512i_private a, b, r;
2212
+
2213
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2214
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
2215
+
2216
+ r = simde__m512i_to_private(simde_mm512_xxx_epu8(simde__m512i_from_private(a), simde__m512i_from_private(b)));
2217
+
2218
+ printf(" { simde_x_mm512_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2219
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2220
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2221
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2222
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2223
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2224
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2225
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2226
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2227
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2228
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2229
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2230
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2231
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2232
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2233
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")),\n",
2234
+ a.u8[63], a.u8[62], a.u8[61], a.u8[60], a.u8[59], a.u8[58], a.u8[57], a.u8[56],
2235
+ a.u8[55], a.u8[54], a.u8[53], a.u8[52], a.u8[51], a.u8[50], a.u8[49], a.u8[48],
2236
+ a.u8[47], a.u8[46], a.u8[45], a.u8[44], a.u8[43], a.u8[42], a.u8[41], a.u8[40],
2237
+ a.u8[39], a.u8[38], a.u8[37], a.u8[36], a.u8[35], a.u8[34], a.u8[33], a.u8[32],
2238
+ a.u8[31], a.u8[30], a.u8[29], a.u8[28], a.u8[27], a.u8[26], a.u8[25], a.u8[24],
2239
+ a.u8[23], a.u8[22], a.u8[21], a.u8[20], a.u8[19], a.u8[18], a.u8[17], a.u8[16],
2240
+ a.u8[15], a.u8[14], a.u8[13], a.u8[12], a.u8[11], a.u8[10], a.u8[ 9], a.u8[ 8],
2241
+ a.u8[ 7], a.u8[ 6], a.u8[ 5], a.u8[ 4], a.u8[ 3], a.u8[ 2], a.u8[ 1], a.u8[ 0]);
2242
+ printf(" simde_x_mm512_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2243
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2244
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2245
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2246
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2247
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2248
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2249
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2250
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2251
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2252
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2253
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2254
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2255
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2256
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2257
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")),\n",
2258
+ b.u8[63], b.u8[62], b.u8[61], b.u8[60], b.u8[59], b.u8[58], b.u8[57], b.u8[56],
2259
+ b.u8[55], b.u8[54], b.u8[53], b.u8[52], b.u8[51], b.u8[50], b.u8[49], b.u8[48],
2260
+ b.u8[47], b.u8[46], b.u8[45], b.u8[44], b.u8[43], b.u8[42], b.u8[41], b.u8[40],
2261
+ b.u8[39], b.u8[38], b.u8[37], b.u8[36], b.u8[35], b.u8[34], b.u8[33], b.u8[32],
2262
+ b.u8[31], b.u8[30], b.u8[29], b.u8[28], b.u8[27], b.u8[26], b.u8[25], b.u8[24],
2263
+ b.u8[23], b.u8[22], b.u8[21], b.u8[20], b.u8[19], b.u8[18], b.u8[17], b.u8[16],
2264
+ b.u8[15], b.u8[14], b.u8[13], b.u8[12], b.u8[11], b.u8[10], b.u8[ 9], b.u8[ 8],
2265
+ b.u8[ 7], b.u8[ 6], b.u8[ 5], b.u8[ 4], b.u8[ 3], b.u8[ 2], b.u8[ 1], b.u8[ 0]);
2266
+ printf(" simde_x_mm512_set_epu8(UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2267
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2268
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2269
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2270
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2271
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2272
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2273
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2274
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2275
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2276
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2277
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2278
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2279
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2280
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "),\n"
2281
+ " UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 "), UINT8_C(%3" PRIu8 ")) },\n",
2282
+ r.u8[63], r.u8[62], r.u8[61], r.u8[60], r.u8[59], r.u8[58], r.u8[57], r.u8[56],
2283
+ r.u8[55], r.u8[54], r.u8[53], r.u8[52], r.u8[51], r.u8[50], r.u8[49], r.u8[48],
2284
+ r.u8[47], r.u8[46], r.u8[45], r.u8[44], r.u8[43], r.u8[42], r.u8[41], r.u8[40],
2285
+ r.u8[39], r.u8[38], r.u8[37], r.u8[36], r.u8[35], r.u8[34], r.u8[33], r.u8[32],
2286
+ r.u8[31], r.u8[30], r.u8[29], r.u8[28], r.u8[27], r.u8[26], r.u8[25], r.u8[24],
2287
+ r.u8[23], r.u8[22], r.u8[21], r.u8[20], r.u8[19], r.u8[18], r.u8[17], r.u8[16],
2288
+ r.u8[15], r.u8[14], r.u8[13], r.u8[12], r.u8[11], r.u8[10], r.u8[ 9], r.u8[ 8],
2289
+ r.u8[ 7], r.u8[ 6], r.u8[ 5], r.u8[ 4], r.u8[ 3], r.u8[ 2], r.u8[ 1], r.u8[ 0]);
2290
+ }
2291
+ return MUNIT_FAIL;
2292
+
2293
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2294
+ simde__m512i r = simde_mm512_xxx_epu8(test_vec[i].a, test_vec[i].b);
2295
+ simde_assert_m512i_u8(r, ==, test_vec[i].r);
2296
+ }
2297
+
2298
+ return MUNIT_OK;
2299
+ }
2300
+
2301
+ static MunitResult
2302
+ test_simde_mm512_xxx_epu16(const MunitParameter params[], void* data) {
2303
+ (void) params;
2304
+ (void) data;
2305
+
2306
+ const struct {
2307
+ simde__m512i a;
2308
+ simde__m512i b;
2309
+ simde__m512i r;
2310
+ } test_vec[8] = {
2311
+
2312
+ };
2313
+
2314
+ printf("\n");
2315
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2316
+ simde__m512i_private a, b, r;
2317
+
2318
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2319
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
2320
+
2321
+ r = simde__m512i_to_private(simde_mm512_xxx_epu16(simde__m512i_from_private(a), simde__m512i_from_private(b)));
2322
+
2323
+ printf(" { simde_x_mm512_set_epu16(UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2324
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2325
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2326
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2327
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2328
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2329
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2330
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 ")),\n",
2331
+ a.u16[31], a.u16[30], a.u16[29], a.u16[28], a.u16[27], a.u16[26], a.u16[25], a.u16[24],
2332
+ a.u16[23], a.u16[22], a.u16[21], a.u16[20], a.u16[19], a.u16[18], a.u16[17], a.u16[16],
2333
+ a.u16[15], a.u16[14], a.u16[13], a.u16[12], a.u16[11], a.u16[10], a.u16[ 9], a.u16[ 8],
2334
+ a.u16[ 7], a.u16[ 6], a.u16[ 5], a.u16[ 4], a.u16[ 3], a.u16[ 2], a.u16[ 1], a.u16[ 0]);
2335
+ printf(" simde_x_mm512_set_epu16(UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2336
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2337
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2338
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2339
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2340
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2341
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2342
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 ")),\n",
2343
+ b.u16[31], b.u16[30], b.u16[29], b.u16[28], b.u16[27], b.u16[26], b.u16[25], b.u16[24],
2344
+ b.u16[23], b.u16[22], b.u16[21], b.u16[20], b.u16[19], b.u16[18], b.u16[17], b.u16[16],
2345
+ b.u16[15], b.u16[14], b.u16[13], b.u16[12], b.u16[11], b.u16[10], b.u16[ 9], b.u16[ 8],
2346
+ b.u16[ 7], b.u16[ 6], b.u16[ 5], b.u16[ 4], b.u16[ 3], b.u16[ 2], b.u16[ 1], b.u16[ 0]);
2347
+ printf(" simde_x_mm512_set_epu16(UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2348
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2349
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2350
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2351
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2352
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2353
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "),\n"
2354
+ " UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 "), UINT16_C(%6" PRIu16 ")) },\n",
2355
+ r.u16[31], r.u16[30], r.u16[29], r.u16[28], r.u16[27], r.u16[26], r.u16[25], r.u16[24],
2356
+ r.u16[23], r.u16[22], r.u16[21], r.u16[20], r.u16[19], r.u16[18], r.u16[17], r.u16[16],
2357
+ r.u16[15], r.u16[14], r.u16[13], r.u16[12], r.u16[11], r.u16[10], r.u16[ 9], r.u16[ 8],
2358
+ r.u16[ 7], r.u16[ 6], r.u16[ 5], r.u16[ 4], r.u16[ 3], r.u16[ 2], r.u16[ 1], r.u16[ 0]);
2359
+ }
2360
+ return MUNIT_FAIL;
2361
+
2362
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2363
+ simde__m512i r = simde_mm512_xxx_epu16(test_vec[i].a, test_vec[i].b);
2364
+ simde_assert_m512i_u16(r, ==, test_vec[i].r);
2365
+ }
2366
+
2367
+ return MUNIT_OK;
2368
+ }
2369
+
2370
+ static MunitResult
2371
+ test_simde_mm512_xxx_epu32(const MunitParameter params[], void* data) {
2372
+ (void) params;
2373
+ (void) data;
2374
+
2375
+ const struct {
2376
+ simde__m512i a;
2377
+ simde__m512i b;
2378
+ simde__m512i r;
2379
+ } test_vec[8] = {
2380
+
2381
+ };
2382
+
2383
+ printf("\n");
2384
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2385
+ simde__m512i_private a, b, r;
2386
+
2387
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2388
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
2389
+
2390
+ r = simde__m512i_to_private(simde_mm512_xxx_epu32(simde__m512i_from_private(a), simde__m512i_from_private(b)));
2391
+
2392
+ printf(" { simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2393
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2394
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2395
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
2396
+ a.u32[15], a.u32[14], a.u32[13], a.u32[12], a.u32[11], a.u32[10], a.u32[ 9], a.u32[ 8],
2397
+ a.u32[ 7], a.u32[ 6], a.u32[ 5], a.u32[ 4], a.u32[ 3], a.u32[ 2], a.u32[ 1], a.u32[ 0]);
2398
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2399
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2400
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2401
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
2402
+ b.u32[15], b.u32[14], b.u32[13], b.u32[12], b.u32[11], b.u32[10], b.u32[ 9], b.u32[ 8],
2403
+ b.u32[ 7], b.u32[ 6], b.u32[ 5], b.u32[ 4], b.u32[ 3], b.u32[ 2], b.u32[ 1], b.u32[ 0]);
2404
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2405
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2406
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2407
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
2408
+ r.u32[15], r.u32[14], r.u32[13], r.u32[12], r.u32[11], r.u32[10], r.u32[ 9], r.u32[ 8],
2409
+ r.u32[ 7], r.u32[ 6], r.u32[ 5], r.u32[ 4], r.u32[ 3], r.u32[ 2], r.u32[ 1], r.u32[ 0]);
2410
+ }
2411
+ return MUNIT_FAIL;
2412
+
2413
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2414
+ simde__m512i r = simde_mm512_xxx_epu32(test_vec[i].a, test_vec[i].b);
2415
+ simde_assert_m512i_u32(r, ==, test_vec[i].r);
2416
+ }
2417
+
2418
+ return MUNIT_OK;
2419
+ }
2420
+
2421
+ static MunitResult
2422
+ test_simde_mm512_mask_xxx_epu32(const MunitParameter params[], void* data) {
2423
+ (void) params;
2424
+ (void) data;
2425
+
2426
+ const struct {
2427
+ simde__m512i src;
2428
+ simde__mmask16 k;
2429
+ simde__m512i a;
2430
+ simde__m512i b;
2431
+ simde__m512i r;
2432
+ } test_vec[8] = {
2433
+
2434
+ };
2435
+
2436
+ printf("\n");
2437
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2438
+ simde__m512i_private src, a, b, r;
2439
+ simde__mmask16 k;
2440
+
2441
+ munit_rand_memory(sizeof(src), (uint8_t*) &src);
2442
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
2443
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2444
+ munit_rand_memory(sizeof(a), (uint8_t*) &b);
2445
+ k &= UINT16_C(0xffff);
2446
+
2447
+ r = simde__m512i_to_private(simde_mm512_mask_xxx_epu32(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
2448
+
2449
+ printf(" { simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2450
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2451
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2452
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
2453
+ src.u32[15], src.u32[14], src.u32[13], src.u32[12], src.u32[11], src.u32[10], src.u32[ 9], src.u32[ 8],
2454
+ src.u32[ 7], src.u32[ 6], src.u32[ 5], src.u32[ 4], src.u32[ 3], src.u32[ 2], src.u32[ 1], src.u32[ 0]);
2455
+ printf(" UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
2456
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2457
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2458
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2459
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
2460
+ a.u32[15], a.u32[14], a.u32[13], a.u32[12], a.u32[11], a.u32[10], a.u32[ 9], a.u32[ 8],
2461
+ a.u32[ 7], a.u32[ 6], a.u32[ 5], a.u32[ 4], a.u32[ 3], a.u32[ 2], a.u32[ 1], a.u32[ 0]);
2462
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2463
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2464
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2465
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
2466
+ b.u32[15], b.u32[14], b.u32[13], b.u32[12], b.u32[11], b.u32[10], b.u32[ 9], b.u32[ 8],
2467
+ b.u32[ 7], b.u32[ 6], b.u32[ 5], b.u32[ 4], b.u32[ 3], b.u32[ 2], b.u32[ 1], b.u32[ 0]);
2468
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2469
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2470
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
2471
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
2472
+ r.u32[15], r.u32[14], r.u32[13], r.u32[12], r.u32[11], r.u32[10], r.u32[ 9], r.u32[ 8],
2473
+ r.u32[ 7], r.u32[ 6], r.u32[ 5], r.u32[ 4], r.u32[ 3], r.u32[ 2], r.u32[ 1], r.u32[ 0]);
2474
+ }
2475
+ return MUNIT_FAIL;
2476
+
2477
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2478
+ simde__m512i r = simde_mm512_mask_xxx_epu32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
2479
+ simde_assert_m512i_u32(r, ==, test_vec[i].r);
2480
+ }
2481
+
2482
+ return MUNIT_OK;
2483
+ }
2484
+
2485
+ static MunitResult
2486
+ test_simde_mm512_xxx_epu64(const MunitParameter params[], void* data) {
2487
+ (void) params;
2488
+ (void) data;
2489
+
2490
+ const struct {
2491
+ simde__m512i a;
2492
+ simde__m512i b;
2493
+ simde__m512i r;
2494
+ } test_vec[8] = {
2495
+
2496
+ };
2497
+
2498
+ printf("\n");
2499
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2500
+ simde__m512i_private a, b, r;
2501
+
2502
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2503
+ munit_rand_memory(sizeof(b), (uint8_t*) &b);
2504
+
2505
+ r = simde__m512i_to_private(simde_mm512_xxx_epu64(simde__m512i_from_private(a), simde__m512i_from_private(b)));
2506
+
2507
+ printf(" { simde_x_mm512_set_epu64(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
2508
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n",
2509
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n",
2510
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")) },\n",
2511
+ a.i64[7], a.i64[6], a.i64[5], a.i64[4],
2512
+ a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
2513
+ printf(" simde_x_mm512_set_epu64(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
2514
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
2515
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
2516
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")),\n",
2517
+ b.u64[7], b.u64[6], b.u64[5], b.u64[4],
2518
+ b.u64[3], b.u64[2], b.u64[1], b.u64[0]);
2519
+ printf(" simde_x_mm512_set_epu64(UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
2520
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
2521
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 "),\n"
2522
+ " UINT64_C(%20" PRIu64 "), UINT64_C(%20" PRIu64 ")) },\n",
2523
+ r.u64[7], r.u64[6], r.u64[5], r.u64[4],
2524
+ r.u64[3], r.u64[2], r.u64[1], r.u64[0]);
2525
+ }
2526
+ return MUNIT_FAIL;
2527
+
2528
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2529
+ simde__m512i r = simde_mm512_xxx_epu64(test_vec[i].a, test_vec[i].b);
2530
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
2531
+ }
2532
+
2533
+ return MUNIT_OK;
2534
+ }
2535
+
2536
+ static MunitResult
2537
+ test_simde_mm512_xxx_ps(const MunitParameter params[], void* data) {
2538
+ (void) params;
2539
+ (void) data;
2540
+
2541
+ const struct {
2542
+ simde__m512 a;
2543
+ simde__m512 b;
2544
+ simde__m512 r;
2545
+ } test_vec[8] = {
2546
+
2547
+ };
2548
+
2549
+ printf("\n");
2550
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2551
+ simde__m512_private a, b, r;
2552
+
2553
+ for (size_t j = 0 ; j < sizeof(simde__m512) / sizeof(simde_float32) ; j++) {
2554
+ a.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
2555
+ b.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
2556
+ }
2557
+
2558
+ r = simde__m512_to_private(simde_mm512_xxx_ps(simde__m512_from_private(a), simde__m512_from_private(b)));
2559
+
2560
+ printf(" { simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2561
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2562
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2563
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
2564
+ 9, a.f32[15], 9, a.f32[14], 9, a.f32[13], 9, a.f32[12],
2565
+ 9, a.f32[11], 9, a.f32[10], 9, a.f32[ 9], 9, a.f32[ 8],
2566
+ 9, a.f32[ 7], 9, a.f32[ 6], 9, a.f32[ 5], 9, a.f32[ 4],
2567
+ 9, a.f32[ 3], 9, a.f32[ 2], 9, a.f32[ 1], 9, a.f32[ 0]);
2568
+ printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2569
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2570
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2571
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
2572
+ 9, b.f32[15], 9, b.f32[14], 9, b.f32[13], 9, b.f32[12],
2573
+ 9, b.f32[11], 9, b.f32[10], 9, b.f32[ 9], 9, b.f32[ 8],
2574
+ 9, b.f32[ 7], 9, b.f32[ 6], 9, b.f32[ 5], 9, b.f32[ 4],
2575
+ 9, b.f32[ 3], 9, b.f32[ 2], 9, b.f32[ 1], 9, b.f32[ 0]);
2576
+ printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2577
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2578
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2579
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)) },\n",
2580
+ 9, r.f32[15], 9, r.f32[14], 9, r.f32[13], 9, r.f32[12],
2581
+ 9, r.f32[11], 9, r.f32[10], 9, r.f32[ 9], 9, r.f32[ 8],
2582
+ 9, r.f32[ 7], 9, r.f32[ 6], 9, r.f32[ 5], 9, r.f32[ 4],
2583
+ 9, r.f32[ 3], 9, r.f32[ 2], 9, r.f32[ 1], 9, r.f32[ 0]);
2584
+ }
2585
+ return MUNIT_FAIL;
2586
+
2587
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2588
+ simde__m512 r = simde_mm512_xxx_ps(test_vec[i].a, test_vec[i].b);
2589
+ simde_assert_m512_close(r, test_vec[i].r, 1);
2590
+ }
2591
+
2592
+ return MUNIT_OK;
2593
+ }
2594
+
2595
+ static MunitResult
2596
+ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
2597
+ (void) params;
2598
+ (void) data;
2599
+
2600
+ const struct {
2601
+ simde__m512 src;
2602
+ simde__mmask16 k;
2603
+ simde__m512 a;
2604
+ simde__m512 b;
2605
+ simde__m512 r;
2606
+ } test_vec[8] = {
2607
+
2608
+ };
2609
+
2610
+ printf("\n");
2611
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2612
+ simde__m512_private src, a, b, r;
2613
+ simde__mmask16 k;
2614
+
2615
+ for (size_t j = 0 ; j < sizeof(simde__m512) / sizeof(simde_float32) ; j++) {
2616
+ src.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
2617
+ a.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
2618
+ b.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
2619
+ }
2620
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
2621
+ k &= UINT16_C(0xffff);
2622
+
2623
+ r = simde__m512_to_private(simde_mm512_mask_xxx_ps(simde__m512_from_private(src), k, simde__m512_from_private(a), simde__m512_from_private(b)));
2624
+
2625
+ printf(" { simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2626
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2627
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2628
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
2629
+ 9, src.f32[15], 9, src.f32[14], 9, src.f32[13], 9, src.f32[12],
2630
+ 9, src.f32[11], 9, src.f32[10], 9, src.f32[ 9], 9, src.f32[ 8],
2631
+ 9, src.f32[ 7], 9, src.f32[ 6], 9, src.f32[ 5], 9, src.f32[ 4],
2632
+ 9, src.f32[ 3], 9, src.f32[ 2], 9, src.f32[ 1], 9, src.f32[ 0]);
2633
+ printf(" UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
2634
+ printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2635
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2636
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2637
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
2638
+ 9, a.f32[15], 9, a.f32[14], 9, a.f32[13], 9, a.f32[12],
2639
+ 9, a.f32[11], 9, a.f32[10], 9, a.f32[ 9], 9, a.f32[ 8],
2640
+ 9, a.f32[ 7], 9, a.f32[ 6], 9, a.f32[ 5], 9, a.f32[ 4],
2641
+ 9, a.f32[ 3], 9, a.f32[ 2], 9, a.f32[ 1], 9, a.f32[ 0]);
2642
+ printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2643
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2644
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2645
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
2646
+ 9, b.f32[15], 9, b.f32[14], 9, b.f32[13], 9, b.f32[12],
2647
+ 9, b.f32[11], 9, b.f32[10], 9, b.f32[ 9], 9, b.f32[ 8],
2648
+ 9, b.f32[ 7], 9, b.f32[ 6], 9, b.f32[ 5], 9, b.f32[ 4],
2649
+ 9, b.f32[ 3], 9, b.f32[ 2], 9, b.f32[ 1], 9, b.f32[ 0]);
2650
+ printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2651
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2652
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2653
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)) },\n",
2654
+ 9, r.f32[15], 9, r.f32[14], 9, r.f32[13], 9, r.f32[12],
2655
+ 9, r.f32[11], 9, r.f32[10], 9, r.f32[ 9], 9, r.f32[ 8],
2656
+ 9, r.f32[ 7], 9, r.f32[ 6], 9, r.f32[ 5], 9, r.f32[ 4],
2657
+ 9, r.f32[ 3], 9, r.f32[ 2], 9, r.f32[ 1], 9, r.f32[ 0]);
2658
+ }
2659
+ return MUNIT_FAIL;
2660
+
2661
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2662
+ simde__m512 r = simde_mm512_mask_xxx_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
2663
+ simde_assert_m512_close(r, test_vec[i].r, 1);
2664
+ }
2665
+
2666
+ return MUNIT_OK;
2667
+ }
2668
+
2669
+ static MunitResult
2670
+ test_simde_mm512_xxx_pd(const MunitParameter params[], void* data) {
2671
+ (void) params;
2672
+ (void) data;
2673
+
2674
+ const struct {
2675
+ simde__m512d a;
2676
+ simde__m512d b;
2677
+ simde__m512d r;
2678
+ } test_vec[8] = {
2679
+
2680
+ };
2681
+
2682
+ printf("\n");
2683
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2684
+ simde__m512d_private a, b, r;
2685
+
2686
+ for (size_t j = 0 ; j < sizeof(simde__m512d) / sizeof(simde_float64) ; j++) {
2687
+ a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
2688
+ b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
2689
+ }
2690
+
2691
+ r = simde__m512d_to_private(simde_mm512_xxx_pd(simde__m512d_from_private(a), simde__m512d_from_private(b)));
2692
+
2693
+ printf(" { simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2694
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2695
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2696
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
2697
+ 8, a.f64[7], 8, a.f64[6], 8, a.f64[5], 8, a.f64[4],
2698
+ 8, a.f64[3], 8, a.f64[2], 8, a.f64[1], 8, a.f64[0]);
2699
+ printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2700
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2701
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2702
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
2703
+ 8, b.f64[7], 8, b.f64[6], 8, b.f64[5], 8, b.f64[4],
2704
+ 8, b.f64[3], 8, b.f64[2], 8, b.f64[1], 8, b.f64[0]);
2705
+ printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2706
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2707
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2708
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n",
2709
+ 8, r.f64[7], 8, r.f64[6], 8, r.f64[5], 8, r.f64[4],
2710
+ 8, r.f64[3], 8, r.f64[2], 8, r.f64[1], 8, r.f64[0]);
2711
+ }
2712
+ return MUNIT_FAIL;
2713
+
2714
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2715
+ simde__m512d r = simde_mm512_xxx_pd(test_vec[i].a, test_vec[i].b);
2716
+ simde_assert_m512d_close(r, test_vec[i].r, 1);
2717
+ }
2718
+
2719
+ return MUNIT_OK;
2720
+ }
2721
+
2722
+ static MunitResult
2723
+ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
2724
+ (void) params;
2725
+ (void) data;
2726
+
2727
+ const struct {
2728
+ simde__m512d src;
2729
+ simde__mmask8 k;
2730
+ simde__m512d a;
2731
+ simde__m512d b;
2732
+ simde__m512d r;
2733
+ } test_vec[8] = {
2734
+
2735
+ };
2736
+
2737
+ printf("\n");
2738
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2739
+ simde__m512d_private src, a, b, r;
2740
+ simde__mmask8 k;
2741
+
2742
+ for (size_t j = 0 ; j < sizeof(simde__m512d) / sizeof(simde_float64) ; j++) {
2743
+ src.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
2744
+ a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
2745
+ b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
2746
+ }
2747
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
2748
+ k &= UINT8_C(0xff);
2749
+
2750
+ r = simde__m512d_to_private(simde_mm512_mask_xxx_pd(simde__m512d_from_private(src), k, simde__m512d_from_private(a), simde__m512d_from_private(b)));
2751
+
2752
+ printf(" { simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2753
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2754
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2755
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
2756
+ 8, src.f64[7], 8, src.f64[6], 8, src.f64[5], 8, src.f64[4],
2757
+ 8, src.f64[3], 8, src.f64[2], 8, src.f64[1], 8, src.f64[0]);
2758
+ printf(" UINT8_C(%3" PRIu8 "),\n", HEDLEY_STATIC_CAST(uint8_t, k));
2759
+ printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2760
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2761
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2762
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
2763
+ 8, a.f64[7], 8, a.f64[6], 8, a.f64[5], 8, a.f64[4],
2764
+ 8, a.f64[3], 8, a.f64[2], 8, a.f64[1], 8, a.f64[0]);
2765
+ printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2766
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2767
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2768
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
2769
+ 8, b.f64[7], 8, b.f64[6], 8, b.f64[5], 8, b.f64[4],
2770
+ 8, b.f64[3], 8, b.f64[2], 8, b.f64[1], 8, b.f64[0]);
2771
+ printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2772
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2773
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2774
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n",
2775
+ 8, r.f64[7], 8, r.f64[6], 8, r.f64[5], 8, r.f64[4],
2776
+ 8, r.f64[3], 8, r.f64[2], 8, r.f64[1], 8, r.f64[0]);
2777
+ }
2778
+ return MUNIT_FAIL;
2779
+
2780
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2781
+ simde__m512d r = simde_mm512_mask_xxx_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
2782
+ simde_assert_m512d_close(r, test_vec[i].r, 1);
2783
+ }
2784
+
2785
+ return MUNIT_OK;
2786
+ }
2787
+
2788
+ /* Not sure what the use case for these is. */
2789
+
2790
+ static MunitResult
2791
+ test_simde_mm512_xxx_mov_epi32(const MunitParameter params[], void* data) {
2792
+ (void) params;
2793
+ (void) data;
2794
+
2795
+ const struct {
2796
+ simde__mmask16 k;
2797
+ simde__m512i a;
2798
+ simde__m512i r;
2799
+ } test_vec[8] = {
2800
+
2801
+ };
2802
+
2803
+ printf("\n");
2804
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2805
+ simde__m512i_private a, r;
2806
+ simde__mmask16 k;
2807
+
2808
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
2809
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2810
+
2811
+ r = simde__m512i_to_private(simde_mm512_xxx_mov_epi32(k, simde__m512i_from_private(a)));
2812
+
2813
+ printf(" { UINT16_C(%5" PRIu16 "),\n", k);
2814
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2815
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2816
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2817
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
2818
+ a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
2819
+ a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
2820
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2821
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2822
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
2823
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n",
2824
+ r.i32[15], r.i32[14], r.i32[13], r.i32[12], r.i32[11], r.i32[10], r.i32[ 9], r.i32[ 8],
2825
+ r.i32[ 7], r.i32[ 6], r.i32[ 5], r.i32[ 4], r.i32[ 3], r.i32[ 2], r.i32[ 1], r.i32[ 0]);
2826
+ }
2827
+ return MUNIT_FAIL;
2828
+
2829
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2830
+ simde__m512i r = simde_mm512_xxx_mov_epi32(test_vec[i].k, test_vec[i].a);
2831
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
2832
+ }
2833
+
2834
+ return MUNIT_OK;
2835
+ }
2836
+
2837
+ static MunitResult
2838
+ test_simde_mm512_xxx_mov_epi64(const MunitParameter params[], void* data) {
2839
+ (void) params;
2840
+ (void) data;
2841
+
2842
+ const struct {
2843
+ simde__mmask8 k;
2844
+ simde__m512i a;
2845
+ simde__m512i r;
2846
+ } test_vec[8] = {
2847
+
2848
+ };
2849
+
2850
+ printf("\n");
2851
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2852
+ simde__m512i_private a, r;
2853
+ simde__mmask8 k;
2854
+
2855
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
2856
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
2857
+
2858
+ r = simde__m512i_to_private(simde_mm512_xxx_mov_epi64(k, simde__m512i_from_private(a)));
2859
+
2860
+ printf(" { UINT8_C(%3" PRIu8 "),\n", k);
2861
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2862
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2863
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2864
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
2865
+ a.i64[7], a.i64[6], a.i64[5], a.i64[4],
2866
+ a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
2867
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2868
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2869
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
2870
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n",
2871
+ r.i64[7], r.i64[6], r.i64[5], r.i64[4],
2872
+ r.i64[3], r.i64[2], r.i64[1], r.i64[0]);
2873
+ }
2874
+ return MUNIT_FAIL;
2875
+
2876
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2877
+ simde__m512i r = simde_mm512_xxx_mov_epi64(test_vec[i].k, test_vec[i].a);
2878
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
2879
+ }
2880
+
2881
+ return MUNIT_OK;
2882
+ }
2883
+
2884
+ static MunitResult
2885
+ test_simde_mm512_xxx_mov_ps(const MunitParameter params[], void* data) {
2886
+ (void) params;
2887
+ (void) data;
2888
+
2889
+ const struct {
2890
+ simde__mmask16 k;
2891
+ simde__m512 a;
2892
+ simde__m512 r;
2893
+ } test_vec[8] = {
2894
+
2895
+ };
2896
+
2897
+ printf("\n");
2898
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2899
+ simde__m512_private a, r;
2900
+ simde__mmask16 k;
2901
+
2902
+ for (size_t j = 0 ; j < sizeof(simde__m512) / sizeof(simde_float32) ; j++) {
2903
+ a.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
2904
+ }
2905
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
2906
+
2907
+ r = simde__m512_to_private(simde_mm512_xxx_mov_ps(k, simde__m512_from_private(a)));
2908
+
2909
+ printf(" { UINT16_C(%5" PRIu16 "),\n", k);
2910
+ printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2911
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2912
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2913
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
2914
+ 9, a.f32[15], 9, a.f32[14], 9, a.f32[13], 9, a.f32[12],
2915
+ 9, a.f32[11], 9, a.f32[10], 9, a.f32[ 9], 9, a.f32[ 8],
2916
+ 9, a.f32[ 7], 9, a.f32[ 6], 9, a.f32[ 5], 9, a.f32[ 4],
2917
+ 9, a.f32[ 3], 9, a.f32[ 2], 9, a.f32[ 1], 9, a.f32[ 0]);
2918
+ printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2919
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2920
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
2921
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)) },\n",
2922
+ 9, r.f32[15], 9, r.f32[14], 9, r.f32[13], 9, r.f32[12],
2923
+ 9, r.f32[11], 9, r.f32[10], 9, r.f32[ 9], 9, r.f32[ 8],
2924
+ 9, r.f32[ 7], 9, r.f32[ 6], 9, r.f32[ 5], 9, r.f32[ 4],
2925
+ 9, r.f32[ 3], 9, r.f32[ 2], 9, r.f32[ 1], 9, r.f32[ 0]);
2926
+ }
2927
+ return MUNIT_FAIL;
2928
+
2929
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2930
+ simde__m512 r = simde_mm512_xxx_mov_ps(test_vec[i].k, test_vec[i].a);
2931
+ simde_assert_m512_close(r, test_vec[i].r, 1);
2932
+ }
2933
+
2934
+ return MUNIT_OK;
2935
+ }
2936
+
2937
+ static MunitResult
2938
+ test_simde_mm512_xxx_mov_pd(const MunitParameter params[], void* data) {
2939
+ (void) params;
2940
+ (void) data;
2941
+
2942
+ const struct {
2943
+ simde__mmask8 k;
2944
+ simde__m512d a;
2945
+ simde__m512d r;
2946
+ } test_vec[8] = {
2947
+
2948
+ };
2949
+
2950
+ printf("\n");
2951
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
2952
+ simde__m512d_private a, r;
2953
+ simde__mmask8 k;
2954
+
2955
+ for (size_t j = 0 ; j < sizeof(simde__m512d) / sizeof(simde_float64) ; j++) {
2956
+ a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
2957
+ }
2958
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
2959
+
2960
+ r = simde__m512d_to_private(simde_mm512_xxx_mov_pd(k, simde__m512d_from_private(a)));
2961
+
2962
+ printf(" { UINT8_C(%3" PRIu8 "),\n", k);
2963
+ printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2964
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2965
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2966
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
2967
+ 8, a.f64[7], 8, a.f64[6], 8, a.f64[5], 8, a.f64[4],
2968
+ 8, a.f64[3], 8, a.f64[2], 8, a.f64[1], 8, a.f64[0]);
2969
+ printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2970
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2971
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
2972
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n",
2973
+ 8, r.f64[7], 8, r.f64[6], 8, r.f64[5], 8, r.f64[4],
2974
+ 8, r.f64[3], 8, r.f64[2], 8, r.f64[1], 8, r.f64[0]);
2975
+ }
2976
+ return MUNIT_FAIL;
2977
+
2978
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2979
+ simde__m512d r = simde_mm512_xxx_mov_pd(test_vec[i].k, test_vec[i].a);
2980
+ simde_assert_m512d_close(r, test_vec[i].r, 1);
2981
+ }
2982
+
2983
+ return MUNIT_OK;
2984
+ }