minimap2 0.2.25.0 → 0.2.25.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
@@ -0,0 +1,2878 @@
1
+ /* Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person
4
+ * obtaining a copy of this software and associated documentation
5
+ * files (the "Software"), to deal in the Software without
6
+ * restriction, including without limitation the rights to use, copy,
7
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ * of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be
12
+ * included in all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ * SOFTWARE.
22
+ */
23
+
24
+ #define SIMDE_TESTS_CURRENT_ISAX mmx
25
+ #include <test/x86/test-mmx.h>
26
+
27
+ #if defined(SIMDE_MMX_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
28
+
29
+ static MunitResult
30
+ test_simde_mm_set1_pi8(const MunitParameter params[], void* data) {
31
+ (void) params;
32
+ (void) data;
33
+
34
+ const struct {
35
+ int8_t a;
36
+ simde__m64 r;
37
+ } test_vec[8] = {
38
+ { INT8_C( -16),
39
+ simde_mm_set_pi8(INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16),
40
+ INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16)) },
41
+ { INT8_C(-120),
42
+ simde_mm_set_pi8(INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120),
43
+ INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120)) },
44
+ { INT8_C( 86),
45
+ simde_mm_set_pi8(INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86),
46
+ INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86)) },
47
+ { INT8_C( -12),
48
+ simde_mm_set_pi8(INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12),
49
+ INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12)) },
50
+ { INT8_C( 3),
51
+ simde_mm_set_pi8(INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3),
52
+ INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3)) },
53
+ { INT8_C( 25),
54
+ simde_mm_set_pi8(INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25),
55
+ INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25)) },
56
+ { INT8_C( 40),
57
+ simde_mm_set_pi8(INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40),
58
+ INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40)) },
59
+ { INT8_C( -12),
60
+ simde_mm_set_pi8(INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12),
61
+ INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12)) }
62
+ };
63
+
64
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
65
+ simde__m64 r = simde_mm_set1_pi8(test_vec[i].a);
66
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
67
+ }
68
+
69
+ return MUNIT_OK;
70
+ }
71
+
72
+ static MunitResult
73
+ test_simde_mm_set1_pi16(const MunitParameter params[], void* data) {
74
+ (void) params;
75
+ (void) data;
76
+
77
+ int16_t v = HEDLEY_STATIC_CAST(int16_t, munit_rand_int_range(SHRT_MIN, SHRT_MAX));
78
+
79
+ simde__m64 x = simde_mm_set1_pi16(v);
80
+ int16_t* r = HEDLEY_REINTERPRET_CAST(int16_t*, &x);
81
+
82
+ simde_mm_empty();
83
+
84
+ munit_assert_int16(r[0], ==, v);
85
+ munit_assert_int16(r[1], ==, v);
86
+ munit_assert_int16(r[2], ==, v);
87
+ munit_assert_int16(r[3], ==, v);
88
+
89
+ return MUNIT_OK;
90
+ }
91
+
92
+ static MunitResult
93
+ test_simde_mm_set1_pi32(const MunitParameter params[], void* data) {
94
+ (void) params;
95
+ (void) data;
96
+
97
+ int32_t v = HEDLEY_STATIC_CAST(int32_t, munit_rand_int_range(INT32_MIN, INT32_MAX));
98
+
99
+ simde__m64 x = simde_mm_set1_pi32(v);
100
+ int32_t* r = HEDLEY_REINTERPRET_CAST(int32_t*, &x);
101
+
102
+ simde_mm_empty();
103
+
104
+ munit_assert_int32(r[0], ==, v);
105
+ munit_assert_int32(r[1], ==, v);
106
+
107
+ return MUNIT_OK;
108
+ }
109
+
110
+ static MunitResult
111
+ test_simde_mm_setr_pi8(const MunitParameter params[], void* data) {
112
+ (void) params;
113
+ (void) data;
114
+
115
+ int8_t d[8 / sizeof(int8_t)];
116
+ munit_rand_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d));
117
+
118
+ simde__m64 x = simde_mm_setr_pi8(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]);
119
+ int8_t* c = HEDLEY_REINTERPRET_CAST(int8_t*, &x);
120
+
121
+ simde_mm_empty();
122
+
123
+ munit_assert_int8(c[0], ==, d[0]);
124
+ munit_assert_int8(c[1], ==, d[1]);
125
+ munit_assert_int8(c[2], ==, d[2]);
126
+ munit_assert_int8(c[3], ==, d[3]);
127
+ munit_assert_int8(c[4], ==, d[4]);
128
+ munit_assert_int8(c[5], ==, d[5]);
129
+ munit_assert_int8(c[6], ==, d[6]);
130
+ munit_assert_int8(c[7], ==, d[7]);
131
+
132
+ return MUNIT_OK;
133
+ }
134
+
135
+ static MunitResult
136
+ test_simde_mm_setr_pi16(const MunitParameter params[], void* data) {
137
+ (void) params;
138
+ (void) data;
139
+
140
+ int16_t d[8 / sizeof(int16_t)];
141
+ munit_rand_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d));
142
+
143
+ simde__m64 x = simde_mm_setr_pi16(d[0], d[1], d[2], d[3]);
144
+ int16_t* s = HEDLEY_REINTERPRET_CAST(int16_t*, &x);
145
+
146
+ simde_mm_empty();
147
+
148
+ munit_assert_int16(s[0], ==, d[0]);
149
+ munit_assert_int16(s[1], ==, d[1]);
150
+ munit_assert_int16(s[2], ==, d[2]);
151
+ munit_assert_int16(s[3], ==, d[3]);
152
+
153
+ return MUNIT_OK;
154
+ }
155
+
156
+ static MunitResult
157
+ test_simde_mm_setr_pi32(const MunitParameter params[], void* data) {
158
+ (void) params;
159
+ (void) data;
160
+
161
+ int32_t d[8 / sizeof(int32_t)];
162
+ munit_rand_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d));
163
+
164
+ simde__m64 x = simde_mm_setr_pi32(d[0], d[1]);
165
+ int32_t* i = HEDLEY_REINTERPRET_CAST(int32_t*, &x);
166
+
167
+ simde_mm_empty();
168
+
169
+ munit_assert_int32(i[0], ==, d[0]);
170
+ munit_assert_int32(i[1], ==, d[1]);
171
+
172
+ return MUNIT_OK;
173
+ }
174
+
175
+ static MunitResult
176
+ test_simde_mm_add_pi8(const MunitParameter params[], void* data) {
177
+ (void) params;
178
+ (void) data;
179
+
180
+ const struct {
181
+ simde__m64 a;
182
+ simde__m64 b;
183
+ simde__m64 r;
184
+ } test_vec[8] = {
185
+ { simde_mm_set_pi8(INT8_C( 25), INT8_C(-106), INT8_C( 93), INT8_C( 86), INT8_C( -56), INT8_C( 101), INT8_C( 79), INT8_C( 83)),
186
+ simde_mm_set_pi8(INT8_C( -38), INT8_C( -6), INT8_C( 47), INT8_C( 59), INT8_C( -67), INT8_C( -36), INT8_C( 127), INT8_C( 104)),
187
+ simde_mm_set_pi8(INT8_C( -13), INT8_C(-112), INT8_C(-116), INT8_C(-111), INT8_C(-123), INT8_C( 65), INT8_C( -50), INT8_C( -69)) },
188
+ { simde_mm_set_pi8(INT8_C(-105), INT8_C( 113), INT8_C( 22), INT8_C( -91), INT8_C( 59), INT8_C( -4), INT8_C( 67), INT8_C( 43)),
189
+ simde_mm_set_pi8(INT8_C( -13), INT8_C( 93), INT8_C( 81), INT8_C( 108), INT8_C(-104), INT8_C( 123), INT8_C( 105), INT8_C( 119)),
190
+ simde_mm_set_pi8(INT8_C(-118), INT8_C( -50), INT8_C( 103), INT8_C( 17), INT8_C( -45), INT8_C( 119), INT8_C( -84), INT8_C( -94)) },
191
+ { simde_mm_set_pi8(INT8_C( -8), INT8_C( 52), INT8_C( 92), INT8_C( 121), INT8_C( 58), INT8_C(-104), INT8_C( 27), INT8_C( -80)),
192
+ simde_mm_set_pi8(INT8_C( 62), INT8_C(-100), INT8_C( 5), INT8_C( -95), INT8_C( -16), INT8_C( 109), INT8_C( 127), INT8_C( 62)),
193
+ simde_mm_set_pi8(INT8_C( 54), INT8_C( -48), INT8_C( 97), INT8_C( 26), INT8_C( 42), INT8_C( 5), INT8_C(-102), INT8_C( -18)) },
194
+ { simde_mm_set_pi8(INT8_C( 32), INT8_C( 124), INT8_C( 115), INT8_C( 3), INT8_C( 104), INT8_C( 27), INT8_C( 43), INT8_C( -11)),
195
+ simde_mm_set_pi8(INT8_C( -22), INT8_C( 27), INT8_C( -47), INT8_C( 45), INT8_C( -96), INT8_C( -49), INT8_C( -74), INT8_C( -34)),
196
+ simde_mm_set_pi8(INT8_C( 10), INT8_C(-105), INT8_C( 68), INT8_C( 48), INT8_C( 8), INT8_C( -22), INT8_C( -31), INT8_C( -45)) },
197
+ { simde_mm_set_pi8(INT8_C( -14), INT8_C( -79), INT8_C( -38), INT8_C( -93), INT8_C( -55), INT8_C( 83), INT8_C( 78), INT8_C( -90)),
198
+ simde_mm_set_pi8(INT8_C( 91), INT8_C( -61), INT8_C(-124), INT8_C( -64), INT8_C( 76), INT8_C( -15), INT8_C(-117), INT8_C( 11)),
199
+ simde_mm_set_pi8(INT8_C( 77), INT8_C( 116), INT8_C( 94), INT8_C( 99), INT8_C( 21), INT8_C( 68), INT8_C( -39), INT8_C( -79)) },
200
+ { simde_mm_set_pi8(INT8_C(-119), INT8_C( 33), INT8_C( -57), INT8_C( 54), INT8_C( -18), INT8_C( 79), INT8_C( 86), INT8_C( -25)),
201
+ simde_mm_set_pi8(INT8_C(-115), INT8_C(-114), INT8_C( 72), INT8_C(-126), INT8_C( -80), INT8_C( 114), INT8_C(-126), INT8_C( 42)),
202
+ simde_mm_set_pi8(INT8_C( 22), INT8_C( -81), INT8_C( 15), INT8_C( -72), INT8_C( -98), INT8_C( -63), INT8_C( -40), INT8_C( 17)) },
203
+ { simde_mm_set_pi8(INT8_C( 60), INT8_C( 109), INT8_C( 93), INT8_C( -45), INT8_C( -62), INT8_C(-104), INT8_C( -41), INT8_C( 72)),
204
+ simde_mm_set_pi8(INT8_C( 72), INT8_C( -86), INT8_C( 21), INT8_C( 79), INT8_C( 43), INT8_C( 23), INT8_C( -74), INT8_C( -62)),
205
+ simde_mm_set_pi8(INT8_C(-124), INT8_C( 23), INT8_C( 114), INT8_C( 34), INT8_C( -19), INT8_C( -81), INT8_C(-115), INT8_C( 10)) },
206
+ { simde_mm_set_pi8(INT8_C( 110), INT8_C( 106), INT8_C( -94), INT8_C( 102), INT8_C( -82), INT8_C( 108), INT8_C( -12), INT8_C( -48)),
207
+ simde_mm_set_pi8(INT8_C( 108), INT8_C( 3), INT8_C( -91), INT8_C( 65), INT8_C( 30), INT8_C( 106), INT8_C( -1), INT8_C( 100)),
208
+ simde_mm_set_pi8(INT8_C( -38), INT8_C( 109), INT8_C( 71), INT8_C( -89), INT8_C( -52), INT8_C( -42), INT8_C( -13), INT8_C( 52)) }
209
+ };
210
+
211
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
212
+ simde__m64 r = simde_mm_add_pi8(test_vec[i].a, test_vec[i].b);
213
+ simde_mm_empty();
214
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
215
+ }
216
+
217
+ simde_mm_empty();
218
+ return MUNIT_OK;
219
+ }
220
+
221
+ static MunitResult
222
+ test_simde_mm_add_pi16(const MunitParameter params[], void* data) {
223
+ (void) params;
224
+ (void) data;
225
+
226
+ const struct {
227
+ simde__m64 a;
228
+ simde__m64 b;
229
+ simde__m64 r;
230
+ } test_vec[8] = {
231
+ { simde_mm_set_pi16(INT16_C( -13258), INT16_C( -8776), INT16_C( 32365), INT16_C( -3887)),
232
+ simde_mm_set_pi16(INT16_C( 20018), INT16_C( 23417), INT16_C( -774), INT16_C( 5810)),
233
+ simde_mm_set_pi16(INT16_C( 6760), INT16_C( 14641), INT16_C( 31591), INT16_C( 1923)) },
234
+ { simde_mm_set_pi16(INT16_C( 11335), INT16_C( 29732), INT16_C( 26059), INT16_C( -15004)),
235
+ simde_mm_set_pi16(INT16_C( -13772), INT16_C( -20922), INT16_C( 1993), INT16_C( -30395)),
236
+ simde_mm_set_pi16(INT16_C( -2437), INT16_C( 8810), INT16_C( 28052), INT16_C( 20137)) },
237
+ { simde_mm_set_pi16(INT16_C( 159), INT16_C( 23628), INT16_C( -17224), INT16_C( -23288)),
238
+ simde_mm_set_pi16(INT16_C( -18303), INT16_C( 7699), INT16_C( 22351), INT16_C( -16238)),
239
+ simde_mm_set_pi16(INT16_C( -18144), INT16_C( 31327), INT16_C( 5127), INT16_C( 26010)) },
240
+ { simde_mm_set_pi16(INT16_C( 9097), INT16_C( -5982), INT16_C( 28191), INT16_C( -32707)),
241
+ simde_mm_set_pi16(INT16_C( -16920), INT16_C( -18039), INT16_C( -32259), INT16_C( 10405)),
242
+ simde_mm_set_pi16(INT16_C( -7823), INT16_C( -24021), INT16_C( -4068), INT16_C( -22302)) },
243
+ { simde_mm_set_pi16(INT16_C( 2097), INT16_C( 24451), INT16_C( 25533), INT16_C( -14205)),
244
+ simde_mm_set_pi16(INT16_C( -28269), INT16_C( 4484), INT16_C( -22223), INT16_C( 17945)),
245
+ simde_mm_set_pi16(INT16_C( -26172), INT16_C( 28935), INT16_C( 3310), INT16_C( 3740)) },
246
+ { simde_mm_set_pi16(INT16_C( -17654), INT16_C( 12451), INT16_C( 12325), INT16_C( 5198)),
247
+ simde_mm_set_pi16(INT16_C( -26590), INT16_C( 31889), INT16_C( -14656), INT16_C( 6378)),
248
+ simde_mm_set_pi16(INT16_C( 21292), INT16_C( -21196), INT16_C( -2331), INT16_C( 11576)) },
249
+ { simde_mm_set_pi16(INT16_C( 31498), INT16_C( -18726), INT16_C( -9720), INT16_C( -17042)),
250
+ simde_mm_set_pi16(INT16_C( 17025), INT16_C( 13186), INT16_C( -25923), INT16_C( 15017)),
251
+ simde_mm_set_pi16(INT16_C( -17013), INT16_C( -5540), INT16_C( 29893), INT16_C( -2025)) },
252
+ { simde_mm_set_pi16(INT16_C( 9904), INT16_C( -28061), INT16_C( -32123), INT16_C( -1285)),
253
+ simde_mm_set_pi16(INT16_C( -7190), INT16_C( -1918), INT16_C( 26654), INT16_C( -31449)),
254
+ simde_mm_set_pi16(INT16_C( 2714), INT16_C( -29979), INT16_C( -5469), INT16_C( -32734)) }
255
+ };
256
+
257
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
258
+ simde__m64 r = simde_mm_add_pi16(test_vec[i].a, test_vec[i].b);
259
+ simde_mm_empty();
260
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
261
+ }
262
+
263
+ simde_mm_empty();
264
+ return MUNIT_OK;
265
+ }
266
+
267
+ static MunitResult
268
+ test_simde_mm_add_pi32(const MunitParameter params[], void* data) {
269
+ (void) params;
270
+ (void) data;
271
+
272
+ const struct {
273
+ simde__m64 a;
274
+ simde__m64 b;
275
+ simde__m64 r;
276
+ } test_vec[8] = {
277
+ { simde_mm_set_pi32(INT32_C( -1528799955), INT32_C( -1825996932)),
278
+ simde_mm_set_pi32(INT32_C( -1229665745), INT32_C( 989894561)),
279
+ simde_mm_set_pi32(INT32_C( 1536501596), INT32_C( -836102371)) },
280
+ { simde_mm_set_pi32(INT32_C( 1936809596), INT32_C( 1331021923)),
281
+ simde_mm_set_pi32(INT32_C( -505769092), INT32_C( 1471336810)),
282
+ simde_mm_set_pi32(INT32_C( 1431040504), INT32_C( -1492608563)) },
283
+ { simde_mm_set_pi32(INT32_C( 783830780), INT32_C( 1923113282)),
284
+ simde_mm_set_pi32(INT32_C( 1700161106), INT32_C( -175473923)),
285
+ simde_mm_set_pi32(INT32_C( -1810975410), INT32_C( 1747639359)) },
286
+ { simde_mm_set_pi32(INT32_C( 1195975755), INT32_C( 1329173130)),
287
+ simde_mm_set_pi32(INT32_C( -611537759), INT32_C( 787308680)),
288
+ simde_mm_set_pi32(INT32_C( 584437996), INT32_C( 2116481810)) },
289
+ { simde_mm_set_pi32(INT32_C( 950103059), INT32_C( 570905377)),
290
+ simde_mm_set_pi32(INT32_C( 1696944201), INT32_C( -1762697792)),
291
+ simde_mm_set_pi32(INT32_C( -1647920036), INT32_C( -1191792415)) },
292
+ { simde_mm_set_pi32(INT32_C( 40870864), INT32_C( 149169565)),
293
+ simde_mm_set_pi32(INT32_C( 1633277631), INT32_C( -224026523)),
294
+ simde_mm_set_pi32(INT32_C( 1674148495), INT32_C( -74856958)) },
295
+ { simde_mm_set_pi32(INT32_C( -718937511), INT32_C( 1453252371)),
296
+ simde_mm_set_pi32(INT32_C( 56683182), INT32_C( -594741944)),
297
+ simde_mm_set_pi32(INT32_C( -662254329), INT32_C( 858510427)) },
298
+ { simde_mm_set_pi32(INT32_C( -950411567), INT32_C( -1493828)),
299
+ simde_mm_set_pi32(INT32_C( -1680249611), INT32_C( 321011369)),
300
+ simde_mm_set_pi32(INT32_C( 1664306118), INT32_C( 319517541)) }
301
+ };
302
+
303
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
304
+ simde__m64 r = simde_mm_add_pi32(test_vec[i].a, test_vec[i].b);
305
+ simde_mm_empty();
306
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
307
+ }
308
+
309
+ simde_mm_empty();
310
+ return MUNIT_OK;
311
+ }
312
+
313
+ static MunitResult
314
+ test_simde_mm_adds_pi8(const MunitParameter params[], void* data) {
315
+ (void) params;
316
+ (void) data;
317
+
318
+ const struct {
319
+ simde__m64 a;
320
+ simde__m64 b;
321
+ simde__m64 r;
322
+ } test_vec[8] = {
323
+ { simde_mm_set_pi8( 99, 16, -73, -73, 34, 32, 87, 42),
324
+ simde_mm_set_pi8( -29, -82, -26, -38, 66, -51, 82, 53),
325
+ simde_mm_set_pi8( 70, -66, -99, -111, 100, -19, 127, 95) },
326
+ { simde_mm_set_pi8( -63, -116, -41, -11, -99, -60, -36, -15),
327
+ simde_mm_set_pi8( 84, -113, 107, 81, -28, -25, -90, -115),
328
+ simde_mm_set_pi8( 21, -128, 66, 70, -127, -85, -126, -128) },
329
+ { simde_mm_set_pi8( -79, -104, -10, -65, 84, -40, -102, 75),
330
+ simde_mm_set_pi8( 30, 54, 127, 16, -7, -31, -83, -89),
331
+ simde_mm_set_pi8( -49, -50, 117, -49, 77, -71, -128, -14) },
332
+ { simde_mm_set_pi8(-115, -50, 111, 104, -19, -48, 122, 59),
333
+ simde_mm_set_pi8( -74, -15, 43, 9, 94, -81, -68, 15),
334
+ simde_mm_set_pi8(-128, -65, 127, 113, 75, -128, 54, 74) },
335
+ { simde_mm_set_pi8( 18, -79, 5, 80, 99, 108, 39, -27),
336
+ simde_mm_set_pi8( 127, 44, 22, -80, -86, -11, 108, -95),
337
+ simde_mm_set_pi8( 127, -35, 27, 0, 13, 97, 127, -122) },
338
+ { simde_mm_set_pi8( -35, 62, 102, -79, 117, 108, 56, -21),
339
+ simde_mm_set_pi8( 68, 119, -10, 17, 40, -124, -75, -39),
340
+ simde_mm_set_pi8( 33, 127, 92, -62, 127, -16, -19, -60) },
341
+ { simde_mm_set_pi8( 45, -5, -10, -4, -23, -76, -111, -38),
342
+ simde_mm_set_pi8( 24, -15, -2, 75, 11, -108, -5, 124),
343
+ simde_mm_set_pi8( 69, -20, -12, 71, -12, -128, -116, 86) },
344
+ { simde_mm_set_pi8( 116, 38, 87, 5, -25, -119, 117, -12),
345
+ simde_mm_set_pi8( -51, 25, -122, 40, -111, -50, -55, -109),
346
+ simde_mm_set_pi8( 65, 63, -35, 45, -128, -128, 62, -121) }
347
+ };
348
+
349
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
350
+ simde__m64 r = simde_mm_adds_pi8(test_vec[i].a, test_vec[i].b);
351
+ simde_mm_empty();
352
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
353
+ }
354
+
355
+ simde_mm_empty();
356
+ return MUNIT_OK;
357
+ }
358
+
359
+ static MunitResult
360
+ test_simde_mm_adds_pu8(const MunitParameter params[], void* data) {
361
+ (void) params;
362
+ (void) data;
363
+
364
+ const struct {
365
+ simde__m64 a;
366
+ simde__m64 b;
367
+ simde__m64 r;
368
+ } test_vec[8] = {
369
+ { simde_x_mm_set_pu8(UINT8_C( 81), UINT8_C( 21), UINT8_C( 204), UINT8_C( 252),
370
+ UINT8_C( 129), UINT8_C( 215), UINT8_C( 184), UINT8_C( 80)),
371
+ simde_x_mm_set_pu8(UINT8_C( 23), UINT8_C( 216), UINT8_C( 110), UINT8_C( 125),
372
+ UINT8_C( 171), UINT8_C( 145), UINT8_C( 61), UINT8_C( 141)),
373
+ simde_x_mm_set_pu8(UINT8_C( 104), UINT8_C( 237), UINT8_C( 255), UINT8_C( 255),
374
+ UINT8_C( 255), UINT8_C( 255), UINT8_C( 245), UINT8_C( 221)) },
375
+ { simde_x_mm_set_pu8(UINT8_C( 239), UINT8_C( 124), UINT8_C( 164), UINT8_C( 178),
376
+ UINT8_C( 97), UINT8_C( 133), UINT8_C( 53), UINT8_C( 7)),
377
+ simde_x_mm_set_pu8(UINT8_C( 55), UINT8_C( 60), UINT8_C( 93), UINT8_C( 144),
378
+ UINT8_C( 87), UINT8_C( 38), UINT8_C( 29), UINT8_C( 227)),
379
+ simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 184), UINT8_C( 255), UINT8_C( 255),
380
+ UINT8_C( 184), UINT8_C( 171), UINT8_C( 82), UINT8_C( 234)) },
381
+ { simde_x_mm_set_pu8(UINT8_C( 2), UINT8_C( 239), UINT8_C( 120), UINT8_C( 239),
382
+ UINT8_C( 57), UINT8_C( 159), UINT8_C( 235), UINT8_C( 22)),
383
+ simde_x_mm_set_pu8(UINT8_C( 220), UINT8_C( 9), UINT8_C( 135), UINT8_C( 55),
384
+ UINT8_C( 21), UINT8_C( 1), UINT8_C( 123), UINT8_C( 167)),
385
+ simde_x_mm_set_pu8(UINT8_C( 222), UINT8_C( 248), UINT8_C( 255), UINT8_C( 255),
386
+ UINT8_C( 78), UINT8_C( 160), UINT8_C( 255), UINT8_C( 189)) },
387
+ { simde_x_mm_set_pu8(UINT8_C( 169), UINT8_C( 122), UINT8_C( 209), UINT8_C( 107),
388
+ UINT8_C( 53), UINT8_C( 194), UINT8_C( 157), UINT8_C( 250)),
389
+ simde_x_mm_set_pu8(UINT8_C( 190), UINT8_C( 161), UINT8_C( 50), UINT8_C( 2),
390
+ UINT8_C( 227), UINT8_C( 196), UINT8_C( 34), UINT8_C( 128)),
391
+ simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 109),
392
+ UINT8_C( 255), UINT8_C( 255), UINT8_C( 191), UINT8_C( 255)) },
393
+ { simde_x_mm_set_pu8(UINT8_C( 127), UINT8_C( 206), UINT8_C( 75), UINT8_C( 228),
394
+ UINT8_C( 24), UINT8_C( 253), UINT8_C( 247), UINT8_C( 227)),
395
+ simde_x_mm_set_pu8(UINT8_C( 199), UINT8_C( 181), UINT8_C( 197), UINT8_C( 15),
396
+ UINT8_C( 201), UINT8_C( 118), UINT8_C( 220), UINT8_C( 22)),
397
+ simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 243),
398
+ UINT8_C( 225), UINT8_C( 255), UINT8_C( 255), UINT8_C( 249)) },
399
+ { simde_x_mm_set_pu8(UINT8_C( 160), UINT8_C( 45), UINT8_C( 121), UINT8_C( 199),
400
+ UINT8_C( 155), UINT8_C( 201), UINT8_C( 54), UINT8_C( 92)),
401
+ simde_x_mm_set_pu8(UINT8_C( 29), UINT8_C( 158), UINT8_C( 69), UINT8_C( 12),
402
+ UINT8_C( 220), UINT8_C( 133), UINT8_C( 37), UINT8_C( 27)),
403
+ simde_x_mm_set_pu8(UINT8_C( 189), UINT8_C( 203), UINT8_C( 190), UINT8_C( 211),
404
+ UINT8_C( 255), UINT8_C( 255), UINT8_C( 91), UINT8_C( 119)) },
405
+ { simde_x_mm_set_pu8(UINT8_C( 173), UINT8_C( 130), UINT8_C( 79), UINT8_C( 240),
406
+ UINT8_C( 183), UINT8_C( 112), UINT8_C( 65), UINT8_C( 13)),
407
+ simde_x_mm_set_pu8(UINT8_C( 24), UINT8_C( 152), UINT8_C( 239), UINT8_C( 128),
408
+ UINT8_C( 83), UINT8_C( 69), UINT8_C( 122), UINT8_C( 121)),
409
+ simde_x_mm_set_pu8(UINT8_C( 197), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255),
410
+ UINT8_C( 255), UINT8_C( 181), UINT8_C( 187), UINT8_C( 134)) },
411
+ { simde_x_mm_set_pu8(UINT8_C( 242), UINT8_C( 255), UINT8_C( 149), UINT8_C( 159),
412
+ UINT8_C( 60), UINT8_C( 134), UINT8_C( 24), UINT8_C( 232)),
413
+ simde_x_mm_set_pu8(UINT8_C( 209), UINT8_C( 150), UINT8_C( 4), UINT8_C( 97),
414
+ UINT8_C( 136), UINT8_C( 88), UINT8_C( 70), UINT8_C( 193)),
415
+ simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 153), UINT8_C( 255),
416
+ UINT8_C( 196), UINT8_C( 222), UINT8_C( 94), UINT8_C( 255)) }
417
+ };
418
+
419
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
420
+ simde__m64 r = simde_mm_adds_pu8(test_vec[i].a, test_vec[i].b);
421
+ simde_assert_m64_u8(r, ==, test_vec[i].r);
422
+ }
423
+
424
+ return MUNIT_OK;
425
+ }
426
+
427
+ static MunitResult
428
+ test_simde_mm_adds_pi16(const MunitParameter params[], void* data) {
429
+ (void) params;
430
+ (void) data;
431
+
432
+ const struct {
433
+ simde__m64 a;
434
+ simde__m64 b;
435
+ simde__m64 r;
436
+ } test_vec[8] = {
437
+ { simde_mm_set_pi16(INT16_C( -31309), INT16_C( -5581), INT16_C( -13514), INT16_C( -24682)),
438
+ simde_mm_set_pi16(INT16_C( 19892), INT16_C( -12160), INT16_C( 3266), INT16_C( 9002)),
439
+ simde_mm_set_pi16(INT16_C( -11417), INT16_C( -17741), INT16_C( -10248), INT16_C( -15680)) },
440
+ { simde_mm_set_pi16(INT16_C( 20564), INT16_C( -25554), INT16_C( 18522), INT16_C( -107)),
441
+ simde_mm_set_pi16(INT16_C( 12328), INT16_C( 12883), INT16_C( 2251), INT16_C( -19119)),
442
+ simde_mm_set_pi16(INT16_C( 32767), INT16_C( -12671), INT16_C( 20773), INT16_C( -19226)) },
443
+ { simde_mm_set_pi16(INT16_C( 20106), INT16_C( -15513), INT16_C( -25552), INT16_C( -23751)),
444
+ simde_mm_set_pi16(INT16_C( 11380), INT16_C( 4698), INT16_C( 16886), INT16_C( 11304)),
445
+ simde_mm_set_pi16(INT16_C( 31486), INT16_C( -10815), INT16_C( -8666), INT16_C( -12447)) },
446
+ { simde_mm_set_pi16(INT16_C( -30807), INT16_C( -12488), INT16_C( 12150), INT16_C( 344)),
447
+ simde_mm_set_pi16(INT16_C( -21735), INT16_C( 11424), INT16_C( 19342), INT16_C( -22640)),
448
+ simde_mm_set_pi16(INT16_C( -32768), INT16_C( -1064), INT16_C( 31492), INT16_C( -22296)) },
449
+ { simde_mm_set_pi16(INT16_C( 23188), INT16_C( -20941), INT16_C( 26991), INT16_C( -11383)),
450
+ simde_mm_set_pi16(INT16_C( 20582), INT16_C( 6628), INT16_C( 32097), INT16_C( 23397)),
451
+ simde_mm_set_pi16(INT16_C( 32767), INT16_C( -14313), INT16_C( 32767), INT16_C( 12014)) },
452
+ { simde_mm_set_pi16(INT16_C( 1789), INT16_C( 28566), INT16_C( 18995), INT16_C( -32500)),
453
+ simde_mm_set_pi16(INT16_C( -32609), INT16_C( -30393), INT16_C( 1798), INT16_C( 28485)),
454
+ simde_mm_set_pi16(INT16_C( -30820), INT16_C( -1827), INT16_C( 20793), INT16_C( -4015)) },
455
+ { simde_mm_set_pi16(INT16_C( 18491), INT16_C( -11781), INT16_C( -27491), INT16_C( 337)),
456
+ simde_mm_set_pi16(INT16_C( 420), INT16_C( 28774), INT16_C( -31111), INT16_C( 15256)),
457
+ simde_mm_set_pi16(INT16_C( 18911), INT16_C( 16993), INT16_C( -32768), INT16_C( 15593)) },
458
+ { simde_mm_set_pi16(INT16_C( -15687), INT16_C( 25487), INT16_C( 23048), INT16_C( -8478)),
459
+ simde_mm_set_pi16(INT16_C( 9271), INT16_C( -4756), INT16_C( -12087), INT16_C( -15383)),
460
+ simde_mm_set_pi16(INT16_C( -6416), INT16_C( 20731), INT16_C( 10961), INT16_C( -23861)) }
461
+ };
462
+
463
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
464
+ simde__m64 r = simde_mm_adds_pi16(test_vec[i].a, test_vec[i].b);
465
+ simde_mm_empty();
466
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
467
+ }
468
+
469
+ simde_mm_empty();
470
+ return MUNIT_OK;
471
+ }
472
+
473
+ static MunitResult
474
+ test_simde_mm_adds_pu16(const MunitParameter params[], void* data) {
475
+ (void) params;
476
+ (void) data;
477
+
478
+ const struct {
479
+ simde__m64 a;
480
+ simde__m64 b;
481
+ simde__m64 r;
482
+ } test_vec[8] = {
483
+ { simde_x_mm_set_pu16(UINT16_C(43150), UINT16_C( 5470), UINT16_C(60072), UINT16_C(50068)),
484
+ simde_x_mm_set_pu16(UINT16_C( 7332), UINT16_C( 4270), UINT16_C(46463), UINT16_C( 9473)),
485
+ simde_x_mm_set_pu16(UINT16_C(50482), UINT16_C( 9740), UINT16_C(65535), UINT16_C(59541)) },
486
+ { simde_x_mm_set_pu16(UINT16_C( 2434), UINT16_C(31906), UINT16_C( 3723), UINT16_C(47234)),
487
+ simde_x_mm_set_pu16(UINT16_C(58902), UINT16_C(62845), UINT16_C(51771), UINT16_C(64034)),
488
+ simde_x_mm_set_pu16(UINT16_C(61336), UINT16_C(65535), UINT16_C(55494), UINT16_C(65535)) },
489
+ { simde_x_mm_set_pu16(UINT16_C( 129), UINT16_C(16274), UINT16_C( 9343), UINT16_C(27425)),
490
+ simde_x_mm_set_pu16(UINT16_C(21184), UINT16_C(38810), UINT16_C(32910), UINT16_C(34144)),
491
+ simde_x_mm_set_pu16(UINT16_C(21313), UINT16_C(55084), UINT16_C(42253), UINT16_C(61569)) },
492
+ { simde_x_mm_set_pu16(UINT16_C(64726), UINT16_C(55325), UINT16_C( 5040), UINT16_C(34690)),
493
+ simde_x_mm_set_pu16(UINT16_C(18928), UINT16_C(15762), UINT16_C(23760), UINT16_C(30303)),
494
+ simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(28800), UINT16_C(64993)) },
495
+ { simde_x_mm_set_pu16(UINT16_C(12447), UINT16_C(56063), UINT16_C(19893), UINT16_C(38115)),
496
+ simde_x_mm_set_pu16(UINT16_C(53854), UINT16_C( 9599), UINT16_C(53148), UINT16_C(47295)),
497
+ simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535)) },
498
+ { simde_x_mm_set_pu16(UINT16_C(30591), UINT16_C(42550), UINT16_C(36715), UINT16_C(13411)),
499
+ simde_x_mm_set_pu16(UINT16_C(46515), UINT16_C(57187), UINT16_C(46870), UINT16_C(44207)),
500
+ simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(57618)) },
501
+ { simde_x_mm_set_pu16(UINT16_C(12664), UINT16_C(64378), UINT16_C(29354), UINT16_C(42615)),
502
+ simde_x_mm_set_pu16(UINT16_C(62249), UINT16_C(64644), UINT16_C(45128), UINT16_C(47328)),
503
+ simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535)) },
504
+ { simde_x_mm_set_pu16(UINT16_C(65124), UINT16_C( 3867), UINT16_C(20702), UINT16_C(63422)),
505
+ simde_x_mm_set_pu16(UINT16_C(51381), UINT16_C(37432), UINT16_C(48951), UINT16_C(45184)),
506
+ simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(41299), UINT16_C(65535), UINT16_C(65535)) }
507
+ };
508
+
509
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
510
+ simde__m64 r = simde_mm_adds_pu16(test_vec[i].a, test_vec[i].b);
511
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
512
+ }
513
+
514
+ return MUNIT_OK;
515
+ }
516
+
517
+ static MunitResult
518
+ test_simde_mm_and_si64(const MunitParameter params[], void* data) {
519
+ (void) params;
520
+ (void) data;
521
+
522
+ const struct {
523
+ simde__m64 a;
524
+ simde__m64 b;
525
+ simde__m64 r;
526
+ } test_vec[8] = {
527
+ { simde_mm_set_pi32(INT32_C( 340534654), INT32_C( 867835838)),
528
+ simde_mm_set_pi32(INT32_C( -1715051141), INT32_C( 327376215)),
529
+ simde_mm_set_pi32(INT32_C( 272901498), INT32_C( 327294230)) },
530
+ { simde_mm_set_pi32(INT32_C( 364465166), INT32_C( -1853449223)),
531
+ simde_mm_set_pi32(INT32_C( 425932704), INT32_C( -538031667)),
532
+ simde_mm_set_pi32(INT32_C( 287376384), INT32_C( -1853486647)) },
533
+ { simde_mm_set_pi32(INT32_C( 1222276268), INT32_C( -1950390417)),
534
+ simde_mm_set_pi32(INT32_C( 104967923), INT32_C( 339992254)),
535
+ simde_mm_set_pi32(INT32_C( 4203680), INT32_C( 214574)) },
536
+ { simde_mm_set_pi32(INT32_C( 678635361), INT32_C( 1353498548)),
537
+ simde_mm_set_pi32(INT32_C( 1051418126), INT32_C( -1022663537)),
538
+ simde_mm_set_pi32(INT32_C( 673383936), INT32_C( 1074275460)) },
539
+ { simde_mm_set_pi32(INT32_C( 1823492970), INT32_C( -1726291925)),
540
+ simde_mm_set_pi32(INT32_C( 1139854805), INT32_C( 874111018)),
541
+ simde_mm_set_pi32(INT32_C( 1085294912), INT32_C( 270065706)) },
542
+ { simde_mm_set_pi32(INT32_C( 188716107), INT32_C( 919243794)),
543
+ simde_mm_set_pi32(INT32_C( -505381577), INT32_C( -1684778331)),
544
+ simde_mm_set_pi32(INT32_C( 18879491), INT32_C( 310378496)) },
545
+ { simde_mm_set_pi32(INT32_C( -1486610662), INT32_C( 307692640)),
546
+ simde_mm_set_pi32(INT32_C( -1793851837), INT32_C( 1963802755)),
547
+ simde_mm_set_pi32(INT32_C( -2063589886), INT32_C( 268763136)) },
548
+ { simde_mm_set_pi32(INT32_C( -630259527), INT32_C( -82339396)),
549
+ simde_mm_set_pi32(INT32_C( 1607040389), INT32_C( 867785548)),
550
+ simde_mm_set_pi32(INT32_C( 1514733697), INT32_C( 856758540)) }
551
+ };
552
+
553
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
554
+ simde__m64 r = simde_mm_and_si64(test_vec[i].a, test_vec[i].b);
555
+ simde_mm_empty();
556
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
557
+ }
558
+
559
+ simde_mm_empty();
560
+ return MUNIT_OK;
561
+ }
562
+
563
+ static MunitResult
564
+ test_simde_mm_andnot_si64(const MunitParameter params[], void* data) {
565
+ (void) params;
566
+ (void) data;
567
+
568
+ const struct {
569
+ simde__m64 a;
570
+ simde__m64 b;
571
+ simde__m64 r;
572
+ } test_vec[8] = {
573
+ { simde_mm_set_pi32(INT32_C( 874898289), INT32_C( -802292997)),
574
+ simde_mm_set_pi32(INT32_C( 329777422), INT32_C( 479831177)),
575
+ simde_mm_set_pi32(INT32_C( 59244558), INT32_C( 210764800)) },
576
+ { simde_mm_set_pi32(INT32_C( -944824913), INT32_C( 1953730462)),
577
+ simde_mm_set_pi32(INT32_C( -914930437), INT32_C( -556614726)),
578
+ simde_mm_set_pi32(INT32_C( 139477072), INT32_C( -1971310560)) },
579
+ { simde_mm_set_pi32(INT32_C( -253535493), INT32_C( 1477705121)),
580
+ simde_mm_set_pi32(INT32_C( -1581892884), INT32_C( -1606801005)),
581
+ simde_mm_set_pi32(INT32_C( 18096132), INT32_C( -1607991278)) },
582
+ { simde_mm_set_pi32(INT32_C( -585861604), INT32_C( 825554783)),
583
+ simde_mm_set_pi32(INT32_C( -1758500210), INT32_C( -643533489)),
584
+ simde_mm_set_pi32(INT32_C( 36374658), INT32_C( -931135488)) },
585
+ { simde_mm_set_pi32(INT32_C( -5443449), INT32_C( 694842285)),
586
+ simde_mm_set_pi32(INT32_C( -1613805192), INT32_C( 215848721)),
587
+ simde_mm_set_pi32(INT32_C( 4393336), INT32_C( 76907536)) },
588
+ { simde_mm_set_pi32(INT32_C( 1431251288), INT32_C( 1009645294)),
589
+ simde_mm_set_pi32(INT32_C( -1668167014), INT32_C( -733286899)),
590
+ simde_mm_set_pi32(INT32_C( -2003778942), INT32_C( -1069414399)) },
591
+ { simde_mm_set_pi32(INT32_C( 1707128575), INT32_C( -1462185330)),
592
+ simde_mm_set_pi32(INT32_C( -1016415616), INT32_C( -1881637541)),
593
+ simde_mm_set_pi32(INT32_C( -2111174656), INT32_C( 117452113)) },
594
+ { simde_mm_set_pi32(INT32_C( 336066190), INT32_C( -2007360384)),
595
+ simde_mm_set_pi32(INT32_C( -1959332116), INT32_C( -820920813)),
596
+ simde_mm_set_pi32(INT32_C( -1959788448), INT32_C( 1191289363)) }
597
+ };
598
+
599
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
600
+ simde__m64 r = simde_mm_andnot_si64(test_vec[i].a, test_vec[i].b);
601
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
602
+ }
603
+
604
+ simde_mm_empty();
605
+ return MUNIT_OK;
606
+ }
607
+
608
+ static MunitResult
609
+ test_simde_mm_cmpeq_pi8(const MunitParameter params[], void* data) {
610
+ (void) params;
611
+ (void) data;
612
+
613
+ const struct {
614
+ simde__m64 a;
615
+ simde__m64 b;
616
+ simde__m64 r;
617
+ } test_vec[8] = {
618
+ { simde_mm_set_pi8(INT8_C( 61), INT8_C(-117), INT8_C(-117), INT8_C( -23), INT8_C( -19), INT8_C( 6), INT8_C( -24), INT8_C( 89)),
619
+ simde_mm_set_pi8(INT8_C( 47), INT8_C( 71), INT8_C(-105), INT8_C( 13), INT8_C( -26), INT8_C( 93), INT8_C( 118), INT8_C( -58)),
620
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
621
+ { simde_mm_set_pi8(INT8_C( 78), INT8_C( 11), INT8_C( -2), INT8_C( 86), INT8_C( -50), INT8_C( -49), INT8_C( -1), INT8_C( 92)),
622
+ simde_mm_set_pi8(INT8_C( -85), INT8_C( -99), INT8_C( -41), INT8_C( 116), INT8_C( 74), INT8_C( 114), INT8_C( -3), INT8_C( -98)),
623
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
624
+ { simde_mm_set_pi8(INT8_C( 60), INT8_C( 10), INT8_C( -34), INT8_C( 30), INT8_C( 48), INT8_C( -13), INT8_C(-106), INT8_C( 105)),
625
+ simde_mm_set_pi8(INT8_C( 81), INT8_C( 108), INT8_C( -65), INT8_C( -58), INT8_C( -30), INT8_C( -90), INT8_C( 42), INT8_C( 0)),
626
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
627
+ { simde_mm_set_pi8(INT8_C(-113), INT8_C( -67), INT8_C( -55), INT8_C( 84), INT8_C( -92), INT8_C( -66), INT8_C( 7), INT8_C( 21)),
628
+ simde_mm_set_pi8(INT8_C(-113), INT8_C( -67), INT8_C( -55), INT8_C( 84), INT8_C( -92), INT8_C( -66), INT8_C( 7), INT8_C( 21)),
629
+ simde_mm_set_pi8(INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) },
630
+ { simde_mm_set_pi8(INT8_C( -72), INT8_C( -56), INT8_C(-104), INT8_C( -6), INT8_C( 37), INT8_C(-114), INT8_C( 84), INT8_C( 21)),
631
+ simde_mm_set_pi8(INT8_C( 77), INT8_C( -25), INT8_C(-104), INT8_C( 0), INT8_C( -39), INT8_C( 38), INT8_C( -54), INT8_C( -90)),
632
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
633
+ { simde_mm_set_pi8(INT8_C( 46), INT8_C( 120), INT8_C( -13), INT8_C(-125), INT8_C( 50), INT8_C( 10), INT8_C( 120), INT8_C( -10)),
634
+ simde_mm_set_pi8(INT8_C( 85), INT8_C( 89), INT8_C( 9), INT8_C( 65), INT8_C( -82), INT8_C( -80), INT8_C( 65), INT8_C( -65)),
635
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
636
+ { simde_mm_set_pi8(INT8_C( -12), INT8_C( -41), INT8_C( -54), INT8_C( 92), INT8_C( -87), INT8_C( -82), INT8_C(-120), INT8_C( 37)),
637
+ simde_mm_set_pi8(INT8_C( 94), INT8_C( -21), INT8_C( 36), INT8_C(-121), INT8_C( -62), INT8_C( -4), INT8_C( 42), INT8_C(-119)),
638
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
639
+ { simde_mm_set_pi8(INT8_C( -8), INT8_C( -60), INT8_C( 35), INT8_C( -31), INT8_C(-103), INT8_C( -7), INT8_C( -39), INT8_C( 47)),
640
+ simde_mm_set_pi8(INT8_C( 13), INT8_C( -84), INT8_C(-126), INT8_C(-127), INT8_C( -82), INT8_C( 37), INT8_C( 60), INT8_C( 30)),
641
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }
642
+ };
643
+
644
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
645
+ simde__m64 r = simde_mm_cmpeq_pi8(test_vec[i].a, test_vec[i].b);
646
+ simde_mm_empty();
647
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
648
+ }
649
+
650
+ simde_mm_empty();
651
+ return MUNIT_OK;
652
+ }
653
+
654
+ static MunitResult
655
+ test_simde_mm_cmpeq_pi16(const MunitParameter params[], void* data) {
656
+ (void) params;
657
+ (void) data;
658
+
659
+ const struct {
660
+ simde__m64 a;
661
+ simde__m64 b;
662
+ simde__m64 r;
663
+ } test_vec[8] = {
664
+ { simde_mm_set_pi16(INT16_C( -13903), INT16_C( -28259), INT16_C( 10786), INT16_C( 24518)),
665
+ simde_mm_set_pi16(INT16_C( 5267), INT16_C( 1924), INT16_C( 13281), INT16_C( -25055)),
666
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
667
+ { simde_mm_set_pi16(INT16_C( -21949), INT16_C( -13483), INT16_C( -390), INT16_C( 6377)),
668
+ simde_mm_set_pi16(INT16_C( -9583), INT16_C( 6876), INT16_C( 23768), INT16_C( 6209)),
669
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
670
+ { simde_mm_set_pi16(INT16_C( 11364), INT16_C( 28383), INT16_C( 13353), INT16_C( 14261)),
671
+ simde_mm_set_pi16(INT16_C( 13422), INT16_C( 32033), INT16_C( 4055), INT16_C( 5623)),
672
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
673
+ { simde_mm_set_pi16(INT16_C( 206), INT16_C( -1567), INT16_C( -17153), INT16_C( 18166)),
674
+ simde_mm_set_pi16(INT16_C( 30519), INT16_C( 30643), INT16_C( 32735), INT16_C( -4195)),
675
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
676
+ { simde_mm_set_pi16(INT16_C( 25406), INT16_C( -18343), INT16_C( -15870), INT16_C( -15505)),
677
+ simde_mm_set_pi16(INT16_C( 25406), INT16_C( -18343), INT16_C( -15870), INT16_C( -15505)),
678
+ simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) },
679
+ { simde_mm_set_pi16(INT16_C( 21393), INT16_C( 22815), INT16_C( 322), INT16_C( 9608)),
680
+ simde_mm_set_pi16(INT16_C( 23953), INT16_C( -31672), INT16_C( -7546), INT16_C( 31996)),
681
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
682
+ { simde_mm_set_pi16(INT16_C( -16506), INT16_C( -921), INT16_C( -32189), INT16_C( 18444)),
683
+ simde_mm_set_pi16(INT16_C( -10340), INT16_C( -28110), INT16_C( 24057), INT16_C( -7047)),
684
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
685
+ { simde_mm_set_pi16(INT16_C( -1173), INT16_C( -25844), INT16_C( -10729), INT16_C( 22121)),
686
+ simde_mm_set_pi16(INT16_C( 25970), INT16_C( 12718), INT16_C( 25424), INT16_C( 11867)),
687
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }
688
+ };
689
+
690
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
691
+ simde__m64 r = simde_mm_cmpeq_pi16(test_vec[i].a, test_vec[i].b);
692
+ simde_mm_empty();
693
+ simde_assert_m64_u16(r, ==, test_vec[i].r);
694
+ }
695
+
696
+ simde_mm_empty();
697
+ return MUNIT_OK;
698
+ }
699
+
700
+ static MunitResult
701
+ test_simde_mm_cmpeq_pi32(const MunitParameter params[], void* data) {
702
+ (void) params;
703
+ (void) data;
704
+
705
+ const struct {
706
+ simde__m64 a;
707
+ simde__m64 b;
708
+ simde__m64 r;
709
+ } test_vec[8] = {
710
+ { simde_mm_set_pi32(INT32_C( -883578301), INT32_C( 417988218)),
711
+ simde_mm_set_pi32(INT32_C( 450681489), INT32_C( 406936792)),
712
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
713
+ { simde_mm_set_pi32(INT32_C( 1860119652), INT32_C( 934622249)),
714
+ simde_mm_set_pi32(INT32_C( 2099328110), INT32_C( 368512983)),
715
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
716
+ { simde_mm_set_pi32(INT32_C( -102694706), INT32_C( 1190575359)),
717
+ simde_mm_set_pi32(INT32_C( 2008250167), INT32_C( -274890785)),
718
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
719
+ { simde_mm_set_pi32(INT32_C( 126096531), INT32_C( -1641991199)),
720
+ simde_mm_set_pi32(INT32_C( 126096531), INT32_C( -1641991199)),
721
+ simde_mm_set_pi32(INT32_C( -1), INT32_C( -1)) },
722
+ { simde_mm_set_pi32(INT32_C( -1202101442), INT32_C( -1016086014)),
723
+ simde_mm_set_pi32(INT32_C( -1034786090), INT32_C( -993100857)),
724
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
725
+ { simde_mm_set_pi32(INT32_C( 1495225233), INT32_C( 629670210)),
726
+ simde_mm_set_pi32(INT32_C( -2075632239), INT32_C( 2096947846)),
727
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
728
+ { simde_mm_set_pi32(INT32_C( -60309626), INT32_C( 1208779331)),
729
+ simde_mm_set_pi32(INT32_C( -1842161764), INT32_C( -461808135)),
730
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
731
+ { simde_mm_set_pi32(INT32_C( -1693648021), INT32_C( 1449776663)),
732
+ simde_mm_set_pi32(INT32_C( 833512818), INT32_C( 777741136)),
733
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }
734
+ };
735
+
736
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
737
+ simde__m64 r = simde_mm_cmpeq_pi32(test_vec[i].a, test_vec[i].b);
738
+ simde_mm_empty();
739
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
740
+ }
741
+
742
+ simde_mm_empty();
743
+ return MUNIT_OK;
744
+ }
745
+
746
+ static MunitResult
747
+ test_simde_mm_cmpgt_pi8(const MunitParameter params[], void* data) {
748
+ (void) params;
749
+ (void) data;
750
+
751
+ const struct {
752
+ simde__m64 a;
753
+ simde__m64 b;
754
+ simde__m64 r;
755
+ } test_vec[8] = {
756
+ { simde_mm_set_pi8(INT8_C( -77), INT8_C( 29), INT8_C( -34), INT8_C(-110), INT8_C( -78), INT8_C( -8), INT8_C( 92), INT8_C( 44)),
757
+ simde_mm_set_pi8(INT8_C( -57), INT8_C( 99), INT8_C( -10), INT8_C( 28), INT8_C( 46), INT8_C( 79), INT8_C( -76), INT8_C( 59)),
758
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
759
+ { simde_mm_set_pi8(INT8_C( 96), INT8_C( -9), INT8_C( -61), INT8_C( 46), INT8_C( 104), INT8_C(-105), INT8_C( 89), INT8_C( 48)),
760
+ simde_mm_set_pi8(INT8_C( 109), INT8_C( 70), INT8_C( 13), INT8_C( 90), INT8_C(-116), INT8_C( -23), INT8_C( 10), INT8_C( -96)),
761
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) },
762
+ { simde_mm_set_pi8(INT8_C( -24), INT8_C( -2), INT8_C( 73), INT8_C( 36), INT8_C( -29), INT8_C( -70), INT8_C( 73), INT8_C(-121)),
763
+ simde_mm_set_pi8(INT8_C( 17), INT8_C( -17), INT8_C( 77), INT8_C( -2), INT8_C( 111), INT8_C(-111), INT8_C( -66), INT8_C( -30)),
764
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) },
765
+ { simde_mm_set_pi8(INT8_C( 72), INT8_C(-102), INT8_C(-121), INT8_C( 41), INT8_C( -29), INT8_C(-100), INT8_C( -70), INT8_C( 82)),
766
+ simde_mm_set_pi8(INT8_C( 101), INT8_C( 118), INT8_C(-110), INT8_C( -74), INT8_C( -57), INT8_C( -2), INT8_C( 89), INT8_C( -16)),
767
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1)) },
768
+ { simde_mm_set_pi8(INT8_C( 64), INT8_C( 2), INT8_C(-118), INT8_C( 23), INT8_C( -88), INT8_C(-120), INT8_C( 61), INT8_C( 114)),
769
+ simde_mm_set_pi8(INT8_C( 60), INT8_C( 91), INT8_C( 96), INT8_C( -22), INT8_C( 38), INT8_C( 49), INT8_C( 80), INT8_C( -29)),
770
+ simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) },
771
+ { simde_mm_set_pi8(INT8_C( 31), INT8_C( -32), INT8_C(-121), INT8_C( 9), INT8_C( 80), INT8_C( 108), INT8_C( 29), INT8_C( 2)),
772
+ simde_mm_set_pi8(INT8_C(-119), INT8_C( 33), INT8_C( 9), INT8_C( 101), INT8_C( 101), INT8_C( 79), INT8_C( 41), INT8_C( 87)),
773
+ simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) },
774
+ { simde_mm_set_pi8(INT8_C( 96), INT8_C( -75), INT8_C(-121), INT8_C(-101), INT8_C( 10), INT8_C(-126), INT8_C( 58), INT8_C( 60)),
775
+ simde_mm_set_pi8(INT8_C( 101), INT8_C( -73), INT8_C( 126), INT8_C( 105), INT8_C( -48), INT8_C(-119), INT8_C( -97), INT8_C( -90)),
776
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) },
777
+ { simde_mm_set_pi8(INT8_C( 118), INT8_C( 118), INT8_C( -21), INT8_C( -49), INT8_C( 85), INT8_C( 69), INT8_C( 84), INT8_C( 111)),
778
+ simde_mm_set_pi8(INT8_C( -96), INT8_C( 121), INT8_C(-110), INT8_C( -87), INT8_C( -73), INT8_C( 37), INT8_C( 45), INT8_C(-120)),
779
+ simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }
780
+ };
781
+
782
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
783
+ simde__m64 r = simde_mm_cmpgt_pi8(test_vec[i].a, test_vec[i].b);
784
+ simde_mm_empty();
785
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
786
+ }
787
+
788
+ simde_mm_empty();
789
+ return MUNIT_OK;
790
+ }
791
+
792
+ static MunitResult
793
+ test_simde_mm_cmpgt_pi16(const MunitParameter params[], void* data) {
794
+ (void) params;
795
+ (void) data;
796
+
797
+ const struct {
798
+ simde__m64 a;
799
+ simde__m64 b;
800
+ simde__m64 r;
801
+ } test_vec[8] = {
802
+ { simde_mm_set_pi16(INT16_C( 27287), INT16_C( -17445), INT16_C( 7868), INT16_C( 17731)),
803
+ simde_mm_set_pi16(INT16_C( -32130), INT16_C( -12389), INT16_C( -15721), INT16_C( -10529)),
804
+ simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) },
805
+ { simde_mm_set_pi16(INT16_C( -23331), INT16_C( 19282), INT16_C( 27710), INT16_C( 4608)),
806
+ simde_mm_set_pi16(INT16_C( -32646), INT16_C( -2319), INT16_C( 19710), INT16_C( 25425)),
807
+ simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) },
808
+ { simde_mm_set_pi16(INT16_C( 29350), INT16_C( -12356), INT16_C( -18117), INT16_C( -29182)),
809
+ simde_mm_set_pi16(INT16_C( 10015), INT16_C( -4879), INT16_C( 30741), INT16_C( -4144)),
810
+ simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
811
+ { simde_mm_set_pi16(INT16_C( 30697), INT16_C( -4215), INT16_C( 31556), INT16_C( 11913)),
812
+ simde_mm_set_pi16(INT16_C( -27176), INT16_C( 17667), INT16_C( -30447), INT16_C( -2179)),
813
+ simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) },
814
+ { simde_mm_set_pi16(INT16_C( 9207), INT16_C( 4793), INT16_C( -24596), INT16_C( 10085)),
815
+ simde_mm_set_pi16(INT16_C( -18727), INT16_C( -929), INT16_C( 7051), INT16_C( 8853)),
816
+ simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1)) },
817
+ { simde_mm_set_pi16(INT16_C( 22734), INT16_C( 5890), INT16_C( -3490), INT16_C( -24930)),
818
+ simde_mm_set_pi16(INT16_C( 23656), INT16_C( 14548), INT16_C( 31806), INT16_C( -18379)),
819
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
820
+ { simde_mm_set_pi16(INT16_C( -28756), INT16_C( 2211), INT16_C( -15605), INT16_C( -32010)),
821
+ simde_mm_set_pi16(INT16_C( -12192), INT16_C( -10879), INT16_C( 28731), INT16_C( 7911)),
822
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0)) },
823
+ { simde_mm_set_pi16(INT16_C( -9646), INT16_C( -8544), INT16_C( -843), INT16_C( 12140)),
824
+ simde_mm_set_pi16(INT16_C( 4324), INT16_C( 29706), INT16_C( 13667), INT16_C( -9123)),
825
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }
826
+ };
827
+
828
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
829
+ simde__m64 r = simde_mm_cmpgt_pi16(test_vec[i].a, test_vec[i].b);
830
+ simde_mm_empty();
831
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
832
+ }
833
+
834
+ simde_mm_empty();
835
+ return MUNIT_OK;
836
+ }
837
+
838
+ static MunitResult
839
+ test_simde_mm_cmpgt_pi32(const MunitParameter params[], void* data) {
840
+ (void) params;
841
+ (void) data;
842
+
843
+ const struct {
844
+ simde__m64 a;
845
+ simde__m64 b;
846
+ simde__m64 r;
847
+ } test_vec[8] = {
848
+ { simde_mm_set_pi32(INT32_C( -1143248233), INT32_C( 1162026684)),
849
+ simde_mm_set_pi32(INT32_C( -811892098), INT32_C( -689978729)),
850
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) },
851
+ { simde_mm_set_pi32(INT32_C( 1263707357), INT32_C( 302017598)),
852
+ simde_mm_set_pi32(INT32_C( -151945094), INT32_C( 1666272510)),
853
+ simde_mm_set_pi32(INT32_C( -1), INT32_C( 0)) },
854
+ { simde_mm_set_pi32(INT32_C( -809733466), INT32_C( -1912424133)),
855
+ simde_mm_set_pi32(INT32_C( -319740129), INT32_C( -271550443)),
856
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
857
+ { simde_mm_set_pi32(INT32_C( -276203543), INT32_C( 780761924)),
858
+ simde_mm_set_pi32(INT32_C( 1157862872), INT32_C( -142767855)),
859
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) },
860
+ { simde_mm_set_pi32(INT32_C( 314123255), INT32_C( 660971500)),
861
+ simde_mm_set_pi32(INT32_C( -60836135), INT32_C( 580197259)),
862
+ simde_mm_set_pi32(INT32_C( -1), INT32_C( -1)) },
863
+ { simde_mm_set_pi32(INT32_C( 386029774), INT32_C( -1633750434)),
864
+ simde_mm_set_pi32(INT32_C( 953441384), INT32_C( -1204454338)),
865
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
866
+ { simde_mm_set_pi32(INT32_C( 144936876), INT32_C( -2097757429)),
867
+ simde_mm_set_pi32(INT32_C( -712912800), INT32_C( 518484027)),
868
+ simde_mm_set_pi32(INT32_C( -1), INT32_C( 0)) },
869
+ { simde_mm_set_pi32(INT32_C( -559883694), INT32_C( 795671733)),
870
+ simde_mm_set_pi32(INT32_C( 1946816740), INT32_C( -597871261)),
871
+ simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) }
872
+ };
873
+
874
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
875
+ simde__m64 r = simde_mm_cmpgt_pi32(test_vec[i].a, test_vec[i].b);
876
+ simde_mm_empty();
877
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
878
+ }
879
+
880
+ simde_mm_empty();
881
+ return MUNIT_OK;
882
+ }
883
+
884
+ static MunitResult
885
+ test_simde_mm_cvtm64_si64(const MunitParameter params[], void* data) {
886
+ (void) params;
887
+ (void) data;
888
+
889
+ const struct {
890
+ simde__m64 a;
891
+ int64_t r;
892
+ } test_vec[8] = {
893
+ { simde_x_mm_set_pi64(INT64_C( 2133233461862191637)),
894
+ INT64_C( 2133233461862191637) },
895
+ { simde_x_mm_set_pi64(INT64_C(-1973285463394951226)),
896
+ INT64_C(-1973285463394951226) },
897
+ { simde_x_mm_set_pi64(INT64_C(-5080660655112358315)),
898
+ INT64_C(-5080660655112358315) },
899
+ { simde_x_mm_set_pi64(INT64_C(-2729804181976621239)),
900
+ INT64_C(-2729804181976621239) },
901
+ { simde_x_mm_set_pi64(INT64_C( 2995193706671491592)),
902
+ INT64_C( 2995193706671491592) },
903
+ { simde_x_mm_set_pi64(INT64_C( 5468114770221852232)),
904
+ INT64_C( 5468114770221852232) },
905
+ { simde_x_mm_set_pi64(INT64_C( 8741870191125799000)),
906
+ INT64_C( 8741870191125799000) },
907
+ { simde_x_mm_set_pi64(INT64_C(-2719280269483103979)),
908
+ INT64_C(-2719280269483103979) }
909
+ };
910
+
911
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
912
+ int64_t r = simde_mm_cvtm64_si64(test_vec[i].a);
913
+ munit_assert_int64(r, ==, test_vec[i].r);
914
+ }
915
+
916
+ return MUNIT_OK;
917
+ }
918
+
919
+ static MunitResult
920
+ test_simde_mm_cvtsi32_si64(const MunitParameter params[], void* data) {
921
+ (void) params;
922
+ (void) data;
923
+
924
+ const struct {
925
+ int32_t a;
926
+ simde__m64 r;
927
+ } test_vec[8] = {
928
+ { INT32_C( -1348583717), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1348583717)) },
929
+ { INT32_C( -756715702), simde_mm_set_pi32(INT32_C( 0), INT32_C( -756715702)) },
930
+ { INT32_C( -1433924355), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1433924355)) },
931
+ { INT32_C( -1317069830), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1317069830)) },
932
+ { INT32_C( 1132090539), simde_mm_set_pi32(INT32_C( 0), INT32_C( 1132090539)) },
933
+ { INT32_C( -1685122075), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1685122075)) },
934
+ { INT32_C( -782778794), simde_mm_set_pi32(INT32_C( 0), INT32_C( -782778794)) },
935
+ { INT32_C( -1603608856), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1603608856)) }
936
+ };
937
+
938
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
939
+ simde__m64 r = simde_mm_cvtsi32_si64(test_vec[i].a);
940
+ simde_mm_empty();
941
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
942
+ }
943
+
944
+ simde_mm_empty();
945
+ return MUNIT_OK;
946
+ }
947
+
948
+ static MunitResult
949
+ test_simde_mm_cvtsi64_m64(const MunitParameter params[], void* data) {
950
+ (void) params;
951
+ (void) data;
952
+
953
+ const struct {
954
+ int64_t a;
955
+ simde__m64 r;
956
+ } test_vec[8] = {
957
+ { INT64_C( 2448316468135826021),
958
+ simde_x_mm_set_pi64(INT64_C( 2448316468135826021)) },
959
+ { INT64_C(-5945835882033612295),
960
+ simde_x_mm_set_pi64(INT64_C(-5945835882033612295)) },
961
+ { INT64_C( 5992090895212857513),
962
+ simde_x_mm_set_pi64(INT64_C( 5992090895212857513)) },
963
+ { INT64_C(-6796228402041923924),
964
+ simde_x_mm_set_pi64(INT64_C(-6796228402041923924)) },
965
+ { INT64_C(-8511645703056027592),
966
+ simde_x_mm_set_pi64(INT64_C(-8511645703056027592)) },
967
+ { INT64_C(-8723546203794185453),
968
+ simde_x_mm_set_pi64(INT64_C(-8723546203794185453)) },
969
+ { INT64_C( 4345402151036158873),
970
+ simde_x_mm_set_pi64(INT64_C( 4345402151036158873)) },
971
+ { INT64_C(-6661466122659936384),
972
+ simde_x_mm_set_pi64(INT64_C(-6661466122659936384)) }
973
+ };
974
+
975
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
976
+ simde__m64 r = simde_mm_cvtsi64_m64(test_vec[i].a);
977
+ simde_mm_empty();
978
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
979
+ }
980
+
981
+ simde_mm_empty();
982
+ return MUNIT_OK;
983
+ }
984
+
985
+ static MunitResult
986
+ test_simde_mm_cvtsi64_si32(const MunitParameter params[], void* data) {
987
+ (void) params;
988
+ (void) data;
989
+
990
+ const struct {
991
+ simde__m64 a;
992
+ int32_t r;
993
+ } test_vec[8] = {
994
+ { simde_mm_set_pi32(INT32_C( 1382271190), INT32_C( -17653840)), INT32_C( -17653840), },
995
+ { simde_mm_set_pi32(INT32_C( 2132466748), INT32_C( -1483731059)), INT32_C( -1483731059), },
996
+ { simde_mm_set_pi32(INT32_C( -822228698), INT32_C( 1004225555)), INT32_C( 1004225555), },
997
+ { simde_mm_set_pi32(INT32_C( 558984757), INT32_C( -1886991323)), INT32_C( -1886991323), },
998
+ { simde_mm_set_pi32(INT32_C( 927499451), INT32_C( 1754078566)), INT32_C( 1754078566), },
999
+ { simde_mm_set_pi32(INT32_C( -1298862100), INT32_C( -1081030334)), INT32_C( -1081030334), },
1000
+ { simde_mm_set_pi32(INT32_C( -2034437538), INT32_C( 1272751087)), INT32_C( 1272751087), },
1001
+ { simde_mm_set_pi32(INT32_C( -1114400737), INT32_C( 1318901980)), INT32_C( 1318901980), }
1002
+ };
1003
+
1004
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1005
+ int32_t r = simde_mm_cvtsi64_si32(test_vec[i].a);
1006
+ simde_mm_empty();
1007
+ munit_assert_int32(r, ==, test_vec[i].r);
1008
+ }
1009
+
1010
+ simde_mm_empty();
1011
+ return MUNIT_OK;
1012
+ }
1013
+
1014
+ static MunitResult
1015
+ test_simde_mm_madd_pi16(const MunitParameter params[], void* data) {
1016
+ (void) params;
1017
+ (void) data;
1018
+
1019
+ const struct {
1020
+ simde__m64 a;
1021
+ simde__m64 b;
1022
+ simde__m64 r;
1023
+ } test_vec[8] = {
1024
+ { simde_mm_set_pi16(INT16_C( -30343), INT16_C( -26392), INT16_C( 12299), INT16_C( 4601)),
1025
+ simde_mm_set_pi16(INT16_C( 1486), INT16_C( 26809), INT16_C( 7836), INT16_C( -25805)),
1026
+ simde_mm_set_pi32(INT32_C( -752632826), INT32_C( -22353841)) },
1027
+ { simde_mm_set_pi16(INT16_C( 1890), INT16_C( 31305), INT16_C( -30077), INT16_C( 2552)),
1028
+ simde_mm_set_pi16(INT16_C( -26920), INT16_C( -29540), INT16_C( 15300), INT16_C( 26578)),
1029
+ simde_mm_set_pi32(INT32_C( -975628500), INT32_C( -392351044)) },
1030
+ { simde_mm_set_pi16(INT16_C( 22384), INT16_C( 696), INT16_C( 25907), INT16_C( -24876)),
1031
+ simde_mm_set_pi16(INT16_C( -11857), INT16_C( 27254), INT16_C( -31966), INT16_C( 7796)),
1032
+ simde_mm_set_pi32(INT32_C( -246438304), INT32_C( -1022076458)) },
1033
+ { simde_mm_set_pi16(INT16_C( 29956), INT16_C( -2269), INT16_C( 6641), INT16_C( -23007)),
1034
+ simde_mm_set_pi16(INT16_C( 8143), INT16_C( 30485), INT16_C( 15411), INT16_C( -14515)),
1035
+ simde_mm_set_pi32(INT32_C( 174761243), INT32_C( 436291056)) },
1036
+ { simde_mm_set_pi16(INT16_C( 7615), INT16_C( 20384), INT16_C( 5326), INT16_C( -12172)),
1037
+ simde_mm_set_pi16(INT16_C( 26893), INT16_C( 19452), INT16_C( 1570), INT16_C( -21018)),
1038
+ simde_mm_set_pi32(INT32_C( 601299763), INT32_C( 264192916)) },
1039
+ { simde_mm_set_pi16(INT16_C( 21548), INT16_C( 8299), INT16_C( -27943), INT16_C( -19629)),
1040
+ simde_mm_set_pi16(INT16_C( -7799), INT16_C( -19736), INT16_C( -28205), INT16_C( 18816)),
1041
+ simde_mm_set_pi32(INT32_C( -331841916), INT32_C( 418793051)) },
1042
+ { simde_mm_set_pi16(INT16_C( -14814), INT16_C( -21565), INT16_C( 4061), INT16_C( 32148)),
1043
+ simde_mm_set_pi16(INT16_C( 26150), INT16_C( 16339), INT16_C( -29106), INT16_C( 3765)),
1044
+ simde_mm_set_pi32(INT32_C( -739736635), INT32_C( 2837754)) },
1045
+ { simde_mm_set_pi16(INT16_C( -14349), INT16_C( 29040), INT16_C( 10943), INT16_C( -14909)),
1046
+ simde_mm_set_pi16(INT16_C( 4672), INT16_C( 28858), INT16_C( 1393), INT16_C( 4521)),
1047
+ simde_mm_set_pi32(INT32_C( 770997792), INT32_C( -52159990)) }
1048
+ };
1049
+
1050
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1051
+ simde__m64 r = simde_mm_madd_pi16(test_vec[i].a, test_vec[i].b);
1052
+ simde_mm_empty();
1053
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
1054
+ }
1055
+
1056
+ simde_mm_empty();
1057
+ return MUNIT_OK;
1058
+ }
1059
+
1060
+ static MunitResult
1061
+ test_simde_mm_mulhi_pi16(const MunitParameter params[], void* data) {
1062
+ (void) params;
1063
+ (void) data;
1064
+
1065
+ const struct {
1066
+ simde__m64 a;
1067
+ simde__m64 b;
1068
+ simde__m64 r;
1069
+ } test_vec[8] = {
1070
+ { simde_mm_set_pi16(INT16_C( 8979), INT16_C( 5154), INT16_C( -16978), INT16_C( 30928)),
1071
+ simde_mm_set_pi16(INT16_C( 131), INT16_C( -26526), INT16_C( -28508), INT16_C( 3156)),
1072
+ simde_mm_set_pi16(INT16_C( 17), INT16_C( -2087), INT16_C( 7385), INT16_C( 1489)) },
1073
+ { simde_mm_set_pi16(INT16_C( -20724), INT16_C( -32562), INT16_C( -4287), INT16_C( -11994)),
1074
+ simde_mm_set_pi16(INT16_C( -1407), INT16_C( -20477), INT16_C( 2350), INT16_C( -5112)),
1075
+ simde_mm_set_pi16(INT16_C( 444), INT16_C( 10174), INT16_C( -154), INT16_C( 935)) },
1076
+ { simde_mm_set_pi16(INT16_C( -19242), INT16_C( -20442), INT16_C( -24803), INT16_C( 26694)),
1077
+ simde_mm_set_pi16(INT16_C( 13233), INT16_C( -6736), INT16_C( 457), INT16_C( 16731)),
1078
+ simde_mm_set_pi16(INT16_C( -3886), INT16_C( 2101), INT16_C( -173), INT16_C( 6814)) },
1079
+ { simde_mm_set_pi16(INT16_C( -7830), INT16_C( 18993), INT16_C( 2047), INT16_C( 32735)),
1080
+ simde_mm_set_pi16(INT16_C( 17045), INT16_C( -23188), INT16_C( -16247), INT16_C( -6369)),
1081
+ simde_mm_set_pi16(INT16_C( -2037), INT16_C( -6721), INT16_C( -508), INT16_C( -3182)) },
1082
+ { simde_mm_set_pi16(INT16_C( -20331), INT16_C( -1771), INT16_C( 7319), INT16_C( -2172)),
1083
+ simde_mm_set_pi16(INT16_C( 27473), INT16_C( 3736), INT16_C( 26635), INT16_C( -24632)),
1084
+ simde_mm_set_pi16(INT16_C( -8523), INT16_C( -101), INT16_C( 2974), INT16_C( 816)) },
1085
+ { simde_mm_set_pi16(INT16_C( 18863), INT16_C( 29355), INT16_C( 22063), INT16_C( 24992)),
1086
+ simde_mm_set_pi16(INT16_C( 31646), INT16_C( 10850), INT16_C( -1174), INT16_C( 6386)),
1087
+ simde_mm_set_pi16(INT16_C( 9108), INT16_C( 4859), INT16_C( -396), INT16_C( 2435)) },
1088
+ { simde_mm_set_pi16(INT16_C( 12919), INT16_C( 27836), INT16_C( -15473), INT16_C( 31227)),
1089
+ simde_mm_set_pi16(INT16_C( -2051), INT16_C( 6265), INT16_C( -13839), INT16_C( 14795)),
1090
+ simde_mm_set_pi16(INT16_C( -405), INT16_C( 2661), INT16_C( 3267), INT16_C( 7049)) },
1091
+ { simde_mm_set_pi16(INT16_C( -20265), INT16_C( -2387), INT16_C( 1893), INT16_C( 16606)),
1092
+ simde_mm_set_pi16(INT16_C( 31589), INT16_C( -8123), INT16_C( 26642), INT16_C( 6982)),
1093
+ simde_mm_set_pi16(INT16_C( -9768), INT16_C( 295), INT16_C( 769), INT16_C( 1769)) }
1094
+ };
1095
+
1096
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1097
+ simde__m64 r = simde_mm_mulhi_pi16(test_vec[i].a, test_vec[i].b);
1098
+ simde_mm_empty();
1099
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
1100
+ }
1101
+
1102
+ simde_mm_empty();
1103
+ return MUNIT_OK;
1104
+ }
1105
+
1106
+ static MunitResult
1107
+ test_simde_mm_mullo_pi16(const MunitParameter params[], void* data) {
1108
+ (void) params;
1109
+ (void) data;
1110
+
1111
+ const struct {
1112
+ simde__m64 a;
1113
+ simde__m64 b;
1114
+ simde__m64 r;
1115
+ } test_vec[8] = {
1116
+ { simde_mm_set_pi16(INT16_C( 1243), INT16_C( 20416), INT16_C( 15667), INT16_C( 4430)),
1117
+ simde_mm_set_pi16(INT16_C( -5775), INT16_C( 26694), INT16_C( 17028), INT16_C( 23537)),
1118
+ simde_mm_set_pi16(INT16_C( 30635), INT16_C( -12672), INT16_C( -19380), INT16_C( 1134)) },
1119
+ { simde_mm_set_pi16(INT16_C( -5230), INT16_C( -20726), INT16_C( -32301), INT16_C( 4324)),
1120
+ simde_mm_set_pi16(INT16_C( 31416), INT16_C( -24870), INT16_C( 28490), INT16_C( -28474)),
1121
+ simde_mm_set_pi16(INT16_C( -6928), INT16_C( 14980), INT16_C( 1022), INT16_C( 20568)) },
1122
+ { simde_mm_set_pi16(INT16_C( 359), INT16_C( 28315), INT16_C( 30109), INT16_C( 30370)),
1123
+ simde_mm_set_pi16(INT16_C( 11362), INT16_C( -24534), INT16_C( -7779), INT16_C( -31174)),
1124
+ simde_mm_set_pi16(INT16_C( 15726), INT16_C( 1390), INT16_C( 7753), INT16_C( -21324)) },
1125
+ { simde_mm_set_pi16(INT16_C( -7682), INT16_C( -17472), INT16_C( 1125), INT16_C( -30733)),
1126
+ simde_mm_set_pi16(INT16_C( 27323), INT16_C( 21286), INT16_C( 28332), INT16_C( -26848)),
1127
+ simde_mm_set_pi16(INT16_C( 16522), INT16_C( 7808), INT16_C( 23004), INT16_C( 21344)) },
1128
+ { simde_mm_set_pi16(INT16_C( 28468), INT16_C( -4021), INT16_C( 23325), INT16_C( -24525)),
1129
+ simde_mm_set_pi16(INT16_C( 29242), INT16_C( -5135), INT16_C( 12241), INT16_C( -5671)),
1130
+ simde_mm_set_pi16(INT16_C( 22984), INT16_C( 3995), INT16_C( -19027), INT16_C( 13883)) },
1131
+ { simde_mm_set_pi16(INT16_C( -11233), INT16_C( -9235), INT16_C( -23340), INT16_C( -55)),
1132
+ simde_mm_set_pi16(INT16_C( -21567), INT16_C( -13689), INT16_C( 21540), INT16_C( 32686)),
1133
+ simde_mm_set_pi16(INT16_C( -24481), INT16_C( -1029), INT16_C( -16944), INT16_C( -28258)) },
1134
+ { simde_mm_set_pi16(INT16_C( 24703), INT16_C( -27133), INT16_C( 13289), INT16_C( 20833)),
1135
+ simde_mm_set_pi16(INT16_C( -32748), INT16_C( 15704), INT16_C( 10635), INT16_C( -13911)),
1136
+ simde_mm_set_pi16(INT16_C( 2540), INT16_C( 18440), INT16_C( -32637), INT16_C( -7671)) },
1137
+ { simde_mm_set_pi16(INT16_C( -20397), INT16_C( -17293), INT16_C( -2038), INT16_C( -24305)),
1138
+ simde_mm_set_pi16(INT16_C( -25280), INT16_C( 2678), INT16_C( -17798), INT16_C( 10227)),
1139
+ simde_mm_set_pi16(INT16_C( -1088), INT16_C( 23298), INT16_C( 30916), INT16_C( 10813)), }
1140
+ };
1141
+
1142
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1143
+ simde__m64 r = simde_mm_mullo_pi16(test_vec[i].a, test_vec[i].b);
1144
+ simde_mm_empty();
1145
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
1146
+ }
1147
+
1148
+ simde_mm_empty();
1149
+ return MUNIT_OK;
1150
+ }
1151
+
1152
+ static MunitResult
1153
+ test_simde_mm_or_si64(const MunitParameter params[], void* data) {
1154
+ (void) params;
1155
+ (void) data;
1156
+
1157
+ const struct {
1158
+ simde__m64 a;
1159
+ simde__m64 b;
1160
+ simde__m64 r;
1161
+ } test_vec[8] = {
1162
+ { simde_mm_set_pi32(INT32_C( -891509218), INT32_C(-1564843089)),
1163
+ simde_mm_set_pi32(INT32_C( -653544563), INT32_C(-1696113634)),
1164
+ simde_mm_set_pi32(INT32_C( 332862867), INT32_C( 945635249)) },
1165
+ { simde_mm_set_pi32(INT32_C( 534518332), INT32_C( 469703625)),
1166
+ simde_mm_set_pi32(INT32_C( 1926733937), INT32_C(-1778281838)),
1167
+ simde_mm_set_pi32(INT32_C( 1829485133), INT32_C(-1912698533)) },
1168
+ { simde_mm_set_pi32(INT32_C( 1838379192), INT32_C(-1012991609)),
1169
+ simde_mm_set_pi32(INT32_C( 513007439), INT32_C( -965007092)),
1170
+ simde_mm_set_pi32(INT32_C( 1929424887), INT32_C( 98947211)) },
1171
+ { simde_mm_set_pi32(INT32_C( -467260595), INT32_C( 1936608780)),
1172
+ simde_mm_set_pi32(INT32_C(-1713951633), INT32_C(-1838143667)),
1173
+ simde_mm_set_pi32(INT32_C( 2112948002), INT32_C( -518100671)) },
1174
+ { simde_mm_set_pi32(INT32_C( -1074911), INT32_C( 1257024473)),
1175
+ simde_mm_set_pi32(INT32_C( 690851199), INT32_C( -673662530)),
1176
+ simde_mm_set_pi32(INT32_C( -691923874), INT32_C(-1657531801)) },
1177
+ { simde_mm_set_pi32(INT32_C( -124691463), INT32_C( -802403954)),
1178
+ simde_mm_set_pi32(INT32_C(-1515822997), INT32_C(-1488861756)),
1179
+ simde_mm_set_pi32(INT32_C( 1563900818), INT32_C( 2003668042)) },
1180
+ { simde_mm_set_pi32(INT32_C( -30677319), INT32_C( -381566895)),
1181
+ simde_mm_set_pi32(INT32_C( 1588726708), INT32_C( 607880991)),
1182
+ simde_mm_set_pi32(INT32_C(-1600525043), INT32_C( -847624370)) },
1183
+ { simde_mm_set_pi32(INT32_C( 289587202), INT32_C(-1908682429)),
1184
+ simde_mm_set_pi32(INT32_C( 291676112), INT32_C(-1617544418)),
1185
+ simde_mm_set_pi32(INT32_C( 2121682), INT32_C( 296610397)) }
1186
+ };
1187
+
1188
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1189
+ simde__m64 r = simde_mm_xor_si64(test_vec[i].a, test_vec[i].b);
1190
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
1191
+ }
1192
+
1193
+ return MUNIT_OK;
1194
+ }
1195
+
1196
+ static MunitResult
1197
+ test_simde_mm_packs_pi16(const MunitParameter params[], void* data) {
1198
+ (void) params;
1199
+ (void) data;
1200
+
1201
+ const struct {
1202
+ simde__m64 a;
1203
+ simde__m64 b;
1204
+ simde__m64 r;
1205
+ } test_vec[8] = {
1206
+ { simde_mm_set_pi16(INT16_C( -17383), INT16_C( -12181), INT16_C( -2968), INT16_C( 26626)),
1207
+ simde_mm_set_pi16(INT16_C( -10040), INT16_C( 13688), INT16_C( -30953), INT16_C( -4037)),
1208
+ simde_mm_set_pi8 (INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128),
1209
+ INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127)) },
1210
+ { simde_mm_set_pi16(INT16_C( -20194), INT16_C( 12331), INT16_C( -23109), INT16_C( 25162)),
1211
+ simde_mm_set_pi16(INT16_C( -1071), INT16_C( 20521), INT16_C( 860), INT16_C( 5875)),
1212
+ simde_mm_set_pi8 (INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127),
1213
+ INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) },
1214
+ { simde_mm_set_pi16(INT16_C( -12255), INT16_C( 13277), INT16_C( -28950), INT16_C( 5253)),
1215
+ simde_mm_set_pi16(INT16_C( 25343), INT16_C( -1252), INT16_C( 3561), INT16_C( 7538)),
1216
+ simde_mm_set_pi8 (INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127),
1217
+ INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) },
1218
+ { simde_mm_set_pi16(INT16_C( -11251), INT16_C( -21118), INT16_C( -2077), INT16_C( -20336)),
1219
+ simde_mm_set_pi16(INT16_C( 23412), INT16_C( 7898), INT16_C( -3571), INT16_C( 9242)),
1220
+ simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127),
1221
+ INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) },
1222
+ { simde_mm_set_pi16(INT16_C( 28180), INT16_C( 25339), INT16_C( 20328), INT16_C( 3051)),
1223
+ simde_mm_set_pi16(INT16_C( 31135), INT16_C( 3581), INT16_C( 11552), INT16_C( 25034)),
1224
+ simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127),
1225
+ INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127)) },
1226
+ { simde_mm_set_pi16(INT16_C( 14129), INT16_C( -2982), INT16_C( -13260), INT16_C( -12225)),
1227
+ simde_mm_set_pi16(INT16_C( -557), INT16_C( -14564), INT16_C( -28065), INT16_C( 25636)),
1228
+ simde_mm_set_pi8 (INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127),
1229
+ INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128)) },
1230
+ { simde_mm_set_pi16(INT16_C( 31333), INT16_C( 20796), INT16_C( 16795), INT16_C( -5127)),
1231
+ simde_mm_set_pi16(INT16_C( 22060), INT16_C( 10681), INT16_C( 28763), INT16_C( 2847)),
1232
+ simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127),
1233
+ INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128)) },
1234
+ { simde_mm_set_pi16(INT16_C( 167), INT16_C( 233), INT16_C( 115), INT16_C( 126)),
1235
+ simde_mm_set_pi16(INT16_C( 10), INT16_C( 94), INT16_C( 181), INT16_C( 233)),
1236
+ simde_mm_set_pi8 (INT8_C( 10), INT8_C( 94), INT8_C( 127), INT8_C( 127),
1237
+ INT8_C( 127), INT8_C( 127), INT8_C( 115), INT8_C( 126)) }
1238
+ };
1239
+
1240
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1241
+ simde__m64 r = simde_mm_packs_pi16(test_vec[i].a, test_vec[i].b);
1242
+ simde_mm_empty();
1243
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
1244
+ }
1245
+
1246
+ simde_mm_empty();
1247
+ return MUNIT_OK;
1248
+ }
1249
+
1250
+ static MunitResult
1251
+ test_simde_mm_packs_pi32(const MunitParameter params[], void* data) {
1252
+ (void) params;
1253
+ (void) data;
1254
+
1255
+ const struct {
1256
+ simde__m64 a;
1257
+ simde__m64 b;
1258
+ simde__m64 r;
1259
+ } test_vec[8] = {
1260
+ { simde_mm_set_pi32(INT32_C( -2875748), INT32_C( -4)),
1261
+ simde_mm_set_pi32(INT32_C( -53), INT32_C( 934884)),
1262
+ simde_mm_set_pi16(INT16_C( -53), INT16_C( 32767), INT16_C( -32768), INT16_C( -4)) },
1263
+ { simde_mm_set_pi32(INT32_C( 1), INT32_C( -216790321)),
1264
+ simde_mm_set_pi32(INT32_C( 120), INT32_C( -379925)),
1265
+ simde_mm_set_pi16(INT16_C( 120), INT16_C( -32768), INT16_C( 1), INT16_C( -32768)) },
1266
+ { simde_mm_set_pi32(INT32_C( -18), INT32_C( 281)),
1267
+ simde_mm_set_pi32(INT32_C( -33064), INT32_C( 130)),
1268
+ simde_mm_set_pi16(INT16_C( -32768), INT16_C( 130), INT16_C( -18), INT16_C( 281)) },
1269
+ { simde_mm_set_pi32(INT32_C( -51729), INT32_C( 14)),
1270
+ simde_mm_set_pi32(INT32_C( 6852), INT32_C( -36)),
1271
+ simde_mm_set_pi16(INT16_C( 6852), INT16_C( -36), INT16_C( -32768), INT16_C( 14)) },
1272
+ { simde_mm_set_pi32(INT32_C( -1), INT32_C( -210)),
1273
+ simde_mm_set_pi32(INT32_C( 3024991), INT32_C( 30957735)),
1274
+ simde_mm_set_pi16(INT16_C( 32767), INT16_C( 32767), INT16_C( -1), INT16_C( -210)) },
1275
+ { simde_mm_set_pi32(INT32_C( 28), INT32_C( 890)),
1276
+ simde_mm_set_pi32(INT32_C( -2031601), INT32_C( -5309)),
1277
+ simde_mm_set_pi16(INT16_C( -32768), INT16_C( -5309), INT16_C( 28), INT16_C( 890)) },
1278
+ { simde_mm_set_pi32(INT32_C( -80), INT32_C( 4267394)),
1279
+ simde_mm_set_pi32(INT32_C( 34757305), INT32_C( 127105)),
1280
+ simde_mm_set_pi16(INT16_C( 32767), INT16_C( 32767), INT16_C( -80), INT16_C( 32767)) },
1281
+ { simde_mm_set_pi32(INT32_C( -2773123), INT32_C( -42)),
1282
+ simde_mm_set_pi32(INT32_C( 33), INT32_C( 3534549)),
1283
+ simde_mm_set_pi16(INT16_C( 33), INT16_C( 32767), INT16_C( -32768), INT16_C( -42)) }
1284
+ };
1285
+
1286
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1287
+ simde__m64 r = simde_mm_packs_pi32(test_vec[i].a, test_vec[i].b);
1288
+ simde_mm_empty();
1289
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
1290
+ }
1291
+
1292
+ simde_mm_empty();
1293
+ return MUNIT_OK;
1294
+ }
1295
+
1296
+ static MunitResult
1297
+ test_simde_mm_packs_pu16(const MunitParameter params[], void* data) {
1298
+ (void) params;
1299
+ (void) data;
1300
+
1301
+ const struct {
1302
+ simde__m64 a;
1303
+ simde__m64 b;
1304
+ simde__m64 r;
1305
+ } test_vec[8] = {
1306
+ { simde_mm_set_pi16(INT16_C( -2), INT16_C( 113), INT16_C( 49), INT16_C( -647)),
1307
+ simde_mm_set_pi16(INT16_C( 56), INT16_C( 5), INT16_C( 1), INT16_C( -54)),
1308
+ simde_mm_set_pi8 (INT8_C( 56), INT8_C( 5), INT8_C( 1), INT8_C( 0),
1309
+ INT8_C( 0), INT8_C( 113), INT8_C( 49), INT8_C( 0)) },
1310
+ { simde_mm_set_pi16(INT16_C( -1), INT16_C( -206), INT16_C( -1650), INT16_C( -109)),
1311
+ simde_mm_set_pi16(INT16_C( -3828), INT16_C( 2), INT16_C( 471), INT16_C( 2)),
1312
+ simde_mm_set_pi8 (INT8_C( 0), INT8_C( 2), INT8_C( -1), INT8_C( 2),
1313
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1314
+ { simde_mm_set_pi16(INT16_C( 3), INT16_C( -2), INT16_C( 500), INT16_C( -100)),
1315
+ simde_mm_set_pi16(INT16_C( -1574), INT16_C( -1), INT16_C( -1), INT16_C( 2)),
1316
+ simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2),
1317
+ INT8_C( 3), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
1318
+ { simde_mm_set_pi16(INT16_C( -13), INT16_C( -217), INT16_C( 3305), INT16_C( -10)),
1319
+ simde_mm_set_pi16(INT16_C( -370), INT16_C( 181), INT16_C( 1), INT16_C( -1434)),
1320
+ simde_mm_set_pi8 (INT8_C( 0), INT8_C( -75), INT8_C( 1), INT8_C( 0),
1321
+ INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
1322
+ { simde_mm_set_pi16(INT16_C( 867), INT16_C( -63), INT16_C( -1003), INT16_C( 13)),
1323
+ simde_mm_set_pi16(INT16_C( -29854), INT16_C( -6), INT16_C( 33), INT16_C( 5)),
1324
+ simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 5),
1325
+ INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 13)) },
1326
+ { simde_mm_set_pi16(INT16_C( 771), INT16_C( -1), INT16_C( -13), INT16_C( -2)),
1327
+ simde_mm_set_pi16(INT16_C( -65), INT16_C( 55), INT16_C( 295), INT16_C( 17510)),
1328
+ simde_mm_set_pi8 (INT8_C( 0), INT8_C( 55), INT8_C( -1), INT8_C( -1),
1329
+ INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1330
+ { simde_mm_set_pi16(INT16_C( 50), INT16_C( 32337), INT16_C( 13), INT16_C( 20449)),
1331
+ simde_mm_set_pi16(INT16_C( -897), INT16_C( -113), INT16_C( -3866), INT16_C( -15759)),
1332
+ simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1333
+ INT8_C( 50), INT8_C( -1), INT8_C( 13), INT8_C( -1)) },
1334
+ { simde_mm_set_pi16(INT16_C( 0), INT16_C( 4501), INT16_C( 202), INT16_C( 9748)),
1335
+ simde_mm_set_pi16(INT16_C( -2), INT16_C( -1), INT16_C( -16348), INT16_C( -6302)),
1336
+ simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1337
+ INT8_C( 0), INT8_C( -1), INT8_C( -54), INT8_C( -1)) }
1338
+ };
1339
+
1340
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1341
+ simde__m64 r = simde_mm_packs_pu16(test_vec[i].a, test_vec[i].b);
1342
+ simde_mm_empty();
1343
+ simde_assert_m64_u8(r, ==, test_vec[i].r);
1344
+ }
1345
+
1346
+ simde_mm_empty();
1347
+ return MUNIT_OK;
1348
+ }
1349
+
1350
+ static MunitResult
1351
+ test_simde_mm_sll_pi16(const MunitParameter params[], void* data) {
1352
+ (void) params;
1353
+ (void) data;
1354
+
1355
+ const struct {
1356
+ simde__m64 a;
1357
+ simde__m64 count;
1358
+ simde__m64 r;
1359
+ } test_vec[8] = {
1360
+ { simde_mm_set_pi16(INT16_C( -2612), INT16_C( -7275), INT16_C( 24980), INT16_C( 12744)),
1361
+ simde_mm_cvtsi64_m64(15),
1362
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( -32768), INT16_C( 0), INT16_C( 0)) },
1363
+ { simde_mm_set_pi16(INT16_C( 17143), INT16_C( -12000), INT16_C( 32255), INT16_C( 5448)),
1364
+ simde_mm_cvtsi64_m64(10),
1365
+ simde_mm_set_pi16(INT16_C( -9216), INT16_C( -32768), INT16_C( -1024), INT16_C( 8192)) },
1366
+ { simde_mm_set_pi16(INT16_C( 1219), INT16_C( -18409), INT16_C( 24763), INT16_C( 13023)),
1367
+ simde_mm_cvtsi64_m64(3),
1368
+ simde_mm_set_pi16(INT16_C( 9752), INT16_C( -16200), INT16_C( 1496), INT16_C( -26888)) },
1369
+ { simde_mm_set_pi16(INT16_C( -30853), INT16_C( -438), INT16_C( -13150), INT16_C( -2468)),
1370
+ simde_mm_cvtsi64_m64(10),
1371
+ simde_mm_set_pi16(INT16_C( -5120), INT16_C( 10240), INT16_C( -30720), INT16_C( 28672)) },
1372
+ { simde_mm_set_pi16(INT16_C( -20343), INT16_C( 30713), INT16_C( 26566), INT16_C( 9213)),
1373
+ simde_mm_cvtsi64_m64(7),
1374
+ simde_mm_set_pi16(INT16_C( 17536), INT16_C( -896), INT16_C( -7424), INT16_C( -384)) },
1375
+ { simde_mm_set_pi16(INT16_C( -14337), INT16_C( -4898), INT16_C( 32658), INT16_C( -4944)),
1376
+ simde_mm_cvtsi64_m64(5),
1377
+ simde_mm_set_pi16(INT16_C( -32), INT16_C( -25664), INT16_C( -3520), INT16_C( -27136)) },
1378
+ { simde_mm_set_pi16(INT16_C( 21648), INT16_C( 25416), INT16_C( 19921), INT16_C( -16738)),
1379
+ simde_mm_cvtsi64_m64(0),
1380
+ simde_mm_set_pi16(INT16_C( 21648), INT16_C( 25416), INT16_C( 19921), INT16_C( -16738)) },
1381
+ { simde_mm_set_pi16(INT16_C( -10368), INT16_C( -19483), INT16_C( -15412), INT16_C( -29979)),
1382
+ simde_mm_cvtsi64_m64(9),
1383
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( -13824), INT16_C( -26624), INT16_C( -13824)) }
1384
+ };
1385
+
1386
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1387
+ simde__m64 r = simde_mm_sll_pi16(test_vec[i].a, test_vec[i].count);
1388
+ simde_mm_empty();
1389
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
1390
+ }
1391
+
1392
+ simde_mm_empty();
1393
+ return MUNIT_OK;
1394
+ }
1395
+
1396
+ static MunitResult
1397
+ test_simde_mm_sll_pi32(const MunitParameter params[], void* data) {
1398
+ (void) params;
1399
+ (void) data;
1400
+
1401
+ const struct {
1402
+ simde__m64 a;
1403
+ simde__m64 count;
1404
+ simde__m64 r;
1405
+ } test_vec[8] = {
1406
+ { simde_mm_set_pi32(INT32_C( 2135609954), INT32_C( -1662756041)),
1407
+ simde_mm_cvtsi64_m64(10),
1408
+ simde_mm_set_pi32(INT32_C( 726239232), INT32_C( -1855136768)) },
1409
+ { simde_mm_set_pi32(INT32_C( 1984991847), INT32_C( -75949890)),
1410
+ simde_mm_cvtsi64_m64(14),
1411
+ simde_mm_set_pi32(INT32_C( 614055936), INT32_C( 1177518080)) },
1412
+ { simde_mm_set_pi32(INT32_C( -1315562518), INT32_C( -1717142831)),
1413
+ simde_mm_cvtsi64_m64(22),
1414
+ simde_mm_set_pi32(INT32_C( -92274688), INT32_C( -1270874112)) },
1415
+ { simde_mm_set_pi32(INT32_C( -814215595), INT32_C( 805054469)),
1416
+ simde_mm_cvtsi64_m64(9),
1417
+ simde_mm_set_pi32(INT32_C( -266556928), INT32_C( -128972288)) },
1418
+ { simde_mm_set_pi32(INT32_C( -1588862908), INT32_C( 2132697891)),
1419
+ simde_mm_cvtsi64_m64(1),
1420
+ simde_mm_set_pi32(INT32_C( 1117241480), INT32_C( -29571514)) },
1421
+ { simde_mm_set_pi32(INT32_C( 782274620), INT32_C( -2120419106)),
1422
+ simde_mm_cvtsi64_m64(16),
1423
+ simde_mm_set_pi32(INT32_C( -1875116032), INT32_C( -119668736)) },
1424
+ { simde_mm_set_pi32(INT32_C( -1687581332), INT32_C( -1263634481)),
1425
+ simde_mm_cvtsi64_m64(16),
1426
+ simde_mm_set_pi32(INT32_C( -1922301952), INT32_C( 2010054656)) },
1427
+ { simde_mm_set_pi32(INT32_C( -1258319564), INT32_C( 975343739)),
1428
+ simde_mm_cvtsi64_m64(5),
1429
+ simde_mm_set_pi32(INT32_C( -1611520384), INT32_C( 1146228576)) }
1430
+ };
1431
+
1432
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1433
+ simde__m64 r = simde_mm_sll_pi32(test_vec[i].a, test_vec[i].count);
1434
+ simde_mm_empty();
1435
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
1436
+ }
1437
+
1438
+ simde_mm_empty();
1439
+ return MUNIT_OK;
1440
+ }
1441
+
1442
+ static MunitResult
1443
+ test_simde_mm_sll_si64(const MunitParameter params[], void* data) {
1444
+ (void) params;
1445
+ (void) data;
1446
+
1447
+ const struct {
1448
+ simde__m64 a;
1449
+ simde__m64 count;
1450
+ simde__m64 r;
1451
+ } test_vec[8] = {
1452
+ { simde_mm_cvtsi64_m64(INT64_C( 3171924675130206313)),
1453
+ simde_mm_cvtsi64_m64(26),
1454
+ simde_mm_cvtsi64_m64(INT64_C( -4688886433618853888)) },
1455
+ { simde_mm_cvtsi64_m64(INT64_C( 8810857393431583130)),
1456
+ simde_mm_cvtsi64_m64(35),
1457
+ simde_mm_cvtsi64_m64(INT64_C( 8135977920570064896)) },
1458
+ { simde_mm_cvtsi64_m64(INT64_C( 8253138385445189600)),
1459
+ simde_mm_cvtsi64_m64(60),
1460
+ simde_mm_cvtsi64_m64(INT64_C( 0)) },
1461
+ { simde_mm_cvtsi64_m64(INT64_C( -109691783123384247)),
1462
+ simde_mm_cvtsi64_m64(35),
1463
+ simde_mm_cvtsi64_m64(INT64_C( 4699016138212769792)) },
1464
+ { simde_mm_cvtsi64_m64(INT64_C( 797909880260215132)),
1465
+ simde_mm_cvtsi64_m64(19),
1466
+ simde_mm_cvtsi64_m64(INT64_C( -686801717540421632)) },
1467
+ { simde_mm_cvtsi64_m64(INT64_C( -2366434973696685665)),
1468
+ simde_mm_cvtsi64_m64(28),
1469
+ simde_mm_cvtsi64_m64(INT64_C( -2410559835486552064)) },
1470
+ { simde_mm_cvtsi64_m64(INT64_C( 3032641446696114060)),
1471
+ simde_mm_cvtsi64_m64(28),
1472
+ simde_mm_cvtsi64_m64(INT64_C( 4618209939532283904)) },
1473
+ { simde_mm_cvtsi64_m64(INT64_C( 5741540145978860560)),
1474
+ simde_mm_cvtsi64_m64(44),
1475
+ simde_mm_cvtsi64_m64(INT64_C( 7944631217658265600)) }
1476
+ };
1477
+
1478
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1479
+ simde__m64 r = simde_mm_sll_si64(test_vec[i].a, test_vec[i].count);
1480
+ simde_mm_empty();
1481
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
1482
+ }
1483
+
1484
+ simde_mm_empty();
1485
+ return MUNIT_OK;
1486
+ }
1487
+
1488
+ static MunitResult
1489
+ test_simde_mm_slli_pi16(const MunitParameter params[], void* data) {
1490
+ (void) params;
1491
+ (void) data;
1492
+
1493
+ const struct {
1494
+ simde__m64 a;
1495
+ int count;
1496
+ simde__m64 r;
1497
+ } test_vec[8] = {
1498
+ { simde_mm_set_pi16(INT16_C( -13543), INT16_C( 6360), INT16_C( -1306), INT16_C( -5948)),
1499
+ 6,
1500
+ simde_mm_set_pi16(INT16_C( -14784), INT16_C( 13824), INT16_C( -18048), INT16_C( 12544)) },
1501
+ { simde_mm_set_pi16(INT16_C( 6506), INT16_C( -28533), INT16_C( 3988), INT16_C( -31210)),
1502
+ 10,
1503
+ simde_mm_set_pi16(INT16_C( -22528), INT16_C( 11264), INT16_C( 20480), INT16_C( 22528)) },
1504
+ { simde_mm_set_pi16(INT16_C( 19388), INT16_C( -4520), INT16_C( 9582), INT16_C( 11067)),
1505
+ 9,
1506
+ simde_mm_set_pi16(INT16_C( 30720), INT16_C( -20480), INT16_C( -9216), INT16_C( 30208)) },
1507
+ { simde_mm_set_pi16(INT16_C( 12000), INT16_C( 28876), INT16_C( 29834), INT16_C( -13742)),
1508
+ 13,
1509
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( -32768), INT16_C( 16384), INT16_C( 16384)) },
1510
+ { simde_mm_set_pi16(INT16_C( 4648), INT16_C( -2151), INT16_C( -26641), INT16_C( -27659)),
1511
+ 13,
1512
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 8192), INT16_C( -8192), INT16_C( -24576)) },
1513
+ { simde_mm_set_pi16(INT16_C( -2353), INT16_C( 20317), INT16_C( 7426), INT16_C( 24788)),
1514
+ 8,
1515
+ simde_mm_set_pi16(INT16_C( -12544), INT16_C( 23808), INT16_C( 512), INT16_C( -11264)) },
1516
+ { simde_mm_set_pi16(INT16_C( -6174), INT16_C( 31492), INT16_C( 28575), INT16_C( -20383)),
1517
+ 1,
1518
+ simde_mm_set_pi16(INT16_C( -12348), INT16_C( -2552), INT16_C( -8386), INT16_C( 24770)) },
1519
+ { simde_mm_set_pi16(INT16_C( -30371), INT16_C( 17334), INT16_C( 2428), INT16_C( -4558)),
1520
+ 5,
1521
+ simde_mm_set_pi16(INT16_C( 11168), INT16_C( 30400), INT16_C( 12160), INT16_C( -14784)) }
1522
+ };
1523
+
1524
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1525
+ simde__m64 r = simde_mm_slli_pi16(test_vec[i].a, test_vec[i].count);
1526
+ simde_mm_empty();
1527
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
1528
+ }
1529
+
1530
+ simde_mm_empty();
1531
+ return MUNIT_OK;
1532
+ }
1533
+
1534
+ static MunitResult
1535
+ test_simde_mm_slli_pi32(const MunitParameter params[], void* data) {
1536
+ (void) params;
1537
+ (void) data;
1538
+
1539
+ const struct {
1540
+ simde__m64 a;
1541
+ int count;
1542
+ simde__m64 r;
1543
+ } test_vec[8] = {
1544
+ { simde_mm_set_pi32(INT32_C( 83881529), INT32_C( 1357951601)),
1545
+ 27,
1546
+ simde_mm_set_pi32(INT32_C( -939524096), INT32_C( -2013265920)) },
1547
+ { simde_mm_set_pi32(INT32_C( -2138298674), INT32_C( -2019079679)),
1548
+ 21,
1549
+ simde_mm_set_pi32(INT32_C( -641728512), INT32_C( 1075838976)) },
1550
+ { simde_mm_set_pi32(INT32_C( -281448798), INT32_C( -1557273316)),
1551
+ 27,
1552
+ simde_mm_set_pi32(INT32_C( 268435456), INT32_C( -536870912)) },
1553
+ { simde_mm_set_pi32(INT32_C( -1207542290), INT32_C( -694741539)),
1554
+ 7,
1555
+ simde_mm_set_pi32(INT32_C( 53409536), INT32_C( 1267396224)) },
1556
+ { simde_mm_set_pi32(INT32_C( 902716495), INT32_C( 943182057)),
1557
+ 20,
1558
+ simde_mm_set_pi32(INT32_C( -990904320), INT32_C( 244318208)) },
1559
+ { simde_mm_set_pi32(INT32_C( 7423865), INT32_C( -1974692036)),
1560
+ 5,
1561
+ simde_mm_set_pi32(INT32_C( 237563680), INT32_C( 1234364288)) },
1562
+ { simde_mm_set_pi32(INT32_C( 174727032), INT32_C( -891064659)),
1563
+ 23,
1564
+ simde_mm_set_pi32(INT32_C( -1140850688), INT32_C( 1451229184)) },
1565
+ { simde_mm_set_pi32(INT32_C( 134754342), INT32_C( -1894000042)),
1566
+ 27,
1567
+ simde_mm_set_pi32(INT32_C( 805306368), INT32_C( -1342177280)) }
1568
+ };
1569
+
1570
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1571
+ simde__m64 r = simde_mm_slli_pi32(test_vec[i].a, test_vec[i].count);
1572
+ simde_mm_empty();
1573
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
1574
+ }
1575
+
1576
+ simde_mm_empty();
1577
+ return MUNIT_OK;
1578
+ }
1579
+
1580
+ static MunitResult
1581
+ test_simde_mm_slli_si64(const MunitParameter params[], void* data) {
1582
+ (void) params;
1583
+ (void) data;
1584
+
1585
+ const struct {
1586
+ simde__m64 a;
1587
+ int count;
1588
+ simde__m64 r;
1589
+ } test_vec[8] = {
1590
+ { simde_mm_cvtsi64_m64(INT64_C( -3655983719573882447)),
1591
+ 37,
1592
+ simde_mm_cvtsi64_m64(INT64_C( 5043809618745098240)) },
1593
+ { simde_mm_cvtsi64_m64(INT64_C( 5373634195600553823)),
1594
+ 49,
1595
+ simde_mm_cvtsi64_m64(INT64_C( 7979815589747097600)) },
1596
+ { simde_mm_cvtsi64_m64(INT64_C( 955832682335824267)),
1597
+ 11,
1598
+ simde_mm_cvtsi64_m64(INT64_C( 2190461610555627520)) },
1599
+ { simde_mm_cvtsi64_m64(INT64_C( 4435237962953354472)),
1600
+ 32,
1601
+ simde_mm_cvtsi64_m64(INT64_C( -6041177681452597248)) },
1602
+ { simde_mm_cvtsi64_m64(INT64_C( 509713568463920999)),
1603
+ 0,
1604
+ simde_mm_cvtsi64_m64(INT64_C( 509713568463920999)) },
1605
+ { simde_mm_cvtsi64_m64(INT64_C( 3092984209993521199)),
1606
+ 24,
1607
+ simde_mm_cvtsi64_m64(INT64_C( -4581130211545841664)) },
1608
+ { simde_mm_cvtsi64_m64(INT64_C( -9034725437056781767)),
1609
+ 38,
1610
+ simde_mm_cvtsi64_m64(INT64_C( 4817882106908639232)) },
1611
+ { simde_mm_cvtsi64_m64(INT64_C( 8352260709189542260)),
1612
+ 34,
1613
+ simde_mm_cvtsi64_m64(INT64_C( -8446635447710384128)) }
1614
+ };
1615
+
1616
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1617
+ simde__m64 r = simde_mm_slli_si64(test_vec[i].a, test_vec[i].count);
1618
+ simde_mm_empty();
1619
+ simde_assert_m64_i64(r, ==, test_vec[i].r);
1620
+ }
1621
+
1622
+ simde_mm_empty();
1623
+ return MUNIT_OK;
1624
+ }
1625
+
1626
+ static MunitResult
1627
+ test_simde_mm_srl_pi16(const MunitParameter params[], void* data) {
1628
+ (void) params;
1629
+ (void) data;
1630
+
1631
+ const struct {
1632
+ simde__m64 a;
1633
+ simde__m64 count;
1634
+ simde__m64 r;
1635
+ } test_vec[8] = {
1636
+ { simde_mm_set_pi16(INT16_C( -3979), INT16_C( -30013), INT16_C( 22836), INT16_C( 7438)),
1637
+ simde_mm_cvtsi64_m64(5),
1638
+ simde_mm_set_pi16(INT16_C( 1923), INT16_C( 1110), INT16_C( 713), INT16_C( 232)) },
1639
+ { simde_mm_set_pi16(INT16_C( -17889), INT16_C( -31199), INT16_C( 2233), INT16_C( 29176)),
1640
+ simde_mm_cvtsi64_m64(9),
1641
+ simde_mm_set_pi16(INT16_C( 93), INT16_C( 67), INT16_C( 4), INT16_C( 56)) },
1642
+ { simde_mm_set_pi16(INT16_C( -14320), INT16_C( -29349), INT16_C( -4712), INT16_C( 3031)),
1643
+ simde_mm_cvtsi64_m64(6),
1644
+ simde_mm_set_pi16(INT16_C( 800), INT16_C( 565), INT16_C( 950), INT16_C( 47)) },
1645
+ { simde_mm_set_pi16(INT16_C( 28706), INT16_C( -15113), INT16_C( -3287), INT16_C( -13609)),
1646
+ simde_mm_cvtsi64_m64(13),
1647
+ simde_mm_set_pi16(INT16_C( 3), INT16_C( 6), INT16_C( 7), INT16_C( 6)) },
1648
+ { simde_mm_set_pi16(INT16_C( -4348), INT16_C( 14324), INT16_C( 12491), INT16_C( -32763)),
1649
+ simde_mm_cvtsi64_m64(2),
1650
+ simde_mm_set_pi16(INT16_C( 15297), INT16_C( 3581), INT16_C( 3122), INT16_C( 8193)) },
1651
+ { simde_mm_set_pi16(INT16_C( -1454), INT16_C( -3136), INT16_C( 16900), INT16_C( -26266)),
1652
+ simde_mm_cvtsi64_m64(11),
1653
+ simde_mm_set_pi16(INT16_C( 31), INT16_C( 30), INT16_C( 8), INT16_C( 19)) },
1654
+ { simde_mm_set_pi16(INT16_C( 23032), INT16_C( 21033), INT16_C( 2074), INT16_C( -30320)),
1655
+ simde_mm_cvtsi64_m64(9),
1656
+ simde_mm_set_pi16(INT16_C( 44), INT16_C( 41), INT16_C( 4), INT16_C( 68)) },
1657
+ { simde_mm_set_pi16(INT16_C( 2403), INT16_C( 6070), INT16_C( -16381), INT16_C( 15198)),
1658
+ simde_mm_cvtsi64_m64(10),
1659
+ simde_mm_set_pi16(INT16_C( 2), INT16_C( 5), INT16_C( 48), INT16_C( 14)) }
1660
+ };
1661
+
1662
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1663
+ simde__m64 r = simde_mm_srl_pi16(test_vec[i].a, test_vec[i].count);
1664
+ simde_mm_empty();
1665
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
1666
+ }
1667
+
1668
+ simde_mm_empty();
1669
+ return MUNIT_OK;
1670
+ }
1671
+
1672
+ static MunitResult
1673
+ test_simde_mm_srl_pi32(const MunitParameter params[], void* data) {
1674
+ (void) params;
1675
+ (void) data;
1676
+
1677
+ const struct {
1678
+ simde__m64 a;
1679
+ simde__m64 count;
1680
+ simde__m64 r;
1681
+ } test_vec[8] = {
1682
+ { simde_mm_set_pi32(INT32_C( 1162874425), INT32_C( 701403552)),
1683
+ simde_mm_cvtsi64_m64(12),
1684
+ simde_mm_set_pi32(INT32_C( 283904), INT32_C( 171241)) },
1685
+ { simde_mm_set_pi32(INT32_C( -1730008971), INT32_C( 1480718473)),
1686
+ simde_mm_cvtsi64_m64(7),
1687
+ simde_mm_set_pi32(INT32_C( 20038736), INT32_C( 11568113)) },
1688
+ { simde_mm_set_pi32(INT32_C( -2020652937), INT32_C( -14094139)),
1689
+ simde_mm_cvtsi64_m64(9),
1690
+ simde_mm_set_pi32(INT32_C( 4442020), INT32_C( 8361080)) },
1691
+ { simde_mm_set_pi32(INT32_C( 1211264864), INT32_C( -549692031)),
1692
+ simde_mm_cvtsi64_m64(13),
1693
+ simde_mm_set_pi32(INT32_C( 147859), INT32_C( 457186)) },
1694
+ { simde_mm_set_pi32(INT32_C( 526771625), INT32_C( -1372326605)),
1695
+ simde_mm_cvtsi64_m64(6),
1696
+ simde_mm_set_pi32(INT32_C( 8230806), INT32_C( 45666260)) },
1697
+ { simde_mm_set_pi32(INT32_C( 257774375), INT32_C( 1425803958)),
1698
+ simde_mm_cvtsi64_m64(0),
1699
+ simde_mm_set_pi32(INT32_C( 257774375), INT32_C( 1425803958)) },
1700
+ { simde_mm_set_pi32(INT32_C( 751075720), INT32_C( -1937798467)),
1701
+ simde_mm_cvtsi64_m64(6),
1702
+ simde_mm_set_pi32(INT32_C( 11735558), INT32_C( 36830762)) },
1703
+ { simde_mm_set_pi32(INT32_C( -703624712), INT32_C( 1484883517)),
1704
+ simde_mm_cvtsi64_m64(14),
1705
+ simde_mm_set_pi32(INT32_C( 219198), INT32_C( 90630)) }
1706
+ };
1707
+
1708
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1709
+ simde__m64 r = simde_mm_srl_pi32(test_vec[i].a, test_vec[i].count);
1710
+ simde_mm_empty();
1711
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
1712
+ }
1713
+
1714
+ simde_mm_empty();
1715
+ return MUNIT_OK;
1716
+ }
1717
+
1718
+ static MunitResult
1719
+ test_simde_mm_srl_si64(const MunitParameter params[], void* data) {
1720
+ (void) params;
1721
+ (void) data;
1722
+
1723
+ const struct {
1724
+ simde__m64 a;
1725
+ simde__m64 count;
1726
+ simde__m64 r;
1727
+ } test_vec[8] = {
1728
+ { simde_mm_cvtsi64_m64(INT64_C( -1550745422537000797)),
1729
+ simde_mm_cvtsi64_m64(27),
1730
+ simde_mm_cvtsi64_m64(INT64_C( 125884999716)) },
1731
+ { simde_mm_cvtsi64_m64(INT64_C( -4905487896917789484)),
1732
+ simde_mm_cvtsi64_m64(51),
1733
+ simde_mm_cvtsi64_m64(INT64_C( 6013)) },
1734
+ { simde_mm_cvtsi64_m64(INT64_C( 784798283774789910)),
1735
+ simde_mm_cvtsi64_m64(61),
1736
+ simde_mm_cvtsi64_m64(INT64_C( 0)) },
1737
+ { simde_mm_cvtsi64_m64(INT64_C( -7160969444731528566)),
1738
+ simde_mm_cvtsi64_m64(36),
1739
+ simde_mm_cvtsi64_m64(INT64_C( 164229635)) },
1740
+ { simde_mm_cvtsi64_m64(INT64_C( -123534753035910002)),
1741
+ simde_mm_cvtsi64_m64(20),
1742
+ simde_mm_cvtsi64_m64(INT64_C( 17474374123262)) },
1743
+ { simde_mm_cvtsi64_m64(INT64_C( 5720385725637272506)),
1744
+ simde_mm_cvtsi64_m64(33),
1745
+ simde_mm_cvtsi64_m64(INT64_C( 665940545)) },
1746
+ { simde_mm_cvtsi64_m64(INT64_C( -3398235017645277558)),
1747
+ simde_mm_cvtsi64_m64(63),
1748
+ simde_mm_cvtsi64_m64(INT64_C( 1)) },
1749
+ { simde_mm_cvtsi64_m64(INT64_C( -5355948413550293775)),
1750
+ simde_mm_cvtsi64_m64(7),
1751
+ simde_mm_cvtsi64_m64(INT64_C( 102271841094994201)) }
1752
+ };
1753
+
1754
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1755
+ simde__m64 r = simde_mm_srl_si64(test_vec[i].a, test_vec[i].count);
1756
+ simde_mm_empty();
1757
+ simde_assert_m64_i64(r, ==, test_vec[i].r);
1758
+ }
1759
+
1760
+ simde_mm_empty();
1761
+ return MUNIT_OK;
1762
+ }
1763
+
1764
+ static MunitResult
1765
+ test_simde_mm_srli_pi16(const MunitParameter params[], void* data) {
1766
+ (void) params;
1767
+ (void) data;
1768
+
1769
+ const struct {
1770
+ simde__m64 a;
1771
+ int count;
1772
+ simde__m64 r;
1773
+ } test_vec[8] = {
1774
+ { simde_mm_set_pi16(INT16_C( -15698), INT16_C( -32310), INT16_C( 339), INT16_C( 3496)),
1775
+ 10,
1776
+ simde_mm_set_pi16(INT16_C( 48), INT16_C( 32), INT16_C( 0), INT16_C( 3)) },
1777
+ { simde_mm_set_pi16(INT16_C( -27263), INT16_C( -18160), INT16_C( -20487), INT16_C( -21173)),
1778
+ 6,
1779
+ simde_mm_set_pi16(INT16_C( 598), INT16_C( 740), INT16_C( 703), INT16_C( 693)) },
1780
+ { simde_mm_set_pi16(INT16_C( 23805), INT16_C( -14941), INT16_C( 6558), INT16_C( -23896)),
1781
+ 6,
1782
+ simde_mm_set_pi16(INT16_C( 371), INT16_C( 790), INT16_C( 102), INT16_C( 650)) },
1783
+ { simde_mm_set_pi16(INT16_C( 22534), INT16_C( -27358), INT16_C( -9489), INT16_C( -15972)),
1784
+ 7,
1785
+ simde_mm_set_pi16(INT16_C( 176), INT16_C( 298), INT16_C( 437), INT16_C( 387)) },
1786
+ { simde_mm_set_pi16(INT16_C( 2212), INT16_C( -29223), INT16_C( -19783), INT16_C( -4105)),
1787
+ 0,
1788
+ simde_mm_set_pi16(INT16_C( 2212), INT16_C( -29223), INT16_C( -19783), INT16_C( -4105)) },
1789
+ { simde_mm_set_pi16(INT16_C( 24559), INT16_C( -21850), INT16_C( -30646), INT16_C( 21423)),
1790
+ 14,
1791
+ simde_mm_set_pi16(INT16_C( 1), INT16_C( 2), INT16_C( 2), INT16_C( 1)) },
1792
+ { simde_mm_set_pi16(INT16_C( -3241), INT16_C( -31506), INT16_C( 3662), INT16_C( 16805)),
1793
+ 5,
1794
+ simde_mm_set_pi16(INT16_C( 1946), INT16_C( 1063), INT16_C( 114), INT16_C( 525)) },
1795
+ { simde_mm_set_pi16(INT16_C( -13677), INT16_C( 7117), INT16_C( -15559), INT16_C( -8368)),
1796
+ 14,
1797
+ simde_mm_set_pi16(INT16_C( 3), INT16_C( 0), INT16_C( 3), INT16_C( 3)) }
1798
+ };
1799
+
1800
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1801
+ simde__m64 r = simde_mm_srli_pi16(test_vec[i].a, test_vec[i].count);
1802
+ simde_mm_empty();
1803
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
1804
+ }
1805
+
1806
+ simde_mm_empty();
1807
+ return MUNIT_OK;
1808
+ }
1809
+
1810
+ static MunitResult
1811
+ test_simde_mm_srli_pi32(const MunitParameter params[], void* data) {
1812
+ (void) params;
1813
+ (void) data;
1814
+
1815
+ const struct {
1816
+ simde__m64 a;
1817
+ int count;
1818
+ simde__m64 r;
1819
+ } test_vec[8] = {
1820
+ { simde_mm_set_pi32(INT32_C( 116105102), INT32_C( -612588364)),
1821
+ 14,
1822
+ simde_mm_set_pi32(INT32_C( 7086), INT32_C( 224754)) },
1823
+ { simde_mm_set_pi32(INT32_C( -569249998), INT32_C( 1055993616)),
1824
+ 8,
1825
+ simde_mm_set_pi32(INT32_C( 14553583), INT32_C( 4124975)) },
1826
+ { simde_mm_set_pi32(INT32_C( 851549428), INT32_C( -1334511981)),
1827
+ 15,
1828
+ simde_mm_set_pi32(INT32_C( 25987), INT32_C( 90345)) },
1829
+ { simde_mm_set_pi32(INT32_C( -1526427094), INT32_C( 130645372)),
1830
+ 14,
1831
+ simde_mm_set_pi32(INT32_C( 168978), INT32_C( 7973)) },
1832
+ { simde_mm_set_pi32(INT32_C( -1832776933), INT32_C( -28796512)),
1833
+ 0,
1834
+ simde_mm_set_pi32(INT32_C( -1832776933), INT32_C( -28796512)) },
1835
+ { simde_mm_set_pi32(INT32_C( -1521422315), INT32_C( 230241179)),
1836
+ 4,
1837
+ simde_mm_set_pi32(INT32_C( 173346561), INT32_C( 14390073)) },
1838
+ { simde_mm_set_pi32(INT32_C( 981909051), INT32_C( -764766890)),
1839
+ 15,
1840
+ simde_mm_set_pi32(INT32_C( 29965), INT32_C( 107733)) },
1841
+ { simde_mm_set_pi32(INT32_C( -1889202569), INT32_C( 1472716773)),
1842
+ 10,
1843
+ simde_mm_set_pi32(INT32_C( 2349379), INT32_C( 1438199)) }
1844
+ };
1845
+
1846
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1847
+ simde__m64 r = simde_mm_srli_pi32(test_vec[i].a, test_vec[i].count);
1848
+ simde_mm_empty();
1849
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
1850
+ }
1851
+
1852
+ simde_mm_empty();
1853
+ return MUNIT_OK;
1854
+ }
1855
+
1856
+ static MunitResult
1857
+ test_simde_mm_srli_si64(const MunitParameter params[], void* data) {
1858
+ (void) params;
1859
+ (void) data;
1860
+
1861
+ const struct {
1862
+ simde__m64 a;
1863
+ int count;
1864
+ simde__m64 r;
1865
+ } test_vec[8] = {
1866
+ { simde_mm_cvtsi64_m64(INT64_C( -8294501885901195762)),
1867
+ 62,
1868
+ simde_mm_cvtsi64_m64(INT64_C( 2)) },
1869
+ { simde_mm_cvtsi64_m64(INT64_C( 7027314223871146181)),
1870
+ 7,
1871
+ simde_mm_cvtsi64_m64(INT64_C( 54900892373993329)) },
1872
+ { simde_mm_cvtsi64_m64(INT64_C( 2649805052949317833)),
1873
+ 19,
1874
+ simde_mm_cvtsi64_m64(INT64_C( 5054102044962)) },
1875
+ { simde_mm_cvtsi64_m64(INT64_C( 778555941675423413)),
1876
+ 12,
1877
+ simde_mm_cvtsi64_m64(INT64_C( 190077134198101)) },
1878
+ { simde_mm_cvtsi64_m64(INT64_C( 1453695186595163432)),
1879
+ 17,
1880
+ simde_mm_cvtsi64_m64(INT64_C( 11090814106713)) },
1881
+ { simde_mm_cvtsi64_m64(INT64_C( 834539484136231083)),
1882
+ 22,
1883
+ simde_mm_cvtsi64_m64(INT64_C( 198969718011)) },
1884
+ { simde_mm_cvtsi64_m64(INT64_C( 1883775849744838333)),
1885
+ 12,
1886
+ simde_mm_cvtsi64_m64(INT64_C( 459906213316610)) },
1887
+ { simde_mm_cvtsi64_m64(INT64_C( 7946503469684399228)),
1888
+ 61,
1889
+ simde_mm_cvtsi64_m64(INT64_C( 3)) }
1890
+ };
1891
+
1892
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1893
+ simde__m64 r = simde_mm_srli_si64(test_vec[i].a, test_vec[i].count);
1894
+ simde_mm_empty();
1895
+ simde_assert_m64_i64(r, ==, test_vec[i].r);
1896
+ }
1897
+
1898
+ simde_mm_empty();
1899
+ return MUNIT_OK;
1900
+ }
1901
+
1902
+ static MunitResult
1903
+ test_simde_mm_srai_pi16(const MunitParameter params[], void* data) {
1904
+ (void) params;
1905
+ (void) data;
1906
+
1907
+ const struct {
1908
+ simde__m64 a;
1909
+ int count;
1910
+ simde__m64 r;
1911
+ } test_vec[8] = {
1912
+ { simde_mm_set_pi16(INT16_C( -32259), INT16_C( -10390), INT16_C( 28627), INT16_C( 18747)),
1913
+ 6,
1914
+ simde_mm_set_pi16(INT16_C( -505), INT16_C( -163), INT16_C( 447), INT16_C( 292)) },
1915
+ { simde_mm_set_pi16(INT16_C( -300), INT16_C( -3262), INT16_C( -2861), INT16_C( -11389)),
1916
+ 15,
1917
+ simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) },
1918
+ { simde_mm_set_pi16(INT16_C( 6480), INT16_C( -15684), INT16_C( 3587), INT16_C( 7844)),
1919
+ 2,
1920
+ simde_mm_set_pi16(INT16_C( 1620), INT16_C( -3921), INT16_C( 896), INT16_C( 1961)) },
1921
+ { simde_mm_set_pi16(INT16_C( -6044), INT16_C( -15946), INT16_C( 1721), INT16_C( -30273)),
1922
+ 5,
1923
+ simde_mm_set_pi16(INT16_C( -189), INT16_C( -499), INT16_C( 53), INT16_C( -947)) },
1924
+ { simde_mm_set_pi16(INT16_C( 24609), INT16_C( 14431), INT16_C( 1917), INT16_C( -13176)),
1925
+ 13,
1926
+ simde_mm_set_pi16(INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( -2)) },
1927
+ { simde_mm_set_pi16(INT16_C( 13575), INT16_C( 32610), INT16_C( -4763), INT16_C( 10748)),
1928
+ 12,
1929
+ simde_mm_set_pi16(INT16_C( 3), INT16_C( 7), INT16_C( -2), INT16_C( 2)) },
1930
+ { simde_mm_set_pi16(INT16_C( -2824), INT16_C( 28483), INT16_C( -23495), INT16_C( -17241)),
1931
+ 11,
1932
+ simde_mm_set_pi16(INT16_C( -2), INT16_C( 13), INT16_C( -12), INT16_C( -9)) },
1933
+ { simde_mm_set_pi16(INT16_C( -5294), INT16_C( 29284), INT16_C( -3542), INT16_C( 21806)),
1934
+ 10,
1935
+ simde_mm_set_pi16(INT16_C( -6), INT16_C( 28), INT16_C( -4), INT16_C( 21)) }
1936
+ };
1937
+
1938
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1939
+ simde__m64 r = simde_mm_srai_pi16(test_vec[i].a, test_vec[i].count);
1940
+ simde_mm_empty();
1941
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
1942
+ }
1943
+
1944
+ simde_mm_empty();
1945
+ return MUNIT_OK;
1946
+ }
1947
+
1948
+ static MunitResult
1949
+ test_simde_mm_srai_pi32(const MunitParameter params[], void* data) {
1950
+ (void) params;
1951
+ (void) data;
1952
+
1953
+ const struct {
1954
+ simde__m64 a;
1955
+ int count;
1956
+ simde__m64 r;
1957
+ } test_vec[8] = {
1958
+ { simde_mm_set_pi32(INT32_C( -2114070678), INT32_C( 1876117819)),
1959
+ 6,
1960
+ simde_mm_set_pi32(INT32_C( -33032355), INT32_C( 29314340)) },
1961
+ { simde_mm_set_pi32(INT32_C( -19598526), INT32_C( -187444349)),
1962
+ 15,
1963
+ simde_mm_set_pi32(INT32_C( -599), INT32_C( -5721)) },
1964
+ { simde_mm_set_pi32(INT32_C( 424723132), INT32_C( 235085476)),
1965
+ 2,
1966
+ simde_mm_set_pi32(INT32_C( 106180783), INT32_C( 58771369)) },
1967
+ { simde_mm_set_pi32(INT32_C( -396049994), INT32_C( 112822719)),
1968
+ 5,
1969
+ simde_mm_set_pi32(INT32_C( -12376563), INT32_C( 3525709)) },
1970
+ { simde_mm_set_pi32(INT32_C( 1612789855), INT32_C( 125684872)),
1971
+ 13,
1972
+ simde_mm_set_pi32(INT32_C( 196873), INT32_C( 15342)) },
1973
+ { simde_mm_set_pi32(INT32_C( 889683810), INT32_C( -312137220)),
1974
+ 12,
1975
+ simde_mm_set_pi32(INT32_C( 217207), INT32_C( -76206)) },
1976
+ { simde_mm_set_pi32(INT32_C( -185045181), INT32_C( -1539720025)),
1977
+ 11,
1978
+ simde_mm_set_pi32(INT32_C( -90355), INT32_C( -751817)) },
1979
+ { simde_mm_set_pi32(INT32_C( -346918300), INT32_C( -232106706)),
1980
+ 10,
1981
+ simde_mm_set_pi32(INT32_C( -338788), INT32_C( -226667)) }
1982
+ };
1983
+
1984
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1985
+ simde__m64 r = simde_mm_srai_pi32(test_vec[i].a, test_vec[i].count);
1986
+ simde_mm_empty();
1987
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
1988
+ }
1989
+
1990
+ simde_mm_empty();
1991
+ return MUNIT_OK;
1992
+ }
1993
+
1994
+ static MunitResult
1995
+ test_simde_mm_sra_pi16(const MunitParameter params[], void* data) {
1996
+ (void) params;
1997
+ (void) data;
1998
+
1999
+ const struct {
2000
+ simde__m64 a;
2001
+ simde__m64 count;
2002
+ simde__m64 r;
2003
+ } test_vec[8] = {
2004
+ { simde_mm_set_pi16(INT16_C( 17561), INT16_C( 10489), INT16_C( -28823), INT16_C( -32541)),
2005
+ simde_mm_cvtsi64_m64(11),
2006
+ simde_mm_set_pi16(INT16_C( 8), INT16_C( 5), INT16_C( -15), INT16_C( -16)) },
2007
+ { simde_mm_set_pi16(INT16_C( -23916), INT16_C( 22319), INT16_C( -24731), INT16_C( -24948)),
2008
+ simde_mm_cvtsi64_m64(6),
2009
+ simde_mm_set_pi16(INT16_C( -374), INT16_C( 348), INT16_C( -387), INT16_C( -390)) },
2010
+ { simde_mm_set_pi16(INT16_C( 10305), INT16_C( -29863), INT16_C( -25929), INT16_C( 26582)),
2011
+ simde_mm_cvtsi64_m64(4),
2012
+ simde_mm_set_pi16(INT16_C( 644), INT16_C( -1867), INT16_C( -1621), INT16_C( 1661)) },
2013
+ { simde_mm_set_pi16(INT16_C( -11917), INT16_C( 7165), INT16_C( 860), INT16_C( -7108)),
2014
+ simde_mm_cvtsi64_m64(3),
2015
+ simde_mm_set_pi16(INT16_C( -1490), INT16_C( 895), INT16_C( 107), INT16_C( -889)) },
2016
+ { simde_mm_set_pi16(INT16_C( 30600), INT16_C( 3146), INT16_C( -22841), INT16_C( -27601)),
2017
+ simde_mm_cvtsi64_m64(0),
2018
+ simde_mm_set_pi16(INT16_C( 30600), INT16_C( 3146), INT16_C( -22841), INT16_C( -27601)) },
2019
+ { simde_mm_set_pi16(INT16_C( 7952), INT16_C( 8542), INT16_C( -27736), INT16_C( 20289)),
2020
+ simde_mm_cvtsi64_m64(1),
2021
+ simde_mm_set_pi16(INT16_C( 3976), INT16_C( 4271), INT16_C( -13868), INT16_C( 10144)) },
2022
+ { simde_mm_set_pi16(INT16_C( -24594), INT16_C( -8796), INT16_C( -25195), INT16_C( 300)),
2023
+ simde_mm_cvtsi64_m64(10),
2024
+ simde_mm_set_pi16(INT16_C( -25), INT16_C( -9), INT16_C( -25), INT16_C( 0)) },
2025
+ { simde_mm_set_pi16(INT16_C( 9552), INT16_C( 20569), INT16_C( 1838), INT16_C( 26385)),
2026
+ simde_mm_cvtsi64_m64(9),
2027
+ simde_mm_set_pi16(INT16_C( 18), INT16_C( 40), INT16_C( 3), INT16_C( 51)) }
2028
+ };
2029
+
2030
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2031
+ simde__m64 r = simde_mm_sra_pi16(test_vec[i].a, test_vec[i].count);
2032
+ simde_mm_empty();
2033
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
2034
+ }
2035
+
2036
+ simde_mm_empty();
2037
+ return MUNIT_OK;
2038
+ }
2039
+
2040
+ static MunitResult
2041
+ test_simde_mm_sra_pi32(const MunitParameter params[], void* data) {
2042
+ (void) params;
2043
+ (void) data;
2044
+
2045
+ const struct {
2046
+ simde__m64 a;
2047
+ simde__m64 count;
2048
+ simde__m64 r;
2049
+ } test_vec[8] = {
2050
+ { simde_mm_set_pi32(INT32_C( -1917317013), INT32_C( -1383526879)),
2051
+ simde_mm_cvtsi64_m64(15),
2052
+ simde_mm_set_pi32(INT32_C( -58512), INT32_C( -42222)) },
2053
+ { simde_mm_set_pi32(INT32_C( -736945287), INT32_C( 858975517)),
2054
+ simde_mm_cvtsi64_m64(18),
2055
+ simde_mm_set_pi32(INT32_C( -2812), INT32_C( 3276)) },
2056
+ { simde_mm_set_pi32(INT32_C( 1016725733), INT32_C( -1716419270)),
2057
+ simde_mm_cvtsi64_m64(20),
2058
+ simde_mm_set_pi32(INT32_C( 969), INT32_C( -1637)) },
2059
+ { simde_mm_set_pi32(INT32_C( 884929023), INT32_C( -2109726169)),
2060
+ simde_mm_cvtsi64_m64(17),
2061
+ simde_mm_set_pi32(INT32_C( 6751), INT32_C( -16096)) },
2062
+ { simde_mm_set_pi32(INT32_C( 1766981669), INT32_C( 1505895116)),
2063
+ simde_mm_cvtsi64_m64(8),
2064
+ simde_mm_set_pi32(INT32_C( 6902272), INT32_C( 5882402)) },
2065
+ { simde_mm_set_pi32(INT32_C( 1732469741), INT32_C( -2109399559)),
2066
+ simde_mm_cvtsi64_m64(22),
2067
+ simde_mm_set_pi32(INT32_C( 413), INT32_C( -503)) },
2068
+ { simde_mm_set_pi32(INT32_C( -1207208411), INT32_C( 962459192)),
2069
+ simde_mm_cvtsi64_m64(24),
2070
+ simde_mm_set_pi32(INT32_C( -72), INT32_C( 57)) },
2071
+ { simde_mm_set_pi32(INT32_C( 519578965), INT32_C( 1181576220)),
2072
+ simde_mm_cvtsi64_m64(2),
2073
+ simde_mm_set_pi32(INT32_C( 129894741), INT32_C( 295394055)) }
2074
+ };
2075
+
2076
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2077
+ simde__m64 r = simde_mm_sra_pi32(test_vec[i].a, test_vec[i].count);
2078
+ simde_mm_empty();
2079
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
2080
+ }
2081
+
2082
+ simde_mm_empty();
2083
+ return MUNIT_OK;
2084
+ }
2085
+
2086
+ static MunitResult
2087
+ test_simde_mm_sub_pi8(const MunitParameter params[], void* data) {
2088
+ (void) params;
2089
+ (void) data;
2090
+
2091
+ const struct {
2092
+ simde__m64 a;
2093
+ simde__m64 b;
2094
+ simde__m64 r;
2095
+ } test_vec[8] = {
2096
+ { simde_mm_set_pi8(INT8_C( -68), INT8_C(-128), INT8_C(-110), INT8_C( -46), INT8_C( 64), INT8_C( -87), INT8_C( 123), INT8_C( 83)),
2097
+ simde_mm_set_pi8(INT8_C( -44), INT8_C( -27), INT8_C(-126), INT8_C( 47), INT8_C( -5), INT8_C( 124), INT8_C(-111), INT8_C( 88)),
2098
+ simde_mm_set_pi8(INT8_C( -24), INT8_C(-101), INT8_C( 16), INT8_C( -93), INT8_C( 69), INT8_C( 45), INT8_C( -22), INT8_C( -5)) },
2099
+ { simde_mm_set_pi8(INT8_C( 7), INT8_C( 68), INT8_C( -53), INT8_C( -82), INT8_C( 50), INT8_C(-107), INT8_C( 109), INT8_C( 33)),
2100
+ simde_mm_set_pi8(INT8_C( 121), INT8_C( 50), INT8_C( 31), INT8_C( 80), INT8_C( 118), INT8_C( 8), INT8_C( -73), INT8_C( 38)),
2101
+ simde_mm_set_pi8(INT8_C(-114), INT8_C( 18), INT8_C( -84), INT8_C( 94), INT8_C( -68), INT8_C(-115), INT8_C( -74), INT8_C( -5)) },
2102
+ { simde_mm_set_pi8(INT8_C( 60), INT8_C( 5), INT8_C( 4), INT8_C( -85), INT8_C( -61), INT8_C( 71), INT8_C( -19), INT8_C( -92)),
2103
+ simde_mm_set_pi8(INT8_C( -51), INT8_C( 118), INT8_C( 99), INT8_C( 14), INT8_C( 124), INT8_C(-115), INT8_C( 49), INT8_C( 19)),
2104
+ simde_mm_set_pi8(INT8_C( 111), INT8_C(-113), INT8_C( -95), INT8_C( -99), INT8_C( 71), INT8_C( -70), INT8_C( -68), INT8_C(-111)) },
2105
+ { simde_mm_set_pi8(INT8_C( 80), INT8_C( 47), INT8_C( 46), INT8_C( -13), INT8_C( 94), INT8_C( -69), INT8_C( -72), INT8_C( -28)),
2106
+ simde_mm_set_pi8(INT8_C( 45), INT8_C( 99), INT8_C( 14), INT8_C( 4), INT8_C( 89), INT8_C( -77), INT8_C( -4), INT8_C( 109)),
2107
+ simde_mm_set_pi8(INT8_C( 35), INT8_C( -52), INT8_C( 32), INT8_C( -17), INT8_C( 5), INT8_C( 8), INT8_C( -68), INT8_C( 119)) },
2108
+ { simde_mm_set_pi8(INT8_C( 117), INT8_C(-101), INT8_C( -54), INT8_C( -50), INT8_C( 55), INT8_C( -97), INT8_C( -74), INT8_C( 79)),
2109
+ simde_mm_set_pi8(INT8_C( 116), INT8_C( 19), INT8_C( 84), INT8_C( 90), INT8_C( -15), INT8_C( -49), INT8_C( 34), INT8_C(-124)),
2110
+ simde_mm_set_pi8(INT8_C( 1), INT8_C(-120), INT8_C( 118), INT8_C( 116), INT8_C( 70), INT8_C( -48), INT8_C(-108), INT8_C( -53)) },
2111
+ { simde_mm_set_pi8(INT8_C( 43), INT8_C( -88), INT8_C( 7), INT8_C( -31), INT8_C( -45), INT8_C( -6), INT8_C( -61), INT8_C( -47)),
2112
+ simde_mm_set_pi8(INT8_C(-110), INT8_C( 87), INT8_C(-102), INT8_C( -63), INT8_C( -35), INT8_C( 78), INT8_C( 96), INT8_C( 51)),
2113
+ simde_mm_set_pi8(INT8_C(-103), INT8_C( 81), INT8_C( 109), INT8_C( 32), INT8_C( -10), INT8_C( -84), INT8_C( 99), INT8_C( -98)) },
2114
+ { simde_mm_set_pi8(INT8_C(-113), INT8_C( -62), INT8_C(-117), INT8_C( 34), INT8_C( -40), INT8_C( 24), INT8_C( -20), INT8_C( 52)),
2115
+ simde_mm_set_pi8(INT8_C( 53), INT8_C( -16), INT8_C( 75), INT8_C( 38), INT8_C( 2), INT8_C( -75), INT8_C( -51), INT8_C( 92)),
2116
+ simde_mm_set_pi8(INT8_C( 90), INT8_C( -46), INT8_C( 64), INT8_C( -4), INT8_C( -42), INT8_C( 99), INT8_C( 31), INT8_C( -40)) },
2117
+ { simde_mm_set_pi8(INT8_C( -94), INT8_C( -1), INT8_C( -70), INT8_C( 90), INT8_C(-105), INT8_C( -20), INT8_C( -71), INT8_C( -95)),
2118
+ simde_mm_set_pi8(INT8_C( -97), INT8_C( 49), INT8_C( 71), INT8_C( 69), INT8_C( -48), INT8_C( 31), INT8_C( -19), INT8_C( 28)),
2119
+ simde_mm_set_pi8(INT8_C( 3), INT8_C( -50), INT8_C( 115), INT8_C( 21), INT8_C( -57), INT8_C( -51), INT8_C( -52), INT8_C(-123)) }
2120
+ };
2121
+
2122
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2123
+ simde__m64 r = simde_mm_sub_pi8(test_vec[i].a, test_vec[i].b);
2124
+ simde_mm_empty();
2125
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
2126
+ }
2127
+
2128
+ simde_mm_empty();
2129
+ return MUNIT_OK;
2130
+ }
2131
+
2132
+ static MunitResult
2133
+ test_simde_mm_sub_pi16(const MunitParameter params[], void* data) {
2134
+ (void) params;
2135
+ (void) data;
2136
+
2137
+ const struct {
2138
+ simde__m64 a;
2139
+ simde__m64 b;
2140
+ simde__m64 r;
2141
+ } test_vec[8] = {
2142
+ { simde_mm_set_pi16(INT16_C( -19579), INT16_C( 12561), INT16_C( 18345), INT16_C( 16319)),
2143
+ simde_mm_set_pi16(INT16_C( -28282), INT16_C( 12811), INT16_C( -17042), INT16_C( 32694)),
2144
+ simde_mm_set_pi16(INT16_C( 8703), INT16_C( -250), INT16_C( -30149), INT16_C( -16375)) },
2145
+ { simde_mm_set_pi16(INT16_C( 26389), INT16_C( -16440), INT16_C( 31193), INT16_C( 17420)),
2146
+ simde_mm_set_pi16(INT16_C( -16772), INT16_C( -30407), INT16_C( 13204), INT16_C( -3950)),
2147
+ simde_mm_set_pi16(INT16_C( -22375), INT16_C( 13967), INT16_C( 17989), INT16_C( 21370)) },
2148
+ { simde_mm_set_pi16(INT16_C( 27021), INT16_C( -21341), INT16_C( -29765), INT16_C( -27825)),
2149
+ simde_mm_set_pi16(INT16_C( 32255), INT16_C( -11881), INT16_C( -17239), INT16_C( 17727)),
2150
+ simde_mm_set_pi16(INT16_C( -5234), INT16_C( -9460), INT16_C( -12526), INT16_C( 19984)) },
2151
+ { simde_mm_set_pi16(INT16_C( -1061), INT16_C( 10691), INT16_C( 5402), INT16_C( -29779)),
2152
+ simde_mm_set_pi16(INT16_C( -3105), INT16_C( 17443), INT16_C( 29683), INT16_C( -4669)),
2153
+ simde_mm_set_pi16(INT16_C( 2044), INT16_C( -6752), INT16_C( -24281), INT16_C( -25110)) },
2154
+ { simde_mm_set_pi16(INT16_C( -27429), INT16_C( -24038), INT16_C( -27170), INT16_C( 23974)),
2155
+ simde_mm_set_pi16(INT16_C( 26527), INT16_C( -23757), INT16_C( 12822), INT16_C( 25106)),
2156
+ simde_mm_set_pi16(INT16_C( 11580), INT16_C( -281), INT16_C( 25544), INT16_C( -1132)) },
2157
+ { simde_mm_set_pi16(INT16_C( -22000), INT16_C( 31301), INT16_C( 3019), INT16_C( 5319)),
2158
+ simde_mm_set_pi16(INT16_C( 17233), INT16_C( -4995), INT16_C( -32364), INT16_C( 13233)),
2159
+ simde_mm_set_pi16(INT16_C( 26303), INT16_C( -29240), INT16_C( -30153), INT16_C( -7914)) },
2160
+ { simde_mm_set_pi16(INT16_C( -3486), INT16_C( -1801), INT16_C( 6573), INT16_C( -2443)),
2161
+ simde_mm_set_pi16(INT16_C( 12310), INT16_C( 34), INT16_C( -20082), INT16_C( -25128)),
2162
+ simde_mm_set_pi16(INT16_C( -15796), INT16_C( -1835), INT16_C( 26655), INT16_C( 22685)) },
2163
+ { simde_mm_set_pi16(INT16_C( -4220), INT16_C( 17506), INT16_C( 6973), INT16_C( -8771)),
2164
+ simde_mm_set_pi16(INT16_C( -28953), INT16_C( 20334), INT16_C( 30681), INT16_C( -3329)),
2165
+ simde_mm_set_pi16(INT16_C( 24733), INT16_C( -2828), INT16_C( -23708), INT16_C( -5442)) }
2166
+ };
2167
+
2168
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2169
+ simde__m64 r = simde_mm_sub_pi16(test_vec[i].a, test_vec[i].b);
2170
+ simde_mm_empty();
2171
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
2172
+ }
2173
+
2174
+ simde_mm_empty();
2175
+ return MUNIT_OK;
2176
+ }
2177
+
2178
+ static MunitResult
2179
+ test_simde_mm_sub_pi32(const MunitParameter params[], void* data) {
2180
+ (void) params;
2181
+ (void) data;
2182
+
2183
+ const struct {
2184
+ simde__m64 a;
2185
+ simde__m64 b;
2186
+ simde__m64 r;
2187
+ } test_vec[8] = {
2188
+ { simde_mm_set_pi32(INT32_C( 1529386227), INT32_C( -668097316)),
2189
+ simde_mm_set_pi32(INT32_C( 473233841), INT32_C( -90529672)),
2190
+ simde_mm_set_pi32(INT32_C( 1056152386), INT32_C( -577567644)) },
2191
+ { simde_mm_set_pi32(INT32_C( -1931729107), INT32_C( -722204778)),
2192
+ simde_mm_set_pi32(INT32_C( -1308867233), INT32_C( -379543807)),
2193
+ simde_mm_set_pi32(INT32_C( -622861874), INT32_C( -342660971)) },
2194
+ { simde_mm_set_pi32(INT32_C( -291860960), INT32_C( -190367090)),
2195
+ simde_mm_set_pi32(INT32_C( 135041259), INT32_C( 1788100299)),
2196
+ simde_mm_set_pi32(INT32_C( -426902219), INT32_C( -1978467389)) },
2197
+ { simde_mm_set_pi32(INT32_C( 1883589163), INT32_C( 323765200)),
2198
+ simde_mm_set_pi32(INT32_C( 645555820), INT32_C( 651498122)),
2199
+ simde_mm_set_pi32(INT32_C( 1238033343), INT32_C( -327732922)) },
2200
+ { simde_mm_set_pi32(INT32_C( 1636190981), INT32_C( -1768384078)),
2201
+ simde_mm_set_pi32(INT32_C( 292739084), INT32_C( -81452554)),
2202
+ simde_mm_set_pi32(INT32_C( 1343451897), INT32_C( -1686931524)) },
2203
+ { simde_mm_set_pi32(INT32_C( -1203362066), INT32_C( 1430164168)),
2204
+ simde_mm_set_pi32(INT32_C( 1181972217), INT32_C( -1859714213)),
2205
+ simde_mm_set_pi32(INT32_C( 1909633013), INT32_C( -1005088915)) },
2206
+ { simde_mm_set_pi32(INT32_C( -81132926), INT32_C( 156813953)),
2207
+ simde_mm_set_pi32(INT32_C( 1408689560), INT32_C( -1315494890)),
2208
+ simde_mm_set_pi32(INT32_C( -1489822486), INT32_C( 1472308843)) },
2209
+ { simde_mm_set_pi32(INT32_C( -99259746), INT32_C( -1543487401)),
2210
+ simde_mm_set_pi32(INT32_C( 1211860803), INT32_C( 322815885)),
2211
+ simde_mm_set_pi32(INT32_C( -1311120549), INT32_C( -1866303286)) }
2212
+ };
2213
+
2214
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2215
+ simde__m64 r = simde_mm_sub_pi32(test_vec[i].a, test_vec[i].b);
2216
+ simde_mm_empty();
2217
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
2218
+ }
2219
+
2220
+ simde_mm_empty();
2221
+ return MUNIT_OK;
2222
+ }
2223
+
2224
+ static MunitResult
2225
+ test_simde_mm_subs_pi8(const MunitParameter params[], void* data) {
2226
+ (void) params;
2227
+ (void) data;
2228
+
2229
+ const struct {
2230
+ simde__m64 a;
2231
+ simde__m64 b;
2232
+ simde__m64 r;
2233
+ } test_vec[8] = {
2234
+ { simde_mm_set_pi8(INT8_C( 47), INT8_C( -51), INT8_C( -9), INT8_C( 46), INT8_C( 37), INT8_C( 125), INT8_C(-121), INT8_C( 88)),
2235
+ simde_mm_set_pi8(INT8_C( -37), INT8_C( -59), INT8_C( -18), INT8_C( -39), INT8_C( -68), INT8_C( 127), INT8_C( -66), INT8_C( -14)),
2236
+ simde_mm_set_pi8(INT8_C( 84), INT8_C( 8), INT8_C( 9), INT8_C( 85), INT8_C( 105), INT8_C( -2), INT8_C( -55), INT8_C( 102)) },
2237
+ { simde_mm_set_pi8(INT8_C( -68), INT8_C( 111), INT8_C( 54), INT8_C( -2), INT8_C( -96), INT8_C( -30), INT8_C( 7), INT8_C( -1)),
2238
+ simde_mm_set_pi8(INT8_C( 71), INT8_C( 109), INT8_C( 43), INT8_C( -28), INT8_C(-128), INT8_C( -98), INT8_C( 65), INT8_C( -86)),
2239
+ simde_mm_set_pi8(INT8_C(-128), INT8_C( 2), INT8_C( 11), INT8_C( 26), INT8_C( 32), INT8_C( 68), INT8_C( -58), INT8_C( 85)) },
2240
+ { simde_mm_set_pi8(INT8_C(-124), INT8_C(-105), INT8_C( 39), INT8_C( 68), INT8_C( -44), INT8_C( -60), INT8_C( -44), INT8_C( -99)),
2241
+ simde_mm_set_pi8(INT8_C( -9), INT8_C(-127), INT8_C( 77), INT8_C( -14), INT8_C( -70), INT8_C( -39), INT8_C( -18), INT8_C( -40)),
2242
+ simde_mm_set_pi8(INT8_C(-115), INT8_C( 22), INT8_C( -38), INT8_C( 82), INT8_C( 26), INT8_C( -21), INT8_C( -26), INT8_C( -59)) },
2243
+ { simde_mm_set_pi8(INT8_C( 52), INT8_C( 33), INT8_C( 97), INT8_C( 39), INT8_C(-126), INT8_C( -11), INT8_C( 17), INT8_C( 108)),
2244
+ simde_mm_set_pi8(INT8_C( 65), INT8_C( 112), INT8_C( 108), INT8_C( 33), INT8_C( 68), INT8_C(-103), INT8_C( -45), INT8_C( 7)),
2245
+ simde_mm_set_pi8(INT8_C( -13), INT8_C( -79), INT8_C( -11), INT8_C( 6), INT8_C(-128), INT8_C( 92), INT8_C( 62), INT8_C( 101)) },
2246
+ { simde_mm_set_pi8(INT8_C(-105), INT8_C( 75), INT8_C( 127), INT8_C( -57), INT8_C( 88), INT8_C( -25), INT8_C( -75), INT8_C( -74)),
2247
+ simde_mm_set_pi8(INT8_C(-125), INT8_C( -81), INT8_C( 60), INT8_C(-108), INT8_C( 78), INT8_C( -60), INT8_C( 88), INT8_C( 30)),
2248
+ simde_mm_set_pi8(INT8_C( 20), INT8_C( 127), INT8_C( 67), INT8_C( 51), INT8_C( 10), INT8_C( 35), INT8_C(-128), INT8_C(-104)) },
2249
+ { simde_mm_set_pi8(INT8_C( -28), INT8_C( -97), INT8_C( 80), INT8_C( -43), INT8_C( -70), INT8_C( 45), INT8_C( 10), INT8_C( -67)),
2250
+ simde_mm_set_pi8(INT8_C(-109), INT8_C( 97), INT8_C( 25), INT8_C( 63), INT8_C( -65), INT8_C( -95), INT8_C(-111), INT8_C( -39)),
2251
+ simde_mm_set_pi8(INT8_C( 81), INT8_C(-128), INT8_C( 55), INT8_C(-106), INT8_C( -5), INT8_C( 127), INT8_C( 121), INT8_C( -28)) },
2252
+ { simde_mm_set_pi8(INT8_C( 52), INT8_C( -18), INT8_C( -86), INT8_C( -29), INT8_C( 69), INT8_C( 92), INT8_C( 89), INT8_C( -66)),
2253
+ simde_mm_set_pi8(INT8_C( 16), INT8_C( 0), INT8_C( 95), INT8_C( 95), INT8_C( 115), INT8_C( -53), INT8_C( 55), INT8_C( 75)),
2254
+ simde_mm_set_pi8(INT8_C( 36), INT8_C( -18), INT8_C(-128), INT8_C(-124), INT8_C( -46), INT8_C( 127), INT8_C( 34), INT8_C(-128)) },
2255
+ { simde_mm_set_pi8(INT8_C( 99), INT8_C( -48), INT8_C( 16), INT8_C( 126), INT8_C(-110), INT8_C(-111), INT8_C( -66), INT8_C( 83)),
2256
+ simde_mm_set_pi8(INT8_C(-118), INT8_C( 118), INT8_C( 100), INT8_C(-121), INT8_C( -17), INT8_C( 74), INT8_C( -47), INT8_C( -77)),
2257
+ simde_mm_set_pi8(INT8_C( 127), INT8_C(-128), INT8_C( -84), INT8_C( 127), INT8_C( -93), INT8_C(-128), INT8_C( -19), INT8_C( 127)) }
2258
+ };
2259
+
2260
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2261
+ simde__m64 r = simde_mm_subs_pi8(test_vec[i].a, test_vec[i].b);
2262
+ simde_mm_empty();
2263
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
2264
+ }
2265
+
2266
+ simde_mm_empty();
2267
+ return MUNIT_OK;
2268
+ }
2269
+
2270
+ static MunitResult
2271
+ test_simde_mm_subs_pu8(const MunitParameter params[], void* data) {
2272
+ (void) params;
2273
+ (void) data;
2274
+
2275
+ const struct {
2276
+ simde__m64 a;
2277
+ simde__m64 b;
2278
+ simde__m64 r;
2279
+ } test_vec[8] = {
2280
+ { simde_mm_set_pi8(INT8_C( 108), INT8_C(-104), INT8_C( 106), INT8_C( 91), INT8_C( 54), INT8_C( 95), INT8_C( -86), INT8_C( -68)),
2281
+ simde_mm_set_pi8(INT8_C( -73), INT8_C( -12), INT8_C( 13), INT8_C( -7), INT8_C(-102), INT8_C( -27), INT8_C( -93), INT8_C( -1)),
2282
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0)) },
2283
+ { simde_mm_set_pi8(INT8_C( -3), INT8_C(-122), INT8_C(-110), INT8_C( -87), INT8_C( -28), INT8_C( -38), INT8_C( 30), INT8_C( -22)),
2284
+ simde_mm_set_pi8(INT8_C( -24), INT8_C( 92), INT8_C( -83), INT8_C( -90), INT8_C(-108), INT8_C(-117), INT8_C( 101), INT8_C( -58)),
2285
+ simde_mm_set_pi8(INT8_C( 21), INT8_C( 42), INT8_C( 0), INT8_C( 3), INT8_C( 80), INT8_C( 79), INT8_C( 0), INT8_C( 36)) },
2286
+ { simde_mm_set_pi8(INT8_C( -50), INT8_C( 51), INT8_C( -7), INT8_C( -68), INT8_C( -7), INT8_C( 11), INT8_C( 15), INT8_C( 2)),
2287
+ simde_mm_set_pi8(INT8_C( -4), INT8_C(-101), INT8_C(-106), INT8_C( -43), INT8_C(-124), INT8_C( 1), INT8_C( -19), INT8_C( 18)),
2288
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 117), INT8_C( 10), INT8_C( 0), INT8_C( 0)) },
2289
+ { simde_mm_set_pi8(INT8_C( 20), INT8_C( -33), INT8_C( -99), INT8_C( -4), INT8_C(-119), INT8_C( 72), INT8_C( 104), INT8_C( -43)),
2290
+ simde_mm_set_pi8(INT8_C( -27), INT8_C( 116), INT8_C( 127), INT8_C( 71), INT8_C( 110), INT8_C( 47), INT8_C( 56), INT8_C( -18)),
2291
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 107), INT8_C( 30), INT8_C( -75), INT8_C( 27), INT8_C( 25), INT8_C( 48), INT8_C( 0)) },
2292
+ { simde_mm_set_pi8(INT8_C( 24), INT8_C( 44), INT8_C( 126), INT8_C( -16), INT8_C( 48), INT8_C( 119), INT8_C( 122), INT8_C( 92)),
2293
+ simde_mm_set_pi8(INT8_C( -53), INT8_C( 93), INT8_C( 123), INT8_C( 43), INT8_C( -1), INT8_C( -86), INT8_C( 12), INT8_C( -40)),
2294
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( -59), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 0)) },
2295
+ { simde_mm_set_pi8(INT8_C( 43), INT8_C( -29), INT8_C( 72), INT8_C( -16), INT8_C( 73), INT8_C( 36), INT8_C( 38), INT8_C(-122)),
2296
+ simde_mm_set_pi8(INT8_C( 68), INT8_C( 17), INT8_C(-105), INT8_C( 112), INT8_C( 123), INT8_C(-118), INT8_C( 37), INT8_C( 35)),
2297
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( -46), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 99)) },
2298
+ { simde_mm_set_pi8(INT8_C( 78), INT8_C( 25), INT8_C(-123), INT8_C(-114), INT8_C( 56), INT8_C( 33), INT8_C( -54), INT8_C( 46)),
2299
+ simde_mm_set_pi8(INT8_C( -71), INT8_C( 113), INT8_C( -52), INT8_C( -21), INT8_C(-112), INT8_C( -45), INT8_C( 117), INT8_C( -91)),
2300
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 85), INT8_C( 0)) },
2301
+ { simde_mm_set_pi8(INT8_C( 35), INT8_C( 56), INT8_C( 106), INT8_C( 118), INT8_C( -12), INT8_C( -92), INT8_C( -24), INT8_C( 93)),
2302
+ simde_mm_set_pi8(INT8_C(-118), INT8_C( -26), INT8_C( -47), INT8_C( 86), INT8_C( -69), INT8_C( 43), INT8_C( 117), INT8_C( 101)),
2303
+ simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32), INT8_C( 57), INT8_C( 121), INT8_C( 115), INT8_C( 0)) }
2304
+ };
2305
+
2306
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2307
+ simde__m64 r = simde_mm_subs_pu8(test_vec[i].a, test_vec[i].b);
2308
+ simde_mm_empty();
2309
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
2310
+ }
2311
+
2312
+ simde_mm_empty();
2313
+ return MUNIT_OK;
2314
+ }
2315
+
2316
+ static MunitResult
2317
+ test_simde_mm_subs_pi16(const MunitParameter params[], void* data) {
2318
+ (void) params;
2319
+ (void) data;
2320
+
2321
+ const struct {
2322
+ simde__m64 a;
2323
+ simde__m64 b;
2324
+ simde__m64 r;
2325
+ } test_vec[8] = {
2326
+ { simde_mm_set_pi16(INT16_C( 997), INT16_C( -2676), INT16_C( -29256), INT16_C( -21534)),
2327
+ simde_mm_set_pi16(INT16_C( 25057), INT16_C( -8634), INT16_C( 14564), INT16_C( 23460)),
2328
+ simde_mm_set_pi16(INT16_C( -24060), INT16_C( 5958), INT16_C( -32768), INT16_C( -32768)) },
2329
+ { simde_mm_set_pi16(INT16_C( 17773), INT16_C( -21379), INT16_C( -10016), INT16_C( -25057)),
2330
+ simde_mm_set_pi16(INT16_C( -17494), INT16_C( -5727), INT16_C( -23865), INT16_C( -12297)),
2331
+ simde_mm_set_pi16(INT16_C( 32767), INT16_C( -15652), INT16_C( 13849), INT16_C( -12760)) },
2332
+ { simde_mm_set_pi16(INT16_C( -18595), INT16_C( -25519), INT16_C( 25647), INT16_C( 18081)),
2333
+ simde_mm_set_pi16(INT16_C( -16730), INT16_C( 8578), INT16_C( -24195), INT16_C( -23138)),
2334
+ simde_mm_set_pi16(INT16_C( -1865), INT16_C( -32768), INT16_C( 32767), INT16_C( 32767)) },
2335
+ { simde_mm_set_pi16(INT16_C( 30835), INT16_C( -1900), INT16_C( -12465), INT16_C( -32273)),
2336
+ simde_mm_set_pi16(INT16_C( 22212), INT16_C( 29314), INT16_C( 30369), INT16_C( -7474)),
2337
+ simde_mm_set_pi16(INT16_C( 8623), INT16_C( -31214), INT16_C( -32768), INT16_C( -24799)) },
2338
+ { simde_mm_set_pi16(INT16_C( -4511), INT16_C( -11707), INT16_C( -456), INT16_C( 4939)),
2339
+ simde_mm_set_pi16(INT16_C( 9564), INT16_C( -6551), INT16_C( 15884), INT16_C( 25916)),
2340
+ simde_mm_set_pi16(INT16_C( -14075), INT16_C( -5156), INT16_C( -16340), INT16_C( -20977)) },
2341
+ { simde_mm_set_pi16(INT16_C( 16747), INT16_C( 26115), INT16_C( 28725), INT16_C( -9489)),
2342
+ simde_mm_set_pi16(INT16_C( 18589), INT16_C( 10790), INT16_C( 16046), INT16_C( 7670)),
2343
+ simde_mm_set_pi16(INT16_C( -1842), INT16_C( 15325), INT16_C( 12679), INT16_C( -17159)) },
2344
+ { simde_mm_set_pi16(INT16_C( 12230), INT16_C( 31818), INT16_C( -20400), INT16_C( 29194)),
2345
+ simde_mm_set_pi16(INT16_C( 13624), INT16_C( -27762), INT16_C( -3717), INT16_C( 9357)),
2346
+ simde_mm_set_pi16(INT16_C( -1394), INT16_C( 32767), INT16_C( -16683), INT16_C( 19837)) },
2347
+ { simde_mm_set_pi16(INT16_C( 4223), INT16_C( 22129), INT16_C( 27682), INT16_C( 6112)),
2348
+ simde_mm_set_pi16(INT16_C( 25462), INT16_C( 1497), INT16_C( -20195), INT16_C( -31363)),
2349
+ simde_mm_set_pi16(INT16_C( -21239), INT16_C( 20632), INT16_C( 32767), INT16_C( 32767)) }
2350
+ };
2351
+
2352
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2353
+ simde__m64 r = simde_mm_subs_pi16(test_vec[i].a, test_vec[i].b);
2354
+ simde_mm_empty();
2355
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
2356
+ }
2357
+
2358
+ simde_mm_empty();
2359
+ return MUNIT_OK;
2360
+ }
2361
+
2362
+ static MunitResult
2363
+ test_simde_mm_subs_pu16(const MunitParameter params[], void* data) {
2364
+ (void) params;
2365
+ (void) data;
2366
+
2367
+ const struct {
2368
+ simde__m64 a;
2369
+ simde__m64 b;
2370
+ simde__m64 r;
2371
+ } test_vec[8] = {
2372
+ { simde_mm_set_pi16(INT16_C( -14933), INT16_C( 874), INT16_C( -12812), INT16_C( -23674)),
2373
+ simde_mm_set_pi16(INT16_C( 10199), INT16_C( -21634), INT16_C( -16349), INT16_C( -2233)),
2374
+ simde_mm_set_pi16(INT16_C( -25132), INT16_C( 0), INT16_C( 3537), INT16_C( 0)) },
2375
+ { simde_mm_set_pi16(INT16_C( -30411), INT16_C( 14403), INT16_C( 16019), INT16_C( -7235)),
2376
+ simde_mm_set_pi16(INT16_C( 20809), INT16_C( 30553), INT16_C( -13348), INT16_C( -9019)),
2377
+ simde_mm_set_pi16(INT16_C( 14316), INT16_C( 0), INT16_C( 0), INT16_C( 1784)) },
2378
+ { simde_mm_set_pi16(INT16_C( -3263), INT16_C( 17129), INT16_C( 7120), INT16_C( 17541)),
2379
+ simde_mm_set_pi16(INT16_C( 17758), INT16_C( -24273), INT16_C( -16817), INT16_C( -26381)),
2380
+ simde_mm_set_pi16(INT16_C( -21021), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
2381
+ { simde_mm_set_pi16(INT16_C( 28253), INT16_C( -27429), INT16_C( -2971), INT16_C( -25455)),
2382
+ simde_mm_set_pi16(INT16_C( -28858), INT16_C( 23971), INT16_C( 30194), INT16_C( 29959)),
2383
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 14136), INT16_C( 32371), INT16_C( 10122)) },
2384
+ { simde_mm_set_pi16(INT16_C( -5264), INT16_C( -5469), INT16_C( 2876), INT16_C( 12913)),
2385
+ simde_mm_set_pi16(INT16_C( -25438), INT16_C( -13476), INT16_C( -20493), INT16_C( 9684)),
2386
+ simde_mm_set_pi16(INT16_C( 20174), INT16_C( 8007), INT16_C( 0), INT16_C( 3229)) },
2387
+ { simde_mm_set_pi16(INT16_C( -6406), INT16_C( 29502), INT16_C( -32502), INT16_C( 29440)),
2388
+ simde_mm_set_pi16(INT16_C( 24669), INT16_C( 29936), INT16_C( -12635), INT16_C( -28492)),
2389
+ simde_mm_set_pi16(INT16_C( -31075), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
2390
+ { simde_mm_set_pi16(INT16_C( 1295), INT16_C( 17975), INT16_C( -25873), INT16_C( -8332)),
2391
+ simde_mm_set_pi16(INT16_C( -30157), INT16_C( 122), INT16_C( -20762), INT16_C( 12983)),
2392
+ simde_mm_set_pi16(INT16_C( 0), INT16_C( 17853), INT16_C( 0), INT16_C( -21315)) },
2393
+ { simde_mm_set_pi16(INT16_C( -17654), INT16_C( -28720), INT16_C( -25036), INT16_C( -2408)),
2394
+ simde_mm_set_pi16(INT16_C( 32575), INT16_C( 13887), INT16_C( 23741), INT16_C( -32273)),
2395
+ simde_mm_set_pi16(INT16_C( 15307), INT16_C( 22929), INT16_C( 16759), INT16_C( 29865)) }
2396
+ };
2397
+
2398
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2399
+ simde__m64 r = simde_mm_subs_pu16(test_vec[i].a, test_vec[i].b);
2400
+ simde_mm_empty();
2401
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
2402
+ }
2403
+
2404
+ simde_mm_empty();
2405
+ return MUNIT_OK;
2406
+ }
2407
+
2408
+ static MunitResult
2409
+ test_simde_mm_unpackhi_pi8(const MunitParameter params[], void* data) {
2410
+ (void) params;
2411
+ (void) data;
2412
+
2413
+ const struct {
2414
+ simde__m64 a;
2415
+ simde__m64 b;
2416
+ simde__m64 r;
2417
+ } test_vec[8] = {
2418
+ { simde_mm_set_pi8(INT8_C(-127), INT8_C( 48), INT8_C( 42), INT8_C( 115), INT8_C( -77), INT8_C( 4), INT8_C( 25), INT8_C( -42)),
2419
+ simde_mm_set_pi8(INT8_C( 57), INT8_C( 92), INT8_C( -39), INT8_C( -42), INT8_C( 73), INT8_C( 4), INT8_C( 41), INT8_C( 118)),
2420
+ simde_mm_set_pi8(INT8_C( 57), INT8_C(-127), INT8_C( 92), INT8_C( 48), INT8_C( -39), INT8_C( 42), INT8_C( -42), INT8_C( 115)) },
2421
+ { simde_mm_set_pi8(INT8_C( -16), INT8_C( 120), INT8_C( 16), INT8_C( 116), INT8_C( -35), INT8_C(-100), INT8_C( 0), INT8_C( -39)),
2422
+ simde_mm_set_pi8(INT8_C( 63), INT8_C( -73), INT8_C( 48), INT8_C( -66), INT8_C( -33), INT8_C(-102), INT8_C( -62), INT8_C( 118)),
2423
+ simde_mm_set_pi8(INT8_C( 63), INT8_C( -16), INT8_C( -73), INT8_C( 120), INT8_C( 48), INT8_C( 16), INT8_C( -66), INT8_C( 116)) },
2424
+ { simde_mm_set_pi8(INT8_C( -24), INT8_C( -49), INT8_C( 20), INT8_C( 34), INT8_C( -1), INT8_C( 63), INT8_C( 11), INT8_C( -36)),
2425
+ simde_mm_set_pi8(INT8_C( -97), INT8_C( 52), INT8_C( 62), INT8_C( -48), INT8_C( -15), INT8_C( 24), INT8_C( 18), INT8_C( -28)),
2426
+ simde_mm_set_pi8(INT8_C( -97), INT8_C( -24), INT8_C( 52), INT8_C( -49), INT8_C( 62), INT8_C( 20), INT8_C( -48), INT8_C( 34)) },
2427
+ { simde_mm_set_pi8(INT8_C( 34), INT8_C( -74), INT8_C( -88), INT8_C( -68), INT8_C( 80), INT8_C( 80), INT8_C( -27), INT8_C(-109)),
2428
+ simde_mm_set_pi8(INT8_C( -14), INT8_C( 17), INT8_C( -50), INT8_C( 50), INT8_C( -72), INT8_C(-111), INT8_C( -32), INT8_C(-114)),
2429
+ simde_mm_set_pi8(INT8_C( -14), INT8_C( 34), INT8_C( 17), INT8_C( -74), INT8_C( -50), INT8_C( -88), INT8_C( 50), INT8_C( -68)) },
2430
+ { simde_mm_set_pi8(INT8_C( -82), INT8_C( 34), INT8_C( 79), INT8_C( 75), INT8_C( -45), INT8_C( 43), INT8_C( -97), INT8_C( 55)),
2431
+ simde_mm_set_pi8(INT8_C( 126), INT8_C( 126), INT8_C( 113), INT8_C( 122), INT8_C( 7), INT8_C( 69), INT8_C( 31), INT8_C( 83)),
2432
+ simde_mm_set_pi8(INT8_C( 126), INT8_C( -82), INT8_C( 126), INT8_C( 34), INT8_C( 113), INT8_C( 79), INT8_C( 122), INT8_C( 75)) },
2433
+ { simde_mm_set_pi8(INT8_C( -4), INT8_C( -98), INT8_C( 7), INT8_C( 88), INT8_C( -93), INT8_C( 56), INT8_C( -38), INT8_C( -15)),
2434
+ simde_mm_set_pi8(INT8_C( 75), INT8_C( 97), INT8_C( 76), INT8_C( 26), INT8_C(-119), INT8_C( -96), INT8_C( -74), INT8_C( -24)),
2435
+ simde_mm_set_pi8(INT8_C( 75), INT8_C( -4), INT8_C( 97), INT8_C( -98), INT8_C( 76), INT8_C( 7), INT8_C( 26), INT8_C( 88)) },
2436
+ { simde_mm_set_pi8(INT8_C( 124), INT8_C( 71), INT8_C( -14), INT8_C( 19), INT8_C( -69), INT8_C( -31), INT8_C( 35), INT8_C( -82)),
2437
+ simde_mm_set_pi8(INT8_C( -31), INT8_C( 125), INT8_C( 35), INT8_C( 84), INT8_C( 105), INT8_C(-115), INT8_C( 11), INT8_C( -12)),
2438
+ simde_mm_set_pi8(INT8_C( -31), INT8_C( 124), INT8_C( 125), INT8_C( 71), INT8_C( 35), INT8_C( -14), INT8_C( 84), INT8_C( 19)) },
2439
+ { simde_mm_set_pi8(INT8_C( 45), INT8_C( -51), INT8_C( -71), INT8_C( -47), INT8_C( -27), INT8_C( 20), INT8_C(-117), INT8_C( -5)),
2440
+ simde_mm_set_pi8(INT8_C( -92), INT8_C( -74), INT8_C( 58), INT8_C( 117), INT8_C( -53), INT8_C( 43), INT8_C( 66), INT8_C( -55)),
2441
+ simde_mm_set_pi8(INT8_C( -92), INT8_C( 45), INT8_C( -74), INT8_C( -51), INT8_C( 58), INT8_C( -71), INT8_C( 117), INT8_C( -47)) }
2442
+ };
2443
+
2444
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2445
+ simde__m64 r = simde_mm_unpackhi_pi8(test_vec[i].a, test_vec[i].b);
2446
+ simde_mm_empty();
2447
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
2448
+ }
2449
+
2450
+ simde_mm_empty();
2451
+ return MUNIT_OK;
2452
+ }
2453
+
2454
+ static MunitResult
2455
+ test_simde_mm_unpackhi_pi16(const MunitParameter params[], void* data) {
2456
+ (void) params;
2457
+ (void) data;
2458
+
2459
+ const struct {
2460
+ simde__m64 a;
2461
+ simde__m64 b;
2462
+ simde__m64 r;
2463
+ } test_vec[8] = {
2464
+ { simde_mm_set_pi16(INT16_C( -14965), INT16_C( 28080), INT16_C( -15604), INT16_C( -10099)),
2465
+ simde_mm_set_pi16(INT16_C( 16538), INT16_C( -18813), INT16_C( -254), INT16_C( -23207)),
2466
+ simde_mm_set_pi16(INT16_C( 16538), INT16_C( -14965), INT16_C( -18813), INT16_C( 28080)) },
2467
+ { simde_mm_set_pi16(INT16_C( -4346), INT16_C( -29603), INT16_C( 1361), INT16_C( 16092)),
2468
+ simde_mm_set_pi16(INT16_C( -4428), INT16_C( -25960), INT16_C( 7111), INT16_C( 29823)),
2469
+ simde_mm_set_pi16(INT16_C( -4428), INT16_C( -4346), INT16_C( -25960), INT16_C( -29603)) },
2470
+ { simde_mm_set_pi16(INT16_C( -22197), INT16_C( -13478), INT16_C( 29243), INT16_C( -7146)),
2471
+ simde_mm_set_pi16(INT16_C( -6022), INT16_C( -10408), INT16_C( -5121), INT16_C( -15640)),
2472
+ simde_mm_set_pi16(INT16_C( -6022), INT16_C( -22197), INT16_C( -10408), INT16_C( -13478)) },
2473
+ { simde_mm_set_pi16(INT16_C( -21336), INT16_C( 14878), INT16_C( 14164), INT16_C( 2727)),
2474
+ simde_mm_set_pi16(INT16_C( 12579), INT16_C( -20797), INT16_C( 18011), INT16_C( 5438)),
2475
+ simde_mm_set_pi16(INT16_C( 12579), INT16_C( -21336), INT16_C( -20797), INT16_C( 14878)) },
2476
+ { simde_mm_set_pi16(INT16_C( -20790), INT16_C( -21719), INT16_C( -12256), INT16_C( -17410)),
2477
+ simde_mm_set_pi16(INT16_C( 4576), INT16_C( 6842), INT16_C( -12668), INT16_C( -11854)),
2478
+ simde_mm_set_pi16(INT16_C( 4576), INT16_C( -20790), INT16_C( 6842), INT16_C( -21719)) },
2479
+ { simde_mm_set_pi16(INT16_C( -12751), INT16_C( 22951), INT16_C( -11466), INT16_C( -26387)),
2480
+ simde_mm_set_pi16(INT16_C( -27771), INT16_C( -31462), INT16_C( 14453), INT16_C( -2204)),
2481
+ simde_mm_set_pi16(INT16_C( -27771), INT16_C( -12751), INT16_C( -31462), INT16_C( 22951)) },
2482
+ { simde_mm_set_pi16(INT16_C( -15685), INT16_C( 13196), INT16_C( 17198), INT16_C( 29713)),
2483
+ simde_mm_set_pi16(INT16_C( 29600), INT16_C( -21832), INT16_C( -7500), INT16_C( 31712)),
2484
+ simde_mm_set_pi16(INT16_C( 29600), INT16_C( -15685), INT16_C( -21832), INT16_C( 13196)) },
2485
+ { simde_mm_set_pi16(INT16_C( -16681), INT16_C( -16529), INT16_C( 32728), INT16_C( 31459)),
2486
+ simde_mm_set_pi16(INT16_C( 20407), INT16_C( -12854), INT16_C( 18433), INT16_C( 3119)),
2487
+ simde_mm_set_pi16(INT16_C( 20407), INT16_C( -16681), INT16_C( -12854), INT16_C( -16529)) }
2488
+ };
2489
+
2490
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2491
+ simde__m64 r = simde_mm_unpackhi_pi16(test_vec[i].a, test_vec[i].b);
2492
+ simde_mm_empty();
2493
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
2494
+ }
2495
+
2496
+ simde_mm_empty();
2497
+ return MUNIT_OK;
2498
+ }
2499
+
2500
+ static MunitResult
2501
+ test_simde_mm_unpackhi_pi32(const MunitParameter params[], void* data) {
2502
+ (void) params;
2503
+ (void) data;
2504
+
2505
+ const struct {
2506
+ simde__m64 a;
2507
+ simde__m64 b;
2508
+ simde__m64 r;
2509
+ } test_vec[8] = {
2510
+ { simde_mm_set_pi32(INT32_C( -1658263771), INT32_C( -1249023590)),
2511
+ simde_mm_set_pi32(INT32_C( -1692091894), INT32_C( 429039047)),
2512
+ simde_mm_set_pi32(INT32_C( -1692091894), INT32_C( -1658263771)) },
2513
+ { simde_mm_set_pi32(INT32_C( 900819254), INT32_C( -1069899126)),
2514
+ simde_mm_set_pi32(INT32_C( -400543833), INT32_C( -2013963668)),
2515
+ simde_mm_set_pi32(INT32_C( -400543833), INT32_C( 900819254)) },
2516
+ { simde_mm_set_pi32(INT32_C( -1005749657), INT32_C( -188276900)),
2517
+ simde_mm_set_pi32(INT32_C( 810155385), INT32_C( -436942778)),
2518
+ simde_mm_set_pi32(INT32_C( 810155385), INT32_C( -1005749657)) },
2519
+ { simde_mm_set_pi32(INT32_C( 43596265), INT32_C( -1556778284)),
2520
+ simde_mm_set_pi32(INT32_C( -1634766739), INT32_C( -297104207)),
2521
+ simde_mm_set_pi32(INT32_C( -1634766739), INT32_C( 43596265)) },
2522
+ { simde_mm_set_pi32(INT32_C( 820557065), INT32_C( 2171)),
2523
+ simde_mm_set_pi32(INT32_C( 1748389432), INT32_C( 1779087168)),
2524
+ simde_mm_set_pi32(INT32_C( 1748389432), INT32_C( 820557065)) },
2525
+ { simde_mm_set_pi32(INT32_C( -106826552), INT32_C( -791842435)),
2526
+ simde_mm_set_pi32(INT32_C( 2006847448), INT32_C( 484681450)),
2527
+ simde_mm_set_pi32(INT32_C( 2006847448), INT32_C( -106826552)) },
2528
+ { simde_mm_set_pi32(INT32_C( 1892029634), INT32_C( -899748289)),
2529
+ simde_mm_set_pi32(INT32_C( 1496471605), INT32_C( 840905121)),
2530
+ simde_mm_set_pi32(INT32_C( 1496471605), INT32_C( 1892029634)) },
2531
+ { simde_mm_set_pi32(INT32_C( 1293223526), INT32_C( -574905244)),
2532
+ simde_mm_set_pi32(INT32_C( 57909389), INT32_C( -70830945)),
2533
+ simde_mm_set_pi32(INT32_C( 57909389), INT32_C( 1293223526)) }
2534
+ };
2535
+
2536
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2537
+ simde__m64 r = simde_mm_unpackhi_pi32(test_vec[i].a, test_vec[i].b);
2538
+ simde_mm_empty();
2539
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
2540
+ }
2541
+
2542
+ simde_mm_empty();
2543
+ return MUNIT_OK;
2544
+ }
2545
+
2546
+ static MunitResult
2547
+ test_simde_mm_unpacklo_pi8(const MunitParameter params[], void* data) {
2548
+ (void) params;
2549
+ (void) data;
2550
+
2551
+ const struct {
2552
+ simde__m64 a;
2553
+ simde__m64 b;
2554
+ simde__m64 r;
2555
+ } test_vec[8] = {
2556
+ { simde_mm_set_pi8(INT8_C( -15), INT8_C( -27), INT8_C( -29), INT8_C( 2), INT8_C( 11), INT8_C( 105), INT8_C( -49), INT8_C( 15)),
2557
+ simde_mm_set_pi8(INT8_C( -90), INT8_C( 43), INT8_C( 55), INT8_C( 50), INT8_C(-102), INT8_C( 25), INT8_C( -40), INT8_C( 47)),
2558
+ simde_mm_set_pi8(INT8_C(-102), INT8_C( 11), INT8_C( 25), INT8_C( 105), INT8_C( -40), INT8_C( -49), INT8_C( 47), INT8_C( 15)) },
2559
+ { simde_mm_set_pi8(INT8_C( 1), INT8_C( 83), INT8_C(-101), INT8_C( 117), INT8_C( -52), INT8_C( -74), INT8_C( -59), INT8_C( 121)),
2560
+ simde_mm_set_pi8(INT8_C(-102), INT8_C( 12), INT8_C( -28), INT8_C( 82), INT8_C(-122), INT8_C( 94), INT8_C( 127), INT8_C( -48)),
2561
+ simde_mm_set_pi8(INT8_C(-122), INT8_C( -52), INT8_C( 94), INT8_C( -74), INT8_C( 127), INT8_C( -59), INT8_C( -48), INT8_C( 121)) },
2562
+ { simde_mm_set_pi8(INT8_C( 13), INT8_C( 67), INT8_C( -73), INT8_C( -36), INT8_C( -93), INT8_C( 101), INT8_C(-107), INT8_C( 118)),
2563
+ simde_mm_set_pi8(INT8_C( 46), INT8_C( -72), INT8_C( -50), INT8_C( 34), INT8_C(-111), INT8_C( -17), INT8_C(-128), INT8_C(-126)),
2564
+ simde_mm_set_pi8(INT8_C(-111), INT8_C( -93), INT8_C( -17), INT8_C( 101), INT8_C(-128), INT8_C(-107), INT8_C(-126), INT8_C( 118)) },
2565
+ { simde_mm_set_pi8(INT8_C( 4), INT8_C( -40), INT8_C( -73), INT8_C( 122), INT8_C( 85), INT8_C( 7), INT8_C( -54), INT8_C(-119)),
2566
+ simde_mm_set_pi8(INT8_C( -37), INT8_C( -80), INT8_C(-128), INT8_C( 69), INT8_C( 112), INT8_C( 50), INT8_C( 44), INT8_C( -11)),
2567
+ simde_mm_set_pi8(INT8_C( 112), INT8_C( 85), INT8_C( 50), INT8_C( 7), INT8_C( 44), INT8_C( -54), INT8_C( -11), INT8_C(-119)) },
2568
+ { simde_mm_set_pi8(INT8_C(-113), INT8_C( 30), INT8_C( 68), INT8_C( 96), INT8_C( -94), INT8_C( -13), INT8_C( -38), INT8_C( -63)),
2569
+ simde_mm_set_pi8(INT8_C( -9), INT8_C( 29), INT8_C( 5), INT8_C( -22), INT8_C( 66), INT8_C( 94), INT8_C( -79), INT8_C( -1)),
2570
+ simde_mm_set_pi8(INT8_C( 66), INT8_C( -94), INT8_C( 94), INT8_C( -13), INT8_C( -79), INT8_C( -38), INT8_C( -1), INT8_C( -63)) },
2571
+ { simde_mm_set_pi8(INT8_C( -42), INT8_C( -42), INT8_C( 41), INT8_C( -13), INT8_C( -41), INT8_C( -33), INT8_C( -24), INT8_C( -5)),
2572
+ simde_mm_set_pi8(INT8_C( 85), INT8_C( 79), INT8_C( 19), INT8_C( -95), INT8_C( 42), INT8_C(-124), INT8_C( -96), INT8_C(-122)),
2573
+ simde_mm_set_pi8(INT8_C( 42), INT8_C( -41), INT8_C(-124), INT8_C( -33), INT8_C( -96), INT8_C( -24), INT8_C(-122), INT8_C( -5)) },
2574
+ { simde_mm_set_pi8(INT8_C( 28), INT8_C( 99), INT8_C( -57), INT8_C( 79), INT8_C( 40), INT8_C( -97), INT8_C( -80), INT8_C( 16)),
2575
+ simde_mm_set_pi8(INT8_C( 60), INT8_C( 0), INT8_C( -13), INT8_C( -90), INT8_C( 17), INT8_C( 14), INT8_C(-115), INT8_C( 116)),
2576
+ simde_mm_set_pi8(INT8_C( 17), INT8_C( 40), INT8_C( 14), INT8_C( -97), INT8_C(-115), INT8_C( -80), INT8_C( 116), INT8_C( 16)) },
2577
+ { simde_mm_set_pi8(INT8_C( -26), INT8_C( -43), INT8_C( -21), INT8_C( 73), INT8_C( 83), INT8_C( 33), INT8_C( 105), INT8_C( 57)),
2578
+ simde_mm_set_pi8(INT8_C( -29), INT8_C( 84), INT8_C( 15), INT8_C( -83), INT8_C( -51), INT8_C( 60), INT8_C( -18), INT8_C( 19)),
2579
+ simde_mm_set_pi8(INT8_C( -51), INT8_C( 83), INT8_C( 60), INT8_C( 33), INT8_C( -18), INT8_C( 105), INT8_C( 19), INT8_C( 57)) }
2580
+ };
2581
+
2582
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2583
+ simde__m64 r = simde_mm_unpacklo_pi8(test_vec[i].a, test_vec[i].b);
2584
+ simde_mm_empty();
2585
+ simde_assert_m64_i8(r, ==, test_vec[i].r);
2586
+ }
2587
+
2588
+ simde_mm_empty();
2589
+ return MUNIT_OK;
2590
+ }
2591
+
2592
+ static MunitResult
2593
+ test_simde_mm_unpacklo_pi16(const MunitParameter params[], void* data) {
2594
+ (void) params;
2595
+ (void) data;
2596
+
2597
+ const struct {
2598
+ simde__m64 a;
2599
+ simde__m64 b;
2600
+ simde__m64 r;
2601
+ } test_vec[8] = {
2602
+ { simde_mm_set_pi16(INT16_C( 14920), INT16_C( -14108), INT16_C( -18371), INT16_C( 4165)),
2603
+ simde_mm_set_pi16(INT16_C( 24125), INT16_C( 29535), INT16_C( 14450), INT16_C( 764)),
2604
+ simde_mm_set_pi16(INT16_C( 14450), INT16_C( -18371), INT16_C( 764), INT16_C( 4165)) },
2605
+ { simde_mm_set_pi16(INT16_C( -29305), INT16_C( -20968), INT16_C( -31863), INT16_C( 1945)),
2606
+ simde_mm_set_pi16(INT16_C( 22380), INT16_C( -9274), INT16_C( -14525), INT16_C( 28073)),
2607
+ simde_mm_set_pi16(INT16_C( -14525), INT16_C( -31863), INT16_C( 28073), INT16_C( 1945)) },
2608
+ { simde_mm_set_pi16(INT16_C( 29396), INT16_C( -4481), INT16_C( 16009), INT16_C( -7692)),
2609
+ simde_mm_set_pi16(INT16_C( 19262), INT16_C( -10592), INT16_C( 1200), INT16_C( -22541)),
2610
+ simde_mm_set_pi16(INT16_C( 1200), INT16_C( 16009), INT16_C( -22541), INT16_C( -7692)) },
2611
+ { simde_mm_set_pi16(INT16_C( 10778), INT16_C( -30276), INT16_C( 31580), INT16_C( 4144)),
2612
+ simde_mm_set_pi16(INT16_C( -15899), INT16_C( 20583), INT16_C( -12863), INT16_C( 13808)),
2613
+ simde_mm_set_pi16(INT16_C( -12863), INT16_C( 31580), INT16_C( 13808), INT16_C( 4144)) },
2614
+ { simde_mm_set_pi16(INT16_C( -30267), INT16_C( -14054), INT16_C( 22036), INT16_C( -6987)),
2615
+ simde_mm_set_pi16(INT16_C( -22296), INT16_C( 22035), INT16_C( -11029), INT16_C( 3882)),
2616
+ simde_mm_set_pi16(INT16_C( -11029), INT16_C( 22036), INT16_C( 3882), INT16_C( -6987)) },
2617
+ { simde_mm_set_pi16(INT16_C( 1373), INT16_C( 25788), INT16_C( -14639), INT16_C( 18996)),
2618
+ simde_mm_set_pi16(INT16_C( 6580), INT16_C( 13730), INT16_C( -12979), INT16_C( -26646)),
2619
+ simde_mm_set_pi16(INT16_C( -12979), INT16_C( -14639), INT16_C( -26646), INT16_C( 18996)) },
2620
+ { simde_mm_set_pi16(INT16_C( 27110), INT16_C( 18497), INT16_C( -15879), INT16_C( -18233)),
2621
+ simde_mm_set_pi16(INT16_C( -26068), INT16_C( -29214), INT16_C( 32362), INT16_C( -26103)),
2622
+ simde_mm_set_pi16(INT16_C( 32362), INT16_C( -15879), INT16_C( -26103), INT16_C( -18233)) },
2623
+ { simde_mm_set_pi16(INT16_C( -3448), INT16_C( 28151), INT16_C( 21394), INT16_C( 2546)),
2624
+ simde_mm_set_pi16(INT16_C( 30183), INT16_C( -1624), INT16_C( 11589), INT16_C( 23080)),
2625
+ simde_mm_set_pi16(INT16_C( 11589), INT16_C( 21394), INT16_C( 23080), INT16_C( 2546)) }
2626
+ };
2627
+
2628
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2629
+ simde__m64 r = simde_mm_unpacklo_pi16(test_vec[i].a, test_vec[i].b);
2630
+ simde_mm_empty();
2631
+ simde_assert_m64_i16(r, ==, test_vec[i].r);
2632
+ }
2633
+
2634
+ simde_mm_empty();
2635
+ return MUNIT_OK;
2636
+ }
2637
+
2638
+ static MunitResult
2639
+ test_simde_mm_unpacklo_pi32(const MunitParameter params[], void* data) {
2640
+ (void) params;
2641
+ (void) data;
2642
+
2643
+ const struct {
2644
+ simde__m64 a;
2645
+ simde__m64 b;
2646
+ simde__m64 r;
2647
+ } test_vec[8] = {
2648
+ { simde_mm_set_pi32(INT32_C( -996466818), INT32_C( 42237187)),
2649
+ simde_mm_set_pi32(INT32_C( -37002499), INT32_C( -1170856260)),
2650
+ simde_mm_set_pi32(INT32_C( -1170856260), INT32_C( 42237187)) },
2651
+ { simde_mm_set_pi32(INT32_C( 2063937130), INT32_C( 491318053)),
2652
+ simde_mm_set_pi32(INT32_C( -1702472225), INT32_C( 404431239)),
2653
+ simde_mm_set_pi32(INT32_C( 404431239), INT32_C( 491318053)) },
2654
+ { simde_mm_set_pi32(INT32_C( 482157619), INT32_C( 2096228641)),
2655
+ simde_mm_set_pi32(INT32_C( 1577000773), INT32_C( -1308575062)),
2656
+ simde_mm_set_pi32(INT32_C( -1308575062), INT32_C( 2096228641)) },
2657
+ { simde_mm_set_pi32(INT32_C( -296283078), INT32_C( -1136099560)),
2658
+ simde_mm_set_pi32(INT32_C( 813050106), INT32_C( 140703223)),
2659
+ simde_mm_set_pi32(INT32_C( 140703223), INT32_C( -1136099560)) },
2660
+ { simde_mm_set_pi32(INT32_C( -1874282519), INT32_C( 1046328641)),
2661
+ simde_mm_set_pi32(INT32_C( 1711474246), INT32_C( 663714514)),
2662
+ simde_mm_set_pi32(INT32_C( 663714514), INT32_C( 1046328641)) },
2663
+ { simde_mm_set_pi32(INT32_C( 414254548), INT32_C( -1137400610)),
2664
+ simde_mm_set_pi32(INT32_C( 1336205549), INT32_C( -1985285725)),
2665
+ simde_mm_set_pi32(INT32_C( -1985285725), INT32_C( -1137400610)) },
2666
+ { simde_mm_set_pi32(INT32_C( -1928184284), INT32_C( 711404402)),
2667
+ simde_mm_set_pi32(INT32_C( 894723783), INT32_C( -331643442)),
2668
+ simde_mm_set_pi32(INT32_C( -331643442), INT32_C( 711404402)) },
2669
+ { simde_mm_set_pi32(INT32_C( -1171624194), INT32_C( -943645737)),
2670
+ simde_mm_set_pi32(INT32_C( -1212436628), INT32_C( -1787000320)),
2671
+ simde_mm_set_pi32(INT32_C( -1787000320), INT32_C( -943645737)) }
2672
+ };
2673
+
2674
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2675
+ simde__m64 r = simde_mm_unpacklo_pi32(test_vec[i].a, test_vec[i].b);
2676
+ simde_mm_empty();
2677
+ simde_assert_m64_i32(r, ==, test_vec[i].r);
2678
+ }
2679
+
2680
+ simde_mm_empty();
2681
+ return MUNIT_OK;
2682
+ }
2683
+
2684
+ static MunitResult
2685
+ test_simde_mm_xor_si64(const MunitParameter params[], void* data) {
2686
+ (void) params;
2687
+ (void) data;
2688
+
2689
+ const struct {
2690
+ simde__m64 a;
2691
+ simde__m64 b;
2692
+ simde__m64 r;
2693
+ } test_vec[8] = {
2694
+ { simde_mm_cvtsi64_m64(INT64_C( 3540462192578516470)),
2695
+ simde_mm_cvtsi64_m64(INT64_C( 7953957601195225655)),
2696
+ simde_mm_cvtsi64_m64(INT64_C( 6863518614534072257)) },
2697
+ { simde_mm_cvtsi64_m64(INT64_C( 3280097856998777041)),
2698
+ simde_mm_cvtsi64_m64(INT64_C( 7227524436289590224)),
2699
+ simde_mm_cvtsi64_m64(INT64_C( 5316618871007982337)) },
2700
+ { simde_mm_cvtsi64_m64(INT64_C( -73768962290391525)),
2701
+ simde_mm_cvtsi64_m64(INT64_C( -8786938381172726443)),
2702
+ simde_mm_cvtsi64_m64(INT64_C( 8716556128933069646)) },
2703
+ { simde_mm_cvtsi64_m64(INT64_C( -3834999859910724293)),
2704
+ simde_mm_cvtsi64_m64(INT64_C( 1473106142712794056)),
2705
+ simde_mm_cvtsi64_m64(INT64_C( -2398499088890937613)) },
2706
+ { simde_mm_cvtsi64_m64(INT64_C( -2129742113263669437)),
2707
+ simde_mm_cvtsi64_m64(INT64_C( 8747348426473787001)),
2708
+ simde_mm_cvtsi64_m64(INT64_C( -7271780848289947334)) },
2709
+ { simde_mm_cvtsi64_m64(INT64_C( 3415454954475332549)),
2710
+ simde_mm_cvtsi64_m64(INT64_C( -4751919769270097997)),
2711
+ simde_mm_cvtsi64_m64(INT64_C( -7968019982084324234)) },
2712
+ { simde_mm_cvtsi64_m64(INT64_C( 2939655727369393330)),
2713
+ simde_mm_cvtsi64_m64(INT64_C( -201574666518844870)),
2714
+ simde_mm_cvtsi64_m64(INT64_C( -3028638143195201912)) },
2715
+ { simde_mm_cvtsi64_m64(INT64_C( 2745915445215058834)),
2716
+ simde_mm_cvtsi64_m64(INT64_C( 3063327936426889284)),
2717
+ simde_mm_cvtsi64_m64(INT64_C( 907566634544925654)) }
2718
+ };
2719
+
2720
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2721
+ simde__m64 r = simde_mm_xor_si64(test_vec[i].a, test_vec[i].b);
2722
+ simde_mm_empty();
2723
+ simde_assert_m64_i64(r, ==, test_vec[i].r);
2724
+ }
2725
+
2726
+ simde_mm_empty();
2727
+ return MUNIT_OK;
2728
+ }
2729
+
2730
+ static MunitResult
2731
+ test_simde_m_to_int(const MunitParameter params[], void* data) {
2732
+ (void) params;
2733
+ (void) data;
2734
+
2735
+ const struct {
2736
+ simde__m64 a;
2737
+ int32_t r;
2738
+ } test_vec[8] = {
2739
+ { simde_mm_set_pi32(INT32_C( 187717888), INT32_C( 752961943)), INT32_C( 752961943) },
2740
+ { simde_mm_set_pi32(INT32_C( 1573710578), INT32_C( 101880394)), INT32_C( 101880394) },
2741
+ { simde_mm_set_pi32(INT32_C( 1011596849), INT32_C( 885891666)), INT32_C( 885891666) },
2742
+ { simde_mm_set_pi32(INT32_C( -1107434699), INT32_C( -838173825)), INT32_C( -838173825) },
2743
+ { simde_mm_set_pi32(INT32_C( 1945069486), INT32_C( 466583902)), INT32_C( 466583902) },
2744
+ { simde_mm_set_pi32(INT32_C( 458761181), INT32_C( 257379889)), INT32_C( 257379889) },
2745
+ { simde_mm_set_pi32(INT32_C( 848486959), INT32_C( -1415343346)), INT32_C( -1415343346) },
2746
+ { simde_mm_set_pi32(INT32_C( -1452285617), INT32_C( -1697816479)), INT32_C( -1697816479) }
2747
+ };
2748
+
2749
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2750
+ int32_t r = simde_m_to_int(test_vec[i].a);
2751
+ simde_mm_empty();
2752
+ munit_assert_int32(r, ==, test_vec[i].r);
2753
+ }
2754
+
2755
+ simde_mm_empty();
2756
+ return MUNIT_OK;
2757
+ }
2758
+
2759
+ static MunitResult
2760
+ test_simde_m_to_int64(const MunitParameter params[], void* data) {
2761
+ (void) params;
2762
+ (void) data;
2763
+
2764
+ const struct {
2765
+ simde__m64 a;
2766
+ int64_t r;
2767
+ } test_vec[8] = {
2768
+ { simde_mm_cvtsi64_m64(INT64_C( -2003895301208818234)), INT64_C( -2003895301208818234) },
2769
+ { simde_mm_cvtsi64_m64(INT64_C( -372926738147273591)), INT64_C( -372926738147273591) },
2770
+ { simde_mm_cvtsi64_m64(INT64_C( -3656592147926155100)), INT64_C( -3656592147926155100) },
2771
+ { simde_mm_cvtsi64_m64(INT64_C( 5100863564862776395)), INT64_C( 5100863564862776395) },
2772
+ { simde_mm_cvtsi64_m64(INT64_C( -214027610699488575)), INT64_C( -214027610699488575) },
2773
+ { simde_mm_cvtsi64_m64(INT64_C( -7630939822071486777)), INT64_C( -7630939822071486777) },
2774
+ { simde_mm_cvtsi64_m64(INT64_C( 9123236376678660233)), INT64_C( 9123236376678660233) },
2775
+ { simde_mm_cvtsi64_m64(INT64_C( 3260252501062812952)), INT64_C( 3260252501062812952) }
2776
+ };
2777
+
2778
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2779
+ int64_t r = simde_m_to_int64(test_vec[i].a);
2780
+ simde_mm_empty();
2781
+ munit_assert_int64(r, ==, test_vec[i].r);
2782
+ }
2783
+
2784
+ simde_mm_empty();
2785
+ return MUNIT_OK;
2786
+ }
2787
+
2788
+ #endif /* defined(SIMDE_MMX_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
2789
+
2790
+ HEDLEY_DIAGNOSTIC_PUSH
2791
+ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
2792
+
2793
+ static MunitTest test_suite_tests[] = {
2794
+ #if defined(SIMDE_MMX_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
2795
+ SIMDE_TESTS_DEFINE_TEST(mm_set1_pi8),
2796
+ SIMDE_TESTS_DEFINE_TEST(mm_set1_pi16),
2797
+ SIMDE_TESTS_DEFINE_TEST(mm_set1_pi32),
2798
+ SIMDE_TESTS_DEFINE_TEST(mm_setr_pi8),
2799
+ SIMDE_TESTS_DEFINE_TEST(mm_setr_pi16),
2800
+ SIMDE_TESTS_DEFINE_TEST(mm_setr_pi32),
2801
+ SIMDE_TESTS_DEFINE_TEST(mm_add_pi8),
2802
+ SIMDE_TESTS_DEFINE_TEST(mm_add_pi16),
2803
+ SIMDE_TESTS_DEFINE_TEST(mm_add_pi32),
2804
+ SIMDE_TESTS_DEFINE_TEST(mm_adds_pi8),
2805
+ SIMDE_TESTS_DEFINE_TEST(mm_adds_pi16),
2806
+ SIMDE_TESTS_DEFINE_TEST(mm_adds_pu8),
2807
+ SIMDE_TESTS_DEFINE_TEST(mm_adds_pu16),
2808
+ SIMDE_TESTS_DEFINE_TEST(mm_and_si64),
2809
+ SIMDE_TESTS_DEFINE_TEST(mm_andnot_si64),
2810
+ SIMDE_TESTS_DEFINE_TEST(mm_cmpeq_pi8),
2811
+ SIMDE_TESTS_DEFINE_TEST(mm_cmpeq_pi16),
2812
+ SIMDE_TESTS_DEFINE_TEST(mm_cmpeq_pi32),
2813
+ SIMDE_TESTS_DEFINE_TEST(mm_cmpgt_pi8),
2814
+ SIMDE_TESTS_DEFINE_TEST(mm_cmpgt_pi16),
2815
+ SIMDE_TESTS_DEFINE_TEST(mm_cmpgt_pi32),
2816
+ SIMDE_TESTS_DEFINE_TEST(mm_cvtm64_si64),
2817
+ SIMDE_TESTS_DEFINE_TEST(mm_cvtsi32_si64),
2818
+ SIMDE_TESTS_DEFINE_TEST(mm_cvtsi64_m64),
2819
+ SIMDE_TESTS_DEFINE_TEST(mm_cvtsi64_si32),
2820
+ SIMDE_TESTS_DEFINE_TEST(mm_madd_pi16),
2821
+ SIMDE_TESTS_DEFINE_TEST(mm_mulhi_pi16),
2822
+ SIMDE_TESTS_DEFINE_TEST(mm_mullo_pi16),
2823
+ SIMDE_TESTS_DEFINE_TEST(mm_or_si64),
2824
+ SIMDE_TESTS_DEFINE_TEST(mm_packs_pi16),
2825
+ SIMDE_TESTS_DEFINE_TEST(mm_packs_pi32),
2826
+ SIMDE_TESTS_DEFINE_TEST(mm_packs_pu16),
2827
+ SIMDE_TESTS_DEFINE_TEST(mm_sll_pi16),
2828
+ SIMDE_TESTS_DEFINE_TEST(mm_sll_pi32),
2829
+ SIMDE_TESTS_DEFINE_TEST(mm_sll_si64),
2830
+ SIMDE_TESTS_DEFINE_TEST(mm_slli_pi16),
2831
+ SIMDE_TESTS_DEFINE_TEST(mm_slli_pi32),
2832
+ SIMDE_TESTS_DEFINE_TEST(mm_slli_si64),
2833
+ SIMDE_TESTS_DEFINE_TEST(mm_srl_pi16),
2834
+ SIMDE_TESTS_DEFINE_TEST(mm_srl_pi32),
2835
+ SIMDE_TESTS_DEFINE_TEST(mm_srl_si64),
2836
+ SIMDE_TESTS_DEFINE_TEST(mm_srli_pi16),
2837
+ SIMDE_TESTS_DEFINE_TEST(mm_srli_pi32),
2838
+ SIMDE_TESTS_DEFINE_TEST(mm_srli_si64),
2839
+ SIMDE_TESTS_DEFINE_TEST(mm_srai_pi16),
2840
+ SIMDE_TESTS_DEFINE_TEST(mm_srai_pi32),
2841
+ SIMDE_TESTS_DEFINE_TEST(mm_sra_pi16),
2842
+ SIMDE_TESTS_DEFINE_TEST(mm_sra_pi32),
2843
+ SIMDE_TESTS_DEFINE_TEST(mm_sub_pi8),
2844
+ SIMDE_TESTS_DEFINE_TEST(mm_sub_pi16),
2845
+ SIMDE_TESTS_DEFINE_TEST(mm_sub_pi32),
2846
+ SIMDE_TESTS_DEFINE_TEST(mm_subs_pi8),
2847
+ SIMDE_TESTS_DEFINE_TEST(mm_subs_pi16),
2848
+ SIMDE_TESTS_DEFINE_TEST(mm_subs_pu8),
2849
+ SIMDE_TESTS_DEFINE_TEST(mm_subs_pu16),
2850
+ SIMDE_TESTS_DEFINE_TEST(mm_unpackhi_pi8),
2851
+ SIMDE_TESTS_DEFINE_TEST(mm_unpackhi_pi16),
2852
+ SIMDE_TESTS_DEFINE_TEST(mm_unpackhi_pi32),
2853
+ SIMDE_TESTS_DEFINE_TEST(mm_unpacklo_pi8),
2854
+ SIMDE_TESTS_DEFINE_TEST(mm_unpacklo_pi16),
2855
+ SIMDE_TESTS_DEFINE_TEST(mm_unpacklo_pi32),
2856
+ SIMDE_TESTS_DEFINE_TEST(mm_xor_si64),
2857
+ SIMDE_TESTS_DEFINE_TEST(m_to_int),
2858
+ SIMDE_TESTS_DEFINE_TEST(m_to_int64),
2859
+ #endif /* defined(SIMDE_MMX_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
2860
+
2861
+ { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
2862
+ };
2863
+
2864
+ HEDLEY_C_DECL MunitSuite* SIMDE_TESTS_GENERATE_SYMBOL(suite)(void) {
2865
+ static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
2866
+
2867
+ return &suite;
2868
+ }
2869
+
2870
+ #if defined(SIMDE_TESTS_SINGLE_ISAX)
2871
+ int main(int argc, char* argv[HEDLEY_ARRAY_PARAM(argc + 1)]) {
2872
+ static MunitSuite suite = { "", test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
2873
+
2874
+ return munit_suite_main(&suite, NULL, argc, argv);
2875
+ }
2876
+ #endif /* defined(SIMDE_TESTS_SINGLE_ISAX) */
2877
+
2878
+ HEDLEY_DIAGNOSTIC_POP