minimap2 0.2.25.1 → 0.2.26.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +9 -0
  3. data/ext/Rakefile +2 -2
  4. data/ext/minimap2/NEWS.md +9 -0
  5. data/ext/minimap2/README.md +2 -2
  6. data/ext/minimap2/cookbook.md +2 -2
  7. data/ext/minimap2/minimap.h +1 -1
  8. data/ext/minimap2/minimap2.1 +1 -1
  9. data/ext/minimap2/misc/paftools.js +1 -1
  10. data/ext/minimap2/python/mappy.pyx +1 -1
  11. data/ext/minimap2/setup.py +22 -32
  12. data/lib/minimap2/version.rb +1 -1
  13. metadata +1 -97
  14. data/ext/minimap2/lib/simde/CONTRIBUTING.md +0 -114
  15. data/ext/minimap2/lib/simde/COPYING +0 -20
  16. data/ext/minimap2/lib/simde/README.md +0 -333
  17. data/ext/minimap2/lib/simde/amalgamate.py +0 -58
  18. data/ext/minimap2/lib/simde/meson.build +0 -33
  19. data/ext/minimap2/lib/simde/netlify.toml +0 -20
  20. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +0 -140
  21. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +0 -137
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +0 -142
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +0 -145
  24. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +0 -140
  25. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +0 -145
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +0 -140
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +0 -143
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +0 -137
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +0 -141
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +0 -147
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +0 -141
  32. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +0 -134
  33. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +0 -138
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +0 -134
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +0 -137
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +0 -131
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +0 -135
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +0 -141
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +0 -135
  40. data/ext/minimap2/lib/simde/simde/arm/neon.h +0 -97
  41. data/ext/minimap2/lib/simde/simde/check.h +0 -267
  42. data/ext/minimap2/lib/simde/simde/debug-trap.h +0 -83
  43. data/ext/minimap2/lib/simde/simde/hedley.h +0 -1899
  44. data/ext/minimap2/lib/simde/simde/simde-arch.h +0 -445
  45. data/ext/minimap2/lib/simde/simde/simde-common.h +0 -697
  46. data/ext/minimap2/lib/simde/simde/x86/avx.h +0 -5385
  47. data/ext/minimap2/lib/simde/simde/x86/avx2.h +0 -2402
  48. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +0 -391
  49. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +0 -3389
  50. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +0 -112
  51. data/ext/minimap2/lib/simde/simde/x86/fma.h +0 -659
  52. data/ext/minimap2/lib/simde/simde/x86/mmx.h +0 -2210
  53. data/ext/minimap2/lib/simde/simde/x86/sse.h +0 -3696
  54. data/ext/minimap2/lib/simde/simde/x86/sse2.h +0 -5991
  55. data/ext/minimap2/lib/simde/simde/x86/sse3.h +0 -343
  56. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +0 -1783
  57. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +0 -105
  58. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +0 -1053
  59. data/ext/minimap2/lib/simde/simde/x86/svml.h +0 -543
  60. data/ext/minimap2/lib/simde/test/CMakeLists.txt +0 -166
  61. data/ext/minimap2/lib/simde/test/arm/meson.build +0 -4
  62. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +0 -23
  63. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +0 -871
  64. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +0 -134
  65. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +0 -39
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +0 -10
  67. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +0 -1260
  68. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +0 -873
  69. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +0 -1084
  70. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +0 -1260
  71. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +0 -18
  72. data/ext/minimap2/lib/simde/test/arm/test-arm.c +0 -20
  73. data/ext/minimap2/lib/simde/test/arm/test-arm.h +0 -8
  74. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +0 -171
  75. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +0 -68
  76. data/ext/minimap2/lib/simde/test/meson.build +0 -64
  77. data/ext/minimap2/lib/simde/test/munit/COPYING +0 -21
  78. data/ext/minimap2/lib/simde/test/munit/Makefile +0 -55
  79. data/ext/minimap2/lib/simde/test/munit/README.md +0 -54
  80. data/ext/minimap2/lib/simde/test/munit/example.c +0 -351
  81. data/ext/minimap2/lib/simde/test/munit/meson.build +0 -37
  82. data/ext/minimap2/lib/simde/test/munit/munit.c +0 -2055
  83. data/ext/minimap2/lib/simde/test/munit/munit.h +0 -535
  84. data/ext/minimap2/lib/simde/test/run-tests.c +0 -20
  85. data/ext/minimap2/lib/simde/test/run-tests.h +0 -260
  86. data/ext/minimap2/lib/simde/test/x86/avx.c +0 -13752
  87. data/ext/minimap2/lib/simde/test/x86/avx2.c +0 -9977
  88. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +0 -2664
  89. data/ext/minimap2/lib/simde/test/x86/avx512f.c +0 -10416
  90. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +0 -210
  91. data/ext/minimap2/lib/simde/test/x86/fma.c +0 -2557
  92. data/ext/minimap2/lib/simde/test/x86/meson.build +0 -33
  93. data/ext/minimap2/lib/simde/test/x86/mmx.c +0 -2878
  94. data/ext/minimap2/lib/simde/test/x86/skel.c +0 -2984
  95. data/ext/minimap2/lib/simde/test/x86/sse.c +0 -5121
  96. data/ext/minimap2/lib/simde/test/x86/sse2.c +0 -9860
  97. data/ext/minimap2/lib/simde/test/x86/sse3.c +0 -486
  98. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +0 -3446
  99. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +0 -101
  100. data/ext/minimap2/lib/simde/test/x86/ssse3.c +0 -2084
  101. data/ext/minimap2/lib/simde/test/x86/svml.c +0 -1545
  102. data/ext/minimap2/lib/simde/test/x86/test-avx.h +0 -16
  103. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +0 -25
  104. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +0 -13
  105. data/ext/minimap2/lib/simde/test/x86/test-sse.h +0 -13
  106. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +0 -13
  107. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +0 -196
  108. data/ext/minimap2/lib/simde/test/x86/test-x86.c +0 -48
  109. data/ext/minimap2/lib/simde/test/x86/test-x86.h +0 -8
@@ -1,2878 +0,0 @@
1
- /* Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
2
- *
3
- * Permission is hereby granted, free of charge, to any person
4
- * obtaining a copy of this software and associated documentation
5
- * files (the "Software"), to deal in the Software without
6
- * restriction, including without limitation the rights to use, copy,
7
- * modify, merge, publish, distribute, sublicense, and/or sell copies
8
- * of the Software, and to permit persons to whom the Software is
9
- * furnished to do so, subject to the following conditions:
10
- *
11
- * The above copyright notice and this permission notice shall be
12
- * included in all copies or substantial portions of the Software.
13
- *
14
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- * SOFTWARE.
22
- */
23
-
24
- #define SIMDE_TESTS_CURRENT_ISAX mmx
25
- #include <test/x86/test-mmx.h>
26
-
27
- #if defined(SIMDE_MMX_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
28
-
29
- static MunitResult
30
- test_simde_mm_set1_pi8(const MunitParameter params[], void* data) {
31
- (void) params;
32
- (void) data;
33
-
34
- const struct {
35
- int8_t a;
36
- simde__m64 r;
37
- } test_vec[8] = {
38
- { INT8_C( -16),
39
- simde_mm_set_pi8(INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16),
40
- INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16)) },
41
- { INT8_C(-120),
42
- simde_mm_set_pi8(INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120),
43
- INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120)) },
44
- { INT8_C( 86),
45
- simde_mm_set_pi8(INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86),
46
- INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86)) },
47
- { INT8_C( -12),
48
- simde_mm_set_pi8(INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12),
49
- INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12)) },
50
- { INT8_C( 3),
51
- simde_mm_set_pi8(INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3),
52
- INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3)) },
53
- { INT8_C( 25),
54
- simde_mm_set_pi8(INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25),
55
- INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25)) },
56
- { INT8_C( 40),
57
- simde_mm_set_pi8(INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40),
58
- INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40)) },
59
- { INT8_C( -12),
60
- simde_mm_set_pi8(INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12),
61
- INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12)) }
62
- };
63
-
64
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
65
- simde__m64 r = simde_mm_set1_pi8(test_vec[i].a);
66
- simde_assert_m64_i8(r, ==, test_vec[i].r);
67
- }
68
-
69
- return MUNIT_OK;
70
- }
71
-
72
- static MunitResult
73
- test_simde_mm_set1_pi16(const MunitParameter params[], void* data) {
74
- (void) params;
75
- (void) data;
76
-
77
- int16_t v = HEDLEY_STATIC_CAST(int16_t, munit_rand_int_range(SHRT_MIN, SHRT_MAX));
78
-
79
- simde__m64 x = simde_mm_set1_pi16(v);
80
- int16_t* r = HEDLEY_REINTERPRET_CAST(int16_t*, &x);
81
-
82
- simde_mm_empty();
83
-
84
- munit_assert_int16(r[0], ==, v);
85
- munit_assert_int16(r[1], ==, v);
86
- munit_assert_int16(r[2], ==, v);
87
- munit_assert_int16(r[3], ==, v);
88
-
89
- return MUNIT_OK;
90
- }
91
-
92
- static MunitResult
93
- test_simde_mm_set1_pi32(const MunitParameter params[], void* data) {
94
- (void) params;
95
- (void) data;
96
-
97
- int32_t v = HEDLEY_STATIC_CAST(int32_t, munit_rand_int_range(INT32_MIN, INT32_MAX));
98
-
99
- simde__m64 x = simde_mm_set1_pi32(v);
100
- int32_t* r = HEDLEY_REINTERPRET_CAST(int32_t*, &x);
101
-
102
- simde_mm_empty();
103
-
104
- munit_assert_int32(r[0], ==, v);
105
- munit_assert_int32(r[1], ==, v);
106
-
107
- return MUNIT_OK;
108
- }
109
-
110
- static MunitResult
111
- test_simde_mm_setr_pi8(const MunitParameter params[], void* data) {
112
- (void) params;
113
- (void) data;
114
-
115
- int8_t d[8 / sizeof(int8_t)];
116
- munit_rand_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d));
117
-
118
- simde__m64 x = simde_mm_setr_pi8(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]);
119
- int8_t* c = HEDLEY_REINTERPRET_CAST(int8_t*, &x);
120
-
121
- simde_mm_empty();
122
-
123
- munit_assert_int8(c[0], ==, d[0]);
124
- munit_assert_int8(c[1], ==, d[1]);
125
- munit_assert_int8(c[2], ==, d[2]);
126
- munit_assert_int8(c[3], ==, d[3]);
127
- munit_assert_int8(c[4], ==, d[4]);
128
- munit_assert_int8(c[5], ==, d[5]);
129
- munit_assert_int8(c[6], ==, d[6]);
130
- munit_assert_int8(c[7], ==, d[7]);
131
-
132
- return MUNIT_OK;
133
- }
134
-
135
- static MunitResult
136
- test_simde_mm_setr_pi16(const MunitParameter params[], void* data) {
137
- (void) params;
138
- (void) data;
139
-
140
- int16_t d[8 / sizeof(int16_t)];
141
- munit_rand_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d));
142
-
143
- simde__m64 x = simde_mm_setr_pi16(d[0], d[1], d[2], d[3]);
144
- int16_t* s = HEDLEY_REINTERPRET_CAST(int16_t*, &x);
145
-
146
- simde_mm_empty();
147
-
148
- munit_assert_int16(s[0], ==, d[0]);
149
- munit_assert_int16(s[1], ==, d[1]);
150
- munit_assert_int16(s[2], ==, d[2]);
151
- munit_assert_int16(s[3], ==, d[3]);
152
-
153
- return MUNIT_OK;
154
- }
155
-
156
- static MunitResult
157
- test_simde_mm_setr_pi32(const MunitParameter params[], void* data) {
158
- (void) params;
159
- (void) data;
160
-
161
- int32_t d[8 / sizeof(int32_t)];
162
- munit_rand_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d));
163
-
164
- simde__m64 x = simde_mm_setr_pi32(d[0], d[1]);
165
- int32_t* i = HEDLEY_REINTERPRET_CAST(int32_t*, &x);
166
-
167
- simde_mm_empty();
168
-
169
- munit_assert_int32(i[0], ==, d[0]);
170
- munit_assert_int32(i[1], ==, d[1]);
171
-
172
- return MUNIT_OK;
173
- }
174
-
175
- static MunitResult
176
- test_simde_mm_add_pi8(const MunitParameter params[], void* data) {
177
- (void) params;
178
- (void) data;
179
-
180
- const struct {
181
- simde__m64 a;
182
- simde__m64 b;
183
- simde__m64 r;
184
- } test_vec[8] = {
185
- { simde_mm_set_pi8(INT8_C( 25), INT8_C(-106), INT8_C( 93), INT8_C( 86), INT8_C( -56), INT8_C( 101), INT8_C( 79), INT8_C( 83)),
186
- simde_mm_set_pi8(INT8_C( -38), INT8_C( -6), INT8_C( 47), INT8_C( 59), INT8_C( -67), INT8_C( -36), INT8_C( 127), INT8_C( 104)),
187
- simde_mm_set_pi8(INT8_C( -13), INT8_C(-112), INT8_C(-116), INT8_C(-111), INT8_C(-123), INT8_C( 65), INT8_C( -50), INT8_C( -69)) },
188
- { simde_mm_set_pi8(INT8_C(-105), INT8_C( 113), INT8_C( 22), INT8_C( -91), INT8_C( 59), INT8_C( -4), INT8_C( 67), INT8_C( 43)),
189
- simde_mm_set_pi8(INT8_C( -13), INT8_C( 93), INT8_C( 81), INT8_C( 108), INT8_C(-104), INT8_C( 123), INT8_C( 105), INT8_C( 119)),
190
- simde_mm_set_pi8(INT8_C(-118), INT8_C( -50), INT8_C( 103), INT8_C( 17), INT8_C( -45), INT8_C( 119), INT8_C( -84), INT8_C( -94)) },
191
- { simde_mm_set_pi8(INT8_C( -8), INT8_C( 52), INT8_C( 92), INT8_C( 121), INT8_C( 58), INT8_C(-104), INT8_C( 27), INT8_C( -80)),
192
- simde_mm_set_pi8(INT8_C( 62), INT8_C(-100), INT8_C( 5), INT8_C( -95), INT8_C( -16), INT8_C( 109), INT8_C( 127), INT8_C( 62)),
193
- simde_mm_set_pi8(INT8_C( 54), INT8_C( -48), INT8_C( 97), INT8_C( 26), INT8_C( 42), INT8_C( 5), INT8_C(-102), INT8_C( -18)) },
194
- { simde_mm_set_pi8(INT8_C( 32), INT8_C( 124), INT8_C( 115), INT8_C( 3), INT8_C( 104), INT8_C( 27), INT8_C( 43), INT8_C( -11)),
195
- simde_mm_set_pi8(INT8_C( -22), INT8_C( 27), INT8_C( -47), INT8_C( 45), INT8_C( -96), INT8_C( -49), INT8_C( -74), INT8_C( -34)),
196
- simde_mm_set_pi8(INT8_C( 10), INT8_C(-105), INT8_C( 68), INT8_C( 48), INT8_C( 8), INT8_C( -22), INT8_C( -31), INT8_C( -45)) },
197
- { simde_mm_set_pi8(INT8_C( -14), INT8_C( -79), INT8_C( -38), INT8_C( -93), INT8_C( -55), INT8_C( 83), INT8_C( 78), INT8_C( -90)),
198
- simde_mm_set_pi8(INT8_C( 91), INT8_C( -61), INT8_C(-124), INT8_C( -64), INT8_C( 76), INT8_C( -15), INT8_C(-117), INT8_C( 11)),
199
- simde_mm_set_pi8(INT8_C( 77), INT8_C( 116), INT8_C( 94), INT8_C( 99), INT8_C( 21), INT8_C( 68), INT8_C( -39), INT8_C( -79)) },
200
- { simde_mm_set_pi8(INT8_C(-119), INT8_C( 33), INT8_C( -57), INT8_C( 54), INT8_C( -18), INT8_C( 79), INT8_C( 86), INT8_C( -25)),
201
- simde_mm_set_pi8(INT8_C(-115), INT8_C(-114), INT8_C( 72), INT8_C(-126), INT8_C( -80), INT8_C( 114), INT8_C(-126), INT8_C( 42)),
202
- simde_mm_set_pi8(INT8_C( 22), INT8_C( -81), INT8_C( 15), INT8_C( -72), INT8_C( -98), INT8_C( -63), INT8_C( -40), INT8_C( 17)) },
203
- { simde_mm_set_pi8(INT8_C( 60), INT8_C( 109), INT8_C( 93), INT8_C( -45), INT8_C( -62), INT8_C(-104), INT8_C( -41), INT8_C( 72)),
204
- simde_mm_set_pi8(INT8_C( 72), INT8_C( -86), INT8_C( 21), INT8_C( 79), INT8_C( 43), INT8_C( 23), INT8_C( -74), INT8_C( -62)),
205
- simde_mm_set_pi8(INT8_C(-124), INT8_C( 23), INT8_C( 114), INT8_C( 34), INT8_C( -19), INT8_C( -81), INT8_C(-115), INT8_C( 10)) },
206
- { simde_mm_set_pi8(INT8_C( 110), INT8_C( 106), INT8_C( -94), INT8_C( 102), INT8_C( -82), INT8_C( 108), INT8_C( -12), INT8_C( -48)),
207
- simde_mm_set_pi8(INT8_C( 108), INT8_C( 3), INT8_C( -91), INT8_C( 65), INT8_C( 30), INT8_C( 106), INT8_C( -1), INT8_C( 100)),
208
- simde_mm_set_pi8(INT8_C( -38), INT8_C( 109), INT8_C( 71), INT8_C( -89), INT8_C( -52), INT8_C( -42), INT8_C( -13), INT8_C( 52)) }
209
- };
210
-
211
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
212
- simde__m64 r = simde_mm_add_pi8(test_vec[i].a, test_vec[i].b);
213
- simde_mm_empty();
214
- simde_assert_m64_i8(r, ==, test_vec[i].r);
215
- }
216
-
217
- simde_mm_empty();
218
- return MUNIT_OK;
219
- }
220
-
221
- static MunitResult
222
- test_simde_mm_add_pi16(const MunitParameter params[], void* data) {
223
- (void) params;
224
- (void) data;
225
-
226
- const struct {
227
- simde__m64 a;
228
- simde__m64 b;
229
- simde__m64 r;
230
- } test_vec[8] = {
231
- { simde_mm_set_pi16(INT16_C( -13258), INT16_C( -8776), INT16_C( 32365), INT16_C( -3887)),
232
- simde_mm_set_pi16(INT16_C( 20018), INT16_C( 23417), INT16_C( -774), INT16_C( 5810)),
233
- simde_mm_set_pi16(INT16_C( 6760), INT16_C( 14641), INT16_C( 31591), INT16_C( 1923)) },
234
- { simde_mm_set_pi16(INT16_C( 11335), INT16_C( 29732), INT16_C( 26059), INT16_C( -15004)),
235
- simde_mm_set_pi16(INT16_C( -13772), INT16_C( -20922), INT16_C( 1993), INT16_C( -30395)),
236
- simde_mm_set_pi16(INT16_C( -2437), INT16_C( 8810), INT16_C( 28052), INT16_C( 20137)) },
237
- { simde_mm_set_pi16(INT16_C( 159), INT16_C( 23628), INT16_C( -17224), INT16_C( -23288)),
238
- simde_mm_set_pi16(INT16_C( -18303), INT16_C( 7699), INT16_C( 22351), INT16_C( -16238)),
239
- simde_mm_set_pi16(INT16_C( -18144), INT16_C( 31327), INT16_C( 5127), INT16_C( 26010)) },
240
- { simde_mm_set_pi16(INT16_C( 9097), INT16_C( -5982), INT16_C( 28191), INT16_C( -32707)),
241
- simde_mm_set_pi16(INT16_C( -16920), INT16_C( -18039), INT16_C( -32259), INT16_C( 10405)),
242
- simde_mm_set_pi16(INT16_C( -7823), INT16_C( -24021), INT16_C( -4068), INT16_C( -22302)) },
243
- { simde_mm_set_pi16(INT16_C( 2097), INT16_C( 24451), INT16_C( 25533), INT16_C( -14205)),
244
- simde_mm_set_pi16(INT16_C( -28269), INT16_C( 4484), INT16_C( -22223), INT16_C( 17945)),
245
- simde_mm_set_pi16(INT16_C( -26172), INT16_C( 28935), INT16_C( 3310), INT16_C( 3740)) },
246
- { simde_mm_set_pi16(INT16_C( -17654), INT16_C( 12451), INT16_C( 12325), INT16_C( 5198)),
247
- simde_mm_set_pi16(INT16_C( -26590), INT16_C( 31889), INT16_C( -14656), INT16_C( 6378)),
248
- simde_mm_set_pi16(INT16_C( 21292), INT16_C( -21196), INT16_C( -2331), INT16_C( 11576)) },
249
- { simde_mm_set_pi16(INT16_C( 31498), INT16_C( -18726), INT16_C( -9720), INT16_C( -17042)),
250
- simde_mm_set_pi16(INT16_C( 17025), INT16_C( 13186), INT16_C( -25923), INT16_C( 15017)),
251
- simde_mm_set_pi16(INT16_C( -17013), INT16_C( -5540), INT16_C( 29893), INT16_C( -2025)) },
252
- { simde_mm_set_pi16(INT16_C( 9904), INT16_C( -28061), INT16_C( -32123), INT16_C( -1285)),
253
- simde_mm_set_pi16(INT16_C( -7190), INT16_C( -1918), INT16_C( 26654), INT16_C( -31449)),
254
- simde_mm_set_pi16(INT16_C( 2714), INT16_C( -29979), INT16_C( -5469), INT16_C( -32734)) }
255
- };
256
-
257
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
258
- simde__m64 r = simde_mm_add_pi16(test_vec[i].a, test_vec[i].b);
259
- simde_mm_empty();
260
- simde_assert_m64_i16(r, ==, test_vec[i].r);
261
- }
262
-
263
- simde_mm_empty();
264
- return MUNIT_OK;
265
- }
266
-
267
- static MunitResult
268
- test_simde_mm_add_pi32(const MunitParameter params[], void* data) {
269
- (void) params;
270
- (void) data;
271
-
272
- const struct {
273
- simde__m64 a;
274
- simde__m64 b;
275
- simde__m64 r;
276
- } test_vec[8] = {
277
- { simde_mm_set_pi32(INT32_C( -1528799955), INT32_C( -1825996932)),
278
- simde_mm_set_pi32(INT32_C( -1229665745), INT32_C( 989894561)),
279
- simde_mm_set_pi32(INT32_C( 1536501596), INT32_C( -836102371)) },
280
- { simde_mm_set_pi32(INT32_C( 1936809596), INT32_C( 1331021923)),
281
- simde_mm_set_pi32(INT32_C( -505769092), INT32_C( 1471336810)),
282
- simde_mm_set_pi32(INT32_C( 1431040504), INT32_C( -1492608563)) },
283
- { simde_mm_set_pi32(INT32_C( 783830780), INT32_C( 1923113282)),
284
- simde_mm_set_pi32(INT32_C( 1700161106), INT32_C( -175473923)),
285
- simde_mm_set_pi32(INT32_C( -1810975410), INT32_C( 1747639359)) },
286
- { simde_mm_set_pi32(INT32_C( 1195975755), INT32_C( 1329173130)),
287
- simde_mm_set_pi32(INT32_C( -611537759), INT32_C( 787308680)),
288
- simde_mm_set_pi32(INT32_C( 584437996), INT32_C( 2116481810)) },
289
- { simde_mm_set_pi32(INT32_C( 950103059), INT32_C( 570905377)),
290
- simde_mm_set_pi32(INT32_C( 1696944201), INT32_C( -1762697792)),
291
- simde_mm_set_pi32(INT32_C( -1647920036), INT32_C( -1191792415)) },
292
- { simde_mm_set_pi32(INT32_C( 40870864), INT32_C( 149169565)),
293
- simde_mm_set_pi32(INT32_C( 1633277631), INT32_C( -224026523)),
294
- simde_mm_set_pi32(INT32_C( 1674148495), INT32_C( -74856958)) },
295
- { simde_mm_set_pi32(INT32_C( -718937511), INT32_C( 1453252371)),
296
- simde_mm_set_pi32(INT32_C( 56683182), INT32_C( -594741944)),
297
- simde_mm_set_pi32(INT32_C( -662254329), INT32_C( 858510427)) },
298
- { simde_mm_set_pi32(INT32_C( -950411567), INT32_C( -1493828)),
299
- simde_mm_set_pi32(INT32_C( -1680249611), INT32_C( 321011369)),
300
- simde_mm_set_pi32(INT32_C( 1664306118), INT32_C( 319517541)) }
301
- };
302
-
303
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
304
- simde__m64 r = simde_mm_add_pi32(test_vec[i].a, test_vec[i].b);
305
- simde_mm_empty();
306
- simde_assert_m64_i32(r, ==, test_vec[i].r);
307
- }
308
-
309
- simde_mm_empty();
310
- return MUNIT_OK;
311
- }
312
-
313
- static MunitResult
314
- test_simde_mm_adds_pi8(const MunitParameter params[], void* data) {
315
- (void) params;
316
- (void) data;
317
-
318
- const struct {
319
- simde__m64 a;
320
- simde__m64 b;
321
- simde__m64 r;
322
- } test_vec[8] = {
323
- { simde_mm_set_pi8( 99, 16, -73, -73, 34, 32, 87, 42),
324
- simde_mm_set_pi8( -29, -82, -26, -38, 66, -51, 82, 53),
325
- simde_mm_set_pi8( 70, -66, -99, -111, 100, -19, 127, 95) },
326
- { simde_mm_set_pi8( -63, -116, -41, -11, -99, -60, -36, -15),
327
- simde_mm_set_pi8( 84, -113, 107, 81, -28, -25, -90, -115),
328
- simde_mm_set_pi8( 21, -128, 66, 70, -127, -85, -126, -128) },
329
- { simde_mm_set_pi8( -79, -104, -10, -65, 84, -40, -102, 75),
330
- simde_mm_set_pi8( 30, 54, 127, 16, -7, -31, -83, -89),
331
- simde_mm_set_pi8( -49, -50, 117, -49, 77, -71, -128, -14) },
332
- { simde_mm_set_pi8(-115, -50, 111, 104, -19, -48, 122, 59),
333
- simde_mm_set_pi8( -74, -15, 43, 9, 94, -81, -68, 15),
334
- simde_mm_set_pi8(-128, -65, 127, 113, 75, -128, 54, 74) },
335
- { simde_mm_set_pi8( 18, -79, 5, 80, 99, 108, 39, -27),
336
- simde_mm_set_pi8( 127, 44, 22, -80, -86, -11, 108, -95),
337
- simde_mm_set_pi8( 127, -35, 27, 0, 13, 97, 127, -122) },
338
- { simde_mm_set_pi8( -35, 62, 102, -79, 117, 108, 56, -21),
339
- simde_mm_set_pi8( 68, 119, -10, 17, 40, -124, -75, -39),
340
- simde_mm_set_pi8( 33, 127, 92, -62, 127, -16, -19, -60) },
341
- { simde_mm_set_pi8( 45, -5, -10, -4, -23, -76, -111, -38),
342
- simde_mm_set_pi8( 24, -15, -2, 75, 11, -108, -5, 124),
343
- simde_mm_set_pi8( 69, -20, -12, 71, -12, -128, -116, 86) },
344
- { simde_mm_set_pi8( 116, 38, 87, 5, -25, -119, 117, -12),
345
- simde_mm_set_pi8( -51, 25, -122, 40, -111, -50, -55, -109),
346
- simde_mm_set_pi8( 65, 63, -35, 45, -128, -128, 62, -121) }
347
- };
348
-
349
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
350
- simde__m64 r = simde_mm_adds_pi8(test_vec[i].a, test_vec[i].b);
351
- simde_mm_empty();
352
- simde_assert_m64_i8(r, ==, test_vec[i].r);
353
- }
354
-
355
- simde_mm_empty();
356
- return MUNIT_OK;
357
- }
358
-
359
- static MunitResult
360
- test_simde_mm_adds_pu8(const MunitParameter params[], void* data) {
361
- (void) params;
362
- (void) data;
363
-
364
- const struct {
365
- simde__m64 a;
366
- simde__m64 b;
367
- simde__m64 r;
368
- } test_vec[8] = {
369
- { simde_x_mm_set_pu8(UINT8_C( 81), UINT8_C( 21), UINT8_C( 204), UINT8_C( 252),
370
- UINT8_C( 129), UINT8_C( 215), UINT8_C( 184), UINT8_C( 80)),
371
- simde_x_mm_set_pu8(UINT8_C( 23), UINT8_C( 216), UINT8_C( 110), UINT8_C( 125),
372
- UINT8_C( 171), UINT8_C( 145), UINT8_C( 61), UINT8_C( 141)),
373
- simde_x_mm_set_pu8(UINT8_C( 104), UINT8_C( 237), UINT8_C( 255), UINT8_C( 255),
374
- UINT8_C( 255), UINT8_C( 255), UINT8_C( 245), UINT8_C( 221)) },
375
- { simde_x_mm_set_pu8(UINT8_C( 239), UINT8_C( 124), UINT8_C( 164), UINT8_C( 178),
376
- UINT8_C( 97), UINT8_C( 133), UINT8_C( 53), UINT8_C( 7)),
377
- simde_x_mm_set_pu8(UINT8_C( 55), UINT8_C( 60), UINT8_C( 93), UINT8_C( 144),
378
- UINT8_C( 87), UINT8_C( 38), UINT8_C( 29), UINT8_C( 227)),
379
- simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 184), UINT8_C( 255), UINT8_C( 255),
380
- UINT8_C( 184), UINT8_C( 171), UINT8_C( 82), UINT8_C( 234)) },
381
- { simde_x_mm_set_pu8(UINT8_C( 2), UINT8_C( 239), UINT8_C( 120), UINT8_C( 239),
382
- UINT8_C( 57), UINT8_C( 159), UINT8_C( 235), UINT8_C( 22)),
383
- simde_x_mm_set_pu8(UINT8_C( 220), UINT8_C( 9), UINT8_C( 135), UINT8_C( 55),
384
- UINT8_C( 21), UINT8_C( 1), UINT8_C( 123), UINT8_C( 167)),
385
- simde_x_mm_set_pu8(UINT8_C( 222), UINT8_C( 248), UINT8_C( 255), UINT8_C( 255),
386
- UINT8_C( 78), UINT8_C( 160), UINT8_C( 255), UINT8_C( 189)) },
387
- { simde_x_mm_set_pu8(UINT8_C( 169), UINT8_C( 122), UINT8_C( 209), UINT8_C( 107),
388
- UINT8_C( 53), UINT8_C( 194), UINT8_C( 157), UINT8_C( 250)),
389
- simde_x_mm_set_pu8(UINT8_C( 190), UINT8_C( 161), UINT8_C( 50), UINT8_C( 2),
390
- UINT8_C( 227), UINT8_C( 196), UINT8_C( 34), UINT8_C( 128)),
391
- simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 109),
392
- UINT8_C( 255), UINT8_C( 255), UINT8_C( 191), UINT8_C( 255)) },
393
- { simde_x_mm_set_pu8(UINT8_C( 127), UINT8_C( 206), UINT8_C( 75), UINT8_C( 228),
394
- UINT8_C( 24), UINT8_C( 253), UINT8_C( 247), UINT8_C( 227)),
395
- simde_x_mm_set_pu8(UINT8_C( 199), UINT8_C( 181), UINT8_C( 197), UINT8_C( 15),
396
- UINT8_C( 201), UINT8_C( 118), UINT8_C( 220), UINT8_C( 22)),
397
- simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 243),
398
- UINT8_C( 225), UINT8_C( 255), UINT8_C( 255), UINT8_C( 249)) },
399
- { simde_x_mm_set_pu8(UINT8_C( 160), UINT8_C( 45), UINT8_C( 121), UINT8_C( 199),
400
- UINT8_C( 155), UINT8_C( 201), UINT8_C( 54), UINT8_C( 92)),
401
- simde_x_mm_set_pu8(UINT8_C( 29), UINT8_C( 158), UINT8_C( 69), UINT8_C( 12),
402
- UINT8_C( 220), UINT8_C( 133), UINT8_C( 37), UINT8_C( 27)),
403
- simde_x_mm_set_pu8(UINT8_C( 189), UINT8_C( 203), UINT8_C( 190), UINT8_C( 211),
404
- UINT8_C( 255), UINT8_C( 255), UINT8_C( 91), UINT8_C( 119)) },
405
- { simde_x_mm_set_pu8(UINT8_C( 173), UINT8_C( 130), UINT8_C( 79), UINT8_C( 240),
406
- UINT8_C( 183), UINT8_C( 112), UINT8_C( 65), UINT8_C( 13)),
407
- simde_x_mm_set_pu8(UINT8_C( 24), UINT8_C( 152), UINT8_C( 239), UINT8_C( 128),
408
- UINT8_C( 83), UINT8_C( 69), UINT8_C( 122), UINT8_C( 121)),
409
- simde_x_mm_set_pu8(UINT8_C( 197), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255),
410
- UINT8_C( 255), UINT8_C( 181), UINT8_C( 187), UINT8_C( 134)) },
411
- { simde_x_mm_set_pu8(UINT8_C( 242), UINT8_C( 255), UINT8_C( 149), UINT8_C( 159),
412
- UINT8_C( 60), UINT8_C( 134), UINT8_C( 24), UINT8_C( 232)),
413
- simde_x_mm_set_pu8(UINT8_C( 209), UINT8_C( 150), UINT8_C( 4), UINT8_C( 97),
414
- UINT8_C( 136), UINT8_C( 88), UINT8_C( 70), UINT8_C( 193)),
415
- simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 153), UINT8_C( 255),
416
- UINT8_C( 196), UINT8_C( 222), UINT8_C( 94), UINT8_C( 255)) }
417
- };
418
-
419
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
420
- simde__m64 r = simde_mm_adds_pu8(test_vec[i].a, test_vec[i].b);
421
- simde_assert_m64_u8(r, ==, test_vec[i].r);
422
- }
423
-
424
- return MUNIT_OK;
425
- }
426
-
427
- static MunitResult
428
- test_simde_mm_adds_pi16(const MunitParameter params[], void* data) {
429
- (void) params;
430
- (void) data;
431
-
432
- const struct {
433
- simde__m64 a;
434
- simde__m64 b;
435
- simde__m64 r;
436
- } test_vec[8] = {
437
- { simde_mm_set_pi16(INT16_C( -31309), INT16_C( -5581), INT16_C( -13514), INT16_C( -24682)),
438
- simde_mm_set_pi16(INT16_C( 19892), INT16_C( -12160), INT16_C( 3266), INT16_C( 9002)),
439
- simde_mm_set_pi16(INT16_C( -11417), INT16_C( -17741), INT16_C( -10248), INT16_C( -15680)) },
440
- { simde_mm_set_pi16(INT16_C( 20564), INT16_C( -25554), INT16_C( 18522), INT16_C( -107)),
441
- simde_mm_set_pi16(INT16_C( 12328), INT16_C( 12883), INT16_C( 2251), INT16_C( -19119)),
442
- simde_mm_set_pi16(INT16_C( 32767), INT16_C( -12671), INT16_C( 20773), INT16_C( -19226)) },
443
- { simde_mm_set_pi16(INT16_C( 20106), INT16_C( -15513), INT16_C( -25552), INT16_C( -23751)),
444
- simde_mm_set_pi16(INT16_C( 11380), INT16_C( 4698), INT16_C( 16886), INT16_C( 11304)),
445
- simde_mm_set_pi16(INT16_C( 31486), INT16_C( -10815), INT16_C( -8666), INT16_C( -12447)) },
446
- { simde_mm_set_pi16(INT16_C( -30807), INT16_C( -12488), INT16_C( 12150), INT16_C( 344)),
447
- simde_mm_set_pi16(INT16_C( -21735), INT16_C( 11424), INT16_C( 19342), INT16_C( -22640)),
448
- simde_mm_set_pi16(INT16_C( -32768), INT16_C( -1064), INT16_C( 31492), INT16_C( -22296)) },
449
- { simde_mm_set_pi16(INT16_C( 23188), INT16_C( -20941), INT16_C( 26991), INT16_C( -11383)),
450
- simde_mm_set_pi16(INT16_C( 20582), INT16_C( 6628), INT16_C( 32097), INT16_C( 23397)),
451
- simde_mm_set_pi16(INT16_C( 32767), INT16_C( -14313), INT16_C( 32767), INT16_C( 12014)) },
452
- { simde_mm_set_pi16(INT16_C( 1789), INT16_C( 28566), INT16_C( 18995), INT16_C( -32500)),
453
- simde_mm_set_pi16(INT16_C( -32609), INT16_C( -30393), INT16_C( 1798), INT16_C( 28485)),
454
- simde_mm_set_pi16(INT16_C( -30820), INT16_C( -1827), INT16_C( 20793), INT16_C( -4015)) },
455
- { simde_mm_set_pi16(INT16_C( 18491), INT16_C( -11781), INT16_C( -27491), INT16_C( 337)),
456
- simde_mm_set_pi16(INT16_C( 420), INT16_C( 28774), INT16_C( -31111), INT16_C( 15256)),
457
- simde_mm_set_pi16(INT16_C( 18911), INT16_C( 16993), INT16_C( -32768), INT16_C( 15593)) },
458
- { simde_mm_set_pi16(INT16_C( -15687), INT16_C( 25487), INT16_C( 23048), INT16_C( -8478)),
459
- simde_mm_set_pi16(INT16_C( 9271), INT16_C( -4756), INT16_C( -12087), INT16_C( -15383)),
460
- simde_mm_set_pi16(INT16_C( -6416), INT16_C( 20731), INT16_C( 10961), INT16_C( -23861)) }
461
- };
462
-
463
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
464
- simde__m64 r = simde_mm_adds_pi16(test_vec[i].a, test_vec[i].b);
465
- simde_mm_empty();
466
- simde_assert_m64_i16(r, ==, test_vec[i].r);
467
- }
468
-
469
- simde_mm_empty();
470
- return MUNIT_OK;
471
- }
472
-
473
- static MunitResult
474
- test_simde_mm_adds_pu16(const MunitParameter params[], void* data) {
475
- (void) params;
476
- (void) data;
477
-
478
- const struct {
479
- simde__m64 a;
480
- simde__m64 b;
481
- simde__m64 r;
482
- } test_vec[8] = {
483
- { simde_x_mm_set_pu16(UINT16_C(43150), UINT16_C( 5470), UINT16_C(60072), UINT16_C(50068)),
484
- simde_x_mm_set_pu16(UINT16_C( 7332), UINT16_C( 4270), UINT16_C(46463), UINT16_C( 9473)),
485
- simde_x_mm_set_pu16(UINT16_C(50482), UINT16_C( 9740), UINT16_C(65535), UINT16_C(59541)) },
486
- { simde_x_mm_set_pu16(UINT16_C( 2434), UINT16_C(31906), UINT16_C( 3723), UINT16_C(47234)),
487
- simde_x_mm_set_pu16(UINT16_C(58902), UINT16_C(62845), UINT16_C(51771), UINT16_C(64034)),
488
- simde_x_mm_set_pu16(UINT16_C(61336), UINT16_C(65535), UINT16_C(55494), UINT16_C(65535)) },
489
- { simde_x_mm_set_pu16(UINT16_C( 129), UINT16_C(16274), UINT16_C( 9343), UINT16_C(27425)),
490
- simde_x_mm_set_pu16(UINT16_C(21184), UINT16_C(38810), UINT16_C(32910), UINT16_C(34144)),
491
- simde_x_mm_set_pu16(UINT16_C(21313), UINT16_C(55084), UINT16_C(42253), UINT16_C(61569)) },
492
- { simde_x_mm_set_pu16(UINT16_C(64726), UINT16_C(55325), UINT16_C( 5040), UINT16_C(34690)),
493
- simde_x_mm_set_pu16(UINT16_C(18928), UINT16_C(15762), UINT16_C(23760), UINT16_C(30303)),
494
- simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(28800), UINT16_C(64993)) },
495
- { simde_x_mm_set_pu16(UINT16_C(12447), UINT16_C(56063), UINT16_C(19893), UINT16_C(38115)),
496
- simde_x_mm_set_pu16(UINT16_C(53854), UINT16_C( 9599), UINT16_C(53148), UINT16_C(47295)),
497
- simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535)) },
498
- { simde_x_mm_set_pu16(UINT16_C(30591), UINT16_C(42550), UINT16_C(36715), UINT16_C(13411)),
499
- simde_x_mm_set_pu16(UINT16_C(46515), UINT16_C(57187), UINT16_C(46870), UINT16_C(44207)),
500
- simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(57618)) },
501
- { simde_x_mm_set_pu16(UINT16_C(12664), UINT16_C(64378), UINT16_C(29354), UINT16_C(42615)),
502
- simde_x_mm_set_pu16(UINT16_C(62249), UINT16_C(64644), UINT16_C(45128), UINT16_C(47328)),
503
- simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535)) },
504
- { simde_x_mm_set_pu16(UINT16_C(65124), UINT16_C( 3867), UINT16_C(20702), UINT16_C(63422)),
505
- simde_x_mm_set_pu16(UINT16_C(51381), UINT16_C(37432), UINT16_C(48951), UINT16_C(45184)),
506
- simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(41299), UINT16_C(65535), UINT16_C(65535)) }
507
- };
508
-
509
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
510
- simde__m64 r = simde_mm_adds_pu16(test_vec[i].a, test_vec[i].b);
511
- simde_assert_m64_i16(r, ==, test_vec[i].r);
512
- }
513
-
514
- return MUNIT_OK;
515
- }
516
-
517
- static MunitResult
518
- test_simde_mm_and_si64(const MunitParameter params[], void* data) {
519
- (void) params;
520
- (void) data;
521
-
522
- const struct {
523
- simde__m64 a;
524
- simde__m64 b;
525
- simde__m64 r;
526
- } test_vec[8] = {
527
- { simde_mm_set_pi32(INT32_C( 340534654), INT32_C( 867835838)),
528
- simde_mm_set_pi32(INT32_C( -1715051141), INT32_C( 327376215)),
529
- simde_mm_set_pi32(INT32_C( 272901498), INT32_C( 327294230)) },
530
- { simde_mm_set_pi32(INT32_C( 364465166), INT32_C( -1853449223)),
531
- simde_mm_set_pi32(INT32_C( 425932704), INT32_C( -538031667)),
532
- simde_mm_set_pi32(INT32_C( 287376384), INT32_C( -1853486647)) },
533
- { simde_mm_set_pi32(INT32_C( 1222276268), INT32_C( -1950390417)),
534
- simde_mm_set_pi32(INT32_C( 104967923), INT32_C( 339992254)),
535
- simde_mm_set_pi32(INT32_C( 4203680), INT32_C( 214574)) },
536
- { simde_mm_set_pi32(INT32_C( 678635361), INT32_C( 1353498548)),
537
- simde_mm_set_pi32(INT32_C( 1051418126), INT32_C( -1022663537)),
538
- simde_mm_set_pi32(INT32_C( 673383936), INT32_C( 1074275460)) },
539
- { simde_mm_set_pi32(INT32_C( 1823492970), INT32_C( -1726291925)),
540
- simde_mm_set_pi32(INT32_C( 1139854805), INT32_C( 874111018)),
541
- simde_mm_set_pi32(INT32_C( 1085294912), INT32_C( 270065706)) },
542
- { simde_mm_set_pi32(INT32_C( 188716107), INT32_C( 919243794)),
543
- simde_mm_set_pi32(INT32_C( -505381577), INT32_C( -1684778331)),
544
- simde_mm_set_pi32(INT32_C( 18879491), INT32_C( 310378496)) },
545
- { simde_mm_set_pi32(INT32_C( -1486610662), INT32_C( 307692640)),
546
- simde_mm_set_pi32(INT32_C( -1793851837), INT32_C( 1963802755)),
547
- simde_mm_set_pi32(INT32_C( -2063589886), INT32_C( 268763136)) },
548
- { simde_mm_set_pi32(INT32_C( -630259527), INT32_C( -82339396)),
549
- simde_mm_set_pi32(INT32_C( 1607040389), INT32_C( 867785548)),
550
- simde_mm_set_pi32(INT32_C( 1514733697), INT32_C( 856758540)) }
551
- };
552
-
553
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
554
- simde__m64 r = simde_mm_and_si64(test_vec[i].a, test_vec[i].b);
555
- simde_mm_empty();
556
- simde_assert_m64_i32(r, ==, test_vec[i].r);
557
- }
558
-
559
- simde_mm_empty();
560
- return MUNIT_OK;
561
- }
562
-
563
- static MunitResult
564
- test_simde_mm_andnot_si64(const MunitParameter params[], void* data) {
565
- (void) params;
566
- (void) data;
567
-
568
- const struct {
569
- simde__m64 a;
570
- simde__m64 b;
571
- simde__m64 r;
572
- } test_vec[8] = {
573
- { simde_mm_set_pi32(INT32_C( 874898289), INT32_C( -802292997)),
574
- simde_mm_set_pi32(INT32_C( 329777422), INT32_C( 479831177)),
575
- simde_mm_set_pi32(INT32_C( 59244558), INT32_C( 210764800)) },
576
- { simde_mm_set_pi32(INT32_C( -944824913), INT32_C( 1953730462)),
577
- simde_mm_set_pi32(INT32_C( -914930437), INT32_C( -556614726)),
578
- simde_mm_set_pi32(INT32_C( 139477072), INT32_C( -1971310560)) },
579
- { simde_mm_set_pi32(INT32_C( -253535493), INT32_C( 1477705121)),
580
- simde_mm_set_pi32(INT32_C( -1581892884), INT32_C( -1606801005)),
581
- simde_mm_set_pi32(INT32_C( 18096132), INT32_C( -1607991278)) },
582
- { simde_mm_set_pi32(INT32_C( -585861604), INT32_C( 825554783)),
583
- simde_mm_set_pi32(INT32_C( -1758500210), INT32_C( -643533489)),
584
- simde_mm_set_pi32(INT32_C( 36374658), INT32_C( -931135488)) },
585
- { simde_mm_set_pi32(INT32_C( -5443449), INT32_C( 694842285)),
586
- simde_mm_set_pi32(INT32_C( -1613805192), INT32_C( 215848721)),
587
- simde_mm_set_pi32(INT32_C( 4393336), INT32_C( 76907536)) },
588
- { simde_mm_set_pi32(INT32_C( 1431251288), INT32_C( 1009645294)),
589
- simde_mm_set_pi32(INT32_C( -1668167014), INT32_C( -733286899)),
590
- simde_mm_set_pi32(INT32_C( -2003778942), INT32_C( -1069414399)) },
591
- { simde_mm_set_pi32(INT32_C( 1707128575), INT32_C( -1462185330)),
592
- simde_mm_set_pi32(INT32_C( -1016415616), INT32_C( -1881637541)),
593
- simde_mm_set_pi32(INT32_C( -2111174656), INT32_C( 117452113)) },
594
- { simde_mm_set_pi32(INT32_C( 336066190), INT32_C( -2007360384)),
595
- simde_mm_set_pi32(INT32_C( -1959332116), INT32_C( -820920813)),
596
- simde_mm_set_pi32(INT32_C( -1959788448), INT32_C( 1191289363)) }
597
- };
598
-
599
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
600
- simde__m64 r = simde_mm_andnot_si64(test_vec[i].a, test_vec[i].b);
601
- simde_assert_m64_i32(r, ==, test_vec[i].r);
602
- }
603
-
604
- simde_mm_empty();
605
- return MUNIT_OK;
606
- }
607
-
608
- static MunitResult
609
- test_simde_mm_cmpeq_pi8(const MunitParameter params[], void* data) {
610
- (void) params;
611
- (void) data;
612
-
613
- const struct {
614
- simde__m64 a;
615
- simde__m64 b;
616
- simde__m64 r;
617
- } test_vec[8] = {
618
- { simde_mm_set_pi8(INT8_C( 61), INT8_C(-117), INT8_C(-117), INT8_C( -23), INT8_C( -19), INT8_C( 6), INT8_C( -24), INT8_C( 89)),
619
- simde_mm_set_pi8(INT8_C( 47), INT8_C( 71), INT8_C(-105), INT8_C( 13), INT8_C( -26), INT8_C( 93), INT8_C( 118), INT8_C( -58)),
620
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
621
- { simde_mm_set_pi8(INT8_C( 78), INT8_C( 11), INT8_C( -2), INT8_C( 86), INT8_C( -50), INT8_C( -49), INT8_C( -1), INT8_C( 92)),
622
- simde_mm_set_pi8(INT8_C( -85), INT8_C( -99), INT8_C( -41), INT8_C( 116), INT8_C( 74), INT8_C( 114), INT8_C( -3), INT8_C( -98)),
623
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
624
- { simde_mm_set_pi8(INT8_C( 60), INT8_C( 10), INT8_C( -34), INT8_C( 30), INT8_C( 48), INT8_C( -13), INT8_C(-106), INT8_C( 105)),
625
- simde_mm_set_pi8(INT8_C( 81), INT8_C( 108), INT8_C( -65), INT8_C( -58), INT8_C( -30), INT8_C( -90), INT8_C( 42), INT8_C( 0)),
626
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
627
- { simde_mm_set_pi8(INT8_C(-113), INT8_C( -67), INT8_C( -55), INT8_C( 84), INT8_C( -92), INT8_C( -66), INT8_C( 7), INT8_C( 21)),
628
- simde_mm_set_pi8(INT8_C(-113), INT8_C( -67), INT8_C( -55), INT8_C( 84), INT8_C( -92), INT8_C( -66), INT8_C( 7), INT8_C( 21)),
629
- simde_mm_set_pi8(INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) },
630
- { simde_mm_set_pi8(INT8_C( -72), INT8_C( -56), INT8_C(-104), INT8_C( -6), INT8_C( 37), INT8_C(-114), INT8_C( 84), INT8_C( 21)),
631
- simde_mm_set_pi8(INT8_C( 77), INT8_C( -25), INT8_C(-104), INT8_C( 0), INT8_C( -39), INT8_C( 38), INT8_C( -54), INT8_C( -90)),
632
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
633
- { simde_mm_set_pi8(INT8_C( 46), INT8_C( 120), INT8_C( -13), INT8_C(-125), INT8_C( 50), INT8_C( 10), INT8_C( 120), INT8_C( -10)),
634
- simde_mm_set_pi8(INT8_C( 85), INT8_C( 89), INT8_C( 9), INT8_C( 65), INT8_C( -82), INT8_C( -80), INT8_C( 65), INT8_C( -65)),
635
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
636
- { simde_mm_set_pi8(INT8_C( -12), INT8_C( -41), INT8_C( -54), INT8_C( 92), INT8_C( -87), INT8_C( -82), INT8_C(-120), INT8_C( 37)),
637
- simde_mm_set_pi8(INT8_C( 94), INT8_C( -21), INT8_C( 36), INT8_C(-121), INT8_C( -62), INT8_C( -4), INT8_C( 42), INT8_C(-119)),
638
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
639
- { simde_mm_set_pi8(INT8_C( -8), INT8_C( -60), INT8_C( 35), INT8_C( -31), INT8_C(-103), INT8_C( -7), INT8_C( -39), INT8_C( 47)),
640
- simde_mm_set_pi8(INT8_C( 13), INT8_C( -84), INT8_C(-126), INT8_C(-127), INT8_C( -82), INT8_C( 37), INT8_C( 60), INT8_C( 30)),
641
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }
642
- };
643
-
644
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
645
- simde__m64 r = simde_mm_cmpeq_pi8(test_vec[i].a, test_vec[i].b);
646
- simde_mm_empty();
647
- simde_assert_m64_i8(r, ==, test_vec[i].r);
648
- }
649
-
650
- simde_mm_empty();
651
- return MUNIT_OK;
652
- }
653
-
654
- static MunitResult
655
- test_simde_mm_cmpeq_pi16(const MunitParameter params[], void* data) {
656
- (void) params;
657
- (void) data;
658
-
659
- const struct {
660
- simde__m64 a;
661
- simde__m64 b;
662
- simde__m64 r;
663
- } test_vec[8] = {
664
- { simde_mm_set_pi16(INT16_C( -13903), INT16_C( -28259), INT16_C( 10786), INT16_C( 24518)),
665
- simde_mm_set_pi16(INT16_C( 5267), INT16_C( 1924), INT16_C( 13281), INT16_C( -25055)),
666
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
667
- { simde_mm_set_pi16(INT16_C( -21949), INT16_C( -13483), INT16_C( -390), INT16_C( 6377)),
668
- simde_mm_set_pi16(INT16_C( -9583), INT16_C( 6876), INT16_C( 23768), INT16_C( 6209)),
669
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
670
- { simde_mm_set_pi16(INT16_C( 11364), INT16_C( 28383), INT16_C( 13353), INT16_C( 14261)),
671
- simde_mm_set_pi16(INT16_C( 13422), INT16_C( 32033), INT16_C( 4055), INT16_C( 5623)),
672
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
673
- { simde_mm_set_pi16(INT16_C( 206), INT16_C( -1567), INT16_C( -17153), INT16_C( 18166)),
674
- simde_mm_set_pi16(INT16_C( 30519), INT16_C( 30643), INT16_C( 32735), INT16_C( -4195)),
675
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
676
- { simde_mm_set_pi16(INT16_C( 25406), INT16_C( -18343), INT16_C( -15870), INT16_C( -15505)),
677
- simde_mm_set_pi16(INT16_C( 25406), INT16_C( -18343), INT16_C( -15870), INT16_C( -15505)),
678
- simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) },
679
- { simde_mm_set_pi16(INT16_C( 21393), INT16_C( 22815), INT16_C( 322), INT16_C( 9608)),
680
- simde_mm_set_pi16(INT16_C( 23953), INT16_C( -31672), INT16_C( -7546), INT16_C( 31996)),
681
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
682
- { simde_mm_set_pi16(INT16_C( -16506), INT16_C( -921), INT16_C( -32189), INT16_C( 18444)),
683
- simde_mm_set_pi16(INT16_C( -10340), INT16_C( -28110), INT16_C( 24057), INT16_C( -7047)),
684
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
685
- { simde_mm_set_pi16(INT16_C( -1173), INT16_C( -25844), INT16_C( -10729), INT16_C( 22121)),
686
- simde_mm_set_pi16(INT16_C( 25970), INT16_C( 12718), INT16_C( 25424), INT16_C( 11867)),
687
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }
688
- };
689
-
690
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
691
- simde__m64 r = simde_mm_cmpeq_pi16(test_vec[i].a, test_vec[i].b);
692
- simde_mm_empty();
693
- simde_assert_m64_u16(r, ==, test_vec[i].r);
694
- }
695
-
696
- simde_mm_empty();
697
- return MUNIT_OK;
698
- }
699
-
700
- static MunitResult
701
- test_simde_mm_cmpeq_pi32(const MunitParameter params[], void* data) {
702
- (void) params;
703
- (void) data;
704
-
705
- const struct {
706
- simde__m64 a;
707
- simde__m64 b;
708
- simde__m64 r;
709
- } test_vec[8] = {
710
- { simde_mm_set_pi32(INT32_C( -883578301), INT32_C( 417988218)),
711
- simde_mm_set_pi32(INT32_C( 450681489), INT32_C( 406936792)),
712
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
713
- { simde_mm_set_pi32(INT32_C( 1860119652), INT32_C( 934622249)),
714
- simde_mm_set_pi32(INT32_C( 2099328110), INT32_C( 368512983)),
715
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
716
- { simde_mm_set_pi32(INT32_C( -102694706), INT32_C( 1190575359)),
717
- simde_mm_set_pi32(INT32_C( 2008250167), INT32_C( -274890785)),
718
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
719
- { simde_mm_set_pi32(INT32_C( 126096531), INT32_C( -1641991199)),
720
- simde_mm_set_pi32(INT32_C( 126096531), INT32_C( -1641991199)),
721
- simde_mm_set_pi32(INT32_C( -1), INT32_C( -1)) },
722
- { simde_mm_set_pi32(INT32_C( -1202101442), INT32_C( -1016086014)),
723
- simde_mm_set_pi32(INT32_C( -1034786090), INT32_C( -993100857)),
724
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
725
- { simde_mm_set_pi32(INT32_C( 1495225233), INT32_C( 629670210)),
726
- simde_mm_set_pi32(INT32_C( -2075632239), INT32_C( 2096947846)),
727
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
728
- { simde_mm_set_pi32(INT32_C( -60309626), INT32_C( 1208779331)),
729
- simde_mm_set_pi32(INT32_C( -1842161764), INT32_C( -461808135)),
730
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
731
- { simde_mm_set_pi32(INT32_C( -1693648021), INT32_C( 1449776663)),
732
- simde_mm_set_pi32(INT32_C( 833512818), INT32_C( 777741136)),
733
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }
734
- };
735
-
736
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
737
- simde__m64 r = simde_mm_cmpeq_pi32(test_vec[i].a, test_vec[i].b);
738
- simde_mm_empty();
739
- simde_assert_m64_i32(r, ==, test_vec[i].r);
740
- }
741
-
742
- simde_mm_empty();
743
- return MUNIT_OK;
744
- }
745
-
746
- static MunitResult
747
- test_simde_mm_cmpgt_pi8(const MunitParameter params[], void* data) {
748
- (void) params;
749
- (void) data;
750
-
751
- const struct {
752
- simde__m64 a;
753
- simde__m64 b;
754
- simde__m64 r;
755
- } test_vec[8] = {
756
- { simde_mm_set_pi8(INT8_C( -77), INT8_C( 29), INT8_C( -34), INT8_C(-110), INT8_C( -78), INT8_C( -8), INT8_C( 92), INT8_C( 44)),
757
- simde_mm_set_pi8(INT8_C( -57), INT8_C( 99), INT8_C( -10), INT8_C( 28), INT8_C( 46), INT8_C( 79), INT8_C( -76), INT8_C( 59)),
758
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
759
- { simde_mm_set_pi8(INT8_C( 96), INT8_C( -9), INT8_C( -61), INT8_C( 46), INT8_C( 104), INT8_C(-105), INT8_C( 89), INT8_C( 48)),
760
- simde_mm_set_pi8(INT8_C( 109), INT8_C( 70), INT8_C( 13), INT8_C( 90), INT8_C(-116), INT8_C( -23), INT8_C( 10), INT8_C( -96)),
761
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) },
762
- { simde_mm_set_pi8(INT8_C( -24), INT8_C( -2), INT8_C( 73), INT8_C( 36), INT8_C( -29), INT8_C( -70), INT8_C( 73), INT8_C(-121)),
763
- simde_mm_set_pi8(INT8_C( 17), INT8_C( -17), INT8_C( 77), INT8_C( -2), INT8_C( 111), INT8_C(-111), INT8_C( -66), INT8_C( -30)),
764
- simde_mm_set_pi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) },
765
- { simde_mm_set_pi8(INT8_C( 72), INT8_C(-102), INT8_C(-121), INT8_C( 41), INT8_C( -29), INT8_C(-100), INT8_C( -70), INT8_C( 82)),
766
- simde_mm_set_pi8(INT8_C( 101), INT8_C( 118), INT8_C(-110), INT8_C( -74), INT8_C( -57), INT8_C( -2), INT8_C( 89), INT8_C( -16)),
767
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1)) },
768
- { simde_mm_set_pi8(INT8_C( 64), INT8_C( 2), INT8_C(-118), INT8_C( 23), INT8_C( -88), INT8_C(-120), INT8_C( 61), INT8_C( 114)),
769
- simde_mm_set_pi8(INT8_C( 60), INT8_C( 91), INT8_C( 96), INT8_C( -22), INT8_C( 38), INT8_C( 49), INT8_C( 80), INT8_C( -29)),
770
- simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) },
771
- { simde_mm_set_pi8(INT8_C( 31), INT8_C( -32), INT8_C(-121), INT8_C( 9), INT8_C( 80), INT8_C( 108), INT8_C( 29), INT8_C( 2)),
772
- simde_mm_set_pi8(INT8_C(-119), INT8_C( 33), INT8_C( 9), INT8_C( 101), INT8_C( 101), INT8_C( 79), INT8_C( 41), INT8_C( 87)),
773
- simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) },
774
- { simde_mm_set_pi8(INT8_C( 96), INT8_C( -75), INT8_C(-121), INT8_C(-101), INT8_C( 10), INT8_C(-126), INT8_C( 58), INT8_C( 60)),
775
- simde_mm_set_pi8(INT8_C( 101), INT8_C( -73), INT8_C( 126), INT8_C( 105), INT8_C( -48), INT8_C(-119), INT8_C( -97), INT8_C( -90)),
776
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) },
777
- { simde_mm_set_pi8(INT8_C( 118), INT8_C( 118), INT8_C( -21), INT8_C( -49), INT8_C( 85), INT8_C( 69), INT8_C( 84), INT8_C( 111)),
778
- simde_mm_set_pi8(INT8_C( -96), INT8_C( 121), INT8_C(-110), INT8_C( -87), INT8_C( -73), INT8_C( 37), INT8_C( 45), INT8_C(-120)),
779
- simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }
780
- };
781
-
782
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
783
- simde__m64 r = simde_mm_cmpgt_pi8(test_vec[i].a, test_vec[i].b);
784
- simde_mm_empty();
785
- simde_assert_m64_i8(r, ==, test_vec[i].r);
786
- }
787
-
788
- simde_mm_empty();
789
- return MUNIT_OK;
790
- }
791
-
792
- static MunitResult
793
- test_simde_mm_cmpgt_pi16(const MunitParameter params[], void* data) {
794
- (void) params;
795
- (void) data;
796
-
797
- const struct {
798
- simde__m64 a;
799
- simde__m64 b;
800
- simde__m64 r;
801
- } test_vec[8] = {
802
- { simde_mm_set_pi16(INT16_C( 27287), INT16_C( -17445), INT16_C( 7868), INT16_C( 17731)),
803
- simde_mm_set_pi16(INT16_C( -32130), INT16_C( -12389), INT16_C( -15721), INT16_C( -10529)),
804
- simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) },
805
- { simde_mm_set_pi16(INT16_C( -23331), INT16_C( 19282), INT16_C( 27710), INT16_C( 4608)),
806
- simde_mm_set_pi16(INT16_C( -32646), INT16_C( -2319), INT16_C( 19710), INT16_C( 25425)),
807
- simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) },
808
- { simde_mm_set_pi16(INT16_C( 29350), INT16_C( -12356), INT16_C( -18117), INT16_C( -29182)),
809
- simde_mm_set_pi16(INT16_C( 10015), INT16_C( -4879), INT16_C( 30741), INT16_C( -4144)),
810
- simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
811
- { simde_mm_set_pi16(INT16_C( 30697), INT16_C( -4215), INT16_C( 31556), INT16_C( 11913)),
812
- simde_mm_set_pi16(INT16_C( -27176), INT16_C( 17667), INT16_C( -30447), INT16_C( -2179)),
813
- simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) },
814
- { simde_mm_set_pi16(INT16_C( 9207), INT16_C( 4793), INT16_C( -24596), INT16_C( 10085)),
815
- simde_mm_set_pi16(INT16_C( -18727), INT16_C( -929), INT16_C( 7051), INT16_C( 8853)),
816
- simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1)) },
817
- { simde_mm_set_pi16(INT16_C( 22734), INT16_C( 5890), INT16_C( -3490), INT16_C( -24930)),
818
- simde_mm_set_pi16(INT16_C( 23656), INT16_C( 14548), INT16_C( 31806), INT16_C( -18379)),
819
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
820
- { simde_mm_set_pi16(INT16_C( -28756), INT16_C( 2211), INT16_C( -15605), INT16_C( -32010)),
821
- simde_mm_set_pi16(INT16_C( -12192), INT16_C( -10879), INT16_C( 28731), INT16_C( 7911)),
822
- simde_mm_set_pi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0)) },
823
- { simde_mm_set_pi16(INT16_C( -9646), INT16_C( -8544), INT16_C( -843), INT16_C( 12140)),
824
- simde_mm_set_pi16(INT16_C( 4324), INT16_C( 29706), INT16_C( 13667), INT16_C( -9123)),
825
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }
826
- };
827
-
828
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
829
- simde__m64 r = simde_mm_cmpgt_pi16(test_vec[i].a, test_vec[i].b);
830
- simde_mm_empty();
831
- simde_assert_m64_i16(r, ==, test_vec[i].r);
832
- }
833
-
834
- simde_mm_empty();
835
- return MUNIT_OK;
836
- }
837
-
838
- static MunitResult
839
- test_simde_mm_cmpgt_pi32(const MunitParameter params[], void* data) {
840
- (void) params;
841
- (void) data;
842
-
843
- const struct {
844
- simde__m64 a;
845
- simde__m64 b;
846
- simde__m64 r;
847
- } test_vec[8] = {
848
- { simde_mm_set_pi32(INT32_C( -1143248233), INT32_C( 1162026684)),
849
- simde_mm_set_pi32(INT32_C( -811892098), INT32_C( -689978729)),
850
- simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) },
851
- { simde_mm_set_pi32(INT32_C( 1263707357), INT32_C( 302017598)),
852
- simde_mm_set_pi32(INT32_C( -151945094), INT32_C( 1666272510)),
853
- simde_mm_set_pi32(INT32_C( -1), INT32_C( 0)) },
854
- { simde_mm_set_pi32(INT32_C( -809733466), INT32_C( -1912424133)),
855
- simde_mm_set_pi32(INT32_C( -319740129), INT32_C( -271550443)),
856
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
857
- { simde_mm_set_pi32(INT32_C( -276203543), INT32_C( 780761924)),
858
- simde_mm_set_pi32(INT32_C( 1157862872), INT32_C( -142767855)),
859
- simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) },
860
- { simde_mm_set_pi32(INT32_C( 314123255), INT32_C( 660971500)),
861
- simde_mm_set_pi32(INT32_C( -60836135), INT32_C( 580197259)),
862
- simde_mm_set_pi32(INT32_C( -1), INT32_C( -1)) },
863
- { simde_mm_set_pi32(INT32_C( 386029774), INT32_C( -1633750434)),
864
- simde_mm_set_pi32(INT32_C( 953441384), INT32_C( -1204454338)),
865
- simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) },
866
- { simde_mm_set_pi32(INT32_C( 144936876), INT32_C( -2097757429)),
867
- simde_mm_set_pi32(INT32_C( -712912800), INT32_C( 518484027)),
868
- simde_mm_set_pi32(INT32_C( -1), INT32_C( 0)) },
869
- { simde_mm_set_pi32(INT32_C( -559883694), INT32_C( 795671733)),
870
- simde_mm_set_pi32(INT32_C( 1946816740), INT32_C( -597871261)),
871
- simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) }
872
- };
873
-
874
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
875
- simde__m64 r = simde_mm_cmpgt_pi32(test_vec[i].a, test_vec[i].b);
876
- simde_mm_empty();
877
- simde_assert_m64_i32(r, ==, test_vec[i].r);
878
- }
879
-
880
- simde_mm_empty();
881
- return MUNIT_OK;
882
- }
883
-
884
- static MunitResult
885
- test_simde_mm_cvtm64_si64(const MunitParameter params[], void* data) {
886
- (void) params;
887
- (void) data;
888
-
889
- const struct {
890
- simde__m64 a;
891
- int64_t r;
892
- } test_vec[8] = {
893
- { simde_x_mm_set_pi64(INT64_C( 2133233461862191637)),
894
- INT64_C( 2133233461862191637) },
895
- { simde_x_mm_set_pi64(INT64_C(-1973285463394951226)),
896
- INT64_C(-1973285463394951226) },
897
- { simde_x_mm_set_pi64(INT64_C(-5080660655112358315)),
898
- INT64_C(-5080660655112358315) },
899
- { simde_x_mm_set_pi64(INT64_C(-2729804181976621239)),
900
- INT64_C(-2729804181976621239) },
901
- { simde_x_mm_set_pi64(INT64_C( 2995193706671491592)),
902
- INT64_C( 2995193706671491592) },
903
- { simde_x_mm_set_pi64(INT64_C( 5468114770221852232)),
904
- INT64_C( 5468114770221852232) },
905
- { simde_x_mm_set_pi64(INT64_C( 8741870191125799000)),
906
- INT64_C( 8741870191125799000) },
907
- { simde_x_mm_set_pi64(INT64_C(-2719280269483103979)),
908
- INT64_C(-2719280269483103979) }
909
- };
910
-
911
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
912
- int64_t r = simde_mm_cvtm64_si64(test_vec[i].a);
913
- munit_assert_int64(r, ==, test_vec[i].r);
914
- }
915
-
916
- return MUNIT_OK;
917
- }
918
-
919
- static MunitResult
920
- test_simde_mm_cvtsi32_si64(const MunitParameter params[], void* data) {
921
- (void) params;
922
- (void) data;
923
-
924
- const struct {
925
- int32_t a;
926
- simde__m64 r;
927
- } test_vec[8] = {
928
- { INT32_C( -1348583717), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1348583717)) },
929
- { INT32_C( -756715702), simde_mm_set_pi32(INT32_C( 0), INT32_C( -756715702)) },
930
- { INT32_C( -1433924355), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1433924355)) },
931
- { INT32_C( -1317069830), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1317069830)) },
932
- { INT32_C( 1132090539), simde_mm_set_pi32(INT32_C( 0), INT32_C( 1132090539)) },
933
- { INT32_C( -1685122075), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1685122075)) },
934
- { INT32_C( -782778794), simde_mm_set_pi32(INT32_C( 0), INT32_C( -782778794)) },
935
- { INT32_C( -1603608856), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1603608856)) }
936
- };
937
-
938
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
939
- simde__m64 r = simde_mm_cvtsi32_si64(test_vec[i].a);
940
- simde_mm_empty();
941
- simde_assert_m64_i32(r, ==, test_vec[i].r);
942
- }
943
-
944
- simde_mm_empty();
945
- return MUNIT_OK;
946
- }
947
-
948
- static MunitResult
949
- test_simde_mm_cvtsi64_m64(const MunitParameter params[], void* data) {
950
- (void) params;
951
- (void) data;
952
-
953
- const struct {
954
- int64_t a;
955
- simde__m64 r;
956
- } test_vec[8] = {
957
- { INT64_C( 2448316468135826021),
958
- simde_x_mm_set_pi64(INT64_C( 2448316468135826021)) },
959
- { INT64_C(-5945835882033612295),
960
- simde_x_mm_set_pi64(INT64_C(-5945835882033612295)) },
961
- { INT64_C( 5992090895212857513),
962
- simde_x_mm_set_pi64(INT64_C( 5992090895212857513)) },
963
- { INT64_C(-6796228402041923924),
964
- simde_x_mm_set_pi64(INT64_C(-6796228402041923924)) },
965
- { INT64_C(-8511645703056027592),
966
- simde_x_mm_set_pi64(INT64_C(-8511645703056027592)) },
967
- { INT64_C(-8723546203794185453),
968
- simde_x_mm_set_pi64(INT64_C(-8723546203794185453)) },
969
- { INT64_C( 4345402151036158873),
970
- simde_x_mm_set_pi64(INT64_C( 4345402151036158873)) },
971
- { INT64_C(-6661466122659936384),
972
- simde_x_mm_set_pi64(INT64_C(-6661466122659936384)) }
973
- };
974
-
975
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
976
- simde__m64 r = simde_mm_cvtsi64_m64(test_vec[i].a);
977
- simde_mm_empty();
978
- simde_assert_m64_i32(r, ==, test_vec[i].r);
979
- }
980
-
981
- simde_mm_empty();
982
- return MUNIT_OK;
983
- }
984
-
985
- static MunitResult
986
- test_simde_mm_cvtsi64_si32(const MunitParameter params[], void* data) {
987
- (void) params;
988
- (void) data;
989
-
990
- const struct {
991
- simde__m64 a;
992
- int32_t r;
993
- } test_vec[8] = {
994
- { simde_mm_set_pi32(INT32_C( 1382271190), INT32_C( -17653840)), INT32_C( -17653840), },
995
- { simde_mm_set_pi32(INT32_C( 2132466748), INT32_C( -1483731059)), INT32_C( -1483731059), },
996
- { simde_mm_set_pi32(INT32_C( -822228698), INT32_C( 1004225555)), INT32_C( 1004225555), },
997
- { simde_mm_set_pi32(INT32_C( 558984757), INT32_C( -1886991323)), INT32_C( -1886991323), },
998
- { simde_mm_set_pi32(INT32_C( 927499451), INT32_C( 1754078566)), INT32_C( 1754078566), },
999
- { simde_mm_set_pi32(INT32_C( -1298862100), INT32_C( -1081030334)), INT32_C( -1081030334), },
1000
- { simde_mm_set_pi32(INT32_C( -2034437538), INT32_C( 1272751087)), INT32_C( 1272751087), },
1001
- { simde_mm_set_pi32(INT32_C( -1114400737), INT32_C( 1318901980)), INT32_C( 1318901980), }
1002
- };
1003
-
1004
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1005
- int32_t r = simde_mm_cvtsi64_si32(test_vec[i].a);
1006
- simde_mm_empty();
1007
- munit_assert_int32(r, ==, test_vec[i].r);
1008
- }
1009
-
1010
- simde_mm_empty();
1011
- return MUNIT_OK;
1012
- }
1013
-
1014
- static MunitResult
1015
- test_simde_mm_madd_pi16(const MunitParameter params[], void* data) {
1016
- (void) params;
1017
- (void) data;
1018
-
1019
- const struct {
1020
- simde__m64 a;
1021
- simde__m64 b;
1022
- simde__m64 r;
1023
- } test_vec[8] = {
1024
- { simde_mm_set_pi16(INT16_C( -30343), INT16_C( -26392), INT16_C( 12299), INT16_C( 4601)),
1025
- simde_mm_set_pi16(INT16_C( 1486), INT16_C( 26809), INT16_C( 7836), INT16_C( -25805)),
1026
- simde_mm_set_pi32(INT32_C( -752632826), INT32_C( -22353841)) },
1027
- { simde_mm_set_pi16(INT16_C( 1890), INT16_C( 31305), INT16_C( -30077), INT16_C( 2552)),
1028
- simde_mm_set_pi16(INT16_C( -26920), INT16_C( -29540), INT16_C( 15300), INT16_C( 26578)),
1029
- simde_mm_set_pi32(INT32_C( -975628500), INT32_C( -392351044)) },
1030
- { simde_mm_set_pi16(INT16_C( 22384), INT16_C( 696), INT16_C( 25907), INT16_C( -24876)),
1031
- simde_mm_set_pi16(INT16_C( -11857), INT16_C( 27254), INT16_C( -31966), INT16_C( 7796)),
1032
- simde_mm_set_pi32(INT32_C( -246438304), INT32_C( -1022076458)) },
1033
- { simde_mm_set_pi16(INT16_C( 29956), INT16_C( -2269), INT16_C( 6641), INT16_C( -23007)),
1034
- simde_mm_set_pi16(INT16_C( 8143), INT16_C( 30485), INT16_C( 15411), INT16_C( -14515)),
1035
- simde_mm_set_pi32(INT32_C( 174761243), INT32_C( 436291056)) },
1036
- { simde_mm_set_pi16(INT16_C( 7615), INT16_C( 20384), INT16_C( 5326), INT16_C( -12172)),
1037
- simde_mm_set_pi16(INT16_C( 26893), INT16_C( 19452), INT16_C( 1570), INT16_C( -21018)),
1038
- simde_mm_set_pi32(INT32_C( 601299763), INT32_C( 264192916)) },
1039
- { simde_mm_set_pi16(INT16_C( 21548), INT16_C( 8299), INT16_C( -27943), INT16_C( -19629)),
1040
- simde_mm_set_pi16(INT16_C( -7799), INT16_C( -19736), INT16_C( -28205), INT16_C( 18816)),
1041
- simde_mm_set_pi32(INT32_C( -331841916), INT32_C( 418793051)) },
1042
- { simde_mm_set_pi16(INT16_C( -14814), INT16_C( -21565), INT16_C( 4061), INT16_C( 32148)),
1043
- simde_mm_set_pi16(INT16_C( 26150), INT16_C( 16339), INT16_C( -29106), INT16_C( 3765)),
1044
- simde_mm_set_pi32(INT32_C( -739736635), INT32_C( 2837754)) },
1045
- { simde_mm_set_pi16(INT16_C( -14349), INT16_C( 29040), INT16_C( 10943), INT16_C( -14909)),
1046
- simde_mm_set_pi16(INT16_C( 4672), INT16_C( 28858), INT16_C( 1393), INT16_C( 4521)),
1047
- simde_mm_set_pi32(INT32_C( 770997792), INT32_C( -52159990)) }
1048
- };
1049
-
1050
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1051
- simde__m64 r = simde_mm_madd_pi16(test_vec[i].a, test_vec[i].b);
1052
- simde_mm_empty();
1053
- simde_assert_m64_i32(r, ==, test_vec[i].r);
1054
- }
1055
-
1056
- simde_mm_empty();
1057
- return MUNIT_OK;
1058
- }
1059
-
1060
- static MunitResult
1061
- test_simde_mm_mulhi_pi16(const MunitParameter params[], void* data) {
1062
- (void) params;
1063
- (void) data;
1064
-
1065
- const struct {
1066
- simde__m64 a;
1067
- simde__m64 b;
1068
- simde__m64 r;
1069
- } test_vec[8] = {
1070
- { simde_mm_set_pi16(INT16_C( 8979), INT16_C( 5154), INT16_C( -16978), INT16_C( 30928)),
1071
- simde_mm_set_pi16(INT16_C( 131), INT16_C( -26526), INT16_C( -28508), INT16_C( 3156)),
1072
- simde_mm_set_pi16(INT16_C( 17), INT16_C( -2087), INT16_C( 7385), INT16_C( 1489)) },
1073
- { simde_mm_set_pi16(INT16_C( -20724), INT16_C( -32562), INT16_C( -4287), INT16_C( -11994)),
1074
- simde_mm_set_pi16(INT16_C( -1407), INT16_C( -20477), INT16_C( 2350), INT16_C( -5112)),
1075
- simde_mm_set_pi16(INT16_C( 444), INT16_C( 10174), INT16_C( -154), INT16_C( 935)) },
1076
- { simde_mm_set_pi16(INT16_C( -19242), INT16_C( -20442), INT16_C( -24803), INT16_C( 26694)),
1077
- simde_mm_set_pi16(INT16_C( 13233), INT16_C( -6736), INT16_C( 457), INT16_C( 16731)),
1078
- simde_mm_set_pi16(INT16_C( -3886), INT16_C( 2101), INT16_C( -173), INT16_C( 6814)) },
1079
- { simde_mm_set_pi16(INT16_C( -7830), INT16_C( 18993), INT16_C( 2047), INT16_C( 32735)),
1080
- simde_mm_set_pi16(INT16_C( 17045), INT16_C( -23188), INT16_C( -16247), INT16_C( -6369)),
1081
- simde_mm_set_pi16(INT16_C( -2037), INT16_C( -6721), INT16_C( -508), INT16_C( -3182)) },
1082
- { simde_mm_set_pi16(INT16_C( -20331), INT16_C( -1771), INT16_C( 7319), INT16_C( -2172)),
1083
- simde_mm_set_pi16(INT16_C( 27473), INT16_C( 3736), INT16_C( 26635), INT16_C( -24632)),
1084
- simde_mm_set_pi16(INT16_C( -8523), INT16_C( -101), INT16_C( 2974), INT16_C( 816)) },
1085
- { simde_mm_set_pi16(INT16_C( 18863), INT16_C( 29355), INT16_C( 22063), INT16_C( 24992)),
1086
- simde_mm_set_pi16(INT16_C( 31646), INT16_C( 10850), INT16_C( -1174), INT16_C( 6386)),
1087
- simde_mm_set_pi16(INT16_C( 9108), INT16_C( 4859), INT16_C( -396), INT16_C( 2435)) },
1088
- { simde_mm_set_pi16(INT16_C( 12919), INT16_C( 27836), INT16_C( -15473), INT16_C( 31227)),
1089
- simde_mm_set_pi16(INT16_C( -2051), INT16_C( 6265), INT16_C( -13839), INT16_C( 14795)),
1090
- simde_mm_set_pi16(INT16_C( -405), INT16_C( 2661), INT16_C( 3267), INT16_C( 7049)) },
1091
- { simde_mm_set_pi16(INT16_C( -20265), INT16_C( -2387), INT16_C( 1893), INT16_C( 16606)),
1092
- simde_mm_set_pi16(INT16_C( 31589), INT16_C( -8123), INT16_C( 26642), INT16_C( 6982)),
1093
- simde_mm_set_pi16(INT16_C( -9768), INT16_C( 295), INT16_C( 769), INT16_C( 1769)) }
1094
- };
1095
-
1096
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1097
- simde__m64 r = simde_mm_mulhi_pi16(test_vec[i].a, test_vec[i].b);
1098
- simde_mm_empty();
1099
- simde_assert_m64_i16(r, ==, test_vec[i].r);
1100
- }
1101
-
1102
- simde_mm_empty();
1103
- return MUNIT_OK;
1104
- }
1105
-
1106
- static MunitResult
1107
- test_simde_mm_mullo_pi16(const MunitParameter params[], void* data) {
1108
- (void) params;
1109
- (void) data;
1110
-
1111
- const struct {
1112
- simde__m64 a;
1113
- simde__m64 b;
1114
- simde__m64 r;
1115
- } test_vec[8] = {
1116
- { simde_mm_set_pi16(INT16_C( 1243), INT16_C( 20416), INT16_C( 15667), INT16_C( 4430)),
1117
- simde_mm_set_pi16(INT16_C( -5775), INT16_C( 26694), INT16_C( 17028), INT16_C( 23537)),
1118
- simde_mm_set_pi16(INT16_C( 30635), INT16_C( -12672), INT16_C( -19380), INT16_C( 1134)) },
1119
- { simde_mm_set_pi16(INT16_C( -5230), INT16_C( -20726), INT16_C( -32301), INT16_C( 4324)),
1120
- simde_mm_set_pi16(INT16_C( 31416), INT16_C( -24870), INT16_C( 28490), INT16_C( -28474)),
1121
- simde_mm_set_pi16(INT16_C( -6928), INT16_C( 14980), INT16_C( 1022), INT16_C( 20568)) },
1122
- { simde_mm_set_pi16(INT16_C( 359), INT16_C( 28315), INT16_C( 30109), INT16_C( 30370)),
1123
- simde_mm_set_pi16(INT16_C( 11362), INT16_C( -24534), INT16_C( -7779), INT16_C( -31174)),
1124
- simde_mm_set_pi16(INT16_C( 15726), INT16_C( 1390), INT16_C( 7753), INT16_C( -21324)) },
1125
- { simde_mm_set_pi16(INT16_C( -7682), INT16_C( -17472), INT16_C( 1125), INT16_C( -30733)),
1126
- simde_mm_set_pi16(INT16_C( 27323), INT16_C( 21286), INT16_C( 28332), INT16_C( -26848)),
1127
- simde_mm_set_pi16(INT16_C( 16522), INT16_C( 7808), INT16_C( 23004), INT16_C( 21344)) },
1128
- { simde_mm_set_pi16(INT16_C( 28468), INT16_C( -4021), INT16_C( 23325), INT16_C( -24525)),
1129
- simde_mm_set_pi16(INT16_C( 29242), INT16_C( -5135), INT16_C( 12241), INT16_C( -5671)),
1130
- simde_mm_set_pi16(INT16_C( 22984), INT16_C( 3995), INT16_C( -19027), INT16_C( 13883)) },
1131
- { simde_mm_set_pi16(INT16_C( -11233), INT16_C( -9235), INT16_C( -23340), INT16_C( -55)),
1132
- simde_mm_set_pi16(INT16_C( -21567), INT16_C( -13689), INT16_C( 21540), INT16_C( 32686)),
1133
- simde_mm_set_pi16(INT16_C( -24481), INT16_C( -1029), INT16_C( -16944), INT16_C( -28258)) },
1134
- { simde_mm_set_pi16(INT16_C( 24703), INT16_C( -27133), INT16_C( 13289), INT16_C( 20833)),
1135
- simde_mm_set_pi16(INT16_C( -32748), INT16_C( 15704), INT16_C( 10635), INT16_C( -13911)),
1136
- simde_mm_set_pi16(INT16_C( 2540), INT16_C( 18440), INT16_C( -32637), INT16_C( -7671)) },
1137
- { simde_mm_set_pi16(INT16_C( -20397), INT16_C( -17293), INT16_C( -2038), INT16_C( -24305)),
1138
- simde_mm_set_pi16(INT16_C( -25280), INT16_C( 2678), INT16_C( -17798), INT16_C( 10227)),
1139
- simde_mm_set_pi16(INT16_C( -1088), INT16_C( 23298), INT16_C( 30916), INT16_C( 10813)), }
1140
- };
1141
-
1142
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1143
- simde__m64 r = simde_mm_mullo_pi16(test_vec[i].a, test_vec[i].b);
1144
- simde_mm_empty();
1145
- simde_assert_m64_i16(r, ==, test_vec[i].r);
1146
- }
1147
-
1148
- simde_mm_empty();
1149
- return MUNIT_OK;
1150
- }
1151
-
1152
- static MunitResult
1153
- test_simde_mm_or_si64(const MunitParameter params[], void* data) {
1154
- (void) params;
1155
- (void) data;
1156
-
1157
- const struct {
1158
- simde__m64 a;
1159
- simde__m64 b;
1160
- simde__m64 r;
1161
- } test_vec[8] = {
1162
- { simde_mm_set_pi32(INT32_C( -891509218), INT32_C(-1564843089)),
1163
- simde_mm_set_pi32(INT32_C( -653544563), INT32_C(-1696113634)),
1164
- simde_mm_set_pi32(INT32_C( 332862867), INT32_C( 945635249)) },
1165
- { simde_mm_set_pi32(INT32_C( 534518332), INT32_C( 469703625)),
1166
- simde_mm_set_pi32(INT32_C( 1926733937), INT32_C(-1778281838)),
1167
- simde_mm_set_pi32(INT32_C( 1829485133), INT32_C(-1912698533)) },
1168
- { simde_mm_set_pi32(INT32_C( 1838379192), INT32_C(-1012991609)),
1169
- simde_mm_set_pi32(INT32_C( 513007439), INT32_C( -965007092)),
1170
- simde_mm_set_pi32(INT32_C( 1929424887), INT32_C( 98947211)) },
1171
- { simde_mm_set_pi32(INT32_C( -467260595), INT32_C( 1936608780)),
1172
- simde_mm_set_pi32(INT32_C(-1713951633), INT32_C(-1838143667)),
1173
- simde_mm_set_pi32(INT32_C( 2112948002), INT32_C( -518100671)) },
1174
- { simde_mm_set_pi32(INT32_C( -1074911), INT32_C( 1257024473)),
1175
- simde_mm_set_pi32(INT32_C( 690851199), INT32_C( -673662530)),
1176
- simde_mm_set_pi32(INT32_C( -691923874), INT32_C(-1657531801)) },
1177
- { simde_mm_set_pi32(INT32_C( -124691463), INT32_C( -802403954)),
1178
- simde_mm_set_pi32(INT32_C(-1515822997), INT32_C(-1488861756)),
1179
- simde_mm_set_pi32(INT32_C( 1563900818), INT32_C( 2003668042)) },
1180
- { simde_mm_set_pi32(INT32_C( -30677319), INT32_C( -381566895)),
1181
- simde_mm_set_pi32(INT32_C( 1588726708), INT32_C( 607880991)),
1182
- simde_mm_set_pi32(INT32_C(-1600525043), INT32_C( -847624370)) },
1183
- { simde_mm_set_pi32(INT32_C( 289587202), INT32_C(-1908682429)),
1184
- simde_mm_set_pi32(INT32_C( 291676112), INT32_C(-1617544418)),
1185
- simde_mm_set_pi32(INT32_C( 2121682), INT32_C( 296610397)) }
1186
- };
1187
-
1188
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1189
- simde__m64 r = simde_mm_xor_si64(test_vec[i].a, test_vec[i].b);
1190
- simde_assert_m64_i32(r, ==, test_vec[i].r);
1191
- }
1192
-
1193
- return MUNIT_OK;
1194
- }
1195
-
1196
- static MunitResult
1197
- test_simde_mm_packs_pi16(const MunitParameter params[], void* data) {
1198
- (void) params;
1199
- (void) data;
1200
-
1201
- const struct {
1202
- simde__m64 a;
1203
- simde__m64 b;
1204
- simde__m64 r;
1205
- } test_vec[8] = {
1206
- { simde_mm_set_pi16(INT16_C( -17383), INT16_C( -12181), INT16_C( -2968), INT16_C( 26626)),
1207
- simde_mm_set_pi16(INT16_C( -10040), INT16_C( 13688), INT16_C( -30953), INT16_C( -4037)),
1208
- simde_mm_set_pi8 (INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128),
1209
- INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127)) },
1210
- { simde_mm_set_pi16(INT16_C( -20194), INT16_C( 12331), INT16_C( -23109), INT16_C( 25162)),
1211
- simde_mm_set_pi16(INT16_C( -1071), INT16_C( 20521), INT16_C( 860), INT16_C( 5875)),
1212
- simde_mm_set_pi8 (INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127),
1213
- INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) },
1214
- { simde_mm_set_pi16(INT16_C( -12255), INT16_C( 13277), INT16_C( -28950), INT16_C( 5253)),
1215
- simde_mm_set_pi16(INT16_C( 25343), INT16_C( -1252), INT16_C( 3561), INT16_C( 7538)),
1216
- simde_mm_set_pi8 (INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127),
1217
- INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) },
1218
- { simde_mm_set_pi16(INT16_C( -11251), INT16_C( -21118), INT16_C( -2077), INT16_C( -20336)),
1219
- simde_mm_set_pi16(INT16_C( 23412), INT16_C( 7898), INT16_C( -3571), INT16_C( 9242)),
1220
- simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127),
1221
- INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) },
1222
- { simde_mm_set_pi16(INT16_C( 28180), INT16_C( 25339), INT16_C( 20328), INT16_C( 3051)),
1223
- simde_mm_set_pi16(INT16_C( 31135), INT16_C( 3581), INT16_C( 11552), INT16_C( 25034)),
1224
- simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127),
1225
- INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127)) },
1226
- { simde_mm_set_pi16(INT16_C( 14129), INT16_C( -2982), INT16_C( -13260), INT16_C( -12225)),
1227
- simde_mm_set_pi16(INT16_C( -557), INT16_C( -14564), INT16_C( -28065), INT16_C( 25636)),
1228
- simde_mm_set_pi8 (INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127),
1229
- INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128)) },
1230
- { simde_mm_set_pi16(INT16_C( 31333), INT16_C( 20796), INT16_C( 16795), INT16_C( -5127)),
1231
- simde_mm_set_pi16(INT16_C( 22060), INT16_C( 10681), INT16_C( 28763), INT16_C( 2847)),
1232
- simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127),
1233
- INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128)) },
1234
- { simde_mm_set_pi16(INT16_C( 167), INT16_C( 233), INT16_C( 115), INT16_C( 126)),
1235
- simde_mm_set_pi16(INT16_C( 10), INT16_C( 94), INT16_C( 181), INT16_C( 233)),
1236
- simde_mm_set_pi8 (INT8_C( 10), INT8_C( 94), INT8_C( 127), INT8_C( 127),
1237
- INT8_C( 127), INT8_C( 127), INT8_C( 115), INT8_C( 126)) }
1238
- };
1239
-
1240
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1241
- simde__m64 r = simde_mm_packs_pi16(test_vec[i].a, test_vec[i].b);
1242
- simde_mm_empty();
1243
- simde_assert_m64_i8(r, ==, test_vec[i].r);
1244
- }
1245
-
1246
- simde_mm_empty();
1247
- return MUNIT_OK;
1248
- }
1249
-
1250
- static MunitResult
1251
- test_simde_mm_packs_pi32(const MunitParameter params[], void* data) {
1252
- (void) params;
1253
- (void) data;
1254
-
1255
- const struct {
1256
- simde__m64 a;
1257
- simde__m64 b;
1258
- simde__m64 r;
1259
- } test_vec[8] = {
1260
- { simde_mm_set_pi32(INT32_C( -2875748), INT32_C( -4)),
1261
- simde_mm_set_pi32(INT32_C( -53), INT32_C( 934884)),
1262
- simde_mm_set_pi16(INT16_C( -53), INT16_C( 32767), INT16_C( -32768), INT16_C( -4)) },
1263
- { simde_mm_set_pi32(INT32_C( 1), INT32_C( -216790321)),
1264
- simde_mm_set_pi32(INT32_C( 120), INT32_C( -379925)),
1265
- simde_mm_set_pi16(INT16_C( 120), INT16_C( -32768), INT16_C( 1), INT16_C( -32768)) },
1266
- { simde_mm_set_pi32(INT32_C( -18), INT32_C( 281)),
1267
- simde_mm_set_pi32(INT32_C( -33064), INT32_C( 130)),
1268
- simde_mm_set_pi16(INT16_C( -32768), INT16_C( 130), INT16_C( -18), INT16_C( 281)) },
1269
- { simde_mm_set_pi32(INT32_C( -51729), INT32_C( 14)),
1270
- simde_mm_set_pi32(INT32_C( 6852), INT32_C( -36)),
1271
- simde_mm_set_pi16(INT16_C( 6852), INT16_C( -36), INT16_C( -32768), INT16_C( 14)) },
1272
- { simde_mm_set_pi32(INT32_C( -1), INT32_C( -210)),
1273
- simde_mm_set_pi32(INT32_C( 3024991), INT32_C( 30957735)),
1274
- simde_mm_set_pi16(INT16_C( 32767), INT16_C( 32767), INT16_C( -1), INT16_C( -210)) },
1275
- { simde_mm_set_pi32(INT32_C( 28), INT32_C( 890)),
1276
- simde_mm_set_pi32(INT32_C( -2031601), INT32_C( -5309)),
1277
- simde_mm_set_pi16(INT16_C( -32768), INT16_C( -5309), INT16_C( 28), INT16_C( 890)) },
1278
- { simde_mm_set_pi32(INT32_C( -80), INT32_C( 4267394)),
1279
- simde_mm_set_pi32(INT32_C( 34757305), INT32_C( 127105)),
1280
- simde_mm_set_pi16(INT16_C( 32767), INT16_C( 32767), INT16_C( -80), INT16_C( 32767)) },
1281
- { simde_mm_set_pi32(INT32_C( -2773123), INT32_C( -42)),
1282
- simde_mm_set_pi32(INT32_C( 33), INT32_C( 3534549)),
1283
- simde_mm_set_pi16(INT16_C( 33), INT16_C( 32767), INT16_C( -32768), INT16_C( -42)) }
1284
- };
1285
-
1286
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1287
- simde__m64 r = simde_mm_packs_pi32(test_vec[i].a, test_vec[i].b);
1288
- simde_mm_empty();
1289
- simde_assert_m64_i16(r, ==, test_vec[i].r);
1290
- }
1291
-
1292
- simde_mm_empty();
1293
- return MUNIT_OK;
1294
- }
1295
-
1296
- static MunitResult
1297
- test_simde_mm_packs_pu16(const MunitParameter params[], void* data) {
1298
- (void) params;
1299
- (void) data;
1300
-
1301
- const struct {
1302
- simde__m64 a;
1303
- simde__m64 b;
1304
- simde__m64 r;
1305
- } test_vec[8] = {
1306
- { simde_mm_set_pi16(INT16_C( -2), INT16_C( 113), INT16_C( 49), INT16_C( -647)),
1307
- simde_mm_set_pi16(INT16_C( 56), INT16_C( 5), INT16_C( 1), INT16_C( -54)),
1308
- simde_mm_set_pi8 (INT8_C( 56), INT8_C( 5), INT8_C( 1), INT8_C( 0),
1309
- INT8_C( 0), INT8_C( 113), INT8_C( 49), INT8_C( 0)) },
1310
- { simde_mm_set_pi16(INT16_C( -1), INT16_C( -206), INT16_C( -1650), INT16_C( -109)),
1311
- simde_mm_set_pi16(INT16_C( -3828), INT16_C( 2), INT16_C( 471), INT16_C( 2)),
1312
- simde_mm_set_pi8 (INT8_C( 0), INT8_C( 2), INT8_C( -1), INT8_C( 2),
1313
- INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1314
- { simde_mm_set_pi16(INT16_C( 3), INT16_C( -2), INT16_C( 500), INT16_C( -100)),
1315
- simde_mm_set_pi16(INT16_C( -1574), INT16_C( -1), INT16_C( -1), INT16_C( 2)),
1316
- simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2),
1317
- INT8_C( 3), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
1318
- { simde_mm_set_pi16(INT16_C( -13), INT16_C( -217), INT16_C( 3305), INT16_C( -10)),
1319
- simde_mm_set_pi16(INT16_C( -370), INT16_C( 181), INT16_C( 1), INT16_C( -1434)),
1320
- simde_mm_set_pi8 (INT8_C( 0), INT8_C( -75), INT8_C( 1), INT8_C( 0),
1321
- INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
1322
- { simde_mm_set_pi16(INT16_C( 867), INT16_C( -63), INT16_C( -1003), INT16_C( 13)),
1323
- simde_mm_set_pi16(INT16_C( -29854), INT16_C( -6), INT16_C( 33), INT16_C( 5)),
1324
- simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 5),
1325
- INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 13)) },
1326
- { simde_mm_set_pi16(INT16_C( 771), INT16_C( -1), INT16_C( -13), INT16_C( -2)),
1327
- simde_mm_set_pi16(INT16_C( -65), INT16_C( 55), INT16_C( 295), INT16_C( 17510)),
1328
- simde_mm_set_pi8 (INT8_C( 0), INT8_C( 55), INT8_C( -1), INT8_C( -1),
1329
- INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1330
- { simde_mm_set_pi16(INT16_C( 50), INT16_C( 32337), INT16_C( 13), INT16_C( 20449)),
1331
- simde_mm_set_pi16(INT16_C( -897), INT16_C( -113), INT16_C( -3866), INT16_C( -15759)),
1332
- simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1333
- INT8_C( 50), INT8_C( -1), INT8_C( 13), INT8_C( -1)) },
1334
- { simde_mm_set_pi16(INT16_C( 0), INT16_C( 4501), INT16_C( 202), INT16_C( 9748)),
1335
- simde_mm_set_pi16(INT16_C( -2), INT16_C( -1), INT16_C( -16348), INT16_C( -6302)),
1336
- simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1337
- INT8_C( 0), INT8_C( -1), INT8_C( -54), INT8_C( -1)) }
1338
- };
1339
-
1340
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1341
- simde__m64 r = simde_mm_packs_pu16(test_vec[i].a, test_vec[i].b);
1342
- simde_mm_empty();
1343
- simde_assert_m64_u8(r, ==, test_vec[i].r);
1344
- }
1345
-
1346
- simde_mm_empty();
1347
- return MUNIT_OK;
1348
- }
1349
-
1350
- static MunitResult
1351
- test_simde_mm_sll_pi16(const MunitParameter params[], void* data) {
1352
- (void) params;
1353
- (void) data;
1354
-
1355
- const struct {
1356
- simde__m64 a;
1357
- simde__m64 count;
1358
- simde__m64 r;
1359
- } test_vec[8] = {
1360
- { simde_mm_set_pi16(INT16_C( -2612), INT16_C( -7275), INT16_C( 24980), INT16_C( 12744)),
1361
- simde_mm_cvtsi64_m64(15),
1362
- simde_mm_set_pi16(INT16_C( 0), INT16_C( -32768), INT16_C( 0), INT16_C( 0)) },
1363
- { simde_mm_set_pi16(INT16_C( 17143), INT16_C( -12000), INT16_C( 32255), INT16_C( 5448)),
1364
- simde_mm_cvtsi64_m64(10),
1365
- simde_mm_set_pi16(INT16_C( -9216), INT16_C( -32768), INT16_C( -1024), INT16_C( 8192)) },
1366
- { simde_mm_set_pi16(INT16_C( 1219), INT16_C( -18409), INT16_C( 24763), INT16_C( 13023)),
1367
- simde_mm_cvtsi64_m64(3),
1368
- simde_mm_set_pi16(INT16_C( 9752), INT16_C( -16200), INT16_C( 1496), INT16_C( -26888)) },
1369
- { simde_mm_set_pi16(INT16_C( -30853), INT16_C( -438), INT16_C( -13150), INT16_C( -2468)),
1370
- simde_mm_cvtsi64_m64(10),
1371
- simde_mm_set_pi16(INT16_C( -5120), INT16_C( 10240), INT16_C( -30720), INT16_C( 28672)) },
1372
- { simde_mm_set_pi16(INT16_C( -20343), INT16_C( 30713), INT16_C( 26566), INT16_C( 9213)),
1373
- simde_mm_cvtsi64_m64(7),
1374
- simde_mm_set_pi16(INT16_C( 17536), INT16_C( -896), INT16_C( -7424), INT16_C( -384)) },
1375
- { simde_mm_set_pi16(INT16_C( -14337), INT16_C( -4898), INT16_C( 32658), INT16_C( -4944)),
1376
- simde_mm_cvtsi64_m64(5),
1377
- simde_mm_set_pi16(INT16_C( -32), INT16_C( -25664), INT16_C( -3520), INT16_C( -27136)) },
1378
- { simde_mm_set_pi16(INT16_C( 21648), INT16_C( 25416), INT16_C( 19921), INT16_C( -16738)),
1379
- simde_mm_cvtsi64_m64(0),
1380
- simde_mm_set_pi16(INT16_C( 21648), INT16_C( 25416), INT16_C( 19921), INT16_C( -16738)) },
1381
- { simde_mm_set_pi16(INT16_C( -10368), INT16_C( -19483), INT16_C( -15412), INT16_C( -29979)),
1382
- simde_mm_cvtsi64_m64(9),
1383
- simde_mm_set_pi16(INT16_C( 0), INT16_C( -13824), INT16_C( -26624), INT16_C( -13824)) }
1384
- };
1385
-
1386
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1387
- simde__m64 r = simde_mm_sll_pi16(test_vec[i].a, test_vec[i].count);
1388
- simde_mm_empty();
1389
- simde_assert_m64_i8(r, ==, test_vec[i].r);
1390
- }
1391
-
1392
- simde_mm_empty();
1393
- return MUNIT_OK;
1394
- }
1395
-
1396
- static MunitResult
1397
- test_simde_mm_sll_pi32(const MunitParameter params[], void* data) {
1398
- (void) params;
1399
- (void) data;
1400
-
1401
- const struct {
1402
- simde__m64 a;
1403
- simde__m64 count;
1404
- simde__m64 r;
1405
- } test_vec[8] = {
1406
- { simde_mm_set_pi32(INT32_C( 2135609954), INT32_C( -1662756041)),
1407
- simde_mm_cvtsi64_m64(10),
1408
- simde_mm_set_pi32(INT32_C( 726239232), INT32_C( -1855136768)) },
1409
- { simde_mm_set_pi32(INT32_C( 1984991847), INT32_C( -75949890)),
1410
- simde_mm_cvtsi64_m64(14),
1411
- simde_mm_set_pi32(INT32_C( 614055936), INT32_C( 1177518080)) },
1412
- { simde_mm_set_pi32(INT32_C( -1315562518), INT32_C( -1717142831)),
1413
- simde_mm_cvtsi64_m64(22),
1414
- simde_mm_set_pi32(INT32_C( -92274688), INT32_C( -1270874112)) },
1415
- { simde_mm_set_pi32(INT32_C( -814215595), INT32_C( 805054469)),
1416
- simde_mm_cvtsi64_m64(9),
1417
- simde_mm_set_pi32(INT32_C( -266556928), INT32_C( -128972288)) },
1418
- { simde_mm_set_pi32(INT32_C( -1588862908), INT32_C( 2132697891)),
1419
- simde_mm_cvtsi64_m64(1),
1420
- simde_mm_set_pi32(INT32_C( 1117241480), INT32_C( -29571514)) },
1421
- { simde_mm_set_pi32(INT32_C( 782274620), INT32_C( -2120419106)),
1422
- simde_mm_cvtsi64_m64(16),
1423
- simde_mm_set_pi32(INT32_C( -1875116032), INT32_C( -119668736)) },
1424
- { simde_mm_set_pi32(INT32_C( -1687581332), INT32_C( -1263634481)),
1425
- simde_mm_cvtsi64_m64(16),
1426
- simde_mm_set_pi32(INT32_C( -1922301952), INT32_C( 2010054656)) },
1427
- { simde_mm_set_pi32(INT32_C( -1258319564), INT32_C( 975343739)),
1428
- simde_mm_cvtsi64_m64(5),
1429
- simde_mm_set_pi32(INT32_C( -1611520384), INT32_C( 1146228576)) }
1430
- };
1431
-
1432
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1433
- simde__m64 r = simde_mm_sll_pi32(test_vec[i].a, test_vec[i].count);
1434
- simde_mm_empty();
1435
- simde_assert_m64_i8(r, ==, test_vec[i].r);
1436
- }
1437
-
1438
- simde_mm_empty();
1439
- return MUNIT_OK;
1440
- }
1441
-
1442
- static MunitResult
1443
- test_simde_mm_sll_si64(const MunitParameter params[], void* data) {
1444
- (void) params;
1445
- (void) data;
1446
-
1447
- const struct {
1448
- simde__m64 a;
1449
- simde__m64 count;
1450
- simde__m64 r;
1451
- } test_vec[8] = {
1452
- { simde_mm_cvtsi64_m64(INT64_C( 3171924675130206313)),
1453
- simde_mm_cvtsi64_m64(26),
1454
- simde_mm_cvtsi64_m64(INT64_C( -4688886433618853888)) },
1455
- { simde_mm_cvtsi64_m64(INT64_C( 8810857393431583130)),
1456
- simde_mm_cvtsi64_m64(35),
1457
- simde_mm_cvtsi64_m64(INT64_C( 8135977920570064896)) },
1458
- { simde_mm_cvtsi64_m64(INT64_C( 8253138385445189600)),
1459
- simde_mm_cvtsi64_m64(60),
1460
- simde_mm_cvtsi64_m64(INT64_C( 0)) },
1461
- { simde_mm_cvtsi64_m64(INT64_C( -109691783123384247)),
1462
- simde_mm_cvtsi64_m64(35),
1463
- simde_mm_cvtsi64_m64(INT64_C( 4699016138212769792)) },
1464
- { simde_mm_cvtsi64_m64(INT64_C( 797909880260215132)),
1465
- simde_mm_cvtsi64_m64(19),
1466
- simde_mm_cvtsi64_m64(INT64_C( -686801717540421632)) },
1467
- { simde_mm_cvtsi64_m64(INT64_C( -2366434973696685665)),
1468
- simde_mm_cvtsi64_m64(28),
1469
- simde_mm_cvtsi64_m64(INT64_C( -2410559835486552064)) },
1470
- { simde_mm_cvtsi64_m64(INT64_C( 3032641446696114060)),
1471
- simde_mm_cvtsi64_m64(28),
1472
- simde_mm_cvtsi64_m64(INT64_C( 4618209939532283904)) },
1473
- { simde_mm_cvtsi64_m64(INT64_C( 5741540145978860560)),
1474
- simde_mm_cvtsi64_m64(44),
1475
- simde_mm_cvtsi64_m64(INT64_C( 7944631217658265600)) }
1476
- };
1477
-
1478
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1479
- simde__m64 r = simde_mm_sll_si64(test_vec[i].a, test_vec[i].count);
1480
- simde_mm_empty();
1481
- simde_assert_m64_i8(r, ==, test_vec[i].r);
1482
- }
1483
-
1484
- simde_mm_empty();
1485
- return MUNIT_OK;
1486
- }
1487
-
1488
- static MunitResult
1489
- test_simde_mm_slli_pi16(const MunitParameter params[], void* data) {
1490
- (void) params;
1491
- (void) data;
1492
-
1493
- const struct {
1494
- simde__m64 a;
1495
- int count;
1496
- simde__m64 r;
1497
- } test_vec[8] = {
1498
- { simde_mm_set_pi16(INT16_C( -13543), INT16_C( 6360), INT16_C( -1306), INT16_C( -5948)),
1499
- 6,
1500
- simde_mm_set_pi16(INT16_C( -14784), INT16_C( 13824), INT16_C( -18048), INT16_C( 12544)) },
1501
- { simde_mm_set_pi16(INT16_C( 6506), INT16_C( -28533), INT16_C( 3988), INT16_C( -31210)),
1502
- 10,
1503
- simde_mm_set_pi16(INT16_C( -22528), INT16_C( 11264), INT16_C( 20480), INT16_C( 22528)) },
1504
- { simde_mm_set_pi16(INT16_C( 19388), INT16_C( -4520), INT16_C( 9582), INT16_C( 11067)),
1505
- 9,
1506
- simde_mm_set_pi16(INT16_C( 30720), INT16_C( -20480), INT16_C( -9216), INT16_C( 30208)) },
1507
- { simde_mm_set_pi16(INT16_C( 12000), INT16_C( 28876), INT16_C( 29834), INT16_C( -13742)),
1508
- 13,
1509
- simde_mm_set_pi16(INT16_C( 0), INT16_C( -32768), INT16_C( 16384), INT16_C( 16384)) },
1510
- { simde_mm_set_pi16(INT16_C( 4648), INT16_C( -2151), INT16_C( -26641), INT16_C( -27659)),
1511
- 13,
1512
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 8192), INT16_C( -8192), INT16_C( -24576)) },
1513
- { simde_mm_set_pi16(INT16_C( -2353), INT16_C( 20317), INT16_C( 7426), INT16_C( 24788)),
1514
- 8,
1515
- simde_mm_set_pi16(INT16_C( -12544), INT16_C( 23808), INT16_C( 512), INT16_C( -11264)) },
1516
- { simde_mm_set_pi16(INT16_C( -6174), INT16_C( 31492), INT16_C( 28575), INT16_C( -20383)),
1517
- 1,
1518
- simde_mm_set_pi16(INT16_C( -12348), INT16_C( -2552), INT16_C( -8386), INT16_C( 24770)) },
1519
- { simde_mm_set_pi16(INT16_C( -30371), INT16_C( 17334), INT16_C( 2428), INT16_C( -4558)),
1520
- 5,
1521
- simde_mm_set_pi16(INT16_C( 11168), INT16_C( 30400), INT16_C( 12160), INT16_C( -14784)) }
1522
- };
1523
-
1524
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1525
- simde__m64 r = simde_mm_slli_pi16(test_vec[i].a, test_vec[i].count);
1526
- simde_mm_empty();
1527
- simde_assert_m64_i16(r, ==, test_vec[i].r);
1528
- }
1529
-
1530
- simde_mm_empty();
1531
- return MUNIT_OK;
1532
- }
1533
-
1534
- static MunitResult
1535
- test_simde_mm_slli_pi32(const MunitParameter params[], void* data) {
1536
- (void) params;
1537
- (void) data;
1538
-
1539
- const struct {
1540
- simde__m64 a;
1541
- int count;
1542
- simde__m64 r;
1543
- } test_vec[8] = {
1544
- { simde_mm_set_pi32(INT32_C( 83881529), INT32_C( 1357951601)),
1545
- 27,
1546
- simde_mm_set_pi32(INT32_C( -939524096), INT32_C( -2013265920)) },
1547
- { simde_mm_set_pi32(INT32_C( -2138298674), INT32_C( -2019079679)),
1548
- 21,
1549
- simde_mm_set_pi32(INT32_C( -641728512), INT32_C( 1075838976)) },
1550
- { simde_mm_set_pi32(INT32_C( -281448798), INT32_C( -1557273316)),
1551
- 27,
1552
- simde_mm_set_pi32(INT32_C( 268435456), INT32_C( -536870912)) },
1553
- { simde_mm_set_pi32(INT32_C( -1207542290), INT32_C( -694741539)),
1554
- 7,
1555
- simde_mm_set_pi32(INT32_C( 53409536), INT32_C( 1267396224)) },
1556
- { simde_mm_set_pi32(INT32_C( 902716495), INT32_C( 943182057)),
1557
- 20,
1558
- simde_mm_set_pi32(INT32_C( -990904320), INT32_C( 244318208)) },
1559
- { simde_mm_set_pi32(INT32_C( 7423865), INT32_C( -1974692036)),
1560
- 5,
1561
- simde_mm_set_pi32(INT32_C( 237563680), INT32_C( 1234364288)) },
1562
- { simde_mm_set_pi32(INT32_C( 174727032), INT32_C( -891064659)),
1563
- 23,
1564
- simde_mm_set_pi32(INT32_C( -1140850688), INT32_C( 1451229184)) },
1565
- { simde_mm_set_pi32(INT32_C( 134754342), INT32_C( -1894000042)),
1566
- 27,
1567
- simde_mm_set_pi32(INT32_C( 805306368), INT32_C( -1342177280)) }
1568
- };
1569
-
1570
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1571
- simde__m64 r = simde_mm_slli_pi32(test_vec[i].a, test_vec[i].count);
1572
- simde_mm_empty();
1573
- simde_assert_m64_i32(r, ==, test_vec[i].r);
1574
- }
1575
-
1576
- simde_mm_empty();
1577
- return MUNIT_OK;
1578
- }
1579
-
1580
- static MunitResult
1581
- test_simde_mm_slli_si64(const MunitParameter params[], void* data) {
1582
- (void) params;
1583
- (void) data;
1584
-
1585
- const struct {
1586
- simde__m64 a;
1587
- int count;
1588
- simde__m64 r;
1589
- } test_vec[8] = {
1590
- { simde_mm_cvtsi64_m64(INT64_C( -3655983719573882447)),
1591
- 37,
1592
- simde_mm_cvtsi64_m64(INT64_C( 5043809618745098240)) },
1593
- { simde_mm_cvtsi64_m64(INT64_C( 5373634195600553823)),
1594
- 49,
1595
- simde_mm_cvtsi64_m64(INT64_C( 7979815589747097600)) },
1596
- { simde_mm_cvtsi64_m64(INT64_C( 955832682335824267)),
1597
- 11,
1598
- simde_mm_cvtsi64_m64(INT64_C( 2190461610555627520)) },
1599
- { simde_mm_cvtsi64_m64(INT64_C( 4435237962953354472)),
1600
- 32,
1601
- simde_mm_cvtsi64_m64(INT64_C( -6041177681452597248)) },
1602
- { simde_mm_cvtsi64_m64(INT64_C( 509713568463920999)),
1603
- 0,
1604
- simde_mm_cvtsi64_m64(INT64_C( 509713568463920999)) },
1605
- { simde_mm_cvtsi64_m64(INT64_C( 3092984209993521199)),
1606
- 24,
1607
- simde_mm_cvtsi64_m64(INT64_C( -4581130211545841664)) },
1608
- { simde_mm_cvtsi64_m64(INT64_C( -9034725437056781767)),
1609
- 38,
1610
- simde_mm_cvtsi64_m64(INT64_C( 4817882106908639232)) },
1611
- { simde_mm_cvtsi64_m64(INT64_C( 8352260709189542260)),
1612
- 34,
1613
- simde_mm_cvtsi64_m64(INT64_C( -8446635447710384128)) }
1614
- };
1615
-
1616
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1617
- simde__m64 r = simde_mm_slli_si64(test_vec[i].a, test_vec[i].count);
1618
- simde_mm_empty();
1619
- simde_assert_m64_i64(r, ==, test_vec[i].r);
1620
- }
1621
-
1622
- simde_mm_empty();
1623
- return MUNIT_OK;
1624
- }
1625
-
1626
- static MunitResult
1627
- test_simde_mm_srl_pi16(const MunitParameter params[], void* data) {
1628
- (void) params;
1629
- (void) data;
1630
-
1631
- const struct {
1632
- simde__m64 a;
1633
- simde__m64 count;
1634
- simde__m64 r;
1635
- } test_vec[8] = {
1636
- { simde_mm_set_pi16(INT16_C( -3979), INT16_C( -30013), INT16_C( 22836), INT16_C( 7438)),
1637
- simde_mm_cvtsi64_m64(5),
1638
- simde_mm_set_pi16(INT16_C( 1923), INT16_C( 1110), INT16_C( 713), INT16_C( 232)) },
1639
- { simde_mm_set_pi16(INT16_C( -17889), INT16_C( -31199), INT16_C( 2233), INT16_C( 29176)),
1640
- simde_mm_cvtsi64_m64(9),
1641
- simde_mm_set_pi16(INT16_C( 93), INT16_C( 67), INT16_C( 4), INT16_C( 56)) },
1642
- { simde_mm_set_pi16(INT16_C( -14320), INT16_C( -29349), INT16_C( -4712), INT16_C( 3031)),
1643
- simde_mm_cvtsi64_m64(6),
1644
- simde_mm_set_pi16(INT16_C( 800), INT16_C( 565), INT16_C( 950), INT16_C( 47)) },
1645
- { simde_mm_set_pi16(INT16_C( 28706), INT16_C( -15113), INT16_C( -3287), INT16_C( -13609)),
1646
- simde_mm_cvtsi64_m64(13),
1647
- simde_mm_set_pi16(INT16_C( 3), INT16_C( 6), INT16_C( 7), INT16_C( 6)) },
1648
- { simde_mm_set_pi16(INT16_C( -4348), INT16_C( 14324), INT16_C( 12491), INT16_C( -32763)),
1649
- simde_mm_cvtsi64_m64(2),
1650
- simde_mm_set_pi16(INT16_C( 15297), INT16_C( 3581), INT16_C( 3122), INT16_C( 8193)) },
1651
- { simde_mm_set_pi16(INT16_C( -1454), INT16_C( -3136), INT16_C( 16900), INT16_C( -26266)),
1652
- simde_mm_cvtsi64_m64(11),
1653
- simde_mm_set_pi16(INT16_C( 31), INT16_C( 30), INT16_C( 8), INT16_C( 19)) },
1654
- { simde_mm_set_pi16(INT16_C( 23032), INT16_C( 21033), INT16_C( 2074), INT16_C( -30320)),
1655
- simde_mm_cvtsi64_m64(9),
1656
- simde_mm_set_pi16(INT16_C( 44), INT16_C( 41), INT16_C( 4), INT16_C( 68)) },
1657
- { simde_mm_set_pi16(INT16_C( 2403), INT16_C( 6070), INT16_C( -16381), INT16_C( 15198)),
1658
- simde_mm_cvtsi64_m64(10),
1659
- simde_mm_set_pi16(INT16_C( 2), INT16_C( 5), INT16_C( 48), INT16_C( 14)) }
1660
- };
1661
-
1662
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1663
- simde__m64 r = simde_mm_srl_pi16(test_vec[i].a, test_vec[i].count);
1664
- simde_mm_empty();
1665
- simde_assert_m64_i16(r, ==, test_vec[i].r);
1666
- }
1667
-
1668
- simde_mm_empty();
1669
- return MUNIT_OK;
1670
- }
1671
-
1672
- static MunitResult
1673
- test_simde_mm_srl_pi32(const MunitParameter params[], void* data) {
1674
- (void) params;
1675
- (void) data;
1676
-
1677
- const struct {
1678
- simde__m64 a;
1679
- simde__m64 count;
1680
- simde__m64 r;
1681
- } test_vec[8] = {
1682
- { simde_mm_set_pi32(INT32_C( 1162874425), INT32_C( 701403552)),
1683
- simde_mm_cvtsi64_m64(12),
1684
- simde_mm_set_pi32(INT32_C( 283904), INT32_C( 171241)) },
1685
- { simde_mm_set_pi32(INT32_C( -1730008971), INT32_C( 1480718473)),
1686
- simde_mm_cvtsi64_m64(7),
1687
- simde_mm_set_pi32(INT32_C( 20038736), INT32_C( 11568113)) },
1688
- { simde_mm_set_pi32(INT32_C( -2020652937), INT32_C( -14094139)),
1689
- simde_mm_cvtsi64_m64(9),
1690
- simde_mm_set_pi32(INT32_C( 4442020), INT32_C( 8361080)) },
1691
- { simde_mm_set_pi32(INT32_C( 1211264864), INT32_C( -549692031)),
1692
- simde_mm_cvtsi64_m64(13),
1693
- simde_mm_set_pi32(INT32_C( 147859), INT32_C( 457186)) },
1694
- { simde_mm_set_pi32(INT32_C( 526771625), INT32_C( -1372326605)),
1695
- simde_mm_cvtsi64_m64(6),
1696
- simde_mm_set_pi32(INT32_C( 8230806), INT32_C( 45666260)) },
1697
- { simde_mm_set_pi32(INT32_C( 257774375), INT32_C( 1425803958)),
1698
- simde_mm_cvtsi64_m64(0),
1699
- simde_mm_set_pi32(INT32_C( 257774375), INT32_C( 1425803958)) },
1700
- { simde_mm_set_pi32(INT32_C( 751075720), INT32_C( -1937798467)),
1701
- simde_mm_cvtsi64_m64(6),
1702
- simde_mm_set_pi32(INT32_C( 11735558), INT32_C( 36830762)) },
1703
- { simde_mm_set_pi32(INT32_C( -703624712), INT32_C( 1484883517)),
1704
- simde_mm_cvtsi64_m64(14),
1705
- simde_mm_set_pi32(INT32_C( 219198), INT32_C( 90630)) }
1706
- };
1707
-
1708
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1709
- simde__m64 r = simde_mm_srl_pi32(test_vec[i].a, test_vec[i].count);
1710
- simde_mm_empty();
1711
- simde_assert_m64_i32(r, ==, test_vec[i].r);
1712
- }
1713
-
1714
- simde_mm_empty();
1715
- return MUNIT_OK;
1716
- }
1717
-
1718
- static MunitResult
1719
- test_simde_mm_srl_si64(const MunitParameter params[], void* data) {
1720
- (void) params;
1721
- (void) data;
1722
-
1723
- const struct {
1724
- simde__m64 a;
1725
- simde__m64 count;
1726
- simde__m64 r;
1727
- } test_vec[8] = {
1728
- { simde_mm_cvtsi64_m64(INT64_C( -1550745422537000797)),
1729
- simde_mm_cvtsi64_m64(27),
1730
- simde_mm_cvtsi64_m64(INT64_C( 125884999716)) },
1731
- { simde_mm_cvtsi64_m64(INT64_C( -4905487896917789484)),
1732
- simde_mm_cvtsi64_m64(51),
1733
- simde_mm_cvtsi64_m64(INT64_C( 6013)) },
1734
- { simde_mm_cvtsi64_m64(INT64_C( 784798283774789910)),
1735
- simde_mm_cvtsi64_m64(61),
1736
- simde_mm_cvtsi64_m64(INT64_C( 0)) },
1737
- { simde_mm_cvtsi64_m64(INT64_C( -7160969444731528566)),
1738
- simde_mm_cvtsi64_m64(36),
1739
- simde_mm_cvtsi64_m64(INT64_C( 164229635)) },
1740
- { simde_mm_cvtsi64_m64(INT64_C( -123534753035910002)),
1741
- simde_mm_cvtsi64_m64(20),
1742
- simde_mm_cvtsi64_m64(INT64_C( 17474374123262)) },
1743
- { simde_mm_cvtsi64_m64(INT64_C( 5720385725637272506)),
1744
- simde_mm_cvtsi64_m64(33),
1745
- simde_mm_cvtsi64_m64(INT64_C( 665940545)) },
1746
- { simde_mm_cvtsi64_m64(INT64_C( -3398235017645277558)),
1747
- simde_mm_cvtsi64_m64(63),
1748
- simde_mm_cvtsi64_m64(INT64_C( 1)) },
1749
- { simde_mm_cvtsi64_m64(INT64_C( -5355948413550293775)),
1750
- simde_mm_cvtsi64_m64(7),
1751
- simde_mm_cvtsi64_m64(INT64_C( 102271841094994201)) }
1752
- };
1753
-
1754
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1755
- simde__m64 r = simde_mm_srl_si64(test_vec[i].a, test_vec[i].count);
1756
- simde_mm_empty();
1757
- simde_assert_m64_i64(r, ==, test_vec[i].r);
1758
- }
1759
-
1760
- simde_mm_empty();
1761
- return MUNIT_OK;
1762
- }
1763
-
1764
- static MunitResult
1765
- test_simde_mm_srli_pi16(const MunitParameter params[], void* data) {
1766
- (void) params;
1767
- (void) data;
1768
-
1769
- const struct {
1770
- simde__m64 a;
1771
- int count;
1772
- simde__m64 r;
1773
- } test_vec[8] = {
1774
- { simde_mm_set_pi16(INT16_C( -15698), INT16_C( -32310), INT16_C( 339), INT16_C( 3496)),
1775
- 10,
1776
- simde_mm_set_pi16(INT16_C( 48), INT16_C( 32), INT16_C( 0), INT16_C( 3)) },
1777
- { simde_mm_set_pi16(INT16_C( -27263), INT16_C( -18160), INT16_C( -20487), INT16_C( -21173)),
1778
- 6,
1779
- simde_mm_set_pi16(INT16_C( 598), INT16_C( 740), INT16_C( 703), INT16_C( 693)) },
1780
- { simde_mm_set_pi16(INT16_C( 23805), INT16_C( -14941), INT16_C( 6558), INT16_C( -23896)),
1781
- 6,
1782
- simde_mm_set_pi16(INT16_C( 371), INT16_C( 790), INT16_C( 102), INT16_C( 650)) },
1783
- { simde_mm_set_pi16(INT16_C( 22534), INT16_C( -27358), INT16_C( -9489), INT16_C( -15972)),
1784
- 7,
1785
- simde_mm_set_pi16(INT16_C( 176), INT16_C( 298), INT16_C( 437), INT16_C( 387)) },
1786
- { simde_mm_set_pi16(INT16_C( 2212), INT16_C( -29223), INT16_C( -19783), INT16_C( -4105)),
1787
- 0,
1788
- simde_mm_set_pi16(INT16_C( 2212), INT16_C( -29223), INT16_C( -19783), INT16_C( -4105)) },
1789
- { simde_mm_set_pi16(INT16_C( 24559), INT16_C( -21850), INT16_C( -30646), INT16_C( 21423)),
1790
- 14,
1791
- simde_mm_set_pi16(INT16_C( 1), INT16_C( 2), INT16_C( 2), INT16_C( 1)) },
1792
- { simde_mm_set_pi16(INT16_C( -3241), INT16_C( -31506), INT16_C( 3662), INT16_C( 16805)),
1793
- 5,
1794
- simde_mm_set_pi16(INT16_C( 1946), INT16_C( 1063), INT16_C( 114), INT16_C( 525)) },
1795
- { simde_mm_set_pi16(INT16_C( -13677), INT16_C( 7117), INT16_C( -15559), INT16_C( -8368)),
1796
- 14,
1797
- simde_mm_set_pi16(INT16_C( 3), INT16_C( 0), INT16_C( 3), INT16_C( 3)) }
1798
- };
1799
-
1800
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1801
- simde__m64 r = simde_mm_srli_pi16(test_vec[i].a, test_vec[i].count);
1802
- simde_mm_empty();
1803
- simde_assert_m64_i16(r, ==, test_vec[i].r);
1804
- }
1805
-
1806
- simde_mm_empty();
1807
- return MUNIT_OK;
1808
- }
1809
-
1810
- static MunitResult
1811
- test_simde_mm_srli_pi32(const MunitParameter params[], void* data) {
1812
- (void) params;
1813
- (void) data;
1814
-
1815
- const struct {
1816
- simde__m64 a;
1817
- int count;
1818
- simde__m64 r;
1819
- } test_vec[8] = {
1820
- { simde_mm_set_pi32(INT32_C( 116105102), INT32_C( -612588364)),
1821
- 14,
1822
- simde_mm_set_pi32(INT32_C( 7086), INT32_C( 224754)) },
1823
- { simde_mm_set_pi32(INT32_C( -569249998), INT32_C( 1055993616)),
1824
- 8,
1825
- simde_mm_set_pi32(INT32_C( 14553583), INT32_C( 4124975)) },
1826
- { simde_mm_set_pi32(INT32_C( 851549428), INT32_C( -1334511981)),
1827
- 15,
1828
- simde_mm_set_pi32(INT32_C( 25987), INT32_C( 90345)) },
1829
- { simde_mm_set_pi32(INT32_C( -1526427094), INT32_C( 130645372)),
1830
- 14,
1831
- simde_mm_set_pi32(INT32_C( 168978), INT32_C( 7973)) },
1832
- { simde_mm_set_pi32(INT32_C( -1832776933), INT32_C( -28796512)),
1833
- 0,
1834
- simde_mm_set_pi32(INT32_C( -1832776933), INT32_C( -28796512)) },
1835
- { simde_mm_set_pi32(INT32_C( -1521422315), INT32_C( 230241179)),
1836
- 4,
1837
- simde_mm_set_pi32(INT32_C( 173346561), INT32_C( 14390073)) },
1838
- { simde_mm_set_pi32(INT32_C( 981909051), INT32_C( -764766890)),
1839
- 15,
1840
- simde_mm_set_pi32(INT32_C( 29965), INT32_C( 107733)) },
1841
- { simde_mm_set_pi32(INT32_C( -1889202569), INT32_C( 1472716773)),
1842
- 10,
1843
- simde_mm_set_pi32(INT32_C( 2349379), INT32_C( 1438199)) }
1844
- };
1845
-
1846
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1847
- simde__m64 r = simde_mm_srli_pi32(test_vec[i].a, test_vec[i].count);
1848
- simde_mm_empty();
1849
- simde_assert_m64_i32(r, ==, test_vec[i].r);
1850
- }
1851
-
1852
- simde_mm_empty();
1853
- return MUNIT_OK;
1854
- }
1855
-
1856
- static MunitResult
1857
- test_simde_mm_srli_si64(const MunitParameter params[], void* data) {
1858
- (void) params;
1859
- (void) data;
1860
-
1861
- const struct {
1862
- simde__m64 a;
1863
- int count;
1864
- simde__m64 r;
1865
- } test_vec[8] = {
1866
- { simde_mm_cvtsi64_m64(INT64_C( -8294501885901195762)),
1867
- 62,
1868
- simde_mm_cvtsi64_m64(INT64_C( 2)) },
1869
- { simde_mm_cvtsi64_m64(INT64_C( 7027314223871146181)),
1870
- 7,
1871
- simde_mm_cvtsi64_m64(INT64_C( 54900892373993329)) },
1872
- { simde_mm_cvtsi64_m64(INT64_C( 2649805052949317833)),
1873
- 19,
1874
- simde_mm_cvtsi64_m64(INT64_C( 5054102044962)) },
1875
- { simde_mm_cvtsi64_m64(INT64_C( 778555941675423413)),
1876
- 12,
1877
- simde_mm_cvtsi64_m64(INT64_C( 190077134198101)) },
1878
- { simde_mm_cvtsi64_m64(INT64_C( 1453695186595163432)),
1879
- 17,
1880
- simde_mm_cvtsi64_m64(INT64_C( 11090814106713)) },
1881
- { simde_mm_cvtsi64_m64(INT64_C( 834539484136231083)),
1882
- 22,
1883
- simde_mm_cvtsi64_m64(INT64_C( 198969718011)) },
1884
- { simde_mm_cvtsi64_m64(INT64_C( 1883775849744838333)),
1885
- 12,
1886
- simde_mm_cvtsi64_m64(INT64_C( 459906213316610)) },
1887
- { simde_mm_cvtsi64_m64(INT64_C( 7946503469684399228)),
1888
- 61,
1889
- simde_mm_cvtsi64_m64(INT64_C( 3)) }
1890
- };
1891
-
1892
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1893
- simde__m64 r = simde_mm_srli_si64(test_vec[i].a, test_vec[i].count);
1894
- simde_mm_empty();
1895
- simde_assert_m64_i64(r, ==, test_vec[i].r);
1896
- }
1897
-
1898
- simde_mm_empty();
1899
- return MUNIT_OK;
1900
- }
1901
-
1902
- static MunitResult
1903
- test_simde_mm_srai_pi16(const MunitParameter params[], void* data) {
1904
- (void) params;
1905
- (void) data;
1906
-
1907
- const struct {
1908
- simde__m64 a;
1909
- int count;
1910
- simde__m64 r;
1911
- } test_vec[8] = {
1912
- { simde_mm_set_pi16(INT16_C( -32259), INT16_C( -10390), INT16_C( 28627), INT16_C( 18747)),
1913
- 6,
1914
- simde_mm_set_pi16(INT16_C( -505), INT16_C( -163), INT16_C( 447), INT16_C( 292)) },
1915
- { simde_mm_set_pi16(INT16_C( -300), INT16_C( -3262), INT16_C( -2861), INT16_C( -11389)),
1916
- 15,
1917
- simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) },
1918
- { simde_mm_set_pi16(INT16_C( 6480), INT16_C( -15684), INT16_C( 3587), INT16_C( 7844)),
1919
- 2,
1920
- simde_mm_set_pi16(INT16_C( 1620), INT16_C( -3921), INT16_C( 896), INT16_C( 1961)) },
1921
- { simde_mm_set_pi16(INT16_C( -6044), INT16_C( -15946), INT16_C( 1721), INT16_C( -30273)),
1922
- 5,
1923
- simde_mm_set_pi16(INT16_C( -189), INT16_C( -499), INT16_C( 53), INT16_C( -947)) },
1924
- { simde_mm_set_pi16(INT16_C( 24609), INT16_C( 14431), INT16_C( 1917), INT16_C( -13176)),
1925
- 13,
1926
- simde_mm_set_pi16(INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( -2)) },
1927
- { simde_mm_set_pi16(INT16_C( 13575), INT16_C( 32610), INT16_C( -4763), INT16_C( 10748)),
1928
- 12,
1929
- simde_mm_set_pi16(INT16_C( 3), INT16_C( 7), INT16_C( -2), INT16_C( 2)) },
1930
- { simde_mm_set_pi16(INT16_C( -2824), INT16_C( 28483), INT16_C( -23495), INT16_C( -17241)),
1931
- 11,
1932
- simde_mm_set_pi16(INT16_C( -2), INT16_C( 13), INT16_C( -12), INT16_C( -9)) },
1933
- { simde_mm_set_pi16(INT16_C( -5294), INT16_C( 29284), INT16_C( -3542), INT16_C( 21806)),
1934
- 10,
1935
- simde_mm_set_pi16(INT16_C( -6), INT16_C( 28), INT16_C( -4), INT16_C( 21)) }
1936
- };
1937
-
1938
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1939
- simde__m64 r = simde_mm_srai_pi16(test_vec[i].a, test_vec[i].count);
1940
- simde_mm_empty();
1941
- simde_assert_m64_i16(r, ==, test_vec[i].r);
1942
- }
1943
-
1944
- simde_mm_empty();
1945
- return MUNIT_OK;
1946
- }
1947
-
1948
- static MunitResult
1949
- test_simde_mm_srai_pi32(const MunitParameter params[], void* data) {
1950
- (void) params;
1951
- (void) data;
1952
-
1953
- const struct {
1954
- simde__m64 a;
1955
- int count;
1956
- simde__m64 r;
1957
- } test_vec[8] = {
1958
- { simde_mm_set_pi32(INT32_C( -2114070678), INT32_C( 1876117819)),
1959
- 6,
1960
- simde_mm_set_pi32(INT32_C( -33032355), INT32_C( 29314340)) },
1961
- { simde_mm_set_pi32(INT32_C( -19598526), INT32_C( -187444349)),
1962
- 15,
1963
- simde_mm_set_pi32(INT32_C( -599), INT32_C( -5721)) },
1964
- { simde_mm_set_pi32(INT32_C( 424723132), INT32_C( 235085476)),
1965
- 2,
1966
- simde_mm_set_pi32(INT32_C( 106180783), INT32_C( 58771369)) },
1967
- { simde_mm_set_pi32(INT32_C( -396049994), INT32_C( 112822719)),
1968
- 5,
1969
- simde_mm_set_pi32(INT32_C( -12376563), INT32_C( 3525709)) },
1970
- { simde_mm_set_pi32(INT32_C( 1612789855), INT32_C( 125684872)),
1971
- 13,
1972
- simde_mm_set_pi32(INT32_C( 196873), INT32_C( 15342)) },
1973
- { simde_mm_set_pi32(INT32_C( 889683810), INT32_C( -312137220)),
1974
- 12,
1975
- simde_mm_set_pi32(INT32_C( 217207), INT32_C( -76206)) },
1976
- { simde_mm_set_pi32(INT32_C( -185045181), INT32_C( -1539720025)),
1977
- 11,
1978
- simde_mm_set_pi32(INT32_C( -90355), INT32_C( -751817)) },
1979
- { simde_mm_set_pi32(INT32_C( -346918300), INT32_C( -232106706)),
1980
- 10,
1981
- simde_mm_set_pi32(INT32_C( -338788), INT32_C( -226667)) }
1982
- };
1983
-
1984
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1985
- simde__m64 r = simde_mm_srai_pi32(test_vec[i].a, test_vec[i].count);
1986
- simde_mm_empty();
1987
- simde_assert_m64_i32(r, ==, test_vec[i].r);
1988
- }
1989
-
1990
- simde_mm_empty();
1991
- return MUNIT_OK;
1992
- }
1993
-
1994
- static MunitResult
1995
- test_simde_mm_sra_pi16(const MunitParameter params[], void* data) {
1996
- (void) params;
1997
- (void) data;
1998
-
1999
- const struct {
2000
- simde__m64 a;
2001
- simde__m64 count;
2002
- simde__m64 r;
2003
- } test_vec[8] = {
2004
- { simde_mm_set_pi16(INT16_C( 17561), INT16_C( 10489), INT16_C( -28823), INT16_C( -32541)),
2005
- simde_mm_cvtsi64_m64(11),
2006
- simde_mm_set_pi16(INT16_C( 8), INT16_C( 5), INT16_C( -15), INT16_C( -16)) },
2007
- { simde_mm_set_pi16(INT16_C( -23916), INT16_C( 22319), INT16_C( -24731), INT16_C( -24948)),
2008
- simde_mm_cvtsi64_m64(6),
2009
- simde_mm_set_pi16(INT16_C( -374), INT16_C( 348), INT16_C( -387), INT16_C( -390)) },
2010
- { simde_mm_set_pi16(INT16_C( 10305), INT16_C( -29863), INT16_C( -25929), INT16_C( 26582)),
2011
- simde_mm_cvtsi64_m64(4),
2012
- simde_mm_set_pi16(INT16_C( 644), INT16_C( -1867), INT16_C( -1621), INT16_C( 1661)) },
2013
- { simde_mm_set_pi16(INT16_C( -11917), INT16_C( 7165), INT16_C( 860), INT16_C( -7108)),
2014
- simde_mm_cvtsi64_m64(3),
2015
- simde_mm_set_pi16(INT16_C( -1490), INT16_C( 895), INT16_C( 107), INT16_C( -889)) },
2016
- { simde_mm_set_pi16(INT16_C( 30600), INT16_C( 3146), INT16_C( -22841), INT16_C( -27601)),
2017
- simde_mm_cvtsi64_m64(0),
2018
- simde_mm_set_pi16(INT16_C( 30600), INT16_C( 3146), INT16_C( -22841), INT16_C( -27601)) },
2019
- { simde_mm_set_pi16(INT16_C( 7952), INT16_C( 8542), INT16_C( -27736), INT16_C( 20289)),
2020
- simde_mm_cvtsi64_m64(1),
2021
- simde_mm_set_pi16(INT16_C( 3976), INT16_C( 4271), INT16_C( -13868), INT16_C( 10144)) },
2022
- { simde_mm_set_pi16(INT16_C( -24594), INT16_C( -8796), INT16_C( -25195), INT16_C( 300)),
2023
- simde_mm_cvtsi64_m64(10),
2024
- simde_mm_set_pi16(INT16_C( -25), INT16_C( -9), INT16_C( -25), INT16_C( 0)) },
2025
- { simde_mm_set_pi16(INT16_C( 9552), INT16_C( 20569), INT16_C( 1838), INT16_C( 26385)),
2026
- simde_mm_cvtsi64_m64(9),
2027
- simde_mm_set_pi16(INT16_C( 18), INT16_C( 40), INT16_C( 3), INT16_C( 51)) }
2028
- };
2029
-
2030
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2031
- simde__m64 r = simde_mm_sra_pi16(test_vec[i].a, test_vec[i].count);
2032
- simde_mm_empty();
2033
- simde_assert_m64_i16(r, ==, test_vec[i].r);
2034
- }
2035
-
2036
- simde_mm_empty();
2037
- return MUNIT_OK;
2038
- }
2039
-
2040
- static MunitResult
2041
- test_simde_mm_sra_pi32(const MunitParameter params[], void* data) {
2042
- (void) params;
2043
- (void) data;
2044
-
2045
- const struct {
2046
- simde__m64 a;
2047
- simde__m64 count;
2048
- simde__m64 r;
2049
- } test_vec[8] = {
2050
- { simde_mm_set_pi32(INT32_C( -1917317013), INT32_C( -1383526879)),
2051
- simde_mm_cvtsi64_m64(15),
2052
- simde_mm_set_pi32(INT32_C( -58512), INT32_C( -42222)) },
2053
- { simde_mm_set_pi32(INT32_C( -736945287), INT32_C( 858975517)),
2054
- simde_mm_cvtsi64_m64(18),
2055
- simde_mm_set_pi32(INT32_C( -2812), INT32_C( 3276)) },
2056
- { simde_mm_set_pi32(INT32_C( 1016725733), INT32_C( -1716419270)),
2057
- simde_mm_cvtsi64_m64(20),
2058
- simde_mm_set_pi32(INT32_C( 969), INT32_C( -1637)) },
2059
- { simde_mm_set_pi32(INT32_C( 884929023), INT32_C( -2109726169)),
2060
- simde_mm_cvtsi64_m64(17),
2061
- simde_mm_set_pi32(INT32_C( 6751), INT32_C( -16096)) },
2062
- { simde_mm_set_pi32(INT32_C( 1766981669), INT32_C( 1505895116)),
2063
- simde_mm_cvtsi64_m64(8),
2064
- simde_mm_set_pi32(INT32_C( 6902272), INT32_C( 5882402)) },
2065
- { simde_mm_set_pi32(INT32_C( 1732469741), INT32_C( -2109399559)),
2066
- simde_mm_cvtsi64_m64(22),
2067
- simde_mm_set_pi32(INT32_C( 413), INT32_C( -503)) },
2068
- { simde_mm_set_pi32(INT32_C( -1207208411), INT32_C( 962459192)),
2069
- simde_mm_cvtsi64_m64(24),
2070
- simde_mm_set_pi32(INT32_C( -72), INT32_C( 57)) },
2071
- { simde_mm_set_pi32(INT32_C( 519578965), INT32_C( 1181576220)),
2072
- simde_mm_cvtsi64_m64(2),
2073
- simde_mm_set_pi32(INT32_C( 129894741), INT32_C( 295394055)) }
2074
- };
2075
-
2076
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2077
- simde__m64 r = simde_mm_sra_pi32(test_vec[i].a, test_vec[i].count);
2078
- simde_mm_empty();
2079
- simde_assert_m64_i32(r, ==, test_vec[i].r);
2080
- }
2081
-
2082
- simde_mm_empty();
2083
- return MUNIT_OK;
2084
- }
2085
-
2086
- static MunitResult
2087
- test_simde_mm_sub_pi8(const MunitParameter params[], void* data) {
2088
- (void) params;
2089
- (void) data;
2090
-
2091
- const struct {
2092
- simde__m64 a;
2093
- simde__m64 b;
2094
- simde__m64 r;
2095
- } test_vec[8] = {
2096
- { simde_mm_set_pi8(INT8_C( -68), INT8_C(-128), INT8_C(-110), INT8_C( -46), INT8_C( 64), INT8_C( -87), INT8_C( 123), INT8_C( 83)),
2097
- simde_mm_set_pi8(INT8_C( -44), INT8_C( -27), INT8_C(-126), INT8_C( 47), INT8_C( -5), INT8_C( 124), INT8_C(-111), INT8_C( 88)),
2098
- simde_mm_set_pi8(INT8_C( -24), INT8_C(-101), INT8_C( 16), INT8_C( -93), INT8_C( 69), INT8_C( 45), INT8_C( -22), INT8_C( -5)) },
2099
- { simde_mm_set_pi8(INT8_C( 7), INT8_C( 68), INT8_C( -53), INT8_C( -82), INT8_C( 50), INT8_C(-107), INT8_C( 109), INT8_C( 33)),
2100
- simde_mm_set_pi8(INT8_C( 121), INT8_C( 50), INT8_C( 31), INT8_C( 80), INT8_C( 118), INT8_C( 8), INT8_C( -73), INT8_C( 38)),
2101
- simde_mm_set_pi8(INT8_C(-114), INT8_C( 18), INT8_C( -84), INT8_C( 94), INT8_C( -68), INT8_C(-115), INT8_C( -74), INT8_C( -5)) },
2102
- { simde_mm_set_pi8(INT8_C( 60), INT8_C( 5), INT8_C( 4), INT8_C( -85), INT8_C( -61), INT8_C( 71), INT8_C( -19), INT8_C( -92)),
2103
- simde_mm_set_pi8(INT8_C( -51), INT8_C( 118), INT8_C( 99), INT8_C( 14), INT8_C( 124), INT8_C(-115), INT8_C( 49), INT8_C( 19)),
2104
- simde_mm_set_pi8(INT8_C( 111), INT8_C(-113), INT8_C( -95), INT8_C( -99), INT8_C( 71), INT8_C( -70), INT8_C( -68), INT8_C(-111)) },
2105
- { simde_mm_set_pi8(INT8_C( 80), INT8_C( 47), INT8_C( 46), INT8_C( -13), INT8_C( 94), INT8_C( -69), INT8_C( -72), INT8_C( -28)),
2106
- simde_mm_set_pi8(INT8_C( 45), INT8_C( 99), INT8_C( 14), INT8_C( 4), INT8_C( 89), INT8_C( -77), INT8_C( -4), INT8_C( 109)),
2107
- simde_mm_set_pi8(INT8_C( 35), INT8_C( -52), INT8_C( 32), INT8_C( -17), INT8_C( 5), INT8_C( 8), INT8_C( -68), INT8_C( 119)) },
2108
- { simde_mm_set_pi8(INT8_C( 117), INT8_C(-101), INT8_C( -54), INT8_C( -50), INT8_C( 55), INT8_C( -97), INT8_C( -74), INT8_C( 79)),
2109
- simde_mm_set_pi8(INT8_C( 116), INT8_C( 19), INT8_C( 84), INT8_C( 90), INT8_C( -15), INT8_C( -49), INT8_C( 34), INT8_C(-124)),
2110
- simde_mm_set_pi8(INT8_C( 1), INT8_C(-120), INT8_C( 118), INT8_C( 116), INT8_C( 70), INT8_C( -48), INT8_C(-108), INT8_C( -53)) },
2111
- { simde_mm_set_pi8(INT8_C( 43), INT8_C( -88), INT8_C( 7), INT8_C( -31), INT8_C( -45), INT8_C( -6), INT8_C( -61), INT8_C( -47)),
2112
- simde_mm_set_pi8(INT8_C(-110), INT8_C( 87), INT8_C(-102), INT8_C( -63), INT8_C( -35), INT8_C( 78), INT8_C( 96), INT8_C( 51)),
2113
- simde_mm_set_pi8(INT8_C(-103), INT8_C( 81), INT8_C( 109), INT8_C( 32), INT8_C( -10), INT8_C( -84), INT8_C( 99), INT8_C( -98)) },
2114
- { simde_mm_set_pi8(INT8_C(-113), INT8_C( -62), INT8_C(-117), INT8_C( 34), INT8_C( -40), INT8_C( 24), INT8_C( -20), INT8_C( 52)),
2115
- simde_mm_set_pi8(INT8_C( 53), INT8_C( -16), INT8_C( 75), INT8_C( 38), INT8_C( 2), INT8_C( -75), INT8_C( -51), INT8_C( 92)),
2116
- simde_mm_set_pi8(INT8_C( 90), INT8_C( -46), INT8_C( 64), INT8_C( -4), INT8_C( -42), INT8_C( 99), INT8_C( 31), INT8_C( -40)) },
2117
- { simde_mm_set_pi8(INT8_C( -94), INT8_C( -1), INT8_C( -70), INT8_C( 90), INT8_C(-105), INT8_C( -20), INT8_C( -71), INT8_C( -95)),
2118
- simde_mm_set_pi8(INT8_C( -97), INT8_C( 49), INT8_C( 71), INT8_C( 69), INT8_C( -48), INT8_C( 31), INT8_C( -19), INT8_C( 28)),
2119
- simde_mm_set_pi8(INT8_C( 3), INT8_C( -50), INT8_C( 115), INT8_C( 21), INT8_C( -57), INT8_C( -51), INT8_C( -52), INT8_C(-123)) }
2120
- };
2121
-
2122
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2123
- simde__m64 r = simde_mm_sub_pi8(test_vec[i].a, test_vec[i].b);
2124
- simde_mm_empty();
2125
- simde_assert_m64_i8(r, ==, test_vec[i].r);
2126
- }
2127
-
2128
- simde_mm_empty();
2129
- return MUNIT_OK;
2130
- }
2131
-
2132
- static MunitResult
2133
- test_simde_mm_sub_pi16(const MunitParameter params[], void* data) {
2134
- (void) params;
2135
- (void) data;
2136
-
2137
- const struct {
2138
- simde__m64 a;
2139
- simde__m64 b;
2140
- simde__m64 r;
2141
- } test_vec[8] = {
2142
- { simde_mm_set_pi16(INT16_C( -19579), INT16_C( 12561), INT16_C( 18345), INT16_C( 16319)),
2143
- simde_mm_set_pi16(INT16_C( -28282), INT16_C( 12811), INT16_C( -17042), INT16_C( 32694)),
2144
- simde_mm_set_pi16(INT16_C( 8703), INT16_C( -250), INT16_C( -30149), INT16_C( -16375)) },
2145
- { simde_mm_set_pi16(INT16_C( 26389), INT16_C( -16440), INT16_C( 31193), INT16_C( 17420)),
2146
- simde_mm_set_pi16(INT16_C( -16772), INT16_C( -30407), INT16_C( 13204), INT16_C( -3950)),
2147
- simde_mm_set_pi16(INT16_C( -22375), INT16_C( 13967), INT16_C( 17989), INT16_C( 21370)) },
2148
- { simde_mm_set_pi16(INT16_C( 27021), INT16_C( -21341), INT16_C( -29765), INT16_C( -27825)),
2149
- simde_mm_set_pi16(INT16_C( 32255), INT16_C( -11881), INT16_C( -17239), INT16_C( 17727)),
2150
- simde_mm_set_pi16(INT16_C( -5234), INT16_C( -9460), INT16_C( -12526), INT16_C( 19984)) },
2151
- { simde_mm_set_pi16(INT16_C( -1061), INT16_C( 10691), INT16_C( 5402), INT16_C( -29779)),
2152
- simde_mm_set_pi16(INT16_C( -3105), INT16_C( 17443), INT16_C( 29683), INT16_C( -4669)),
2153
- simde_mm_set_pi16(INT16_C( 2044), INT16_C( -6752), INT16_C( -24281), INT16_C( -25110)) },
2154
- { simde_mm_set_pi16(INT16_C( -27429), INT16_C( -24038), INT16_C( -27170), INT16_C( 23974)),
2155
- simde_mm_set_pi16(INT16_C( 26527), INT16_C( -23757), INT16_C( 12822), INT16_C( 25106)),
2156
- simde_mm_set_pi16(INT16_C( 11580), INT16_C( -281), INT16_C( 25544), INT16_C( -1132)) },
2157
- { simde_mm_set_pi16(INT16_C( -22000), INT16_C( 31301), INT16_C( 3019), INT16_C( 5319)),
2158
- simde_mm_set_pi16(INT16_C( 17233), INT16_C( -4995), INT16_C( -32364), INT16_C( 13233)),
2159
- simde_mm_set_pi16(INT16_C( 26303), INT16_C( -29240), INT16_C( -30153), INT16_C( -7914)) },
2160
- { simde_mm_set_pi16(INT16_C( -3486), INT16_C( -1801), INT16_C( 6573), INT16_C( -2443)),
2161
- simde_mm_set_pi16(INT16_C( 12310), INT16_C( 34), INT16_C( -20082), INT16_C( -25128)),
2162
- simde_mm_set_pi16(INT16_C( -15796), INT16_C( -1835), INT16_C( 26655), INT16_C( 22685)) },
2163
- { simde_mm_set_pi16(INT16_C( -4220), INT16_C( 17506), INT16_C( 6973), INT16_C( -8771)),
2164
- simde_mm_set_pi16(INT16_C( -28953), INT16_C( 20334), INT16_C( 30681), INT16_C( -3329)),
2165
- simde_mm_set_pi16(INT16_C( 24733), INT16_C( -2828), INT16_C( -23708), INT16_C( -5442)) }
2166
- };
2167
-
2168
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2169
- simde__m64 r = simde_mm_sub_pi16(test_vec[i].a, test_vec[i].b);
2170
- simde_mm_empty();
2171
- simde_assert_m64_i16(r, ==, test_vec[i].r);
2172
- }
2173
-
2174
- simde_mm_empty();
2175
- return MUNIT_OK;
2176
- }
2177
-
2178
- static MunitResult
2179
- test_simde_mm_sub_pi32(const MunitParameter params[], void* data) {
2180
- (void) params;
2181
- (void) data;
2182
-
2183
- const struct {
2184
- simde__m64 a;
2185
- simde__m64 b;
2186
- simde__m64 r;
2187
- } test_vec[8] = {
2188
- { simde_mm_set_pi32(INT32_C( 1529386227), INT32_C( -668097316)),
2189
- simde_mm_set_pi32(INT32_C( 473233841), INT32_C( -90529672)),
2190
- simde_mm_set_pi32(INT32_C( 1056152386), INT32_C( -577567644)) },
2191
- { simde_mm_set_pi32(INT32_C( -1931729107), INT32_C( -722204778)),
2192
- simde_mm_set_pi32(INT32_C( -1308867233), INT32_C( -379543807)),
2193
- simde_mm_set_pi32(INT32_C( -622861874), INT32_C( -342660971)) },
2194
- { simde_mm_set_pi32(INT32_C( -291860960), INT32_C( -190367090)),
2195
- simde_mm_set_pi32(INT32_C( 135041259), INT32_C( 1788100299)),
2196
- simde_mm_set_pi32(INT32_C( -426902219), INT32_C( -1978467389)) },
2197
- { simde_mm_set_pi32(INT32_C( 1883589163), INT32_C( 323765200)),
2198
- simde_mm_set_pi32(INT32_C( 645555820), INT32_C( 651498122)),
2199
- simde_mm_set_pi32(INT32_C( 1238033343), INT32_C( -327732922)) },
2200
- { simde_mm_set_pi32(INT32_C( 1636190981), INT32_C( -1768384078)),
2201
- simde_mm_set_pi32(INT32_C( 292739084), INT32_C( -81452554)),
2202
- simde_mm_set_pi32(INT32_C( 1343451897), INT32_C( -1686931524)) },
2203
- { simde_mm_set_pi32(INT32_C( -1203362066), INT32_C( 1430164168)),
2204
- simde_mm_set_pi32(INT32_C( 1181972217), INT32_C( -1859714213)),
2205
- simde_mm_set_pi32(INT32_C( 1909633013), INT32_C( -1005088915)) },
2206
- { simde_mm_set_pi32(INT32_C( -81132926), INT32_C( 156813953)),
2207
- simde_mm_set_pi32(INT32_C( 1408689560), INT32_C( -1315494890)),
2208
- simde_mm_set_pi32(INT32_C( -1489822486), INT32_C( 1472308843)) },
2209
- { simde_mm_set_pi32(INT32_C( -99259746), INT32_C( -1543487401)),
2210
- simde_mm_set_pi32(INT32_C( 1211860803), INT32_C( 322815885)),
2211
- simde_mm_set_pi32(INT32_C( -1311120549), INT32_C( -1866303286)) }
2212
- };
2213
-
2214
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2215
- simde__m64 r = simde_mm_sub_pi32(test_vec[i].a, test_vec[i].b);
2216
- simde_mm_empty();
2217
- simde_assert_m64_i32(r, ==, test_vec[i].r);
2218
- }
2219
-
2220
- simde_mm_empty();
2221
- return MUNIT_OK;
2222
- }
2223
-
2224
- static MunitResult
2225
- test_simde_mm_subs_pi8(const MunitParameter params[], void* data) {
2226
- (void) params;
2227
- (void) data;
2228
-
2229
- const struct {
2230
- simde__m64 a;
2231
- simde__m64 b;
2232
- simde__m64 r;
2233
- } test_vec[8] = {
2234
- { simde_mm_set_pi8(INT8_C( 47), INT8_C( -51), INT8_C( -9), INT8_C( 46), INT8_C( 37), INT8_C( 125), INT8_C(-121), INT8_C( 88)),
2235
- simde_mm_set_pi8(INT8_C( -37), INT8_C( -59), INT8_C( -18), INT8_C( -39), INT8_C( -68), INT8_C( 127), INT8_C( -66), INT8_C( -14)),
2236
- simde_mm_set_pi8(INT8_C( 84), INT8_C( 8), INT8_C( 9), INT8_C( 85), INT8_C( 105), INT8_C( -2), INT8_C( -55), INT8_C( 102)) },
2237
- { simde_mm_set_pi8(INT8_C( -68), INT8_C( 111), INT8_C( 54), INT8_C( -2), INT8_C( -96), INT8_C( -30), INT8_C( 7), INT8_C( -1)),
2238
- simde_mm_set_pi8(INT8_C( 71), INT8_C( 109), INT8_C( 43), INT8_C( -28), INT8_C(-128), INT8_C( -98), INT8_C( 65), INT8_C( -86)),
2239
- simde_mm_set_pi8(INT8_C(-128), INT8_C( 2), INT8_C( 11), INT8_C( 26), INT8_C( 32), INT8_C( 68), INT8_C( -58), INT8_C( 85)) },
2240
- { simde_mm_set_pi8(INT8_C(-124), INT8_C(-105), INT8_C( 39), INT8_C( 68), INT8_C( -44), INT8_C( -60), INT8_C( -44), INT8_C( -99)),
2241
- simde_mm_set_pi8(INT8_C( -9), INT8_C(-127), INT8_C( 77), INT8_C( -14), INT8_C( -70), INT8_C( -39), INT8_C( -18), INT8_C( -40)),
2242
- simde_mm_set_pi8(INT8_C(-115), INT8_C( 22), INT8_C( -38), INT8_C( 82), INT8_C( 26), INT8_C( -21), INT8_C( -26), INT8_C( -59)) },
2243
- { simde_mm_set_pi8(INT8_C( 52), INT8_C( 33), INT8_C( 97), INT8_C( 39), INT8_C(-126), INT8_C( -11), INT8_C( 17), INT8_C( 108)),
2244
- simde_mm_set_pi8(INT8_C( 65), INT8_C( 112), INT8_C( 108), INT8_C( 33), INT8_C( 68), INT8_C(-103), INT8_C( -45), INT8_C( 7)),
2245
- simde_mm_set_pi8(INT8_C( -13), INT8_C( -79), INT8_C( -11), INT8_C( 6), INT8_C(-128), INT8_C( 92), INT8_C( 62), INT8_C( 101)) },
2246
- { simde_mm_set_pi8(INT8_C(-105), INT8_C( 75), INT8_C( 127), INT8_C( -57), INT8_C( 88), INT8_C( -25), INT8_C( -75), INT8_C( -74)),
2247
- simde_mm_set_pi8(INT8_C(-125), INT8_C( -81), INT8_C( 60), INT8_C(-108), INT8_C( 78), INT8_C( -60), INT8_C( 88), INT8_C( 30)),
2248
- simde_mm_set_pi8(INT8_C( 20), INT8_C( 127), INT8_C( 67), INT8_C( 51), INT8_C( 10), INT8_C( 35), INT8_C(-128), INT8_C(-104)) },
2249
- { simde_mm_set_pi8(INT8_C( -28), INT8_C( -97), INT8_C( 80), INT8_C( -43), INT8_C( -70), INT8_C( 45), INT8_C( 10), INT8_C( -67)),
2250
- simde_mm_set_pi8(INT8_C(-109), INT8_C( 97), INT8_C( 25), INT8_C( 63), INT8_C( -65), INT8_C( -95), INT8_C(-111), INT8_C( -39)),
2251
- simde_mm_set_pi8(INT8_C( 81), INT8_C(-128), INT8_C( 55), INT8_C(-106), INT8_C( -5), INT8_C( 127), INT8_C( 121), INT8_C( -28)) },
2252
- { simde_mm_set_pi8(INT8_C( 52), INT8_C( -18), INT8_C( -86), INT8_C( -29), INT8_C( 69), INT8_C( 92), INT8_C( 89), INT8_C( -66)),
2253
- simde_mm_set_pi8(INT8_C( 16), INT8_C( 0), INT8_C( 95), INT8_C( 95), INT8_C( 115), INT8_C( -53), INT8_C( 55), INT8_C( 75)),
2254
- simde_mm_set_pi8(INT8_C( 36), INT8_C( -18), INT8_C(-128), INT8_C(-124), INT8_C( -46), INT8_C( 127), INT8_C( 34), INT8_C(-128)) },
2255
- { simde_mm_set_pi8(INT8_C( 99), INT8_C( -48), INT8_C( 16), INT8_C( 126), INT8_C(-110), INT8_C(-111), INT8_C( -66), INT8_C( 83)),
2256
- simde_mm_set_pi8(INT8_C(-118), INT8_C( 118), INT8_C( 100), INT8_C(-121), INT8_C( -17), INT8_C( 74), INT8_C( -47), INT8_C( -77)),
2257
- simde_mm_set_pi8(INT8_C( 127), INT8_C(-128), INT8_C( -84), INT8_C( 127), INT8_C( -93), INT8_C(-128), INT8_C( -19), INT8_C( 127)) }
2258
- };
2259
-
2260
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2261
- simde__m64 r = simde_mm_subs_pi8(test_vec[i].a, test_vec[i].b);
2262
- simde_mm_empty();
2263
- simde_assert_m64_i8(r, ==, test_vec[i].r);
2264
- }
2265
-
2266
- simde_mm_empty();
2267
- return MUNIT_OK;
2268
- }
2269
-
2270
- static MunitResult
2271
- test_simde_mm_subs_pu8(const MunitParameter params[], void* data) {
2272
- (void) params;
2273
- (void) data;
2274
-
2275
- const struct {
2276
- simde__m64 a;
2277
- simde__m64 b;
2278
- simde__m64 r;
2279
- } test_vec[8] = {
2280
- { simde_mm_set_pi8(INT8_C( 108), INT8_C(-104), INT8_C( 106), INT8_C( 91), INT8_C( 54), INT8_C( 95), INT8_C( -86), INT8_C( -68)),
2281
- simde_mm_set_pi8(INT8_C( -73), INT8_C( -12), INT8_C( 13), INT8_C( -7), INT8_C(-102), INT8_C( -27), INT8_C( -93), INT8_C( -1)),
2282
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0)) },
2283
- { simde_mm_set_pi8(INT8_C( -3), INT8_C(-122), INT8_C(-110), INT8_C( -87), INT8_C( -28), INT8_C( -38), INT8_C( 30), INT8_C( -22)),
2284
- simde_mm_set_pi8(INT8_C( -24), INT8_C( 92), INT8_C( -83), INT8_C( -90), INT8_C(-108), INT8_C(-117), INT8_C( 101), INT8_C( -58)),
2285
- simde_mm_set_pi8(INT8_C( 21), INT8_C( 42), INT8_C( 0), INT8_C( 3), INT8_C( 80), INT8_C( 79), INT8_C( 0), INT8_C( 36)) },
2286
- { simde_mm_set_pi8(INT8_C( -50), INT8_C( 51), INT8_C( -7), INT8_C( -68), INT8_C( -7), INT8_C( 11), INT8_C( 15), INT8_C( 2)),
2287
- simde_mm_set_pi8(INT8_C( -4), INT8_C(-101), INT8_C(-106), INT8_C( -43), INT8_C(-124), INT8_C( 1), INT8_C( -19), INT8_C( 18)),
2288
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 117), INT8_C( 10), INT8_C( 0), INT8_C( 0)) },
2289
- { simde_mm_set_pi8(INT8_C( 20), INT8_C( -33), INT8_C( -99), INT8_C( -4), INT8_C(-119), INT8_C( 72), INT8_C( 104), INT8_C( -43)),
2290
- simde_mm_set_pi8(INT8_C( -27), INT8_C( 116), INT8_C( 127), INT8_C( 71), INT8_C( 110), INT8_C( 47), INT8_C( 56), INT8_C( -18)),
2291
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 107), INT8_C( 30), INT8_C( -75), INT8_C( 27), INT8_C( 25), INT8_C( 48), INT8_C( 0)) },
2292
- { simde_mm_set_pi8(INT8_C( 24), INT8_C( 44), INT8_C( 126), INT8_C( -16), INT8_C( 48), INT8_C( 119), INT8_C( 122), INT8_C( 92)),
2293
- simde_mm_set_pi8(INT8_C( -53), INT8_C( 93), INT8_C( 123), INT8_C( 43), INT8_C( -1), INT8_C( -86), INT8_C( 12), INT8_C( -40)),
2294
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( -59), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 0)) },
2295
- { simde_mm_set_pi8(INT8_C( 43), INT8_C( -29), INT8_C( 72), INT8_C( -16), INT8_C( 73), INT8_C( 36), INT8_C( 38), INT8_C(-122)),
2296
- simde_mm_set_pi8(INT8_C( 68), INT8_C( 17), INT8_C(-105), INT8_C( 112), INT8_C( 123), INT8_C(-118), INT8_C( 37), INT8_C( 35)),
2297
- simde_mm_set_pi8(INT8_C( 0), INT8_C( -46), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 99)) },
2298
- { simde_mm_set_pi8(INT8_C( 78), INT8_C( 25), INT8_C(-123), INT8_C(-114), INT8_C( 56), INT8_C( 33), INT8_C( -54), INT8_C( 46)),
2299
- simde_mm_set_pi8(INT8_C( -71), INT8_C( 113), INT8_C( -52), INT8_C( -21), INT8_C(-112), INT8_C( -45), INT8_C( 117), INT8_C( -91)),
2300
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 85), INT8_C( 0)) },
2301
- { simde_mm_set_pi8(INT8_C( 35), INT8_C( 56), INT8_C( 106), INT8_C( 118), INT8_C( -12), INT8_C( -92), INT8_C( -24), INT8_C( 93)),
2302
- simde_mm_set_pi8(INT8_C(-118), INT8_C( -26), INT8_C( -47), INT8_C( 86), INT8_C( -69), INT8_C( 43), INT8_C( 117), INT8_C( 101)),
2303
- simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32), INT8_C( 57), INT8_C( 121), INT8_C( 115), INT8_C( 0)) }
2304
- };
2305
-
2306
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2307
- simde__m64 r = simde_mm_subs_pu8(test_vec[i].a, test_vec[i].b);
2308
- simde_mm_empty();
2309
- simde_assert_m64_i8(r, ==, test_vec[i].r);
2310
- }
2311
-
2312
- simde_mm_empty();
2313
- return MUNIT_OK;
2314
- }
2315
-
2316
- static MunitResult
2317
- test_simde_mm_subs_pi16(const MunitParameter params[], void* data) {
2318
- (void) params;
2319
- (void) data;
2320
-
2321
- const struct {
2322
- simde__m64 a;
2323
- simde__m64 b;
2324
- simde__m64 r;
2325
- } test_vec[8] = {
2326
- { simde_mm_set_pi16(INT16_C( 997), INT16_C( -2676), INT16_C( -29256), INT16_C( -21534)),
2327
- simde_mm_set_pi16(INT16_C( 25057), INT16_C( -8634), INT16_C( 14564), INT16_C( 23460)),
2328
- simde_mm_set_pi16(INT16_C( -24060), INT16_C( 5958), INT16_C( -32768), INT16_C( -32768)) },
2329
- { simde_mm_set_pi16(INT16_C( 17773), INT16_C( -21379), INT16_C( -10016), INT16_C( -25057)),
2330
- simde_mm_set_pi16(INT16_C( -17494), INT16_C( -5727), INT16_C( -23865), INT16_C( -12297)),
2331
- simde_mm_set_pi16(INT16_C( 32767), INT16_C( -15652), INT16_C( 13849), INT16_C( -12760)) },
2332
- { simde_mm_set_pi16(INT16_C( -18595), INT16_C( -25519), INT16_C( 25647), INT16_C( 18081)),
2333
- simde_mm_set_pi16(INT16_C( -16730), INT16_C( 8578), INT16_C( -24195), INT16_C( -23138)),
2334
- simde_mm_set_pi16(INT16_C( -1865), INT16_C( -32768), INT16_C( 32767), INT16_C( 32767)) },
2335
- { simde_mm_set_pi16(INT16_C( 30835), INT16_C( -1900), INT16_C( -12465), INT16_C( -32273)),
2336
- simde_mm_set_pi16(INT16_C( 22212), INT16_C( 29314), INT16_C( 30369), INT16_C( -7474)),
2337
- simde_mm_set_pi16(INT16_C( 8623), INT16_C( -31214), INT16_C( -32768), INT16_C( -24799)) },
2338
- { simde_mm_set_pi16(INT16_C( -4511), INT16_C( -11707), INT16_C( -456), INT16_C( 4939)),
2339
- simde_mm_set_pi16(INT16_C( 9564), INT16_C( -6551), INT16_C( 15884), INT16_C( 25916)),
2340
- simde_mm_set_pi16(INT16_C( -14075), INT16_C( -5156), INT16_C( -16340), INT16_C( -20977)) },
2341
- { simde_mm_set_pi16(INT16_C( 16747), INT16_C( 26115), INT16_C( 28725), INT16_C( -9489)),
2342
- simde_mm_set_pi16(INT16_C( 18589), INT16_C( 10790), INT16_C( 16046), INT16_C( 7670)),
2343
- simde_mm_set_pi16(INT16_C( -1842), INT16_C( 15325), INT16_C( 12679), INT16_C( -17159)) },
2344
- { simde_mm_set_pi16(INT16_C( 12230), INT16_C( 31818), INT16_C( -20400), INT16_C( 29194)),
2345
- simde_mm_set_pi16(INT16_C( 13624), INT16_C( -27762), INT16_C( -3717), INT16_C( 9357)),
2346
- simde_mm_set_pi16(INT16_C( -1394), INT16_C( 32767), INT16_C( -16683), INT16_C( 19837)) },
2347
- { simde_mm_set_pi16(INT16_C( 4223), INT16_C( 22129), INT16_C( 27682), INT16_C( 6112)),
2348
- simde_mm_set_pi16(INT16_C( 25462), INT16_C( 1497), INT16_C( -20195), INT16_C( -31363)),
2349
- simde_mm_set_pi16(INT16_C( -21239), INT16_C( 20632), INT16_C( 32767), INT16_C( 32767)) }
2350
- };
2351
-
2352
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2353
- simde__m64 r = simde_mm_subs_pi16(test_vec[i].a, test_vec[i].b);
2354
- simde_mm_empty();
2355
- simde_assert_m64_i16(r, ==, test_vec[i].r);
2356
- }
2357
-
2358
- simde_mm_empty();
2359
- return MUNIT_OK;
2360
- }
2361
-
2362
- static MunitResult
2363
- test_simde_mm_subs_pu16(const MunitParameter params[], void* data) {
2364
- (void) params;
2365
- (void) data;
2366
-
2367
- const struct {
2368
- simde__m64 a;
2369
- simde__m64 b;
2370
- simde__m64 r;
2371
- } test_vec[8] = {
2372
- { simde_mm_set_pi16(INT16_C( -14933), INT16_C( 874), INT16_C( -12812), INT16_C( -23674)),
2373
- simde_mm_set_pi16(INT16_C( 10199), INT16_C( -21634), INT16_C( -16349), INT16_C( -2233)),
2374
- simde_mm_set_pi16(INT16_C( -25132), INT16_C( 0), INT16_C( 3537), INT16_C( 0)) },
2375
- { simde_mm_set_pi16(INT16_C( -30411), INT16_C( 14403), INT16_C( 16019), INT16_C( -7235)),
2376
- simde_mm_set_pi16(INT16_C( 20809), INT16_C( 30553), INT16_C( -13348), INT16_C( -9019)),
2377
- simde_mm_set_pi16(INT16_C( 14316), INT16_C( 0), INT16_C( 0), INT16_C( 1784)) },
2378
- { simde_mm_set_pi16(INT16_C( -3263), INT16_C( 17129), INT16_C( 7120), INT16_C( 17541)),
2379
- simde_mm_set_pi16(INT16_C( 17758), INT16_C( -24273), INT16_C( -16817), INT16_C( -26381)),
2380
- simde_mm_set_pi16(INT16_C( -21021), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
2381
- { simde_mm_set_pi16(INT16_C( 28253), INT16_C( -27429), INT16_C( -2971), INT16_C( -25455)),
2382
- simde_mm_set_pi16(INT16_C( -28858), INT16_C( 23971), INT16_C( 30194), INT16_C( 29959)),
2383
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 14136), INT16_C( 32371), INT16_C( 10122)) },
2384
- { simde_mm_set_pi16(INT16_C( -5264), INT16_C( -5469), INT16_C( 2876), INT16_C( 12913)),
2385
- simde_mm_set_pi16(INT16_C( -25438), INT16_C( -13476), INT16_C( -20493), INT16_C( 9684)),
2386
- simde_mm_set_pi16(INT16_C( 20174), INT16_C( 8007), INT16_C( 0), INT16_C( 3229)) },
2387
- { simde_mm_set_pi16(INT16_C( -6406), INT16_C( 29502), INT16_C( -32502), INT16_C( 29440)),
2388
- simde_mm_set_pi16(INT16_C( 24669), INT16_C( 29936), INT16_C( -12635), INT16_C( -28492)),
2389
- simde_mm_set_pi16(INT16_C( -31075), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
2390
- { simde_mm_set_pi16(INT16_C( 1295), INT16_C( 17975), INT16_C( -25873), INT16_C( -8332)),
2391
- simde_mm_set_pi16(INT16_C( -30157), INT16_C( 122), INT16_C( -20762), INT16_C( 12983)),
2392
- simde_mm_set_pi16(INT16_C( 0), INT16_C( 17853), INT16_C( 0), INT16_C( -21315)) },
2393
- { simde_mm_set_pi16(INT16_C( -17654), INT16_C( -28720), INT16_C( -25036), INT16_C( -2408)),
2394
- simde_mm_set_pi16(INT16_C( 32575), INT16_C( 13887), INT16_C( 23741), INT16_C( -32273)),
2395
- simde_mm_set_pi16(INT16_C( 15307), INT16_C( 22929), INT16_C( 16759), INT16_C( 29865)) }
2396
- };
2397
-
2398
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2399
- simde__m64 r = simde_mm_subs_pu16(test_vec[i].a, test_vec[i].b);
2400
- simde_mm_empty();
2401
- simde_assert_m64_i16(r, ==, test_vec[i].r);
2402
- }
2403
-
2404
- simde_mm_empty();
2405
- return MUNIT_OK;
2406
- }
2407
-
2408
- static MunitResult
2409
- test_simde_mm_unpackhi_pi8(const MunitParameter params[], void* data) {
2410
- (void) params;
2411
- (void) data;
2412
-
2413
- const struct {
2414
- simde__m64 a;
2415
- simde__m64 b;
2416
- simde__m64 r;
2417
- } test_vec[8] = {
2418
- { simde_mm_set_pi8(INT8_C(-127), INT8_C( 48), INT8_C( 42), INT8_C( 115), INT8_C( -77), INT8_C( 4), INT8_C( 25), INT8_C( -42)),
2419
- simde_mm_set_pi8(INT8_C( 57), INT8_C( 92), INT8_C( -39), INT8_C( -42), INT8_C( 73), INT8_C( 4), INT8_C( 41), INT8_C( 118)),
2420
- simde_mm_set_pi8(INT8_C( 57), INT8_C(-127), INT8_C( 92), INT8_C( 48), INT8_C( -39), INT8_C( 42), INT8_C( -42), INT8_C( 115)) },
2421
- { simde_mm_set_pi8(INT8_C( -16), INT8_C( 120), INT8_C( 16), INT8_C( 116), INT8_C( -35), INT8_C(-100), INT8_C( 0), INT8_C( -39)),
2422
- simde_mm_set_pi8(INT8_C( 63), INT8_C( -73), INT8_C( 48), INT8_C( -66), INT8_C( -33), INT8_C(-102), INT8_C( -62), INT8_C( 118)),
2423
- simde_mm_set_pi8(INT8_C( 63), INT8_C( -16), INT8_C( -73), INT8_C( 120), INT8_C( 48), INT8_C( 16), INT8_C( -66), INT8_C( 116)) },
2424
- { simde_mm_set_pi8(INT8_C( -24), INT8_C( -49), INT8_C( 20), INT8_C( 34), INT8_C( -1), INT8_C( 63), INT8_C( 11), INT8_C( -36)),
2425
- simde_mm_set_pi8(INT8_C( -97), INT8_C( 52), INT8_C( 62), INT8_C( -48), INT8_C( -15), INT8_C( 24), INT8_C( 18), INT8_C( -28)),
2426
- simde_mm_set_pi8(INT8_C( -97), INT8_C( -24), INT8_C( 52), INT8_C( -49), INT8_C( 62), INT8_C( 20), INT8_C( -48), INT8_C( 34)) },
2427
- { simde_mm_set_pi8(INT8_C( 34), INT8_C( -74), INT8_C( -88), INT8_C( -68), INT8_C( 80), INT8_C( 80), INT8_C( -27), INT8_C(-109)),
2428
- simde_mm_set_pi8(INT8_C( -14), INT8_C( 17), INT8_C( -50), INT8_C( 50), INT8_C( -72), INT8_C(-111), INT8_C( -32), INT8_C(-114)),
2429
- simde_mm_set_pi8(INT8_C( -14), INT8_C( 34), INT8_C( 17), INT8_C( -74), INT8_C( -50), INT8_C( -88), INT8_C( 50), INT8_C( -68)) },
2430
- { simde_mm_set_pi8(INT8_C( -82), INT8_C( 34), INT8_C( 79), INT8_C( 75), INT8_C( -45), INT8_C( 43), INT8_C( -97), INT8_C( 55)),
2431
- simde_mm_set_pi8(INT8_C( 126), INT8_C( 126), INT8_C( 113), INT8_C( 122), INT8_C( 7), INT8_C( 69), INT8_C( 31), INT8_C( 83)),
2432
- simde_mm_set_pi8(INT8_C( 126), INT8_C( -82), INT8_C( 126), INT8_C( 34), INT8_C( 113), INT8_C( 79), INT8_C( 122), INT8_C( 75)) },
2433
- { simde_mm_set_pi8(INT8_C( -4), INT8_C( -98), INT8_C( 7), INT8_C( 88), INT8_C( -93), INT8_C( 56), INT8_C( -38), INT8_C( -15)),
2434
- simde_mm_set_pi8(INT8_C( 75), INT8_C( 97), INT8_C( 76), INT8_C( 26), INT8_C(-119), INT8_C( -96), INT8_C( -74), INT8_C( -24)),
2435
- simde_mm_set_pi8(INT8_C( 75), INT8_C( -4), INT8_C( 97), INT8_C( -98), INT8_C( 76), INT8_C( 7), INT8_C( 26), INT8_C( 88)) },
2436
- { simde_mm_set_pi8(INT8_C( 124), INT8_C( 71), INT8_C( -14), INT8_C( 19), INT8_C( -69), INT8_C( -31), INT8_C( 35), INT8_C( -82)),
2437
- simde_mm_set_pi8(INT8_C( -31), INT8_C( 125), INT8_C( 35), INT8_C( 84), INT8_C( 105), INT8_C(-115), INT8_C( 11), INT8_C( -12)),
2438
- simde_mm_set_pi8(INT8_C( -31), INT8_C( 124), INT8_C( 125), INT8_C( 71), INT8_C( 35), INT8_C( -14), INT8_C( 84), INT8_C( 19)) },
2439
- { simde_mm_set_pi8(INT8_C( 45), INT8_C( -51), INT8_C( -71), INT8_C( -47), INT8_C( -27), INT8_C( 20), INT8_C(-117), INT8_C( -5)),
2440
- simde_mm_set_pi8(INT8_C( -92), INT8_C( -74), INT8_C( 58), INT8_C( 117), INT8_C( -53), INT8_C( 43), INT8_C( 66), INT8_C( -55)),
2441
- simde_mm_set_pi8(INT8_C( -92), INT8_C( 45), INT8_C( -74), INT8_C( -51), INT8_C( 58), INT8_C( -71), INT8_C( 117), INT8_C( -47)) }
2442
- };
2443
-
2444
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2445
- simde__m64 r = simde_mm_unpackhi_pi8(test_vec[i].a, test_vec[i].b);
2446
- simde_mm_empty();
2447
- simde_assert_m64_i8(r, ==, test_vec[i].r);
2448
- }
2449
-
2450
- simde_mm_empty();
2451
- return MUNIT_OK;
2452
- }
2453
-
2454
- static MunitResult
2455
- test_simde_mm_unpackhi_pi16(const MunitParameter params[], void* data) {
2456
- (void) params;
2457
- (void) data;
2458
-
2459
- const struct {
2460
- simde__m64 a;
2461
- simde__m64 b;
2462
- simde__m64 r;
2463
- } test_vec[8] = {
2464
- { simde_mm_set_pi16(INT16_C( -14965), INT16_C( 28080), INT16_C( -15604), INT16_C( -10099)),
2465
- simde_mm_set_pi16(INT16_C( 16538), INT16_C( -18813), INT16_C( -254), INT16_C( -23207)),
2466
- simde_mm_set_pi16(INT16_C( 16538), INT16_C( -14965), INT16_C( -18813), INT16_C( 28080)) },
2467
- { simde_mm_set_pi16(INT16_C( -4346), INT16_C( -29603), INT16_C( 1361), INT16_C( 16092)),
2468
- simde_mm_set_pi16(INT16_C( -4428), INT16_C( -25960), INT16_C( 7111), INT16_C( 29823)),
2469
- simde_mm_set_pi16(INT16_C( -4428), INT16_C( -4346), INT16_C( -25960), INT16_C( -29603)) },
2470
- { simde_mm_set_pi16(INT16_C( -22197), INT16_C( -13478), INT16_C( 29243), INT16_C( -7146)),
2471
- simde_mm_set_pi16(INT16_C( -6022), INT16_C( -10408), INT16_C( -5121), INT16_C( -15640)),
2472
- simde_mm_set_pi16(INT16_C( -6022), INT16_C( -22197), INT16_C( -10408), INT16_C( -13478)) },
2473
- { simde_mm_set_pi16(INT16_C( -21336), INT16_C( 14878), INT16_C( 14164), INT16_C( 2727)),
2474
- simde_mm_set_pi16(INT16_C( 12579), INT16_C( -20797), INT16_C( 18011), INT16_C( 5438)),
2475
- simde_mm_set_pi16(INT16_C( 12579), INT16_C( -21336), INT16_C( -20797), INT16_C( 14878)) },
2476
- { simde_mm_set_pi16(INT16_C( -20790), INT16_C( -21719), INT16_C( -12256), INT16_C( -17410)),
2477
- simde_mm_set_pi16(INT16_C( 4576), INT16_C( 6842), INT16_C( -12668), INT16_C( -11854)),
2478
- simde_mm_set_pi16(INT16_C( 4576), INT16_C( -20790), INT16_C( 6842), INT16_C( -21719)) },
2479
- { simde_mm_set_pi16(INT16_C( -12751), INT16_C( 22951), INT16_C( -11466), INT16_C( -26387)),
2480
- simde_mm_set_pi16(INT16_C( -27771), INT16_C( -31462), INT16_C( 14453), INT16_C( -2204)),
2481
- simde_mm_set_pi16(INT16_C( -27771), INT16_C( -12751), INT16_C( -31462), INT16_C( 22951)) },
2482
- { simde_mm_set_pi16(INT16_C( -15685), INT16_C( 13196), INT16_C( 17198), INT16_C( 29713)),
2483
- simde_mm_set_pi16(INT16_C( 29600), INT16_C( -21832), INT16_C( -7500), INT16_C( 31712)),
2484
- simde_mm_set_pi16(INT16_C( 29600), INT16_C( -15685), INT16_C( -21832), INT16_C( 13196)) },
2485
- { simde_mm_set_pi16(INT16_C( -16681), INT16_C( -16529), INT16_C( 32728), INT16_C( 31459)),
2486
- simde_mm_set_pi16(INT16_C( 20407), INT16_C( -12854), INT16_C( 18433), INT16_C( 3119)),
2487
- simde_mm_set_pi16(INT16_C( 20407), INT16_C( -16681), INT16_C( -12854), INT16_C( -16529)) }
2488
- };
2489
-
2490
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2491
- simde__m64 r = simde_mm_unpackhi_pi16(test_vec[i].a, test_vec[i].b);
2492
- simde_mm_empty();
2493
- simde_assert_m64_i16(r, ==, test_vec[i].r);
2494
- }
2495
-
2496
- simde_mm_empty();
2497
- return MUNIT_OK;
2498
- }
2499
-
2500
- static MunitResult
2501
- test_simde_mm_unpackhi_pi32(const MunitParameter params[], void* data) {
2502
- (void) params;
2503
- (void) data;
2504
-
2505
- const struct {
2506
- simde__m64 a;
2507
- simde__m64 b;
2508
- simde__m64 r;
2509
- } test_vec[8] = {
2510
- { simde_mm_set_pi32(INT32_C( -1658263771), INT32_C( -1249023590)),
2511
- simde_mm_set_pi32(INT32_C( -1692091894), INT32_C( 429039047)),
2512
- simde_mm_set_pi32(INT32_C( -1692091894), INT32_C( -1658263771)) },
2513
- { simde_mm_set_pi32(INT32_C( 900819254), INT32_C( -1069899126)),
2514
- simde_mm_set_pi32(INT32_C( -400543833), INT32_C( -2013963668)),
2515
- simde_mm_set_pi32(INT32_C( -400543833), INT32_C( 900819254)) },
2516
- { simde_mm_set_pi32(INT32_C( -1005749657), INT32_C( -188276900)),
2517
- simde_mm_set_pi32(INT32_C( 810155385), INT32_C( -436942778)),
2518
- simde_mm_set_pi32(INT32_C( 810155385), INT32_C( -1005749657)) },
2519
- { simde_mm_set_pi32(INT32_C( 43596265), INT32_C( -1556778284)),
2520
- simde_mm_set_pi32(INT32_C( -1634766739), INT32_C( -297104207)),
2521
- simde_mm_set_pi32(INT32_C( -1634766739), INT32_C( 43596265)) },
2522
- { simde_mm_set_pi32(INT32_C( 820557065), INT32_C( 2171)),
2523
- simde_mm_set_pi32(INT32_C( 1748389432), INT32_C( 1779087168)),
2524
- simde_mm_set_pi32(INT32_C( 1748389432), INT32_C( 820557065)) },
2525
- { simde_mm_set_pi32(INT32_C( -106826552), INT32_C( -791842435)),
2526
- simde_mm_set_pi32(INT32_C( 2006847448), INT32_C( 484681450)),
2527
- simde_mm_set_pi32(INT32_C( 2006847448), INT32_C( -106826552)) },
2528
- { simde_mm_set_pi32(INT32_C( 1892029634), INT32_C( -899748289)),
2529
- simde_mm_set_pi32(INT32_C( 1496471605), INT32_C( 840905121)),
2530
- simde_mm_set_pi32(INT32_C( 1496471605), INT32_C( 1892029634)) },
2531
- { simde_mm_set_pi32(INT32_C( 1293223526), INT32_C( -574905244)),
2532
- simde_mm_set_pi32(INT32_C( 57909389), INT32_C( -70830945)),
2533
- simde_mm_set_pi32(INT32_C( 57909389), INT32_C( 1293223526)) }
2534
- };
2535
-
2536
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2537
- simde__m64 r = simde_mm_unpackhi_pi32(test_vec[i].a, test_vec[i].b);
2538
- simde_mm_empty();
2539
- simde_assert_m64_i32(r, ==, test_vec[i].r);
2540
- }
2541
-
2542
- simde_mm_empty();
2543
- return MUNIT_OK;
2544
- }
2545
-
2546
- static MunitResult
2547
- test_simde_mm_unpacklo_pi8(const MunitParameter params[], void* data) {
2548
- (void) params;
2549
- (void) data;
2550
-
2551
- const struct {
2552
- simde__m64 a;
2553
- simde__m64 b;
2554
- simde__m64 r;
2555
- } test_vec[8] = {
2556
- { simde_mm_set_pi8(INT8_C( -15), INT8_C( -27), INT8_C( -29), INT8_C( 2), INT8_C( 11), INT8_C( 105), INT8_C( -49), INT8_C( 15)),
2557
- simde_mm_set_pi8(INT8_C( -90), INT8_C( 43), INT8_C( 55), INT8_C( 50), INT8_C(-102), INT8_C( 25), INT8_C( -40), INT8_C( 47)),
2558
- simde_mm_set_pi8(INT8_C(-102), INT8_C( 11), INT8_C( 25), INT8_C( 105), INT8_C( -40), INT8_C( -49), INT8_C( 47), INT8_C( 15)) },
2559
- { simde_mm_set_pi8(INT8_C( 1), INT8_C( 83), INT8_C(-101), INT8_C( 117), INT8_C( -52), INT8_C( -74), INT8_C( -59), INT8_C( 121)),
2560
- simde_mm_set_pi8(INT8_C(-102), INT8_C( 12), INT8_C( -28), INT8_C( 82), INT8_C(-122), INT8_C( 94), INT8_C( 127), INT8_C( -48)),
2561
- simde_mm_set_pi8(INT8_C(-122), INT8_C( -52), INT8_C( 94), INT8_C( -74), INT8_C( 127), INT8_C( -59), INT8_C( -48), INT8_C( 121)) },
2562
- { simde_mm_set_pi8(INT8_C( 13), INT8_C( 67), INT8_C( -73), INT8_C( -36), INT8_C( -93), INT8_C( 101), INT8_C(-107), INT8_C( 118)),
2563
- simde_mm_set_pi8(INT8_C( 46), INT8_C( -72), INT8_C( -50), INT8_C( 34), INT8_C(-111), INT8_C( -17), INT8_C(-128), INT8_C(-126)),
2564
- simde_mm_set_pi8(INT8_C(-111), INT8_C( -93), INT8_C( -17), INT8_C( 101), INT8_C(-128), INT8_C(-107), INT8_C(-126), INT8_C( 118)) },
2565
- { simde_mm_set_pi8(INT8_C( 4), INT8_C( -40), INT8_C( -73), INT8_C( 122), INT8_C( 85), INT8_C( 7), INT8_C( -54), INT8_C(-119)),
2566
- simde_mm_set_pi8(INT8_C( -37), INT8_C( -80), INT8_C(-128), INT8_C( 69), INT8_C( 112), INT8_C( 50), INT8_C( 44), INT8_C( -11)),
2567
- simde_mm_set_pi8(INT8_C( 112), INT8_C( 85), INT8_C( 50), INT8_C( 7), INT8_C( 44), INT8_C( -54), INT8_C( -11), INT8_C(-119)) },
2568
- { simde_mm_set_pi8(INT8_C(-113), INT8_C( 30), INT8_C( 68), INT8_C( 96), INT8_C( -94), INT8_C( -13), INT8_C( -38), INT8_C( -63)),
2569
- simde_mm_set_pi8(INT8_C( -9), INT8_C( 29), INT8_C( 5), INT8_C( -22), INT8_C( 66), INT8_C( 94), INT8_C( -79), INT8_C( -1)),
2570
- simde_mm_set_pi8(INT8_C( 66), INT8_C( -94), INT8_C( 94), INT8_C( -13), INT8_C( -79), INT8_C( -38), INT8_C( -1), INT8_C( -63)) },
2571
- { simde_mm_set_pi8(INT8_C( -42), INT8_C( -42), INT8_C( 41), INT8_C( -13), INT8_C( -41), INT8_C( -33), INT8_C( -24), INT8_C( -5)),
2572
- simde_mm_set_pi8(INT8_C( 85), INT8_C( 79), INT8_C( 19), INT8_C( -95), INT8_C( 42), INT8_C(-124), INT8_C( -96), INT8_C(-122)),
2573
- simde_mm_set_pi8(INT8_C( 42), INT8_C( -41), INT8_C(-124), INT8_C( -33), INT8_C( -96), INT8_C( -24), INT8_C(-122), INT8_C( -5)) },
2574
- { simde_mm_set_pi8(INT8_C( 28), INT8_C( 99), INT8_C( -57), INT8_C( 79), INT8_C( 40), INT8_C( -97), INT8_C( -80), INT8_C( 16)),
2575
- simde_mm_set_pi8(INT8_C( 60), INT8_C( 0), INT8_C( -13), INT8_C( -90), INT8_C( 17), INT8_C( 14), INT8_C(-115), INT8_C( 116)),
2576
- simde_mm_set_pi8(INT8_C( 17), INT8_C( 40), INT8_C( 14), INT8_C( -97), INT8_C(-115), INT8_C( -80), INT8_C( 116), INT8_C( 16)) },
2577
- { simde_mm_set_pi8(INT8_C( -26), INT8_C( -43), INT8_C( -21), INT8_C( 73), INT8_C( 83), INT8_C( 33), INT8_C( 105), INT8_C( 57)),
2578
- simde_mm_set_pi8(INT8_C( -29), INT8_C( 84), INT8_C( 15), INT8_C( -83), INT8_C( -51), INT8_C( 60), INT8_C( -18), INT8_C( 19)),
2579
- simde_mm_set_pi8(INT8_C( -51), INT8_C( 83), INT8_C( 60), INT8_C( 33), INT8_C( -18), INT8_C( 105), INT8_C( 19), INT8_C( 57)) }
2580
- };
2581
-
2582
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2583
- simde__m64 r = simde_mm_unpacklo_pi8(test_vec[i].a, test_vec[i].b);
2584
- simde_mm_empty();
2585
- simde_assert_m64_i8(r, ==, test_vec[i].r);
2586
- }
2587
-
2588
- simde_mm_empty();
2589
- return MUNIT_OK;
2590
- }
2591
-
2592
- static MunitResult
2593
- test_simde_mm_unpacklo_pi16(const MunitParameter params[], void* data) {
2594
- (void) params;
2595
- (void) data;
2596
-
2597
- const struct {
2598
- simde__m64 a;
2599
- simde__m64 b;
2600
- simde__m64 r;
2601
- } test_vec[8] = {
2602
- { simde_mm_set_pi16(INT16_C( 14920), INT16_C( -14108), INT16_C( -18371), INT16_C( 4165)),
2603
- simde_mm_set_pi16(INT16_C( 24125), INT16_C( 29535), INT16_C( 14450), INT16_C( 764)),
2604
- simde_mm_set_pi16(INT16_C( 14450), INT16_C( -18371), INT16_C( 764), INT16_C( 4165)) },
2605
- { simde_mm_set_pi16(INT16_C( -29305), INT16_C( -20968), INT16_C( -31863), INT16_C( 1945)),
2606
- simde_mm_set_pi16(INT16_C( 22380), INT16_C( -9274), INT16_C( -14525), INT16_C( 28073)),
2607
- simde_mm_set_pi16(INT16_C( -14525), INT16_C( -31863), INT16_C( 28073), INT16_C( 1945)) },
2608
- { simde_mm_set_pi16(INT16_C( 29396), INT16_C( -4481), INT16_C( 16009), INT16_C( -7692)),
2609
- simde_mm_set_pi16(INT16_C( 19262), INT16_C( -10592), INT16_C( 1200), INT16_C( -22541)),
2610
- simde_mm_set_pi16(INT16_C( 1200), INT16_C( 16009), INT16_C( -22541), INT16_C( -7692)) },
2611
- { simde_mm_set_pi16(INT16_C( 10778), INT16_C( -30276), INT16_C( 31580), INT16_C( 4144)),
2612
- simde_mm_set_pi16(INT16_C( -15899), INT16_C( 20583), INT16_C( -12863), INT16_C( 13808)),
2613
- simde_mm_set_pi16(INT16_C( -12863), INT16_C( 31580), INT16_C( 13808), INT16_C( 4144)) },
2614
- { simde_mm_set_pi16(INT16_C( -30267), INT16_C( -14054), INT16_C( 22036), INT16_C( -6987)),
2615
- simde_mm_set_pi16(INT16_C( -22296), INT16_C( 22035), INT16_C( -11029), INT16_C( 3882)),
2616
- simde_mm_set_pi16(INT16_C( -11029), INT16_C( 22036), INT16_C( 3882), INT16_C( -6987)) },
2617
- { simde_mm_set_pi16(INT16_C( 1373), INT16_C( 25788), INT16_C( -14639), INT16_C( 18996)),
2618
- simde_mm_set_pi16(INT16_C( 6580), INT16_C( 13730), INT16_C( -12979), INT16_C( -26646)),
2619
- simde_mm_set_pi16(INT16_C( -12979), INT16_C( -14639), INT16_C( -26646), INT16_C( 18996)) },
2620
- { simde_mm_set_pi16(INT16_C( 27110), INT16_C( 18497), INT16_C( -15879), INT16_C( -18233)),
2621
- simde_mm_set_pi16(INT16_C( -26068), INT16_C( -29214), INT16_C( 32362), INT16_C( -26103)),
2622
- simde_mm_set_pi16(INT16_C( 32362), INT16_C( -15879), INT16_C( -26103), INT16_C( -18233)) },
2623
- { simde_mm_set_pi16(INT16_C( -3448), INT16_C( 28151), INT16_C( 21394), INT16_C( 2546)),
2624
- simde_mm_set_pi16(INT16_C( 30183), INT16_C( -1624), INT16_C( 11589), INT16_C( 23080)),
2625
- simde_mm_set_pi16(INT16_C( 11589), INT16_C( 21394), INT16_C( 23080), INT16_C( 2546)) }
2626
- };
2627
-
2628
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2629
- simde__m64 r = simde_mm_unpacklo_pi16(test_vec[i].a, test_vec[i].b);
2630
- simde_mm_empty();
2631
- simde_assert_m64_i16(r, ==, test_vec[i].r);
2632
- }
2633
-
2634
- simde_mm_empty();
2635
- return MUNIT_OK;
2636
- }
2637
-
2638
- static MunitResult
2639
- test_simde_mm_unpacklo_pi32(const MunitParameter params[], void* data) {
2640
- (void) params;
2641
- (void) data;
2642
-
2643
- const struct {
2644
- simde__m64 a;
2645
- simde__m64 b;
2646
- simde__m64 r;
2647
- } test_vec[8] = {
2648
- { simde_mm_set_pi32(INT32_C( -996466818), INT32_C( 42237187)),
2649
- simde_mm_set_pi32(INT32_C( -37002499), INT32_C( -1170856260)),
2650
- simde_mm_set_pi32(INT32_C( -1170856260), INT32_C( 42237187)) },
2651
- { simde_mm_set_pi32(INT32_C( 2063937130), INT32_C( 491318053)),
2652
- simde_mm_set_pi32(INT32_C( -1702472225), INT32_C( 404431239)),
2653
- simde_mm_set_pi32(INT32_C( 404431239), INT32_C( 491318053)) },
2654
- { simde_mm_set_pi32(INT32_C( 482157619), INT32_C( 2096228641)),
2655
- simde_mm_set_pi32(INT32_C( 1577000773), INT32_C( -1308575062)),
2656
- simde_mm_set_pi32(INT32_C( -1308575062), INT32_C( 2096228641)) },
2657
- { simde_mm_set_pi32(INT32_C( -296283078), INT32_C( -1136099560)),
2658
- simde_mm_set_pi32(INT32_C( 813050106), INT32_C( 140703223)),
2659
- simde_mm_set_pi32(INT32_C( 140703223), INT32_C( -1136099560)) },
2660
- { simde_mm_set_pi32(INT32_C( -1874282519), INT32_C( 1046328641)),
2661
- simde_mm_set_pi32(INT32_C( 1711474246), INT32_C( 663714514)),
2662
- simde_mm_set_pi32(INT32_C( 663714514), INT32_C( 1046328641)) },
2663
- { simde_mm_set_pi32(INT32_C( 414254548), INT32_C( -1137400610)),
2664
- simde_mm_set_pi32(INT32_C( 1336205549), INT32_C( -1985285725)),
2665
- simde_mm_set_pi32(INT32_C( -1985285725), INT32_C( -1137400610)) },
2666
- { simde_mm_set_pi32(INT32_C( -1928184284), INT32_C( 711404402)),
2667
- simde_mm_set_pi32(INT32_C( 894723783), INT32_C( -331643442)),
2668
- simde_mm_set_pi32(INT32_C( -331643442), INT32_C( 711404402)) },
2669
- { simde_mm_set_pi32(INT32_C( -1171624194), INT32_C( -943645737)),
2670
- simde_mm_set_pi32(INT32_C( -1212436628), INT32_C( -1787000320)),
2671
- simde_mm_set_pi32(INT32_C( -1787000320), INT32_C( -943645737)) }
2672
- };
2673
-
2674
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2675
- simde__m64 r = simde_mm_unpacklo_pi32(test_vec[i].a, test_vec[i].b);
2676
- simde_mm_empty();
2677
- simde_assert_m64_i32(r, ==, test_vec[i].r);
2678
- }
2679
-
2680
- simde_mm_empty();
2681
- return MUNIT_OK;
2682
- }
2683
-
2684
- static MunitResult
2685
- test_simde_mm_xor_si64(const MunitParameter params[], void* data) {
2686
- (void) params;
2687
- (void) data;
2688
-
2689
- const struct {
2690
- simde__m64 a;
2691
- simde__m64 b;
2692
- simde__m64 r;
2693
- } test_vec[8] = {
2694
- { simde_mm_cvtsi64_m64(INT64_C( 3540462192578516470)),
2695
- simde_mm_cvtsi64_m64(INT64_C( 7953957601195225655)),
2696
- simde_mm_cvtsi64_m64(INT64_C( 6863518614534072257)) },
2697
- { simde_mm_cvtsi64_m64(INT64_C( 3280097856998777041)),
2698
- simde_mm_cvtsi64_m64(INT64_C( 7227524436289590224)),
2699
- simde_mm_cvtsi64_m64(INT64_C( 5316618871007982337)) },
2700
- { simde_mm_cvtsi64_m64(INT64_C( -73768962290391525)),
2701
- simde_mm_cvtsi64_m64(INT64_C( -8786938381172726443)),
2702
- simde_mm_cvtsi64_m64(INT64_C( 8716556128933069646)) },
2703
- { simde_mm_cvtsi64_m64(INT64_C( -3834999859910724293)),
2704
- simde_mm_cvtsi64_m64(INT64_C( 1473106142712794056)),
2705
- simde_mm_cvtsi64_m64(INT64_C( -2398499088890937613)) },
2706
- { simde_mm_cvtsi64_m64(INT64_C( -2129742113263669437)),
2707
- simde_mm_cvtsi64_m64(INT64_C( 8747348426473787001)),
2708
- simde_mm_cvtsi64_m64(INT64_C( -7271780848289947334)) },
2709
- { simde_mm_cvtsi64_m64(INT64_C( 3415454954475332549)),
2710
- simde_mm_cvtsi64_m64(INT64_C( -4751919769270097997)),
2711
- simde_mm_cvtsi64_m64(INT64_C( -7968019982084324234)) },
2712
- { simde_mm_cvtsi64_m64(INT64_C( 2939655727369393330)),
2713
- simde_mm_cvtsi64_m64(INT64_C( -201574666518844870)),
2714
- simde_mm_cvtsi64_m64(INT64_C( -3028638143195201912)) },
2715
- { simde_mm_cvtsi64_m64(INT64_C( 2745915445215058834)),
2716
- simde_mm_cvtsi64_m64(INT64_C( 3063327936426889284)),
2717
- simde_mm_cvtsi64_m64(INT64_C( 907566634544925654)) }
2718
- };
2719
-
2720
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2721
- simde__m64 r = simde_mm_xor_si64(test_vec[i].a, test_vec[i].b);
2722
- simde_mm_empty();
2723
- simde_assert_m64_i64(r, ==, test_vec[i].r);
2724
- }
2725
-
2726
- simde_mm_empty();
2727
- return MUNIT_OK;
2728
- }
2729
-
2730
- static MunitResult
2731
- test_simde_m_to_int(const MunitParameter params[], void* data) {
2732
- (void) params;
2733
- (void) data;
2734
-
2735
- const struct {
2736
- simde__m64 a;
2737
- int32_t r;
2738
- } test_vec[8] = {
2739
- { simde_mm_set_pi32(INT32_C( 187717888), INT32_C( 752961943)), INT32_C( 752961943) },
2740
- { simde_mm_set_pi32(INT32_C( 1573710578), INT32_C( 101880394)), INT32_C( 101880394) },
2741
- { simde_mm_set_pi32(INT32_C( 1011596849), INT32_C( 885891666)), INT32_C( 885891666) },
2742
- { simde_mm_set_pi32(INT32_C( -1107434699), INT32_C( -838173825)), INT32_C( -838173825) },
2743
- { simde_mm_set_pi32(INT32_C( 1945069486), INT32_C( 466583902)), INT32_C( 466583902) },
2744
- { simde_mm_set_pi32(INT32_C( 458761181), INT32_C( 257379889)), INT32_C( 257379889) },
2745
- { simde_mm_set_pi32(INT32_C( 848486959), INT32_C( -1415343346)), INT32_C( -1415343346) },
2746
- { simde_mm_set_pi32(INT32_C( -1452285617), INT32_C( -1697816479)), INT32_C( -1697816479) }
2747
- };
2748
-
2749
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2750
- int32_t r = simde_m_to_int(test_vec[i].a);
2751
- simde_mm_empty();
2752
- munit_assert_int32(r, ==, test_vec[i].r);
2753
- }
2754
-
2755
- simde_mm_empty();
2756
- return MUNIT_OK;
2757
- }
2758
-
2759
- static MunitResult
2760
- test_simde_m_to_int64(const MunitParameter params[], void* data) {
2761
- (void) params;
2762
- (void) data;
2763
-
2764
- const struct {
2765
- simde__m64 a;
2766
- int64_t r;
2767
- } test_vec[8] = {
2768
- { simde_mm_cvtsi64_m64(INT64_C( -2003895301208818234)), INT64_C( -2003895301208818234) },
2769
- { simde_mm_cvtsi64_m64(INT64_C( -372926738147273591)), INT64_C( -372926738147273591) },
2770
- { simde_mm_cvtsi64_m64(INT64_C( -3656592147926155100)), INT64_C( -3656592147926155100) },
2771
- { simde_mm_cvtsi64_m64(INT64_C( 5100863564862776395)), INT64_C( 5100863564862776395) },
2772
- { simde_mm_cvtsi64_m64(INT64_C( -214027610699488575)), INT64_C( -214027610699488575) },
2773
- { simde_mm_cvtsi64_m64(INT64_C( -7630939822071486777)), INT64_C( -7630939822071486777) },
2774
- { simde_mm_cvtsi64_m64(INT64_C( 9123236376678660233)), INT64_C( 9123236376678660233) },
2775
- { simde_mm_cvtsi64_m64(INT64_C( 3260252501062812952)), INT64_C( 3260252501062812952) }
2776
- };
2777
-
2778
- for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2779
- int64_t r = simde_m_to_int64(test_vec[i].a);
2780
- simde_mm_empty();
2781
- munit_assert_int64(r, ==, test_vec[i].r);
2782
- }
2783
-
2784
- simde_mm_empty();
2785
- return MUNIT_OK;
2786
- }
2787
-
2788
- #endif /* defined(SIMDE_MMX_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
2789
-
2790
- HEDLEY_DIAGNOSTIC_PUSH
2791
- HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
2792
-
2793
- static MunitTest test_suite_tests[] = {
2794
- #if defined(SIMDE_MMX_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
2795
- SIMDE_TESTS_DEFINE_TEST(mm_set1_pi8),
2796
- SIMDE_TESTS_DEFINE_TEST(mm_set1_pi16),
2797
- SIMDE_TESTS_DEFINE_TEST(mm_set1_pi32),
2798
- SIMDE_TESTS_DEFINE_TEST(mm_setr_pi8),
2799
- SIMDE_TESTS_DEFINE_TEST(mm_setr_pi16),
2800
- SIMDE_TESTS_DEFINE_TEST(mm_setr_pi32),
2801
- SIMDE_TESTS_DEFINE_TEST(mm_add_pi8),
2802
- SIMDE_TESTS_DEFINE_TEST(mm_add_pi16),
2803
- SIMDE_TESTS_DEFINE_TEST(mm_add_pi32),
2804
- SIMDE_TESTS_DEFINE_TEST(mm_adds_pi8),
2805
- SIMDE_TESTS_DEFINE_TEST(mm_adds_pi16),
2806
- SIMDE_TESTS_DEFINE_TEST(mm_adds_pu8),
2807
- SIMDE_TESTS_DEFINE_TEST(mm_adds_pu16),
2808
- SIMDE_TESTS_DEFINE_TEST(mm_and_si64),
2809
- SIMDE_TESTS_DEFINE_TEST(mm_andnot_si64),
2810
- SIMDE_TESTS_DEFINE_TEST(mm_cmpeq_pi8),
2811
- SIMDE_TESTS_DEFINE_TEST(mm_cmpeq_pi16),
2812
- SIMDE_TESTS_DEFINE_TEST(mm_cmpeq_pi32),
2813
- SIMDE_TESTS_DEFINE_TEST(mm_cmpgt_pi8),
2814
- SIMDE_TESTS_DEFINE_TEST(mm_cmpgt_pi16),
2815
- SIMDE_TESTS_DEFINE_TEST(mm_cmpgt_pi32),
2816
- SIMDE_TESTS_DEFINE_TEST(mm_cvtm64_si64),
2817
- SIMDE_TESTS_DEFINE_TEST(mm_cvtsi32_si64),
2818
- SIMDE_TESTS_DEFINE_TEST(mm_cvtsi64_m64),
2819
- SIMDE_TESTS_DEFINE_TEST(mm_cvtsi64_si32),
2820
- SIMDE_TESTS_DEFINE_TEST(mm_madd_pi16),
2821
- SIMDE_TESTS_DEFINE_TEST(mm_mulhi_pi16),
2822
- SIMDE_TESTS_DEFINE_TEST(mm_mullo_pi16),
2823
- SIMDE_TESTS_DEFINE_TEST(mm_or_si64),
2824
- SIMDE_TESTS_DEFINE_TEST(mm_packs_pi16),
2825
- SIMDE_TESTS_DEFINE_TEST(mm_packs_pi32),
2826
- SIMDE_TESTS_DEFINE_TEST(mm_packs_pu16),
2827
- SIMDE_TESTS_DEFINE_TEST(mm_sll_pi16),
2828
- SIMDE_TESTS_DEFINE_TEST(mm_sll_pi32),
2829
- SIMDE_TESTS_DEFINE_TEST(mm_sll_si64),
2830
- SIMDE_TESTS_DEFINE_TEST(mm_slli_pi16),
2831
- SIMDE_TESTS_DEFINE_TEST(mm_slli_pi32),
2832
- SIMDE_TESTS_DEFINE_TEST(mm_slli_si64),
2833
- SIMDE_TESTS_DEFINE_TEST(mm_srl_pi16),
2834
- SIMDE_TESTS_DEFINE_TEST(mm_srl_pi32),
2835
- SIMDE_TESTS_DEFINE_TEST(mm_srl_si64),
2836
- SIMDE_TESTS_DEFINE_TEST(mm_srli_pi16),
2837
- SIMDE_TESTS_DEFINE_TEST(mm_srli_pi32),
2838
- SIMDE_TESTS_DEFINE_TEST(mm_srli_si64),
2839
- SIMDE_TESTS_DEFINE_TEST(mm_srai_pi16),
2840
- SIMDE_TESTS_DEFINE_TEST(mm_srai_pi32),
2841
- SIMDE_TESTS_DEFINE_TEST(mm_sra_pi16),
2842
- SIMDE_TESTS_DEFINE_TEST(mm_sra_pi32),
2843
- SIMDE_TESTS_DEFINE_TEST(mm_sub_pi8),
2844
- SIMDE_TESTS_DEFINE_TEST(mm_sub_pi16),
2845
- SIMDE_TESTS_DEFINE_TEST(mm_sub_pi32),
2846
- SIMDE_TESTS_DEFINE_TEST(mm_subs_pi8),
2847
- SIMDE_TESTS_DEFINE_TEST(mm_subs_pi16),
2848
- SIMDE_TESTS_DEFINE_TEST(mm_subs_pu8),
2849
- SIMDE_TESTS_DEFINE_TEST(mm_subs_pu16),
2850
- SIMDE_TESTS_DEFINE_TEST(mm_unpackhi_pi8),
2851
- SIMDE_TESTS_DEFINE_TEST(mm_unpackhi_pi16),
2852
- SIMDE_TESTS_DEFINE_TEST(mm_unpackhi_pi32),
2853
- SIMDE_TESTS_DEFINE_TEST(mm_unpacklo_pi8),
2854
- SIMDE_TESTS_DEFINE_TEST(mm_unpacklo_pi16),
2855
- SIMDE_TESTS_DEFINE_TEST(mm_unpacklo_pi32),
2856
- SIMDE_TESTS_DEFINE_TEST(mm_xor_si64),
2857
- SIMDE_TESTS_DEFINE_TEST(m_to_int),
2858
- SIMDE_TESTS_DEFINE_TEST(m_to_int64),
2859
- #endif /* defined(SIMDE_MMX_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
2860
-
2861
- { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
2862
- };
2863
-
2864
- HEDLEY_C_DECL MunitSuite* SIMDE_TESTS_GENERATE_SYMBOL(suite)(void) {
2865
- static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
2866
-
2867
- return &suite;
2868
- }
2869
-
2870
- #if defined(SIMDE_TESTS_SINGLE_ISAX)
2871
- int main(int argc, char* argv[HEDLEY_ARRAY_PARAM(argc + 1)]) {
2872
- static MunitSuite suite = { "", test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
2873
-
2874
- return munit_suite_main(&suite, NULL, argc, argv);
2875
- }
2876
- #endif /* defined(SIMDE_TESTS_SINGLE_ISAX) */
2877
-
2878
- HEDLEY_DIAGNOSTIC_POP