minimap2 0.2.25.0 → 0.2.25.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -3
- data/ext/minimap2/Makefile +6 -2
- data/ext/minimap2/NEWS.md +38 -0
- data/ext/minimap2/README.md +9 -3
- data/ext/minimap2/align.c +5 -3
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +7 -4
- data/ext/minimap2/kalloc.c +20 -1
- data/ext/minimap2/kalloc.h +13 -2
- data/ext/minimap2/ksw2.h +1 -0
- data/ext/minimap2/ksw2_extd2_sse.c +1 -1
- data/ext/minimap2/ksw2_exts2_sse.c +79 -40
- data/ext/minimap2/ksw2_extz2_sse.c +1 -1
- data/ext/minimap2/lchain.c +15 -16
- data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
- data/ext/minimap2/lib/simde/COPYING +20 -0
- data/ext/minimap2/lib/simde/README.md +333 -0
- data/ext/minimap2/lib/simde/amalgamate.py +58 -0
- data/ext/minimap2/lib/simde/meson.build +33 -0
- data/ext/minimap2/lib/simde/netlify.toml +20 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
- data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
- data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
- data/ext/minimap2/lib/simde/simde/check.h +267 -0
- data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
- data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
- data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
- data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
- data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
- data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
- data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
- data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
- data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
- data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
- data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
- data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
- data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
- data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
- data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
- data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
- data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
- data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
- data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
- data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
- data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
- data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
- data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
- data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
- data/ext/minimap2/lib/simde/test/meson.build +64 -0
- data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
- data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
- data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
- data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
- data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
- data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
- data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
- data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
- data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
- data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
- data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
- data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
- data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
- data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
- data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
- data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
- data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
- data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
- data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
- data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
- data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
- data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
- data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
- data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
- data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
- data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
- data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
- data/ext/minimap2/main.c +13 -6
- data/ext/minimap2/map.c +0 -5
- data/ext/minimap2/minimap.h +40 -31
- data/ext/minimap2/minimap2.1 +19 -5
- data/ext/minimap2/misc/paftools.js +545 -24
- data/ext/minimap2/options.c +1 -1
- data/ext/minimap2/pyproject.toml +2 -0
- data/ext/minimap2/python/mappy.pyx +3 -1
- data/ext/minimap2/seed.c +1 -1
- data/ext/minimap2/setup.py +32 -22
- data/lib/minimap2/version.rb +1 -1
- metadata +100 -3
@@ -0,0 +1,2084 @@
|
|
1
|
+
/* Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
|
2
|
+
*
|
3
|
+
* Permission is hereby granted, free of charge, to any person
|
4
|
+
* obtaining a copy of this software and associated documentation
|
5
|
+
* files (the "Software"), to deal in the Software without
|
6
|
+
* restriction, including without limitation the rights to use, copy,
|
7
|
+
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
8
|
+
* of the Software, and to permit persons to whom the Software is
|
9
|
+
* furnished to do so, subject to the following conditions:
|
10
|
+
*
|
11
|
+
* The above copyright notice and this permission notice shall be
|
12
|
+
* included in all copies or substantial portions of the Software.
|
13
|
+
*
|
14
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
18
|
+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19
|
+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
* SOFTWARE.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#define SIMDE_TESTS_CURRENT_ISAX ssse3
|
25
|
+
#include <test/x86/test-x86-internal.h>
|
26
|
+
#include <simde/x86/ssse3.h>
|
27
|
+
|
28
|
+
#if defined(SIMDE_SSSE3_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
|
29
|
+
|
30
|
+
static MunitResult
|
31
|
+
test_simde_mm_abs_epi8(const MunitParameter params[], void* data) {
|
32
|
+
(void) params;
|
33
|
+
(void) data;
|
34
|
+
|
35
|
+
const struct {
|
36
|
+
simde__m128i a;
|
37
|
+
simde__m128i r;
|
38
|
+
} test_vec[8] = {
|
39
|
+
{ simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( -1),
|
40
|
+
INT8_C( 125), INT8_C( -56), INT8_C(-120), INT8_C( -18),
|
41
|
+
INT8_C( -45), INT8_C( 42), INT8_C( 62), INT8_C( -99),
|
42
|
+
INT8_C( -57), INT8_C( 32), INT8_C( -68), INT8_C( 66)),
|
43
|
+
simde_x_mm_set_epu8(UINT8_C( 128), UINT8_C( 127), UINT8_C( 0), UINT8_C( 1),
|
44
|
+
UINT8_C( 125), UINT8_C( 56), UINT8_C( 120), UINT8_C( 18),
|
45
|
+
UINT8_C( 45), UINT8_C( 42), UINT8_C( 62), UINT8_C( 99),
|
46
|
+
UINT8_C( 57), UINT8_C( 32), UINT8_C( 68), UINT8_C( 66)) },
|
47
|
+
{ simde_mm_set_epi8(INT8_C( 113), INT8_C( -60), INT8_C( 1), INT8_C( 32),
|
48
|
+
INT8_C( 41), INT8_C( 40), INT8_C( 112), INT8_C( -39),
|
49
|
+
INT8_C( -65), INT8_C( 54), INT8_C(-116), INT8_C( -97),
|
50
|
+
INT8_C( -18), INT8_C( 78), INT8_C( -84), INT8_C( 94)),
|
51
|
+
simde_x_mm_set_epu8(UINT8_C( 113), UINT8_C( 60), UINT8_C( 1), UINT8_C( 32),
|
52
|
+
UINT8_C( 41), UINT8_C( 40), UINT8_C( 112), UINT8_C( 39),
|
53
|
+
UINT8_C( 65), UINT8_C( 54), UINT8_C( 116), UINT8_C( 97),
|
54
|
+
UINT8_C( 18), UINT8_C( 78), UINT8_C( 84), UINT8_C( 94)) },
|
55
|
+
{ simde_mm_set_epi8(INT8_C( 126), INT8_C( -67), INT8_C( -75), INT8_C( 48),
|
56
|
+
INT8_C( -49), INT8_C( -8), INT8_C( 105), INT8_C( -28),
|
57
|
+
INT8_C(-100), INT8_C( -1), INT8_C( 112), INT8_C( -27),
|
58
|
+
INT8_C( -35), INT8_C( 114), INT8_C( -81), INT8_C( 121)),
|
59
|
+
simde_x_mm_set_epu8(UINT8_C( 126), UINT8_C( 67), UINT8_C( 75), UINT8_C( 48),
|
60
|
+
UINT8_C( 49), UINT8_C( 8), UINT8_C( 105), UINT8_C( 28),
|
61
|
+
UINT8_C( 100), UINT8_C( 1), UINT8_C( 112), UINT8_C( 27),
|
62
|
+
UINT8_C( 35), UINT8_C( 114), UINT8_C( 81), UINT8_C( 121)) },
|
63
|
+
{ simde_mm_set_epi8(INT8_C( 94), INT8_C( 74), INT8_C( 0), INT8_C( -58),
|
64
|
+
INT8_C(-112), INT8_C( 29), INT8_C(-113), INT8_C( -48),
|
65
|
+
INT8_C( 92), INT8_C( -26), INT8_C( -61), INT8_C( 19),
|
66
|
+
INT8_C( -82), INT8_C( -78), INT8_C( -59), INT8_C( 102)),
|
67
|
+
simde_x_mm_set_epu8(UINT8_C( 94), UINT8_C( 74), UINT8_C( 0), UINT8_C( 58),
|
68
|
+
UINT8_C( 112), UINT8_C( 29), UINT8_C( 113), UINT8_C( 48),
|
69
|
+
UINT8_C( 92), UINT8_C( 26), UINT8_C( 61), UINT8_C( 19),
|
70
|
+
UINT8_C( 82), UINT8_C( 78), UINT8_C( 59), UINT8_C( 102)) },
|
71
|
+
{ simde_mm_set_epi8(INT8_C( -47), INT8_C( 2), INT8_C( 14), INT8_C( 29),
|
72
|
+
INT8_C( 46), INT8_C( 102), INT8_C(-121), INT8_C( 118),
|
73
|
+
INT8_C( 113), INT8_C( 31), INT8_C( 96), INT8_C( -45),
|
74
|
+
INT8_C( -4), INT8_C( 59), INT8_C( -14), INT8_C(-113)),
|
75
|
+
simde_x_mm_set_epu8(UINT8_C( 47), UINT8_C( 2), UINT8_C( 14), UINT8_C( 29),
|
76
|
+
UINT8_C( 46), UINT8_C( 102), UINT8_C( 121), UINT8_C( 118),
|
77
|
+
UINT8_C( 113), UINT8_C( 31), UINT8_C( 96), UINT8_C( 45),
|
78
|
+
UINT8_C( 4), UINT8_C( 59), UINT8_C( 14), UINT8_C( 113)) },
|
79
|
+
{ simde_mm_set_epi8(INT8_C( -4), INT8_C( -36), INT8_C( -71), INT8_C( 103),
|
80
|
+
INT8_C(-106), INT8_C( 36), INT8_C( -43), INT8_C( 119),
|
81
|
+
INT8_C( 62), INT8_C( 74), INT8_C( 88), INT8_C( 28),
|
82
|
+
INT8_C( 5), INT8_C( 31), INT8_C( -84), INT8_C( -65)),
|
83
|
+
simde_x_mm_set_epu8(UINT8_C( 4), UINT8_C( 36), UINT8_C( 71), UINT8_C( 103),
|
84
|
+
UINT8_C( 106), UINT8_C( 36), UINT8_C( 43), UINT8_C( 119),
|
85
|
+
UINT8_C( 62), UINT8_C( 74), UINT8_C( 88), UINT8_C( 28),
|
86
|
+
UINT8_C( 5), UINT8_C( 31), UINT8_C( 84), UINT8_C( 65)) },
|
87
|
+
{ simde_mm_set_epi8(INT8_C( -76), INT8_C( 66), INT8_C(-116), INT8_C( 14),
|
88
|
+
INT8_C( 42), INT8_C( -27), INT8_C( 102), INT8_C( 115),
|
89
|
+
INT8_C( -18), INT8_C( 33), INT8_C( 48), INT8_C( 113),
|
90
|
+
INT8_C( 64), INT8_C( 25), INT8_C(-128), INT8_C(-121)),
|
91
|
+
simde_x_mm_set_epu8(UINT8_C( 76), UINT8_C( 66), UINT8_C( 116), UINT8_C( 14),
|
92
|
+
UINT8_C( 42), UINT8_C( 27), UINT8_C( 102), UINT8_C( 115),
|
93
|
+
UINT8_C( 18), UINT8_C( 33), UINT8_C( 48), UINT8_C( 113),
|
94
|
+
UINT8_C( 64), UINT8_C( 25), UINT8_C( 128), UINT8_C( 121)) },
|
95
|
+
{ simde_mm_set_epi8(INT8_C( 83), INT8_C( 29), INT8_C( -57), INT8_C(-122),
|
96
|
+
INT8_C( -78), INT8_C( -6), INT8_C( 104), INT8_C( -66),
|
97
|
+
INT8_C( -96), INT8_C( -79), INT8_C( -74), INT8_C( -82),
|
98
|
+
INT8_C( -64), INT8_C( 4), INT8_C( 64), INT8_C( -63)),
|
99
|
+
simde_x_mm_set_epu8(UINT8_C( 83), UINT8_C( 29), UINT8_C( 57), UINT8_C( 122),
|
100
|
+
UINT8_C( 78), UINT8_C( 6), UINT8_C( 104), UINT8_C( 66),
|
101
|
+
UINT8_C( 96), UINT8_C( 79), UINT8_C( 74), UINT8_C( 82),
|
102
|
+
UINT8_C( 64), UINT8_C( 4), UINT8_C( 64), UINT8_C( 63)) }
|
103
|
+
};
|
104
|
+
|
105
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
106
|
+
simde__m128i r = simde_mm_abs_epi8(test_vec[i].a);
|
107
|
+
simde_assert_m128i_i8(r, ==, test_vec[i].r);
|
108
|
+
}
|
109
|
+
|
110
|
+
return MUNIT_OK;
|
111
|
+
}
|
112
|
+
|
113
|
+
static MunitResult
|
114
|
+
test_simde_mm_abs_epi16(const MunitParameter params[], void* data) {
|
115
|
+
(void) params;
|
116
|
+
(void) data;
|
117
|
+
|
118
|
+
const struct {
|
119
|
+
simde__m128i a;
|
120
|
+
simde__m128i r;
|
121
|
+
} test_vec[8] = {
|
122
|
+
{ simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(0), INT16_C(-1),
|
123
|
+
INT16_C( 17002), INT16_C(-16782), INT16_C( 31724), INT16_C( 17895)),
|
124
|
+
simde_x_mm_set_epu16(UINT16_C(32768), UINT16_C(32767), UINT16_C( 0), UINT16_C( 1),
|
125
|
+
UINT16_C(17002), UINT16_C(16782), UINT16_C(31724), UINT16_C(17895)) },
|
126
|
+
{ simde_mm_set_epi16(INT16_C( 516), INT16_C(-21666), INT16_C( 7667), INT16_C( 17413),
|
127
|
+
INT16_C(-25469), INT16_C( 28873), INT16_C(-13553), INT16_C(-30319)),
|
128
|
+
simde_x_mm_set_epu16(UINT16_C( 516), UINT16_C(21666), UINT16_C( 7667), UINT16_C(17413),
|
129
|
+
UINT16_C(25469), UINT16_C(28873), UINT16_C(13553), UINT16_C(30319)) },
|
130
|
+
{ simde_mm_set_epi16(INT16_C(-19076), INT16_C(-28644), INT16_C( -3095), INT16_C( -4676),
|
131
|
+
INT16_C( 7446), INT16_C( -2630), INT16_C( 16197), INT16_C(-16562)),
|
132
|
+
simde_x_mm_set_epu16(UINT16_C(19076), UINT16_C(28644), UINT16_C( 3095), UINT16_C( 4676),
|
133
|
+
UINT16_C( 7446), UINT16_C( 2630), UINT16_C(16197), UINT16_C(16562)) },
|
134
|
+
{ simde_mm_set_epi16(INT16_C( 17533), INT16_C( 20338), INT16_C( 8248), INT16_C( 6751),
|
135
|
+
INT16_C( -3126), INT16_C( 26964), INT16_C( 14690), INT16_C(-25810)),
|
136
|
+
simde_x_mm_set_epu16(UINT16_C(17533), UINT16_C(20338), UINT16_C( 8248), UINT16_C( 6751),
|
137
|
+
UINT16_C( 3126), UINT16_C(26964), UINT16_C(14690), UINT16_C(25810)) },
|
138
|
+
{ simde_mm_set_epi16(INT16_C( 18848), INT16_C( 6581), INT16_C(-32132), INT16_C( 14259),
|
139
|
+
INT16_C( 20181), INT16_C( 8393), INT16_C( 8677), INT16_C( 24318)),
|
140
|
+
simde_x_mm_set_epu16(UINT16_C(18848), UINT16_C( 6581), UINT16_C(32132), UINT16_C(14259),
|
141
|
+
UINT16_C(20181), UINT16_C( 8393), UINT16_C( 8677), UINT16_C(24318)) },
|
142
|
+
{ simde_mm_set_epi16(INT16_C(-16277), INT16_C(-19021), INT16_C( -2631), INT16_C( 6570),
|
143
|
+
INT16_C( 17968), INT16_C(-24371), INT16_C(-26844), INT16_C( -2593)),
|
144
|
+
simde_x_mm_set_epu16(UINT16_C(16277), UINT16_C(19021), UINT16_C( 2631), UINT16_C( 6570),
|
145
|
+
UINT16_C(17968), UINT16_C(24371), UINT16_C(26844), UINT16_C( 2593)) },
|
146
|
+
{ simde_mm_set_epi16(INT16_C( 23202), INT16_C(-30664), INT16_C( 14496), INT16_C(-10863),
|
147
|
+
INT16_C(-12787), INT16_C( -4044), INT16_C( 13497), INT16_C( 6178)),
|
148
|
+
simde_x_mm_set_epu16(UINT16_C(23202), UINT16_C(30664), UINT16_C(14496), UINT16_C(10863),
|
149
|
+
UINT16_C(12787), UINT16_C( 4044), UINT16_C(13497), UINT16_C( 6178)) },
|
150
|
+
{ simde_mm_set_epi16(INT16_C(-16084), INT16_C( 24093), INT16_C( -9776), INT16_C( 28468),
|
151
|
+
INT16_C( -9561), INT16_C( -3016), INT16_C( -8976), INT16_C(-19890)),
|
152
|
+
simde_x_mm_set_epu16(UINT16_C(16084), UINT16_C(24093), UINT16_C( 9776), UINT16_C(28468),
|
153
|
+
UINT16_C( 9561), UINT16_C( 3016), UINT16_C( 8976), UINT16_C(19890)) }
|
154
|
+
};
|
155
|
+
|
156
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
157
|
+
simde__m128i r = simde_mm_abs_epi16(test_vec[i].a);
|
158
|
+
simde_assert_m128i_u16(r, ==, test_vec[i].r);
|
159
|
+
}
|
160
|
+
|
161
|
+
return MUNIT_OK;
|
162
|
+
}
|
163
|
+
|
164
|
+
static MunitResult
|
165
|
+
test_simde_mm_abs_epi32(const MunitParameter params[], void* data) {
|
166
|
+
(void) params;
|
167
|
+
(void) data;
|
168
|
+
|
169
|
+
const struct {
|
170
|
+
simde__m128i a;
|
171
|
+
simde__m128i r;
|
172
|
+
} test_vec[8] = {
|
173
|
+
{ simde_mm_set_epi32( INT32_MIN , INT32_C(2147483647), INT32_C(0), INT32_C(-1)),
|
174
|
+
simde_x_mm_set_epu32(UINT32_C(2147483648), UINT32_C(2147483647), UINT32_C(0), UINT32_C(1)) },
|
175
|
+
{ simde_mm_set_epi32(INT32_C(-1840848639), INT32_C( 1050450514), INT32_C( -157155149), INT32_C(-1343526078)),
|
176
|
+
simde_x_mm_set_epu32(UINT32_C(1840848639), UINT32_C(1050450514), UINT32_C( 157155149), UINT32_C(1343526078)) },
|
177
|
+
{ simde_mm_set_epi32(INT32_C( 1334116049), INT32_C( 2129925302), INT32_C( 23778640), INT32_C( 713371303)),
|
178
|
+
simde_x_mm_set_epu32(UINT32_C(1334116049), UINT32_C(2129925302), UINT32_C( 23778640), UINT32_C( 713371303)) },
|
179
|
+
{ simde_mm_set_epi32(INT32_C( -302860244), INT32_C( 2030687021), INT32_C( 1060978877), INT32_C( -670900580)),
|
180
|
+
simde_x_mm_set_epu32(UINT32_C( 302860244), UINT32_C(2030687021), UINT32_C(1060978877), UINT32_C( 670900580)) },
|
181
|
+
{ simde_mm_set_epi32(INT32_C( 2040528386), INT32_C( 1361895717), INT32_C( 147208745), INT32_C( 773158561)),
|
182
|
+
simde_x_mm_set_epu32(UINT32_C(2040528386), UINT32_C(1361895717), UINT32_C( 147208745), UINT32_C( 773158561)) },
|
183
|
+
{ simde_mm_set_epi32(INT32_C(-1860066775), INT32_C( 109120839), INT32_C( 825660888), INT32_C( 1402710636)),
|
184
|
+
simde_x_mm_set_epu32(UINT32_C(1860066775), UINT32_C( 109120839), UINT32_C( 825660888), UINT32_C(1402710636)) },
|
185
|
+
{ simde_mm_set_epi32(INT32_C( 1113257677), INT32_C( 2062218865), INT32_C( 1785064575), INT32_C( 1289174686)),
|
186
|
+
simde_x_mm_set_epu32(UINT32_C(1113257677), UINT32_C(2062218865), UINT32_C(1785064575), UINT32_C(1289174686)) },
|
187
|
+
{ simde_mm_set_epi32(INT32_C(-2072383870), INT32_C( 1611206266), INT32_C( 1414397723), INT32_C(-1863310079)),
|
188
|
+
simde_x_mm_set_epu32(UINT32_C(2072383870), UINT32_C(1611206266), UINT32_C(1414397723), UINT32_C(1863310079)) }
|
189
|
+
};
|
190
|
+
|
191
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
192
|
+
simde__m128i r = simde_mm_abs_epi32(test_vec[i].a);
|
193
|
+
simde_assert_m128i_u32(r, ==, test_vec[i].r);
|
194
|
+
}
|
195
|
+
|
196
|
+
return MUNIT_OK;
|
197
|
+
}
|
198
|
+
|
199
|
+
static MunitResult
|
200
|
+
test_simde_mm_abs_pi8(const MunitParameter params[], void* data) {
|
201
|
+
(void) params;
|
202
|
+
(void) data;
|
203
|
+
|
204
|
+
const struct {
|
205
|
+
simde__m64 a;
|
206
|
+
simde__m64 r;
|
207
|
+
} test_vec[8] = {
|
208
|
+
{ simde_mm_set_pi8(INT8_C( 38), INT8_C( 28), INT8_C( -38), INT8_C(-113),
|
209
|
+
INT8_C(-109), INT8_C( -88), INT8_C( 99), INT8_C( -40)),
|
210
|
+
simde_x_mm_set_pu8(UINT8_C( 38), UINT8_C( 28), UINT8_C( 38), UINT8_C(113),
|
211
|
+
UINT8_C(109), UINT8_C( 88), UINT8_C( 99), UINT8_C( 40)) },
|
212
|
+
{ simde_mm_set_pi8(INT8_C( 57), INT8_C( 21), INT8_C( 63), INT8_C( 38),
|
213
|
+
INT8_C( 75), INT8_C( -74), INT8_C( -71), INT8_C( 58)),
|
214
|
+
simde_x_mm_set_pu8(UINT8_C( 57), UINT8_C( 21), UINT8_C( 63), UINT8_C( 38),
|
215
|
+
UINT8_C( 75), UINT8_C( 74), UINT8_C( 71), UINT8_C( 58)) },
|
216
|
+
{ simde_mm_set_pi8(INT8_C( 107), INT8_C(-123), INT8_C( -46), INT8_C( 116),
|
217
|
+
INT8_C( 49), INT8_C(-110), INT8_C( -27), INT8_C( -14)),
|
218
|
+
simde_x_mm_set_pu8(UINT8_C(107), UINT8_C(123), UINT8_C( 46), UINT8_C(116),
|
219
|
+
UINT8_C( 49), UINT8_C(110), UINT8_C( 27), UINT8_C( 14)) },
|
220
|
+
{ simde_mm_set_pi8(INT8_C( 94), INT8_C( -17), INT8_C(-121), INT8_C( -59),
|
221
|
+
INT8_C( -39), INT8_C(-120), INT8_C( -6), INT8_C(-128)),
|
222
|
+
simde_x_mm_set_pu8(UINT8_C( 94), UINT8_C( 17), UINT8_C(121), UINT8_C( 59),
|
223
|
+
UINT8_C( 39), UINT8_C(120), UINT8_C( 6), UINT8_C(128)) },
|
224
|
+
{ simde_mm_set_pi8(INT8_C(-113), INT8_C( -83), INT8_C( 56), INT8_C( 12),
|
225
|
+
INT8_C( 114), INT8_C( 46), INT8_C( -44), INT8_C( 75)),
|
226
|
+
simde_x_mm_set_pu8(UINT8_C(113), UINT8_C( 83), UINT8_C( 56), UINT8_C( 12),
|
227
|
+
UINT8_C(114), UINT8_C( 46), UINT8_C( 44), UINT8_C( 75)) },
|
228
|
+
{ simde_mm_set_pi8(INT8_C( -28), INT8_C( 63), INT8_C( 103), INT8_C(-127),
|
229
|
+
INT8_C( 94), INT8_C( 94), INT8_C( 64), INT8_C( 107)),
|
230
|
+
simde_x_mm_set_pu8(UINT8_C( 28), UINT8_C( 63), UINT8_C(103), UINT8_C(127),
|
231
|
+
UINT8_C( 94), UINT8_C( 94), UINT8_C( 64), UINT8_C(107)) },
|
232
|
+
{ simde_mm_set_pi8(INT8_C( -42), INT8_C( 122), INT8_C( 121), INT8_C( 5),
|
233
|
+
INT8_C( 93), INT8_C( -41), INT8_C( -24), INT8_C( 13)),
|
234
|
+
simde_x_mm_set_pu8(UINT8_C( 42), UINT8_C(122), UINT8_C(121), UINT8_C( 5),
|
235
|
+
UINT8_C( 93), UINT8_C( 41), UINT8_C( 24), UINT8_C( 13)) },
|
236
|
+
{ simde_mm_set_pi8(INT8_C( -78), INT8_C( -48), INT8_C( 35), INT8_C( 4),
|
237
|
+
INT8_C( -62), INT8_C( -9), INT8_C( 70), INT8_C( 20)),
|
238
|
+
simde_x_mm_set_pu8(UINT8_C( 78), UINT8_C( 48), UINT8_C( 35), UINT8_C( 4),
|
239
|
+
UINT8_C( 62), UINT8_C( 9), UINT8_C( 70), UINT8_C( 20)) }
|
240
|
+
};
|
241
|
+
|
242
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
243
|
+
simde__m64 r = simde_mm_abs_pi8(test_vec[i].a);
|
244
|
+
simde_assert_m64_u8(r, ==, test_vec[i].r);
|
245
|
+
}
|
246
|
+
|
247
|
+
return MUNIT_OK;
|
248
|
+
}
|
249
|
+
|
250
|
+
static MunitResult
|
251
|
+
test_simde_mm_abs_pi16(const MunitParameter params[], void* data) {
|
252
|
+
(void) params;
|
253
|
+
(void) data;
|
254
|
+
|
255
|
+
const struct {
|
256
|
+
simde__m64 a;
|
257
|
+
simde__m64 r;
|
258
|
+
} test_vec[8] = {
|
259
|
+
{ simde_mm_set_pi16(INT16_C( 22656), INT16_C( -516), INT16_C( 11935), INT16_C(-27223)),
|
260
|
+
simde_x_mm_set_pu16(UINT16_C(22656), UINT16_C( 516), UINT16_C(11935), UINT16_C(27223)) },
|
261
|
+
{ simde_mm_set_pi16(INT16_C(-22991), INT16_C( -6351), INT16_C(-10389), INT16_C( -8080)),
|
262
|
+
simde_x_mm_set_pu16(UINT16_C(22991), UINT16_C( 6351), UINT16_C(10389), UINT16_C( 8080)) },
|
263
|
+
{ simde_mm_set_pi16(INT16_C( 30466), INT16_C(-32585), INT16_C( 19645), INT16_C(-10576)),
|
264
|
+
simde_x_mm_set_pu16(UINT16_C(30466), UINT16_C(32585), UINT16_C(19645), UINT16_C(10576)) },
|
265
|
+
{ simde_mm_set_pi16(INT16_C(-16502), INT16_C( -5192), INT16_C( 4129), INT16_C( 3864)),
|
266
|
+
simde_x_mm_set_pu16(UINT16_C(16502), UINT16_C( 5192), UINT16_C( 4129), UINT16_C( 3864)) },
|
267
|
+
{ simde_mm_set_pi16(INT16_C( 21069), INT16_C( 17958), INT16_C(-13493), INT16_C( 9609)),
|
268
|
+
simde_x_mm_set_pu16(UINT16_C(21069), UINT16_C(17958), UINT16_C(13493), UINT16_C( 9609)) },
|
269
|
+
{ simde_mm_set_pi16(INT16_C( 400), INT16_C( 20835), INT16_C( 20896), INT16_C( 11278)),
|
270
|
+
simde_x_mm_set_pu16(UINT16_C( 400), UINT16_C(20835), UINT16_C(20896), UINT16_C(11278)) },
|
271
|
+
{ simde_mm_set_pi16(INT16_C(-12492), INT16_C(-12858), INT16_C( 23414), INT16_C(-21576)),
|
272
|
+
simde_x_mm_set_pu16(UINT16_C(12492), UINT16_C(12858), UINT16_C(23414), UINT16_C(21576)) },
|
273
|
+
{ simde_mm_set_pi16(INT16_C( 6654), INT16_C(-24897), INT16_C(-24943), INT16_C(-25087)),
|
274
|
+
simde_x_mm_set_pu16(UINT16_C( 6654), UINT16_C(24897), UINT16_C(24943), UINT16_C(25087)) }
|
275
|
+
};
|
276
|
+
|
277
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
278
|
+
simde__m64 r = simde_mm_abs_pi16(test_vec[i].a);
|
279
|
+
simde_assert_m64_u16(r, ==, test_vec[i].r);
|
280
|
+
}
|
281
|
+
|
282
|
+
return MUNIT_OK;
|
283
|
+
}
|
284
|
+
|
285
|
+
static MunitResult
|
286
|
+
test_simde_mm_abs_pi32(const MunitParameter params[], void* data) {
|
287
|
+
(void) params;
|
288
|
+
(void) data;
|
289
|
+
|
290
|
+
const struct {
|
291
|
+
simde__m64 a;
|
292
|
+
simde__m64 r;
|
293
|
+
} test_vec[8] = {
|
294
|
+
{ simde_mm_set_pi32(INT32_C( -13878279), INT32_C(-1713620712)),
|
295
|
+
simde_x_mm_set_pu32(UINT32_C( 13878279), UINT32_C(1713620712)) },
|
296
|
+
{ simde_mm_set_pi32(INT32_C( -727247206), INT32_C( -746817076)),
|
297
|
+
simde_x_mm_set_pu32(UINT32_C( 727247206), UINT32_C( 746817076)) },
|
298
|
+
{ simde_mm_set_pi32(INT32_C( 850085177), INT32_C( -729101966)),
|
299
|
+
simde_x_mm_set_pu32(UINT32_C( 850085177), UINT32_C( 729101966)) },
|
300
|
+
{ simde_mm_set_pi32(INT32_C( 64469638), INT32_C( 403976835)),
|
301
|
+
simde_x_mm_set_pu32(UINT32_C( 64469638), UINT32_C( 403976835)) },
|
302
|
+
{ simde_mm_set_pi32(INT32_C( 1585672991), INT32_C( 1784425824)),
|
303
|
+
simde_x_mm_set_pu32(UINT32_C(1585672991), UINT32_C(1784425824)) },
|
304
|
+
{ simde_mm_set_pi32(INT32_C( -137548456), INT32_C(-1080835717)),
|
305
|
+
simde_x_mm_set_pu32(UINT32_C( 137548456), UINT32_C(1080835717)) },
|
306
|
+
{ simde_mm_set_pi32(INT32_C( -942357541), INT32_C( 223746416)),
|
307
|
+
simde_x_mm_set_pu32(UINT32_C( 942357541), UINT32_C( 223746416)) },
|
308
|
+
{ simde_mm_set_pi32(INT32_C( -21393113), INT32_C( -293603855)),
|
309
|
+
simde_x_mm_set_pu32(UINT32_C( 21393113), UINT32_C( 293603855)) }
|
310
|
+
};
|
311
|
+
|
312
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
313
|
+
simde__m64 r = simde_mm_abs_pi32(test_vec[i].a);
|
314
|
+
simde_assert_m64_u32(r, ==, test_vec[i].r);
|
315
|
+
}
|
316
|
+
|
317
|
+
return MUNIT_OK;
|
318
|
+
}
|
319
|
+
|
320
|
+
static MunitResult
|
321
|
+
test_simde_mm_alignr_epi8(const MunitParameter params[], void* data) {
|
322
|
+
(void) params;
|
323
|
+
(void) data;
|
324
|
+
|
325
|
+
const struct {
|
326
|
+
simde__m128i a;
|
327
|
+
simde__m128i b;
|
328
|
+
simde__m128i r;
|
329
|
+
} test_vec[8] = {
|
330
|
+
{ simde_mm_set_epi8(INT8_C(-128), INT8_C( 23), INT8_C( -27), INT8_C( -85),
|
331
|
+
INT8_C( -37), INT8_C( 81), INT8_C( 72), INT8_C(-127),
|
332
|
+
INT8_C( -15), INT8_C( 26), INT8_C( -79), INT8_C( 98),
|
333
|
+
INT8_C(-124), INT8_C( 117), INT8_C( -8), INT8_C( -81)),
|
334
|
+
simde_mm_set_epi8(INT8_C( 118), INT8_C( 101), INT8_C( 36), INT8_C( -21),
|
335
|
+
INT8_C( 52), INT8_C( 25), INT8_C( 8), INT8_C( 93),
|
336
|
+
INT8_C( 72), INT8_C( 68), INT8_C(-102), INT8_C( -5),
|
337
|
+
INT8_C( 84), INT8_C( 53), INT8_C( 93), INT8_C( 54)),
|
338
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C(-128), INT8_C( 23), INT8_C( -27),
|
339
|
+
INT8_C( -85), INT8_C( -37), INT8_C( 81), INT8_C( 72),
|
340
|
+
INT8_C(-127), INT8_C( -15), INT8_C( 26), INT8_C( -79),
|
341
|
+
INT8_C( 98), INT8_C(-124), INT8_C( 117), INT8_C( -8)) },
|
342
|
+
{ simde_mm_set_epi8(INT8_C( 108), INT8_C( -43), INT8_C( 92), INT8_C( 111),
|
343
|
+
INT8_C( 46), INT8_C(-106), INT8_C( 85), INT8_C(-119),
|
344
|
+
INT8_C( -59), INT8_C(-124), INT8_C( -31), INT8_C( -23),
|
345
|
+
INT8_C( -83), INT8_C( 5), INT8_C( -9), INT8_C( -71)),
|
346
|
+
simde_mm_set_epi8(INT8_C( -43), INT8_C(-125), INT8_C( -60), INT8_C( 52),
|
347
|
+
INT8_C( -5), INT8_C( -49), INT8_C( -67), INT8_C( 42),
|
348
|
+
INT8_C( -81), INT8_C( -53), INT8_C( 4), INT8_C( -98),
|
349
|
+
INT8_C( 10), INT8_C( -82), INT8_C( 22), INT8_C(-123)),
|
350
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( 108), INT8_C( -43), INT8_C( 92),
|
351
|
+
INT8_C( 111), INT8_C( 46), INT8_C(-106), INT8_C( 85),
|
352
|
+
INT8_C(-119), INT8_C( -59), INT8_C(-124), INT8_C( -31),
|
353
|
+
INT8_C( -23), INT8_C( -83), INT8_C( 5), INT8_C( -9)) },
|
354
|
+
{ simde_mm_set_epi8(INT8_C( -58), INT8_C( -34), INT8_C( 101), INT8_C( 7),
|
355
|
+
INT8_C( -30), INT8_C( 8), INT8_C(-113), INT8_C( 116),
|
356
|
+
INT8_C( 85), INT8_C( -83), INT8_C( 67), INT8_C( 48),
|
357
|
+
INT8_C( -3), INT8_C( -74), INT8_C( -8), INT8_C( -33)),
|
358
|
+
simde_mm_set_epi8(INT8_C( 84), INT8_C( -61), INT8_C( -49), INT8_C(-106),
|
359
|
+
INT8_C( 2), INT8_C( -67), INT8_C( 116), INT8_C( -32),
|
360
|
+
INT8_C( -64), INT8_C( 87), INT8_C( -97), INT8_C( 116),
|
361
|
+
INT8_C( 121), INT8_C( -49), INT8_C( 50), INT8_C( -62)),
|
362
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( -58), INT8_C( -34), INT8_C( 101),
|
363
|
+
INT8_C( 7), INT8_C( -30), INT8_C( 8), INT8_C(-113),
|
364
|
+
INT8_C( 116), INT8_C( 85), INT8_C( -83), INT8_C( 67),
|
365
|
+
INT8_C( 48), INT8_C( -3), INT8_C( -74), INT8_C( -8)) },
|
366
|
+
{ simde_mm_set_epi8(INT8_C( 16), INT8_C( 76), INT8_C( -67), INT8_C( 79),
|
367
|
+
INT8_C( 123), INT8_C( 26), INT8_C( -95), INT8_C( 65),
|
368
|
+
INT8_C( 32), INT8_C( -65), INT8_C( 40), INT8_C( -43),
|
369
|
+
INT8_C( 17), INT8_C( -86), INT8_C( -57), INT8_C( 69)),
|
370
|
+
simde_mm_set_epi8(INT8_C(-126), INT8_C( -50), INT8_C( -8), INT8_C( -49),
|
371
|
+
INT8_C( 97), INT8_C( 107), INT8_C( 63), INT8_C(-118),
|
372
|
+
INT8_C( -16), INT8_C( 51), INT8_C( -64), INT8_C( 0),
|
373
|
+
INT8_C( -55), INT8_C( -33), INT8_C( -97), INT8_C( -81)),
|
374
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( 16), INT8_C( 76), INT8_C( -67),
|
375
|
+
INT8_C( 79), INT8_C( 123), INT8_C( 26), INT8_C( -95),
|
376
|
+
INT8_C( 65), INT8_C( 32), INT8_C( -65), INT8_C( 40),
|
377
|
+
INT8_C( -43), INT8_C( 17), INT8_C( -86), INT8_C( -57)) },
|
378
|
+
{ simde_mm_set_epi8(INT8_C( 117), INT8_C( -66), INT8_C(-127), INT8_C( -49),
|
379
|
+
INT8_C( -11), INT8_C(-128), INT8_C( -58), INT8_C(-105),
|
380
|
+
INT8_C( 101), INT8_C( -9), INT8_C( 2), INT8_C(-109),
|
381
|
+
INT8_C( 92), INT8_C( 127), INT8_C( -78), INT8_C( 72)),
|
382
|
+
simde_mm_set_epi8(INT8_C( -69), INT8_C(-103), INT8_C( -23), INT8_C( 107),
|
383
|
+
INT8_C( 20), INT8_C( 30), INT8_C( 9), INT8_C( -73),
|
384
|
+
INT8_C( -22), INT8_C( -35), INT8_C( -54), INT8_C( 18),
|
385
|
+
INT8_C( 116), INT8_C( 41), INT8_C(-104), INT8_C( 80)),
|
386
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( 117), INT8_C( -66), INT8_C(-127),
|
387
|
+
INT8_C( -49), INT8_C( -11), INT8_C(-128), INT8_C( -58),
|
388
|
+
INT8_C(-105), INT8_C( 101), INT8_C( -9), INT8_C( 2),
|
389
|
+
INT8_C(-109), INT8_C( 92), INT8_C( 127), INT8_C( -78)) },
|
390
|
+
{ simde_mm_set_epi8(INT8_C( -86), INT8_C( -74), INT8_C( 68), INT8_C( 36),
|
391
|
+
INT8_C( 83), INT8_C( 30), INT8_C( 41), INT8_C( 15),
|
392
|
+
INT8_C( -91), INT8_C( -31), INT8_C( -42), INT8_C( 75),
|
393
|
+
INT8_C( -87), INT8_C( 44), INT8_C(-103), INT8_C( 119)),
|
394
|
+
simde_mm_set_epi8(INT8_C( -30), INT8_C(-115), INT8_C( -83), INT8_C( 6),
|
395
|
+
INT8_C( -57), INT8_C( -30), INT8_C( 102), INT8_C( -2),
|
396
|
+
INT8_C( 71), INT8_C( 87), INT8_C( 98), INT8_C( -52),
|
397
|
+
INT8_C(-103), INT8_C( 31), INT8_C( 47), INT8_C( 121)),
|
398
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( -86), INT8_C( -74), INT8_C( 68),
|
399
|
+
INT8_C( 36), INT8_C( 83), INT8_C( 30), INT8_C( 41),
|
400
|
+
INT8_C( 15), INT8_C( -91), INT8_C( -31), INT8_C( -42),
|
401
|
+
INT8_C( 75), INT8_C( -87), INT8_C( 44), INT8_C(-103)) },
|
402
|
+
{ simde_mm_set_epi8(INT8_C(-102), INT8_C( 15), INT8_C( -29), INT8_C( -33),
|
403
|
+
INT8_C( -49), INT8_C( 106), INT8_C( 89), INT8_C( -38),
|
404
|
+
INT8_C( 120), INT8_C( -29), INT8_C( 49), INT8_C(-121),
|
405
|
+
INT8_C( 8), INT8_C( 114), INT8_C( 123), INT8_C( 61)),
|
406
|
+
simde_mm_set_epi8(INT8_C(-116), INT8_C( -97), INT8_C( 62), INT8_C( 16),
|
407
|
+
INT8_C( 41), INT8_C( 3), INT8_C( -42), INT8_C( 67),
|
408
|
+
INT8_C( -15), INT8_C( 2), INT8_C( 36), INT8_C( -89),
|
409
|
+
INT8_C( 9), INT8_C( -85), INT8_C( 112), INT8_C( 119)),
|
410
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C(-102), INT8_C( 15), INT8_C( -29),
|
411
|
+
INT8_C( -33), INT8_C( -49), INT8_C( 106), INT8_C( 89),
|
412
|
+
INT8_C( -38), INT8_C( 120), INT8_C( -29), INT8_C( 49),
|
413
|
+
INT8_C(-121), INT8_C( 8), INT8_C( 114), INT8_C( 123)) },
|
414
|
+
{ simde_mm_set_epi8(INT8_C( 70), INT8_C( -49), INT8_C( 28), INT8_C( 74),
|
415
|
+
INT8_C( -43), INT8_C( 112), INT8_C( 4), INT8_C(-116),
|
416
|
+
INT8_C( 119), INT8_C( -9), INT8_C( 40), INT8_C( -29),
|
417
|
+
INT8_C( -45), INT8_C( -41), INT8_C( -23), INT8_C( -89)),
|
418
|
+
simde_mm_set_epi8(INT8_C( -31), INT8_C( -20), INT8_C(-110), INT8_C( 17),
|
419
|
+
INT8_C( -97), INT8_C( -81), INT8_C( -3), INT8_C( -59),
|
420
|
+
INT8_C( 65), INT8_C( 4), INT8_C( -5), INT8_C(-109),
|
421
|
+
INT8_C( 58), INT8_C( 126), INT8_C(-116), INT8_C(-106)),
|
422
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( 70), INT8_C( -49), INT8_C( 28),
|
423
|
+
INT8_C( 74), INT8_C( -43), INT8_C( 112), INT8_C( 4),
|
424
|
+
INT8_C(-116), INT8_C( 119), INT8_C( -9), INT8_C( 40),
|
425
|
+
INT8_C( -29), INT8_C( -45), INT8_C( -41), INT8_C( -23)) }
|
426
|
+
};
|
427
|
+
|
428
|
+
// printf("\n");
|
429
|
+
// for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
|
430
|
+
// simde__m128i_private a, b, r;
|
431
|
+
|
432
|
+
// munit_rand_memory(sizeof(a), (uint8_t*) &a);
|
433
|
+
// munit_rand_memory(sizeof(b), (uint8_t*) &b);
|
434
|
+
|
435
|
+
// r = simde__m128i_to_private(simde_mm_alignr_epi8(simde__m128i_from_private(a), simde__m128i_from_private(b), 17));
|
436
|
+
|
437
|
+
// printf(" { simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
438
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
439
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
440
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
|
441
|
+
// a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
|
442
|
+
// a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
|
443
|
+
// printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
444
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
445
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
446
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
|
447
|
+
// b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
|
448
|
+
// b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
|
449
|
+
// printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
450
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
451
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
|
452
|
+
// " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n",
|
453
|
+
// r.i8[15], r.i8[14], r.i8[13], r.i8[12], r.i8[11], r.i8[10], r.i8[ 9], r.i8[ 8],
|
454
|
+
// r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]);
|
455
|
+
// }
|
456
|
+
// return MUNIT_FAIL;
|
457
|
+
|
458
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
459
|
+
simde__m128i r = simde_mm_alignr_epi8(test_vec[i].a, test_vec[i].b, 17);
|
460
|
+
simde_assert_m128i_i8(r, ==, test_vec[i].r);
|
461
|
+
}
|
462
|
+
|
463
|
+
return MUNIT_OK;
|
464
|
+
}
|
465
|
+
|
466
|
+
static MunitResult
|
467
|
+
test_simde_mm_alignr_pi8(const MunitParameter params[], void* data) {
|
468
|
+
(void) params;
|
469
|
+
(void) data;
|
470
|
+
|
471
|
+
const struct {
|
472
|
+
simde__m64 a;
|
473
|
+
simde__m64 b;
|
474
|
+
simde__m64 r3;
|
475
|
+
simde__m64 r12;
|
476
|
+
} test_vec[8] = {
|
477
|
+
{ simde_mm_set_pi8(INT8_C( -39), INT8_C(-110), INT8_C( 56), INT8_C( 87),
|
478
|
+
INT8_C( 10), INT8_C( -78), INT8_C( 61), INT8_C( -21)),
|
479
|
+
simde_mm_set_pi8(INT8_C( 13), INT8_C( -51), INT8_C( 6), INT8_C( -66),
|
480
|
+
INT8_C( -73), INT8_C( 87), INT8_C( -77), INT8_C( 108)),
|
481
|
+
simde_mm_set_pi8(INT8_C( -78), INT8_C( 61), INT8_C( -21), INT8_C( 13),
|
482
|
+
INT8_C( -51), INT8_C( 6), INT8_C( -66), INT8_C( -73)),
|
483
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
484
|
+
INT8_C( -39), INT8_C(-110), INT8_C( 56), INT8_C( 87)) },
|
485
|
+
{ simde_mm_set_pi8(INT8_C( 51), INT8_C( -90), INT8_C(-118), INT8_C( -36),
|
486
|
+
INT8_C( 81), INT8_C( 52), INT8_C( 14), INT8_C( 46)),
|
487
|
+
simde_mm_set_pi8(INT8_C( 26), INT8_C( -56), INT8_C( -35), INT8_C( -50),
|
488
|
+
INT8_C( 106), INT8_C( 71), INT8_C( 68), INT8_C( 40)),
|
489
|
+
simde_mm_set_pi8(INT8_C( 52), INT8_C( 14), INT8_C( 46), INT8_C( 26),
|
490
|
+
INT8_C( -56), INT8_C( -35), INT8_C( -50), INT8_C( 106)),
|
491
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
492
|
+
INT8_C( 51), INT8_C( -90), INT8_C(-118), INT8_C( -36)) },
|
493
|
+
{ simde_mm_set_pi8(INT8_C( -8), INT8_C( -77), INT8_C(-125), INT8_C( -59),
|
494
|
+
INT8_C( 8), INT8_C( -89), INT8_C( -90), INT8_C( -97)),
|
495
|
+
simde_mm_set_pi8(INT8_C( -59), INT8_C( -51), INT8_C( -30), INT8_C( -57),
|
496
|
+
INT8_C( 35), INT8_C(-105), INT8_C( -5), INT8_C( -3)),
|
497
|
+
simde_mm_set_pi8(INT8_C( -89), INT8_C( -90), INT8_C( -97), INT8_C( -59),
|
498
|
+
INT8_C( -51), INT8_C( -30), INT8_C( -57), INT8_C( 35)),
|
499
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
500
|
+
INT8_C( -8), INT8_C( -77), INT8_C(-125), INT8_C( -59)) },
|
501
|
+
{ simde_mm_set_pi8(INT8_C( 67), INT8_C( 48), INT8_C( -81), INT8_C( -50),
|
502
|
+
INT8_C( 41), INT8_C( -92), INT8_C( -5), INT8_C( 14)),
|
503
|
+
simde_mm_set_pi8(INT8_C( -86), INT8_C( -71), INT8_C( 17), INT8_C( 108),
|
504
|
+
INT8_C( -44), INT8_C( 60), INT8_C( 44), INT8_C( 75)),
|
505
|
+
simde_mm_set_pi8(INT8_C( -92), INT8_C( -5), INT8_C( 14), INT8_C( -86),
|
506
|
+
INT8_C( -71), INT8_C( 17), INT8_C( 108), INT8_C( -44)),
|
507
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
508
|
+
INT8_C( 67), INT8_C( 48), INT8_C( -81), INT8_C( -50)) },
|
509
|
+
{ simde_mm_set_pi8(INT8_C( -89), INT8_C(-124), INT8_C(-127), INT8_C( 44),
|
510
|
+
INT8_C( 127), INT8_C( 11), INT8_C(-119), INT8_C( -70)),
|
511
|
+
simde_mm_set_pi8(INT8_C( 39), INT8_C( -13), INT8_C( 68), INT8_C( -96),
|
512
|
+
INT8_C(-112), INT8_C(-118), INT8_C( 122), INT8_C( -32)),
|
513
|
+
simde_mm_set_pi8(INT8_C( 11), INT8_C(-119), INT8_C( -70), INT8_C( 39),
|
514
|
+
INT8_C( -13), INT8_C( 68), INT8_C( -96), INT8_C(-112)),
|
515
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
516
|
+
INT8_C( -89), INT8_C(-124), INT8_C(-127), INT8_C( 44)) },
|
517
|
+
{ simde_mm_set_pi8(INT8_C(-100), INT8_C( -55), INT8_C( 7), INT8_C( -95),
|
518
|
+
INT8_C( -19), INT8_C(-101), INT8_C( 80), INT8_C( -82)),
|
519
|
+
simde_mm_set_pi8(INT8_C( -54), INT8_C( 2), INT8_C( 109), INT8_C( 126),
|
520
|
+
INT8_C(-123), INT8_C( -75), INT8_C( -35), INT8_C(-107)),
|
521
|
+
simde_mm_set_pi8(INT8_C(-101), INT8_C( 80), INT8_C( -82), INT8_C( -54),
|
522
|
+
INT8_C( 2), INT8_C( 109), INT8_C( 126), INT8_C(-123)),
|
523
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
524
|
+
INT8_C(-100), INT8_C( -55), INT8_C( 7), INT8_C( -95)) },
|
525
|
+
{ simde_mm_set_pi8(INT8_C( -17), INT8_C( 109), INT8_C(-102), INT8_C( -75),
|
526
|
+
INT8_C( -61), INT8_C( 83), INT8_C( 8), INT8_C( -7)),
|
527
|
+
simde_mm_set_pi8(INT8_C( 94), INT8_C(-110), INT8_C( 105), INT8_C( 1),
|
528
|
+
INT8_C( 125), INT8_C( 57), INT8_C( -29), INT8_C( 60)),
|
529
|
+
simde_mm_set_pi8(INT8_C( 83), INT8_C( 8), INT8_C( -7), INT8_C( 94),
|
530
|
+
INT8_C(-110), INT8_C( 105), INT8_C( 1), INT8_C( 125)),
|
531
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
532
|
+
INT8_C( -17), INT8_C( 109), INT8_C(-102), INT8_C( -75)) },
|
533
|
+
{ simde_mm_set_pi8(INT8_C( 127), INT8_C(-126), INT8_C( -37), INT8_C( -53),
|
534
|
+
INT8_C( 30), INT8_C( 85), INT8_C( -75), INT8_C( 62)),
|
535
|
+
simde_mm_set_pi8(INT8_C( 72), INT8_C( 61), INT8_C(-110), INT8_C( 76),
|
536
|
+
INT8_C( 26), INT8_C(-125), INT8_C( -54), INT8_C( -64)),
|
537
|
+
simde_mm_set_pi8(INT8_C( 85), INT8_C( -75), INT8_C( 62), INT8_C( 72),
|
538
|
+
INT8_C( 61), INT8_C(-110), INT8_C( 76), INT8_C( 26)),
|
539
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
540
|
+
INT8_C( 127), INT8_C(-126), INT8_C( -37), INT8_C( -53)) }
|
541
|
+
};
|
542
|
+
|
543
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
544
|
+
simde__m64 r = simde_mm_alignr_pi8(test_vec[i].a, test_vec[i].b, 3);
|
545
|
+
simde_assert_m64_i8(r, ==, test_vec[i].r3);
|
546
|
+
r = simde_mm_alignr_pi8(test_vec[i].a, test_vec[i].b, 12);
|
547
|
+
simde_assert_m64_i8(r, ==, test_vec[i].r12);
|
548
|
+
}
|
549
|
+
|
550
|
+
return MUNIT_OK;
|
551
|
+
}
|
552
|
+
|
553
|
+
static MunitResult
|
554
|
+
test_simde_mm_hadd_epi16(const MunitParameter params[], void* data) {
|
555
|
+
(void) params;
|
556
|
+
(void) data;
|
557
|
+
|
558
|
+
const struct {
|
559
|
+
simde__m128i a;
|
560
|
+
simde__m128i b;
|
561
|
+
simde__m128i r;
|
562
|
+
} test_vec[] = {
|
563
|
+
{ simde_mm_set_epi16(INT16_C(7), INT16_C(6), INT16_C(5), INT16_C(4),
|
564
|
+
INT16_C(3), INT16_C(2), INT16_C(1), INT16_C(0)),
|
565
|
+
simde_mm_set_epi16(INT16_C(15), INT16_C(14), INT16_C(13), INT16_C(12),
|
566
|
+
INT16_C(11), INT16_C(10), INT16_C(9), INT16_C(8)),
|
567
|
+
simde_mm_set_epi16(INT16_C(29), INT16_C(25), INT16_C(21), INT16_C(17),
|
568
|
+
INT16_C(13), INT16_C(9), INT16_C(5), INT16_C(1)) },
|
569
|
+
{ simde_mm_set_epi16(INT16_C( 16862), INT16_C(-22769), INT16_C( 1276), INT16_C(-11614),
|
570
|
+
INT16_C( 27365), INT16_C(-21745), INT16_C(-20072), INT16_C( 24895)),
|
571
|
+
simde_mm_set_epi16(INT16_C( 27022), INT16_C(-16957), INT16_C( -577), INT16_C( 5907),
|
572
|
+
INT16_C( 27331), INT16_C(-14214), INT16_C( 23860), INT16_C( -4618)),
|
573
|
+
simde_mm_set_epi16(INT16_C( 10065), INT16_C( 5330), INT16_C( 13117), INT16_C( 19242),
|
574
|
+
INT16_C( -5907), INT16_C(-10338), INT16_C( 5620), INT16_C( 4823)) },
|
575
|
+
{ simde_mm_set_epi16(INT16_C( 10296), INT16_C( 16929), INT16_C( -7697), INT16_C(-29772),
|
576
|
+
INT16_C( 8760), INT16_C( 11055), INT16_C(-21639), INT16_C( -9735)),
|
577
|
+
simde_mm_set_epi16(INT16_C( 17587), INT16_C( 2522), INT16_C( 12430), INT16_C(-26697),
|
578
|
+
INT16_C( 10766), INT16_C( 15055), INT16_C(-19640), INT16_C( 28548)),
|
579
|
+
simde_mm_set_epi16(INT16_C( 20109), INT16_C(-14267), INT16_C( 25821), INT16_C( 8908),
|
580
|
+
INT16_C( 27225), INT16_C( 28067), INT16_C( 19815), INT16_C(-31374)) },
|
581
|
+
{ simde_mm_set_epi16(INT16_C( 25001), INT16_C( 2984), INT16_C( 25634), INT16_C( 18284),
|
582
|
+
INT16_C( 332), INT16_C( 30339), INT16_C( -8894), INT16_C( 21932)),
|
583
|
+
simde_mm_set_epi16(INT16_C(-29538), INT16_C( -9241), INT16_C(-32628), INT16_C(-14450),
|
584
|
+
INT16_C( 29835), INT16_C( 605), INT16_C( -3960), INT16_C( -9885)),
|
585
|
+
simde_mm_set_epi16(INT16_C( 26757), INT16_C( 18458), INT16_C( 30440), INT16_C(-13845),
|
586
|
+
INT16_C( 27985), INT16_C(-21618), INT16_C( 30671), INT16_C( 13038)) },
|
587
|
+
{ simde_mm_set_epi16(INT16_C( 9544), INT16_C( 1869), INT16_C( 10876), INT16_C( 18425),
|
588
|
+
INT16_C(-23507), INT16_C( -6113), INT16_C(-14498), INT16_C( 22949)),
|
589
|
+
simde_mm_set_epi16(INT16_C(-18197), INT16_C(-29870), INT16_C(-22608), INT16_C( 17229),
|
590
|
+
INT16_C(-25091), INT16_C( 26338), INT16_C( 15760), INT16_C(-13942)),
|
591
|
+
simde_mm_set_epi16(INT16_C( 17469), INT16_C( -5379), INT16_C( 1247), INT16_C( 1818),
|
592
|
+
INT16_C( 11413), INT16_C( 29301), INT16_C(-29620), INT16_C( 8451)) },
|
593
|
+
{ simde_mm_set_epi16(INT16_C(-12014), INT16_C( -2859), INT16_C(-10534), INT16_C( -75),
|
594
|
+
INT16_C( 11851), INT16_C( 10033), INT16_C(-13975), INT16_C( 29701)),
|
595
|
+
simde_mm_set_epi16(INT16_C( 32129), INT16_C(-30871), INT16_C(-23818), INT16_C(-30018),
|
596
|
+
INT16_C( -9498), INT16_C(-14851), INT16_C(-11614), INT16_C( -9352)),
|
597
|
+
simde_mm_set_epi16(INT16_C( 1258), INT16_C( 11700), INT16_C(-24349), INT16_C(-20966),
|
598
|
+
INT16_C(-14873), INT16_C(-10609), INT16_C( 21884), INT16_C( 15726)) },
|
599
|
+
{ simde_mm_set_epi16(INT16_C(-21544), INT16_C(-15577), INT16_C(-26221), INT16_C( -9036),
|
600
|
+
INT16_C( 27367), INT16_C( 25240), INT16_C( 27963), INT16_C( 16531)),
|
601
|
+
simde_mm_set_epi16(INT16_C(-17413), INT16_C( -3083), INT16_C( 7975), INT16_C( 3251),
|
602
|
+
INT16_C(-12476), INT16_C(-31198), INT16_C(-31819), INT16_C( 23479)),
|
603
|
+
simde_mm_set_epi16(INT16_C(-20496), INT16_C( 11226), INT16_C( 21862), INT16_C( -8340),
|
604
|
+
INT16_C( 28415), INT16_C( 30279), INT16_C(-12929), INT16_C(-21042)) },
|
605
|
+
{ simde_mm_set_epi16(INT16_C(-26423), INT16_C( 20632), INT16_C(-27879), INT16_C( 26257),
|
606
|
+
INT16_C(-14251), INT16_C(-18865), INT16_C( -651), INT16_C(-29238)),
|
607
|
+
simde_mm_set_epi16(INT16_C( -3019), INT16_C( 26530), INT16_C(-15590), INT16_C( -2378),
|
608
|
+
INT16_C( 9416), INT16_C(-20831), INT16_C(-30518), INT16_C(-20357)),
|
609
|
+
simde_mm_set_epi16(INT16_C( 23511), INT16_C(-17968), INT16_C(-11415), INT16_C( 14661),
|
610
|
+
INT16_C( -5791), INT16_C( -1622), INT16_C( 32420), INT16_C(-29889)) },
|
611
|
+
{ simde_mm_set_epi16(INT16_C( 31117), INT16_C( 717), INT16_C( -4833), INT16_C(-22028),
|
612
|
+
INT16_C(-11773), INT16_C( -2769), INT16_C(-29232), INT16_C( 7017)),
|
613
|
+
simde_mm_set_epi16(INT16_C( 29029), INT16_C(-19941), INT16_C( 32677), INT16_C( 1632),
|
614
|
+
INT16_C( 3095), INT16_C( 13355), INT16_C( 25984), INT16_C( 16468)),
|
615
|
+
simde_mm_set_epi16(INT16_C( 9088), INT16_C(-31227), INT16_C( 16450), INT16_C(-23084),
|
616
|
+
INT16_C( 31834), INT16_C(-26861), INT16_C(-14542), INT16_C(-22215)) }
|
617
|
+
};
|
618
|
+
|
619
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
620
|
+
simde__m128i r = simde_mm_hadd_epi16(test_vec[i].a, test_vec[i].b);
|
621
|
+
simde_assert_m128i_i16(r, ==, test_vec[i].r);
|
622
|
+
}
|
623
|
+
|
624
|
+
return MUNIT_OK;
|
625
|
+
}
|
626
|
+
|
627
|
+
static MunitResult
|
628
|
+
test_simde_mm_hadd_epi32(const MunitParameter params[], void* data) {
|
629
|
+
(void) params;
|
630
|
+
(void) data;
|
631
|
+
|
632
|
+
const struct {
|
633
|
+
simde__m128i a;
|
634
|
+
simde__m128i b;
|
635
|
+
simde__m128i r;
|
636
|
+
} test_vec[] = {
|
637
|
+
{ simde_mm_set_epi32(INT32_C(65535), INT32_C(0), INT32_C(1), INT32_C(-1)),
|
638
|
+
simde_mm_set_epi32(INT32_C(-32), INT32_C(128), INT32_C(0), INT32_C(-65535)),
|
639
|
+
simde_mm_set_epi32(INT32_C(96), INT32_C(-65535), INT32_C(65535), INT32_C(0)) },
|
640
|
+
{ simde_mm_set_epi32(INT32_C( 935437342), INT32_C( -511860991), INT32_C(-1852765223), INT32_C( -899948884)),
|
641
|
+
simde_mm_set_epi32(INT32_C( -691754121), INT32_C(-1808366785), INT32_C( 1428445569), INT32_C( 851103099)),
|
642
|
+
simde_mm_set_epi32(INT32_C( 1794846390), INT32_C(-2015418628), INT32_C( 423576351), INT32_C( 1542253189)) },
|
643
|
+
{ simde_mm_set_epi32(INT32_C( 1769801323), INT32_C( -534202186), INT32_C( 93006588), INT32_C( -39722690)),
|
644
|
+
simde_mm_set_epi32(INT32_C(-1819301058), INT32_C( 1538855279), INT32_C(-1162552057), INT32_C( 1560248404)),
|
645
|
+
simde_mm_set_epi32(INT32_C( -280445779), INT32_C( 397696347), INT32_C( 1235599137), INT32_C( 53283898)) },
|
646
|
+
{ simde_mm_set_epi32(INT32_C( -236962758), INT32_C( 1371307856), INT32_C(-1291984296), INT32_C(-1633301517)),
|
647
|
+
simde_mm_set_epi32(INT32_C( 553343851), INT32_C( 1511250694), INT32_C( 2041109339), INT32_C( 952253154)),
|
648
|
+
simde_mm_set_epi32(INT32_C( 2064594545), INT32_C(-1301604803), INT32_C( 1134345098), INT32_C( 1369681483)) },
|
649
|
+
{ simde_mm_set_epi32(INT32_C(-2133195983), INT32_C(-1476381094), INT32_C( -832093237), INT32_C( 2141200401)),
|
650
|
+
simde_mm_set_epi32(INT32_C( -724061580), INT32_C( 1346631337), INT32_C( 1990809669), INT32_C( -817015526)),
|
651
|
+
simde_mm_set_epi32(INT32_C( 622569757), INT32_C( 1173794143), INT32_C( 685390219), INT32_C( 1309107164)) },
|
652
|
+
{ simde_mm_set_epi32(INT32_C(-1087680391), INT32_C( 1056661878), INT32_C( -847385783), INT32_C( 1172111556)),
|
653
|
+
simde_mm_set_epi32(INT32_C( 1557644526), INT32_C( -640816363), INT32_C( 1498664548), INT32_C( -26761096)),
|
654
|
+
simde_mm_set_epi32(INT32_C( 916828163), INT32_C( 1471903452), INT32_C( -31018513), INT32_C( 324725773)) },
|
655
|
+
{ simde_mm_set_epi32(INT32_C( -253519263), INT32_C(-1665749954), INT32_C(-1108971746), INT32_C( -797234951)),
|
656
|
+
simde_mm_set_epi32(INT32_C(-1588481692), INT32_C( 1983191492), INT32_C( 429969831), INT32_C( 285907454)),
|
657
|
+
simde_mm_set_epi32(INT32_C( 394709800), INT32_C( 715877285), INT32_C(-1919269217), INT32_C(-1906206697)) },
|
658
|
+
{ simde_mm_set_epi32(INT32_C( 1025048627), INT32_C( 475870360), INT32_C( 1357422197), INT32_C(-1008236470)),
|
659
|
+
simde_mm_set_epi32(INT32_C(-1420053195), INT32_C( 1212624672), INT32_C( 209820777), INT32_C( -555572396)),
|
660
|
+
simde_mm_set_epi32(INT32_C( -207428523), INT32_C( -345751619), INT32_C( 1500918987), INT32_C( 349185727)) },
|
661
|
+
{ simde_mm_set_epi32(INT32_C( 64730366), INT32_C( 1169670008), INT32_C( 1098115199), INT32_C( -738462226)),
|
662
|
+
simde_mm_set_epi32(INT32_C( -991739835), INT32_C( 26982665), INT32_C(-1747857410), INT32_C( -648157645)),
|
663
|
+
simde_mm_set_epi32(INT32_C( -964757170), INT32_C( 1898952241), INT32_C( 1234400374), INT32_C( 359652973)) }
|
664
|
+
};
|
665
|
+
|
666
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
667
|
+
simde__m128i r = simde_mm_hadd_epi32(test_vec[i].a, test_vec[i].b);
|
668
|
+
simde_assert_m128i_i32(r, ==, test_vec[i].r);
|
669
|
+
}
|
670
|
+
|
671
|
+
return MUNIT_OK;
|
672
|
+
}
|
673
|
+
|
674
|
+
static MunitResult
|
675
|
+
test_simde_mm_hadd_pi16(const MunitParameter params[], void* data) {
|
676
|
+
(void) params;
|
677
|
+
(void) data;
|
678
|
+
|
679
|
+
const struct {
|
680
|
+
simde__m64 a;
|
681
|
+
simde__m64 b;
|
682
|
+
simde__m64 r;
|
683
|
+
} test_vec[] = {
|
684
|
+
{ simde_mm_set_pi16(INT16_C( 30239), INT16_C( 25845), INT16_C( -9936), INT16_C(-27213)),
|
685
|
+
simde_mm_set_pi16(INT16_C( -1015), INT16_C(-20873), INT16_C( -7356), INT16_C(-31477)),
|
686
|
+
simde_mm_set_pi16(INT16_C(-21888), INT16_C( 26703), INT16_C( -9452), INT16_C( 28387)) },
|
687
|
+
{ simde_mm_set_pi16(INT16_C(-21918), INT16_C(-12935), INT16_C(-22245), INT16_C(-19824)),
|
688
|
+
simde_mm_set_pi16(INT16_C(-12980), INT16_C( 31520), INT16_C(-30945), INT16_C( 24768)),
|
689
|
+
simde_mm_set_pi16(INT16_C( 18540), INT16_C( -6177), INT16_C( 30683), INT16_C( 23467)) },
|
690
|
+
{ simde_mm_set_pi16(INT16_C( 17846), INT16_C(-20510), INT16_C( 28205), INT16_C( 22502)),
|
691
|
+
simde_mm_set_pi16(INT16_C( 30262), INT16_C( 19390), INT16_C(-31190), INT16_C(-31299)),
|
692
|
+
simde_mm_set_pi16(INT16_C(-15884), INT16_C( 3047), INT16_C( -2664), INT16_C(-14829)) },
|
693
|
+
{ simde_mm_set_pi16(INT16_C( 4995), INT16_C( 5024), INT16_C(-16313), INT16_C( 32041)),
|
694
|
+
simde_mm_set_pi16(INT16_C( 28210), INT16_C( -6335), INT16_C( 24660), INT16_C( 8387)),
|
695
|
+
simde_mm_set_pi16(INT16_C( 21875), INT16_C(-32489), INT16_C( 10019), INT16_C( 15728)) },
|
696
|
+
{ simde_mm_set_pi16(INT16_C( 18074), INT16_C( 21929), INT16_C(-17147), INT16_C( 5980)),
|
697
|
+
simde_mm_set_pi16(INT16_C(-29293), INT16_C(-14044), INT16_C( -7765), INT16_C( 15197)),
|
698
|
+
simde_mm_set_pi16(INT16_C( 22199), INT16_C( 7432), INT16_C(-25533), INT16_C(-11167)) },
|
699
|
+
{ simde_mm_set_pi16(INT16_C( 9742), INT16_C( 25346), INT16_C(-16677), INT16_C(-18703)),
|
700
|
+
simde_mm_set_pi16(INT16_C( 12116), INT16_C( -6252), INT16_C(-29587), INT16_C( -2727)),
|
701
|
+
simde_mm_set_pi16(INT16_C( 5864), INT16_C(-32314), INT16_C(-30448), INT16_C( 30156)) },
|
702
|
+
{ simde_mm_set_pi16(INT16_C( -2031), INT16_C( 13829), INT16_C( 22178), INT16_C( 10932)),
|
703
|
+
simde_mm_set_pi16(INT16_C( -3860), INT16_C(-21638), INT16_C( 11349), INT16_C( 24248)),
|
704
|
+
simde_mm_set_pi16(INT16_C(-25498), INT16_C(-29939), INT16_C( 11798), INT16_C(-32426)) },
|
705
|
+
{ simde_mm_set_pi16(INT16_C( 14725), INT16_C(-26631), INT16_C( 3352), INT16_C( -9709)),
|
706
|
+
simde_mm_set_pi16(INT16_C(-29523), INT16_C( -5771), INT16_C(-32233), INT16_C( 27043)),
|
707
|
+
simde_mm_set_pi16(INT16_C( 30242), INT16_C( -5190), INT16_C(-11906), INT16_C( -6357)) }
|
708
|
+
};
|
709
|
+
|
710
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
711
|
+
simde__m64 r = simde_mm_hadd_pi16(test_vec[i].a, test_vec[i].b);
|
712
|
+
simde_assert_m64_i16(r, ==, test_vec[i].r);
|
713
|
+
}
|
714
|
+
|
715
|
+
return MUNIT_OK;
|
716
|
+
}
|
717
|
+
|
718
|
+
static MunitResult
|
719
|
+
test_simde_mm_hadd_pi32(const MunitParameter params[], void* data) {
|
720
|
+
(void) params;
|
721
|
+
(void) data;
|
722
|
+
|
723
|
+
const struct {
|
724
|
+
simde__m64 a;
|
725
|
+
simde__m64 b;
|
726
|
+
simde__m64 r;
|
727
|
+
} test_vec[8] = {
|
728
|
+
{ simde_mm_set_pi32(INT32_C( 1862318516), INT32_C( -295748827)),
|
729
|
+
simde_mm_set_pi32(INT32_C( -228228131), INT32_C( 379370564)),
|
730
|
+
simde_mm_set_pi32(INT32_C( 151142433), INT32_C( 1566569689)) },
|
731
|
+
{ simde_mm_set_pi32(INT32_C(-1716341852), INT32_C(-1567376242)),
|
732
|
+
simde_mm_set_pi32(INT32_C( 627426320), INT32_C(-1742027445)),
|
733
|
+
simde_mm_set_pi32(INT32_C(-1114601125), INT32_C( 1011249202)) },
|
734
|
+
{ simde_mm_set_pi32(INT32_C( -8890841), INT32_C( -182169327)),
|
735
|
+
simde_mm_set_pi32(INT32_C( 1909098453), INT32_C( -755712802)),
|
736
|
+
simde_mm_set_pi32(INT32_C( 1153385651), INT32_C( -191060168)) },
|
737
|
+
{ simde_mm_set_pi32(INT32_C( 1354356939), INT32_C( 781213984)),
|
738
|
+
simde_mm_set_pi32(INT32_C( 1569601432), INT32_C( 249731348)),
|
739
|
+
simde_mm_set_pi32(INT32_C( 1819332780), INT32_C( 2135570923)) },
|
740
|
+
{ simde_mm_set_pi32(INT32_C( -205846038), INT32_C( 760290342)),
|
741
|
+
simde_mm_set_pi32(INT32_C( 261312612), INT32_C( 953650902)),
|
742
|
+
simde_mm_set_pi32(INT32_C( 1214963514), INT32_C( 554444304)) },
|
743
|
+
{ simde_mm_set_pi32(INT32_C( 2091951994), INT32_C( -868981806)),
|
744
|
+
simde_mm_set_pi32(INT32_C( 908827748), INT32_C( 1571868066)),
|
745
|
+
simde_mm_set_pi32(INT32_C(-1814271482), INT32_C( 1222970188)) },
|
746
|
+
{ simde_mm_set_pi32(INT32_C( -4189226), INT32_C( 874353707)),
|
747
|
+
simde_mm_set_pi32(INT32_C( -220724007), INT32_C(-1561593917)),
|
748
|
+
simde_mm_set_pi32(INT32_C(-1782317924), INT32_C( 870164481)) },
|
749
|
+
{ simde_mm_set_pi32(INT32_C(-2051599335), INT32_C( 1030472719)),
|
750
|
+
simde_mm_set_pi32(INT32_C( 98310968), INT32_C(-1800274139)),
|
751
|
+
simde_mm_set_pi32(INT32_C(-1701963171), INT32_C(-1021126616)) }
|
752
|
+
};
|
753
|
+
|
754
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
755
|
+
simde__m64 r = simde_mm_hadd_pi32(test_vec[i].a, test_vec[i].b);
|
756
|
+
simde_assert_m64_i32(r, ==, test_vec[i].r);
|
757
|
+
}
|
758
|
+
|
759
|
+
return MUNIT_OK;
|
760
|
+
}
|
761
|
+
|
762
|
+
static MunitResult
|
763
|
+
test_simde_mm_hadds_epi16(const MunitParameter params[], void* data) {
|
764
|
+
(void) params;
|
765
|
+
(void) data;
|
766
|
+
|
767
|
+
const struct {
|
768
|
+
simde__m128i a;
|
769
|
+
simde__m128i b;
|
770
|
+
simde__m128i r;
|
771
|
+
} test_vec[8] = {
|
772
|
+
{ simde_mm_set_epi16(INT16_C( 10807), INT16_C(-14198), INT16_C(-16689), INT16_C( 21115),
|
773
|
+
INT16_C( -910), INT16_C( 23363), INT16_C( -393), INT16_C( 24341)),
|
774
|
+
simde_mm_set_epi16(INT16_C( -1948), INT16_C( 14976), INT16_C(-17960), INT16_C(-14860),
|
775
|
+
INT16_C( 77), INT16_C( 22746), INT16_C( 31556), INT16_C(-27912)),
|
776
|
+
simde_mm_set_epi16(INT16_C( 13028), INT16_C(-32768), INT16_C( 22823), INT16_C( 3644),
|
777
|
+
INT16_C( -3391), INT16_C( 4426), INT16_C( 22453), INT16_C( 23948)) },
|
778
|
+
{ simde_mm_set_epi16(INT16_C( 16701), INT16_C( 18413), INT16_C( 19864), INT16_C(-30571),
|
779
|
+
INT16_C( -296), INT16_C( 12833), INT16_C(-21580), INT16_C( 1624)),
|
780
|
+
simde_mm_set_epi16(INT16_C( 8006), INT16_C( 10793), INT16_C( 9022), INT16_C( 6615),
|
781
|
+
INT16_C( 21610), INT16_C( 15062), INT16_C( -1089), INT16_C( 11200)),
|
782
|
+
simde_mm_set_epi16(INT16_C( 18799), INT16_C( 15637), INT16_C( 32767), INT16_C( 10111),
|
783
|
+
INT16_C( 32767), INT16_C(-10707), INT16_C( 12537), INT16_C(-19956)) },
|
784
|
+
{ simde_mm_set_epi16(INT16_C( 2796), INT16_C( 22044), INT16_C( -3038), INT16_C( -2824),
|
785
|
+
INT16_C(-32407), INT16_C(-18333), INT16_C( 4913), INT16_C(-15422)),
|
786
|
+
simde_mm_set_epi16(INT16_C(-26141), INT16_C(-23205), INT16_C( 4999), INT16_C( 14674),
|
787
|
+
INT16_C( 14693), INT16_C( 30409), INT16_C( -432), INT16_C( -2349)),
|
788
|
+
simde_mm_set_epi16(INT16_C(-32768), INT16_C( 19673), INT16_C( 32767), INT16_C( -2781),
|
789
|
+
INT16_C( 24840), INT16_C( -5862), INT16_C(-32768), INT16_C(-10509)) },
|
790
|
+
{ simde_mm_set_epi16(INT16_C( -4935), INT16_C( -6562), INT16_C( 11817), INT16_C(-25630),
|
791
|
+
INT16_C( -5658), INT16_C( 9543), INT16_C( 31115), INT16_C(-17967)),
|
792
|
+
simde_mm_set_epi16(INT16_C(-31577), INT16_C( 203), INT16_C( 2138), INT16_C(-23787),
|
793
|
+
INT16_C( -59), INT16_C( 6347), INT16_C( -3043), INT16_C( 3327)),
|
794
|
+
simde_mm_set_epi16(INT16_C(-31374), INT16_C(-21649), INT16_C( 6288), INT16_C( 284),
|
795
|
+
INT16_C(-11497), INT16_C(-13813), INT16_C( 3885), INT16_C( 13148)) },
|
796
|
+
{ simde_mm_set_epi16(INT16_C( 23859), INT16_C(-22406), INT16_C( 18603), INT16_C(-25617),
|
797
|
+
INT16_C( 15903), INT16_C(-31522), INT16_C( 6447), INT16_C(-30595)),
|
798
|
+
simde_mm_set_epi16(INT16_C(-12207), INT16_C(-15255), INT16_C( 20239), INT16_C(-16128),
|
799
|
+
INT16_C( 1700), INT16_C( -5295), INT16_C(-26969), INT16_C( 1404)),
|
800
|
+
simde_mm_set_epi16(INT16_C(-27462), INT16_C( 4111), INT16_C( -3595), INT16_C(-25565),
|
801
|
+
INT16_C( 1453), INT16_C( -7014), INT16_C(-15619), INT16_C(-24148)) },
|
802
|
+
{ simde_mm_set_epi16(INT16_C(-23882), INT16_C( 20710), INT16_C(-17743), INT16_C(-12929),
|
803
|
+
INT16_C(-26672), INT16_C( 7004), INT16_C( -7681), INT16_C(-15327)),
|
804
|
+
simde_mm_set_epi16(INT16_C( 9169), INT16_C(-20513), INT16_C( 32026), INT16_C(-32016),
|
805
|
+
INT16_C( 11237), INT16_C( 13212), INT16_C( 5807), INT16_C(-22948)),
|
806
|
+
simde_mm_set_epi16(INT16_C(-11344), INT16_C( 10), INT16_C( 24449), INT16_C(-17141),
|
807
|
+
INT16_C( -3172), INT16_C(-30672), INT16_C(-19668), INT16_C(-23008)) },
|
808
|
+
{ simde_mm_set_epi16(INT16_C( -1265), INT16_C(-31938), INT16_C( 205), INT16_C( -2042),
|
809
|
+
INT16_C( 8951), INT16_C( -9923), INT16_C( -5120), INT16_C( 21641)),
|
810
|
+
simde_mm_set_epi16(INT16_C( 29316), INT16_C(-11244), INT16_C(-12652), INT16_C( 22497),
|
811
|
+
INT16_C(-29075), INT16_C(-14808), INT16_C(-17541), INT16_C( -2061)),
|
812
|
+
simde_mm_set_epi16(INT16_C( 18072), INT16_C( 9845), INT16_C(-32768), INT16_C(-19602),
|
813
|
+
INT16_C(-32768), INT16_C( -1837), INT16_C( -972), INT16_C( 16521)) },
|
814
|
+
{ simde_mm_set_epi16(INT16_C( 4920), INT16_C( 25576), INT16_C(-20472), INT16_C( 4642),
|
815
|
+
INT16_C( 22777), INT16_C( 17155), INT16_C(-22672), INT16_C( 7646)),
|
816
|
+
simde_mm_set_epi16(INT16_C(-28362), INT16_C( 2256), INT16_C( 1158), INT16_C(-32564),
|
817
|
+
INT16_C( 15997), INT16_C( 8308), INT16_C( -8252), INT16_C( 27950)),
|
818
|
+
simde_mm_set_epi16(INT16_C(-26106), INT16_C(-31406), INT16_C( 24305), INT16_C( 19698),
|
819
|
+
INT16_C( 30496), INT16_C(-15830), INT16_C( 32767), INT16_C(-15026)) }
|
820
|
+
};
|
821
|
+
|
822
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
823
|
+
simde__m128i r = simde_mm_hadds_epi16(test_vec[i].a, test_vec[i].b);
|
824
|
+
simde_assert_m128i_i16(r, ==, test_vec[i].r);
|
825
|
+
}
|
826
|
+
|
827
|
+
return MUNIT_OK;
|
828
|
+
}
|
829
|
+
|
830
|
+
static MunitResult
|
831
|
+
test_simde_mm_hadds_pi16(const MunitParameter params[], void* data) {
|
832
|
+
(void) params;
|
833
|
+
(void) data;
|
834
|
+
|
835
|
+
const struct {
|
836
|
+
simde__m64 a;
|
837
|
+
simde__m64 b;
|
838
|
+
simde__m64 r;
|
839
|
+
} test_vec[8] = {
|
840
|
+
{ simde_mm_set_pi16(INT16_C( -2215), INT16_C( -3731), INT16_C( 30165), INT16_C(-21248)),
|
841
|
+
simde_mm_set_pi16(INT16_C( 21370), INT16_C( 7316), INT16_C(-29830), INT16_C( -6216)),
|
842
|
+
simde_mm_set_pi16(INT16_C( 28686), INT16_C(-32768), INT16_C( -5946), INT16_C( 8917)) },
|
843
|
+
{ simde_mm_set_pi16(INT16_C( -8625), INT16_C( -7885), INT16_C( 3501), INT16_C( -8065)),
|
844
|
+
simde_mm_set_pi16(INT16_C( -9522), INT16_C(-24178), INT16_C( 32056), INT16_C( 21007)),
|
845
|
+
simde_mm_set_pi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-16510), INT16_C( -4564)) },
|
846
|
+
{ simde_mm_set_pi16(INT16_C(-17715), INT16_C( 16677), INT16_C( 21397), INT16_C(-29267)),
|
847
|
+
simde_mm_set_pi16(INT16_C( -49), INT16_C(-24556), INT16_C( 4165), INT16_C( 5183)),
|
848
|
+
simde_mm_set_pi16(INT16_C(-24605), INT16_C( 9348), INT16_C( -1038), INT16_C( -7870)) },
|
849
|
+
{ simde_mm_set_pi16(INT16_C( 18073), INT16_C( 17217), INT16_C(-23152), INT16_C( -2700)),
|
850
|
+
simde_mm_set_pi16(INT16_C( 24553), INT16_C(-26123), INT16_C( 21553), INT16_C(-14922)),
|
851
|
+
simde_mm_set_pi16(INT16_C( -1570), INT16_C( 6631), INT16_C( 32767), INT16_C(-25852)) },
|
852
|
+
{ simde_mm_set_pi16(INT16_C( -8686), INT16_C(-29702), INT16_C( 977), INT16_C( -5566)),
|
853
|
+
simde_mm_set_pi16(INT16_C( 10007), INT16_C( 21523), INT16_C(-17042), INT16_C( 8204)),
|
854
|
+
simde_mm_set_pi16(INT16_C( 31530), INT16_C( -8838), INT16_C(-32768), INT16_C( -4589)) },
|
855
|
+
{ simde_mm_set_pi16(INT16_C( 20389), INT16_C( 12774), INT16_C( 24895), INT16_C(-10733)),
|
856
|
+
simde_mm_set_pi16(INT16_C( 4070), INT16_C( 21710), INT16_C(-25629), INT16_C( 32624)),
|
857
|
+
simde_mm_set_pi16(INT16_C( 25780), INT16_C( 6995), INT16_C( 32767), INT16_C( 14162)) },
|
858
|
+
{ simde_mm_set_pi16(INT16_C(-26507), INT16_C(-18711), INT16_C( -30), INT16_C(-27258)),
|
859
|
+
simde_mm_set_pi16(INT16_C( 30229), INT16_C(-20659), INT16_C(-12166), INT16_C( 11942)),
|
860
|
+
simde_mm_set_pi16(INT16_C( 9570), INT16_C( -224), INT16_C(-32768), INT16_C(-27288)) },
|
861
|
+
{ simde_mm_set_pi16(INT16_C( 8239), INT16_C( 4822), INT16_C( -7926), INT16_C( 5523)),
|
862
|
+
simde_mm_set_pi16(INT16_C( 4807), INT16_C(-26347), INT16_C( 27248), INT16_C(-14552)),
|
863
|
+
simde_mm_set_pi16(INT16_C(-21540), INT16_C( 12696), INT16_C( 13061), INT16_C( -2403)) }
|
864
|
+
};
|
865
|
+
|
866
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
867
|
+
simde__m64 r = simde_mm_hadds_pi16(test_vec[i].a, test_vec[i].b);
|
868
|
+
simde_assert_m64_i16(r, ==, test_vec[i].r);
|
869
|
+
}
|
870
|
+
|
871
|
+
return MUNIT_OK;
|
872
|
+
}
|
873
|
+
|
874
|
+
static MunitResult
|
875
|
+
test_simde_mm_hsub_epi16(const MunitParameter params[], void* data) {
|
876
|
+
(void) params;
|
877
|
+
(void) data;
|
878
|
+
|
879
|
+
const struct {
|
880
|
+
simde__m128i a;
|
881
|
+
simde__m128i b;
|
882
|
+
simde__m128i r;
|
883
|
+
} test_vec[8] = {
|
884
|
+
{ simde_mm_set_epi16(INT16_C(-23858), INT16_C( -4873), INT16_C( 25529), INT16_C( 28813),
|
885
|
+
INT16_C( 32687), INT16_C( 7141), INT16_C(-18881), INT16_C(-22018)),
|
886
|
+
simde_mm_set_epi16(INT16_C( 18662), INT16_C( -3351), INT16_C(-22586), INT16_C( 17125),
|
887
|
+
INT16_C( 13505), INT16_C( 8156), INT16_C(-22303), INT16_C( 260)),
|
888
|
+
simde_mm_set_epi16(INT16_C(-22013), INT16_C(-25825), INT16_C( -5349), INT16_C( 22563),
|
889
|
+
INT16_C( 18985), INT16_C( 3284), INT16_C(-25546), INT16_C( -3137)) },
|
890
|
+
{ simde_mm_set_epi16(INT16_C(-16905), INT16_C(-23899), INT16_C( 22124), INT16_C( -9244),
|
891
|
+
INT16_C( -8704), INT16_C(-14521), INT16_C( 29325), INT16_C( 9647)),
|
892
|
+
simde_mm_set_epi16(INT16_C(-14947), INT16_C( 11007), INT16_C(-31280), INT16_C(-24736),
|
893
|
+
INT16_C( 12124), INT16_C( 14146), INT16_C( 2823), INT16_C( 31264)),
|
894
|
+
simde_mm_set_epi16(INT16_C( 25954), INT16_C( 6544), INT16_C( 2022), INT16_C( 28441),
|
895
|
+
INT16_C( -6994), INT16_C(-31368), INT16_C( -5817), INT16_C(-19678)) },
|
896
|
+
{ simde_mm_set_epi16(INT16_C( 20412), INT16_C(-23342), INT16_C(-11221), INT16_C( 22543),
|
897
|
+
INT16_C( 31042), INT16_C( 25207), INT16_C( 22911), INT16_C( 28646)),
|
898
|
+
simde_mm_set_epi16(INT16_C( 20001), INT16_C(-23713), INT16_C( -2107), INT16_C( -9501),
|
899
|
+
INT16_C(-11898), INT16_C( 13884), INT16_C(-15609), INT16_C( 29959)),
|
900
|
+
simde_mm_set_epi16(INT16_C( 21822), INT16_C( -7394), INT16_C( 25782), INT16_C(-19968),
|
901
|
+
INT16_C( 21782), INT16_C(-31772), INT16_C( -5835), INT16_C( 5735)) },
|
902
|
+
{ simde_mm_set_epi16(INT16_C(-21670), INT16_C( 22724), INT16_C(-29935), INT16_C(-11113),
|
903
|
+
INT16_C(-27399), INT16_C( 1226), INT16_C(-32445), INT16_C( 25109)),
|
904
|
+
simde_mm_set_epi16(INT16_C( 6376), INT16_C( 14599), INT16_C(-22701), INT16_C( 30172),
|
905
|
+
INT16_C( 12539), INT16_C(-29335), INT16_C(-14900), INT16_C( 5269)),
|
906
|
+
simde_mm_set_epi16(INT16_C( 8223), INT16_C(-12663), INT16_C( 23662), INT16_C( 20169),
|
907
|
+
INT16_C(-21142), INT16_C( 18822), INT16_C( 28625), INT16_C( -7982)) },
|
908
|
+
{ simde_mm_set_epi16(INT16_C( 12440), INT16_C(-24086), INT16_C( 16668), INT16_C( -2312),
|
909
|
+
INT16_C( 5277), INT16_C( 2451), INT16_C( 29149), INT16_C( 16634)),
|
910
|
+
simde_mm_set_epi16(INT16_C(-13694), INT16_C( 20767), INT16_C( 26711), INT16_C(-18489),
|
911
|
+
INT16_C( 7419), INT16_C( 20190), INT16_C( 6918), INT16_C(-26228)),
|
912
|
+
simde_mm_set_epi16(INT16_C(-31075), INT16_C( 20336), INT16_C( 12771), INT16_C( 32390),
|
913
|
+
INT16_C( 29010), INT16_C(-18980), INT16_C( -2826), INT16_C(-12515)) },
|
914
|
+
{ simde_mm_set_epi16(INT16_C(-18064), INT16_C(-22080), INT16_C( 31211), INT16_C(-24234),
|
915
|
+
INT16_C( 17815), INT16_C( 19504), INT16_C(-18918), INT16_C(-22478)),
|
916
|
+
simde_mm_set_epi16(INT16_C( 1846), INT16_C( 29599), INT16_C( -8713), INT16_C(-21885),
|
917
|
+
INT16_C(-32548), INT16_C( 8452), INT16_C( -6739), INT16_C(-17538)),
|
918
|
+
simde_mm_set_epi16(INT16_C( 27753), INT16_C(-13172), INT16_C(-24536), INT16_C(-10799),
|
919
|
+
INT16_C( -4016), INT16_C( 10091), INT16_C( 1689), INT16_C( -3560)) },
|
920
|
+
{ simde_mm_set_epi16(INT16_C(-26649), INT16_C( 24453), INT16_C( 28697), INT16_C( 11094),
|
921
|
+
INT16_C( 15458), INT16_C(-20509), INT16_C( 21060), INT16_C( 31323)),
|
922
|
+
simde_mm_set_epi16(INT16_C( 13710), INT16_C(-23834), INT16_C( 8691), INT16_C( 6543),
|
923
|
+
INT16_C( 24367), INT16_C( 6903), INT16_C( -2074), INT16_C(-12200)),
|
924
|
+
simde_mm_set_epi16(INT16_C( 27992), INT16_C( -2148), INT16_C(-17464), INT16_C(-10126),
|
925
|
+
INT16_C(-14434), INT16_C(-17603), INT16_C( 29569), INT16_C( 10263)) },
|
926
|
+
{ simde_mm_set_epi16(INT16_C( 30864), INT16_C( 20241), INT16_C( 16902), INT16_C( 20663),
|
927
|
+
INT16_C(-20841), INT16_C(-32594), INT16_C(-27087), INT16_C( 20516)),
|
928
|
+
simde_mm_set_epi16(INT16_C( 20660), INT16_C( -7551), INT16_C(-22127), INT16_C( 27693),
|
929
|
+
INT16_C( -9668), INT16_C(-19341), INT16_C( 24481), INT16_C(-15352)),
|
930
|
+
simde_mm_set_epi16(INT16_C(-28211), INT16_C(-15716), INT16_C( -9673), INT16_C( 25703),
|
931
|
+
INT16_C(-10623), INT16_C( 3761), INT16_C(-11753), INT16_C(-17933)) }
|
932
|
+
};
|
933
|
+
|
934
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
935
|
+
simde__m128i r = simde_mm_hsub_epi16(test_vec[i].a, test_vec[i].b);
|
936
|
+
simde_assert_m128i_i16(r, ==, test_vec[i].r);
|
937
|
+
}
|
938
|
+
|
939
|
+
return MUNIT_OK;
|
940
|
+
}
|
941
|
+
|
942
|
+
static MunitResult
|
943
|
+
test_simde_mm_hsub_epi32(const MunitParameter params[], void* data) {
|
944
|
+
(void) params;
|
945
|
+
(void) data;
|
946
|
+
|
947
|
+
const struct {
|
948
|
+
simde__m128i a;
|
949
|
+
simde__m128i b;
|
950
|
+
simde__m128i r;
|
951
|
+
} test_vec[8] = {
|
952
|
+
{ simde_mm_set_epi32(INT32_C( -479907977), INT32_C( -696539570), INT32_C(-1398601783), INT32_C( 536165734)),
|
953
|
+
simde_mm_set_epi32(INT32_C( 5565618), INT32_C( -381463958), INT32_C( 42537933), INT32_C( -711333282)),
|
954
|
+
simde_mm_set_epi32(INT32_C( -387029576), INT32_C( -753871215), INT32_C( -216631593), INT32_C( 1934767517)) },
|
955
|
+
{ simde_mm_set_epi32(INT32_C( 818650962), INT32_C( 824145104), INT32_C(-1844195506), INT32_C(-1825969675)),
|
956
|
+
simde_mm_set_epi32(INT32_C( 2083359703), INT32_C(-2060086869), INT32_C( 1669409252), INT32_C(-1777354497)),
|
957
|
+
simde_mm_set_epi32(INT32_C( 151520724), INT32_C( 848203547), INT32_C( 5494142), INT32_C( 18225831)) },
|
958
|
+
{ simde_mm_set_epi32(INT32_C(-1587888977), INT32_C( -591401974), INT32_C( -147902824), INT32_C( 1830002670)),
|
959
|
+
simde_mm_set_epi32(INT32_C( 399705498), INT32_C(-1859698091), INT32_C( 1482907200), INT32_C( 1392342146)),
|
960
|
+
simde_mm_set_epi32(INT32_C( 2035563707), INT32_C( -90565054), INT32_C( 996487003), INT32_C( 1977905494)) },
|
961
|
+
{ simde_mm_set_epi32(INT32_C(-1814888934), INT32_C(-1949362692), INT32_C( 1433235748), INT32_C( -537299867)),
|
962
|
+
simde_mm_set_epi32(INT32_C( -273399868), INT32_C( 1316606109), INT32_C( 1234934819), INT32_C( 2143067132)),
|
963
|
+
simde_mm_set_epi32(INT32_C( 1590005977), INT32_C( 908132313), INT32_C( -134473758), INT32_C(-1970535615)) },
|
964
|
+
{ simde_mm_set_epi32(INT32_C(-2102962425), INT32_C( -70755375), INT32_C(-2145299060), INT32_C( 305166546)),
|
965
|
+
simde_mm_set_epi32(INT32_C( -140541285), INT32_C(-1875101359), INT32_C(-1065221069), INT32_C( 2018475568)),
|
966
|
+
simde_mm_set_epi32(INT32_C(-1734560074), INT32_C(-1211270659), INT32_C( 2032207050), INT32_C(-1844501690)) },
|
967
|
+
{ simde_mm_set_epi32(INT32_C( -996291163), INT32_C( -192227992), INT32_C(-1330314074), INT32_C( -761581524)),
|
968
|
+
simde_mm_set_epi32(INT32_C(-1442240024), INT32_C(-1983945374), INT32_C( 1651588405), INT32_C(-2097842058)),
|
969
|
+
simde_mm_set_epi32(INT32_C( -541705350), INT32_C( 545536833), INT32_C( 804063171), INT32_C( 568732550)) },
|
970
|
+
{ simde_mm_set_epi32(INT32_C(-1780627386), INT32_C( 769411793), INT32_C( 1731523542), INT32_C( 660129568)),
|
971
|
+
simde_mm_set_epi32(INT32_C( -469002090), INT32_C( -974572540), INT32_C( 829774662), INT32_C(-1194362973)),
|
972
|
+
simde_mm_set_epi32(INT32_C( -505570450), INT32_C(-2024137635), INT32_C(-1744928117), INT32_C(-1071393974)) },
|
973
|
+
{ simde_mm_set_epi32(INT32_C( -87824878), INT32_C(-2145465811), INT32_C( 968986944), INT32_C( 617458465)),
|
974
|
+
simde_mm_set_epi32(INT32_C( 1888345900), INT32_C( 1243027853), INT32_C(-2115148095), INT32_C( -711680076)),
|
975
|
+
simde_mm_set_epi32(INT32_C( -645318047), INT32_C( 1403468019), INT32_C(-2057640933), INT32_C( -351528479)) }
|
976
|
+
};
|
977
|
+
|
978
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
979
|
+
simde__m128i r = simde_mm_hsub_epi32(test_vec[i].a, test_vec[i].b);
|
980
|
+
simde_assert_m128i_i32(r, ==, test_vec[i].r);
|
981
|
+
}
|
982
|
+
|
983
|
+
return MUNIT_OK;
|
984
|
+
}
|
985
|
+
|
986
|
+
static MunitResult
|
987
|
+
test_simde_mm_hsub_pi16(const MunitParameter params[], void* data) {
|
988
|
+
(void) params;
|
989
|
+
(void) data;
|
990
|
+
|
991
|
+
const struct {
|
992
|
+
simde__m64 a;
|
993
|
+
simde__m64 b;
|
994
|
+
simde__m64 r;
|
995
|
+
} test_vec[8] = {
|
996
|
+
{ simde_mm_set_pi16(INT16_C(-21341), INT16_C( 1993), INT16_C( 8181), INT16_C( 15718)),
|
997
|
+
simde_mm_set_pi16(INT16_C( -7323), INT16_C( 12151), INT16_C(-10629), INT16_C(-22962)),
|
998
|
+
simde_mm_set_pi16(INT16_C( 19474), INT16_C(-12333), INT16_C( 23334), INT16_C( 7537)) },
|
999
|
+
{ simde_mm_set_pi16(INT16_C( 649), INT16_C( 5069), INT16_C(-10855), INT16_C( -5538)),
|
1000
|
+
simde_mm_set_pi16(INT16_C( 84), INT16_C( -4942), INT16_C( -5821), INT16_C( 21098)),
|
1001
|
+
simde_mm_set_pi16(INT16_C( -5026), INT16_C( 26919), INT16_C( 4420), INT16_C( 5317)) },
|
1002
|
+
{ simde_mm_set_pi16(INT16_C(-28141), INT16_C(-12466), INT16_C(-27863), INT16_C( -5643)),
|
1003
|
+
simde_mm_set_pi16(INT16_C( 12491), INT16_C(-24750), INT16_C( 12575), INT16_C( 29904)),
|
1004
|
+
simde_mm_set_pi16(INT16_C( 28295), INT16_C( 17329), INT16_C( 15675), INT16_C( 22220)) },
|
1005
|
+
{ simde_mm_set_pi16(INT16_C( 25473), INT16_C( 10724), INT16_C(-27121), INT16_C(-18177)),
|
1006
|
+
simde_mm_set_pi16(INT16_C( 31789), INT16_C(-29737), INT16_C(-31435), INT16_C(-28245)),
|
1007
|
+
simde_mm_set_pi16(INT16_C( 4010), INT16_C( 3190), INT16_C(-14749), INT16_C( 8944)) },
|
1008
|
+
{ simde_mm_set_pi16(INT16_C( -2257), INT16_C( 11928), INT16_C( 27923), INT16_C(-24594)),
|
1009
|
+
simde_mm_set_pi16(INT16_C(-24230), INT16_C(-17233), INT16_C( -9025), INT16_C( -5110)),
|
1010
|
+
simde_mm_set_pi16(INT16_C( 6997), INT16_C( 3915), INT16_C( 14185), INT16_C( 13019)) },
|
1011
|
+
{ simde_mm_set_pi16(INT16_C( 22627), INT16_C( 24128), INT16_C( 21245), INT16_C( 29826)),
|
1012
|
+
simde_mm_set_pi16(INT16_C( 6099), INT16_C( 1434), INT16_C(-28377), INT16_C( 16981)),
|
1013
|
+
simde_mm_set_pi16(INT16_C( -4665), INT16_C(-20178), INT16_C( 1501), INT16_C( 8581)) },
|
1014
|
+
{ simde_mm_set_pi16(INT16_C( 21869), INT16_C( 28964), INT16_C( -8199), INT16_C( 29797)),
|
1015
|
+
simde_mm_set_pi16(INT16_C(-27694), INT16_C( -486), INT16_C(-29745), INT16_C( 5628)),
|
1016
|
+
simde_mm_set_pi16(INT16_C( 27208), INT16_C(-30163), INT16_C( 7095), INT16_C(-27540)) },
|
1017
|
+
{ simde_mm_set_pi16(INT16_C( 18843), INT16_C(-25565), INT16_C( 32700), INT16_C(-25604)),
|
1018
|
+
simde_mm_set_pi16(INT16_C( -4172), INT16_C( 16324), INT16_C( 20089), INT16_C(-12131)),
|
1019
|
+
simde_mm_set_pi16(INT16_C( 20496), INT16_C(-32220), INT16_C( 21128), INT16_C( 7232)) }
|
1020
|
+
};
|
1021
|
+
|
1022
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1023
|
+
simde__m64 r = simde_mm_hsub_pi16(test_vec[i].a, test_vec[i].b);
|
1024
|
+
simde_assert_m64_i16(r, ==, test_vec[i].r);
|
1025
|
+
}
|
1026
|
+
|
1027
|
+
return MUNIT_OK;
|
1028
|
+
}
|
1029
|
+
|
1030
|
+
static MunitResult
|
1031
|
+
test_simde_mm_hsub_pi32(const MunitParameter params[], void* data) {
|
1032
|
+
(void) params;
|
1033
|
+
(void) data;
|
1034
|
+
|
1035
|
+
const struct {
|
1036
|
+
simde__m64 a;
|
1037
|
+
simde__m64 b;
|
1038
|
+
simde__m64 r;
|
1039
|
+
} test_vec[8] = {
|
1040
|
+
{ simde_mm_set_pi32(INT32_C(-1398601783), INT32_C( 536165734)),
|
1041
|
+
simde_mm_set_pi32(INT32_C( -479907977), INT32_C( -696539570)),
|
1042
|
+
simde_mm_set_pi32(INT32_C( -216631593), INT32_C( 1934767517)) },
|
1043
|
+
{ simde_mm_set_pi32(INT32_C( 42537933), INT32_C( -711333282)),
|
1044
|
+
simde_mm_set_pi32(INT32_C( 5565618), INT32_C( -381463958)),
|
1045
|
+
simde_mm_set_pi32(INT32_C( -387029576), INT32_C( -753871215)) },
|
1046
|
+
{ simde_mm_set_pi32(INT32_C(-1844195506), INT32_C(-1825969675)),
|
1047
|
+
simde_mm_set_pi32(INT32_C( 818650962), INT32_C( 824145104)),
|
1048
|
+
simde_mm_set_pi32(INT32_C( 5494142), INT32_C( 18225831)) },
|
1049
|
+
{ simde_mm_set_pi32(INT32_C( 1669409252), INT32_C(-1777354497)),
|
1050
|
+
simde_mm_set_pi32(INT32_C( 2083359703), INT32_C(-2060086869)),
|
1051
|
+
simde_mm_set_pi32(INT32_C( 151520724), INT32_C( 848203547)) },
|
1052
|
+
{ simde_mm_set_pi32(INT32_C( -147902824), INT32_C( 1830002670)),
|
1053
|
+
simde_mm_set_pi32(INT32_C(-1587888977), INT32_C( -591401974)),
|
1054
|
+
simde_mm_set_pi32(INT32_C( 996487003), INT32_C( 1977905494)) },
|
1055
|
+
{ simde_mm_set_pi32(INT32_C( 1482907200), INT32_C( 1392342146)),
|
1056
|
+
simde_mm_set_pi32(INT32_C( 399705498), INT32_C(-1859698091)),
|
1057
|
+
simde_mm_set_pi32(INT32_C( 2035563707), INT32_C( -90565054)) },
|
1058
|
+
{ simde_mm_set_pi32(INT32_C( 1433235748), INT32_C( -537299867)),
|
1059
|
+
simde_mm_set_pi32(INT32_C(-1814888934), INT32_C(-1949362692)),
|
1060
|
+
simde_mm_set_pi32(INT32_C( -134473758), INT32_C(-1970535615)) },
|
1061
|
+
{ simde_mm_set_pi32(INT32_C( 1234934819), INT32_C( 2143067132)),
|
1062
|
+
simde_mm_set_pi32(INT32_C( -273399868), INT32_C( 1316606109)),
|
1063
|
+
simde_mm_set_pi32(INT32_C( 1590005977), INT32_C( 908132313)) }
|
1064
|
+
};
|
1065
|
+
|
1066
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1067
|
+
simde__m64 r = simde_mm_hsub_pi32(test_vec[i].a, test_vec[i].b);
|
1068
|
+
simde_assert_m64_i32(r, ==, test_vec[i].r);
|
1069
|
+
}
|
1070
|
+
|
1071
|
+
return MUNIT_OK;
|
1072
|
+
}
|
1073
|
+
|
1074
|
+
static MunitResult
|
1075
|
+
test_simde_mm_hsubs_epi16(const MunitParameter params[], void* data) {
|
1076
|
+
(void) params;
|
1077
|
+
(void) data;
|
1078
|
+
|
1079
|
+
const struct {
|
1080
|
+
simde__m128i a;
|
1081
|
+
simde__m128i b;
|
1082
|
+
simde__m128i r;
|
1083
|
+
} test_vec[8] = {
|
1084
|
+
{ simde_mm_set_epi16(INT16_C( 13774), INT16_C(-29689), INT16_C( 10999), INT16_C( -6147),
|
1085
|
+
INT16_C(-29800), INT16_C( 18242), INT16_C( 25067), INT16_C( -9729)),
|
1086
|
+
simde_mm_set_epi16(INT16_C( 12949), INT16_C( 20521), INT16_C(-28557), INT16_C(-22012),
|
1087
|
+
INT16_C(-30915), INT16_C( -6461), INT16_C( 26181), INT16_C( 4500)),
|
1088
|
+
simde_mm_set_epi16(INT16_C( 7572), INT16_C( 6545), INT16_C( 24454), INT16_C(-21681),
|
1089
|
+
INT16_C(-32768), INT16_C(-17146), INT16_C( 32767), INT16_C(-32768)) },
|
1090
|
+
{ simde_mm_set_epi16(INT16_C( 29988), INT16_C(-16786), INT16_C( 24082), INT16_C(-25968),
|
1091
|
+
INT16_C( 12374), INT16_C( 28178), INT16_C( 2246), INT16_C(-16612)),
|
1092
|
+
simde_mm_set_epi16(INT16_C( 21001), INT16_C( 21933), INT16_C(-28339), INT16_C(-21262),
|
1093
|
+
INT16_C(-18820), INT16_C( -7213), INT16_C(-31232), INT16_C( -7091)),
|
1094
|
+
simde_mm_set_epi16(INT16_C( 932), INT16_C( 7077), INT16_C( 11607), INT16_C( 24141),
|
1095
|
+
INT16_C(-32768), INT16_C(-32768), INT16_C( 15804), INT16_C(-18858)) },
|
1096
|
+
{ simde_mm_set_epi16(INT16_C( -2178), INT16_C( 7231), INT16_C( 1749), INT16_C(-11837),
|
1097
|
+
INT16_C( 29652), INT16_C(-23237), INT16_C( -3549), INT16_C(-11367)),
|
1098
|
+
simde_mm_set_epi16(INT16_C(-28533), INT16_C( 19954), INT16_C( 15418), INT16_C( 23837),
|
1099
|
+
INT16_C( 27116), INT16_C( -2562), INT16_C(-12163), INT16_C( 27809)),
|
1100
|
+
simde_mm_set_epi16(INT16_C( 32767), INT16_C( 8419), INT16_C(-29678), INT16_C( 32767),
|
1101
|
+
INT16_C( 9409), INT16_C(-13586), INT16_C(-32768), INT16_C( -7818)) },
|
1102
|
+
{ simde_mm_set_epi16(INT16_C( 9359), INT16_C( 10457), INT16_C( -9481), INT16_C( 7337),
|
1103
|
+
INT16_C( -7865), INT16_C( -8059), INT16_C(-27714), INT16_C(-26625)),
|
1104
|
+
simde_mm_set_epi16(INT16_C( 1382), INT16_C( 7017), INT16_C( 1603), INT16_C( 12659),
|
1105
|
+
INT16_C(-15886), INT16_C( 13804), INT16_C( 23429), INT16_C(-30064)),
|
1106
|
+
simde_mm_set_epi16(INT16_C( 5635), INT16_C( 11056), INT16_C( 29690), INT16_C(-32768),
|
1107
|
+
INT16_C( 1098), INT16_C( 16818), INT16_C( -194), INT16_C( 1089)) },
|
1108
|
+
{ simde_mm_set_epi16(INT16_C( -6864), INT16_C( 32077), INT16_C(-12988), INT16_C(-19165),
|
1109
|
+
INT16_C(-26014), INT16_C( 8246), INT16_C( 27640), INT16_C( 25410)),
|
1110
|
+
simde_mm_set_epi16(INT16_C( 19800), INT16_C(-22857), INT16_C( -2668), INT16_C( 12159),
|
1111
|
+
INT16_C( 9895), INT16_C( -9099), INT16_C(-14776), INT16_C( 4666)),
|
1112
|
+
simde_mm_set_epi16(INT16_C(-32768), INT16_C( 14827), INT16_C(-18994), INT16_C( 19442),
|
1113
|
+
INT16_C( 32767), INT16_C( -6177), INT16_C( 32767), INT16_C( -2230)) },
|
1114
|
+
{ simde_mm_set_epi16(INT16_C( 19814), INT16_C( 25204), INT16_C( 1688), INT16_C(-25917),
|
1115
|
+
INT16_C( -4068), INT16_C(-22336), INT16_C( 14502), INT16_C( 27222)),
|
1116
|
+
simde_mm_set_epi16(INT16_C(-18197), INT16_C( 12530), INT16_C( 8023), INT16_C( 5629),
|
1117
|
+
INT16_C( 32454), INT16_C( 4791), INT16_C( -4481), INT16_C( 19744)),
|
1118
|
+
simde_mm_set_epi16(INT16_C( 30727), INT16_C( -2394), INT16_C(-27663), INT16_C( 24225),
|
1119
|
+
INT16_C( 5390), INT16_C(-27605), INT16_C(-18268), INT16_C( 12720)) },
|
1120
|
+
{ simde_mm_set_epi16(INT16_C( -7171), INT16_C( 12346), INT16_C( 491), INT16_C( -3389),
|
1121
|
+
INT16_C(-18032), INT16_C( -9295), INT16_C(-27339), INT16_C( 18234)),
|
1122
|
+
simde_mm_set_epi16(INT16_C( -7625), INT16_C( 17942), INT16_C(-16463), INT16_C( 19005),
|
1123
|
+
INT16_C(-15122), INT16_C( 15452), INT16_C( 28218), INT16_C( 7688)),
|
1124
|
+
simde_mm_set_epi16(INT16_C( 25567), INT16_C( 32767), INT16_C( 30574), INT16_C(-20530),
|
1125
|
+
INT16_C( 19517), INT16_C( -3880), INT16_C( 8737), INT16_C( 32767)) },
|
1126
|
+
{ simde_mm_set_epi16(INT16_C( 680), INT16_C( 3648), INT16_C( 9121), INT16_C( 26085),
|
1127
|
+
INT16_C( 21203), INT16_C( -8528), INT16_C( 17475), INT16_C(-10092)),
|
1128
|
+
simde_mm_set_epi16(INT16_C( 9701), INT16_C( 32227), INT16_C( -294), INT16_C(-17758),
|
1129
|
+
INT16_C( 13795), INT16_C(-28706), INT16_C( 28077), INT16_C(-10927)),
|
1130
|
+
simde_mm_set_epi16(INT16_C( 22526), INT16_C(-17464), INT16_C(-32768), INT16_C(-32768),
|
1131
|
+
INT16_C( 2968), INT16_C( 16964), INT16_C(-29731), INT16_C(-27567)) }
|
1132
|
+
};
|
1133
|
+
|
1134
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1135
|
+
simde__m128i r = simde_mm_hsubs_epi16(test_vec[i].a, test_vec[i].b);
|
1136
|
+
simde_assert_m128i_i16(r, ==, test_vec[i].r);
|
1137
|
+
}
|
1138
|
+
|
1139
|
+
return MUNIT_OK;
|
1140
|
+
}
|
1141
|
+
|
1142
|
+
static MunitResult
|
1143
|
+
test_simde_mm_hsubs_pi16(const MunitParameter params[], void* data) {
|
1144
|
+
(void) params;
|
1145
|
+
(void) data;
|
1146
|
+
|
1147
|
+
const struct {
|
1148
|
+
simde__m64 a;
|
1149
|
+
simde__m64 b;
|
1150
|
+
simde__m64 r;
|
1151
|
+
} test_vec[8] = {
|
1152
|
+
{ simde_mm_set_pi16(INT16_C(-29800), INT16_C( 18242), INT16_C( 25067), INT16_C( -9729)),
|
1153
|
+
simde_mm_set_pi16(INT16_C( 13774), INT16_C(-29689), INT16_C( 10999), INT16_C( -6147)),
|
1154
|
+
simde_mm_set_pi16(INT16_C(-32768), INT16_C(-17146), INT16_C( 32767), INT16_C(-32768)) },
|
1155
|
+
{ simde_mm_set_pi16(INT16_C(-30915), INT16_C( -6461), INT16_C( 26181), INT16_C( 4500)),
|
1156
|
+
simde_mm_set_pi16(INT16_C( 12949), INT16_C( 20521), INT16_C(-28557), INT16_C(-22012)),
|
1157
|
+
simde_mm_set_pi16(INT16_C( 7572), INT16_C( 6545), INT16_C( 24454), INT16_C(-21681)) },
|
1158
|
+
{ simde_mm_set_pi16(INT16_C( 12374), INT16_C( 28178), INT16_C( 2246), INT16_C(-16612)),
|
1159
|
+
simde_mm_set_pi16(INT16_C( 29988), INT16_C(-16786), INT16_C( 24082), INT16_C(-25968)),
|
1160
|
+
simde_mm_set_pi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 15804), INT16_C(-18858)) },
|
1161
|
+
{ simde_mm_set_pi16(INT16_C(-18820), INT16_C( -7213), INT16_C(-31232), INT16_C( -7091)),
|
1162
|
+
simde_mm_set_pi16(INT16_C( 21001), INT16_C( 21933), INT16_C(-28339), INT16_C(-21262)),
|
1163
|
+
simde_mm_set_pi16(INT16_C( 932), INT16_C( 7077), INT16_C( 11607), INT16_C( 24141)) },
|
1164
|
+
{ simde_mm_set_pi16(INT16_C( 29652), INT16_C(-23237), INT16_C( -3549), INT16_C(-11367)),
|
1165
|
+
simde_mm_set_pi16(INT16_C( -2178), INT16_C( 7231), INT16_C( 1749), INT16_C(-11837)),
|
1166
|
+
simde_mm_set_pi16(INT16_C( 9409), INT16_C(-13586), INT16_C(-32768), INT16_C( -7818)) },
|
1167
|
+
{ simde_mm_set_pi16(INT16_C( 27116), INT16_C( -2562), INT16_C(-12163), INT16_C( 27809)),
|
1168
|
+
simde_mm_set_pi16(INT16_C(-28533), INT16_C( 19954), INT16_C( 15418), INT16_C( 23837)),
|
1169
|
+
simde_mm_set_pi16(INT16_C( 32767), INT16_C( 8419), INT16_C(-29678), INT16_C( 32767)) },
|
1170
|
+
{ simde_mm_set_pi16(INT16_C( -7865), INT16_C( -8059), INT16_C(-27714), INT16_C(-26625)),
|
1171
|
+
simde_mm_set_pi16(INT16_C( 9359), INT16_C( 10457), INT16_C( -9481), INT16_C( 7337)),
|
1172
|
+
simde_mm_set_pi16(INT16_C( 1098), INT16_C( 16818), INT16_C( -194), INT16_C( 1089)) },
|
1173
|
+
{ simde_mm_set_pi16(INT16_C(-15886), INT16_C( 13804), INT16_C( 23429), INT16_C(-30064)),
|
1174
|
+
simde_mm_set_pi16(INT16_C( 1382), INT16_C( 7017), INT16_C( 1603), INT16_C( 12659)),
|
1175
|
+
simde_mm_set_pi16(INT16_C( 5635), INT16_C( 11056), INT16_C( 29690), INT16_C(-32768)) }
|
1176
|
+
};
|
1177
|
+
|
1178
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1179
|
+
simde__m64 r = simde_mm_hsubs_pi16(test_vec[i].a, test_vec[i].b);
|
1180
|
+
simde_assert_m64_i16(r, ==, test_vec[i].r);
|
1181
|
+
}
|
1182
|
+
|
1183
|
+
return MUNIT_OK;
|
1184
|
+
}
|
1185
|
+
|
1186
|
+
static MunitResult
|
1187
|
+
test_simde_mm_maddubs_epi16(const MunitParameter params[], void* data) {
|
1188
|
+
(void) params;
|
1189
|
+
(void) data;
|
1190
|
+
|
1191
|
+
const struct {
|
1192
|
+
simde__m128i a;
|
1193
|
+
simde__m128i b;
|
1194
|
+
simde__m128i r;
|
1195
|
+
} test_vec[8] = {
|
1196
|
+
{ simde_x_mm_set_epu8(UINT8_C( 51), UINT8_C( 0), UINT8_C(149), UINT8_C(143),
|
1197
|
+
UINT8_C( 51), UINT8_C( 92), UINT8_C(224), UINT8_C( 61),
|
1198
|
+
UINT8_C(140), UINT8_C(247), UINT8_C( 97), UINT8_C( 44),
|
1199
|
+
UINT8_C(110), UINT8_C( 56), UINT8_C(160), UINT8_C( 0)),
|
1200
|
+
simde_mm_set_epi8 (INT8_C( -33), INT8_C( -19), INT8_C( 30), INT8_C(-109),
|
1201
|
+
INT8_C( -30), INT8_C( 17), INT8_C( 4), INT8_C( 76),
|
1202
|
+
INT8_C( 94), INT8_C( 61), INT8_C( 21), INT8_C( 25),
|
1203
|
+
INT8_C(-111), INT8_C( -97), INT8_C( 96), INT8_C( 46)),
|
1204
|
+
simde_mm_set_epi16(INT16_C( -1683), INT16_C(-11117), INT16_C( 34), INT16_C( 5532),
|
1205
|
+
INT16_C( 28227), INT16_C( 3137), INT16_C(-17642), INT16_C( 15360)) },
|
1206
|
+
{ simde_x_mm_set_epu8(UINT8_C(202), UINT8_C(213), UINT8_C( 43), UINT8_C( 75),
|
1207
|
+
UINT8_C(134), UINT8_C(185), UINT8_C(251), UINT8_C( 80),
|
1208
|
+
UINT8_C(190), UINT8_C( 88), UINT8_C(158), UINT8_C(164),
|
1209
|
+
UINT8_C(129), UINT8_C( 83), UINT8_C( 80), UINT8_C(190)),
|
1210
|
+
simde_mm_set_epi8 (INT8_C( -88), INT8_C( 38), INT8_C( 2), INT8_C( 99),
|
1211
|
+
INT8_C( 71), INT8_C( 74), INT8_C( 0), INT8_C(-104),
|
1212
|
+
INT8_C( -7), INT8_C( -22), INT8_C( -85), INT8_C( -84),
|
1213
|
+
INT8_C( 70), INT8_C( 111), INT8_C( -89), INT8_C( -1)),
|
1214
|
+
simde_mm_set_epi16(INT16_C( -9682), INT16_C( 7511), INT16_C( 23204), INT16_C( -8320),
|
1215
|
+
INT16_C( -3266), INT16_C(-27206), INT16_C( 18243), INT16_C( -7310)) },
|
1216
|
+
{ simde_x_mm_set_epu8(UINT8_C( 33), UINT8_C( 10), UINT8_C(112), UINT8_C(214),
|
1217
|
+
UINT8_C(240), UINT8_C( 83), UINT8_C(196), UINT8_C( 31),
|
1218
|
+
UINT8_C(227), UINT8_C( 73), UINT8_C(215), UINT8_C(104),
|
1219
|
+
UINT8_C(224), UINT8_C( 75), UINT8_C(136), UINT8_C( 7)),
|
1220
|
+
simde_mm_set_epi8 (INT8_C(-103), INT8_C( 58), INT8_C( -91), INT8_C( -44),
|
1221
|
+
INT8_C( 16), INT8_C( -73), INT8_C( -19), INT8_C( -50),
|
1222
|
+
INT8_C( 72), INT8_C( -54), INT8_C( -57), INT8_C(-103),
|
1223
|
+
INT8_C(-118), INT8_C( 121), INT8_C( 123), INT8_C( -74)),
|
1224
|
+
simde_mm_set_epi16(INT16_C( -2819), INT16_C(-19608), INT16_C( -2219), INT16_C( -5274),
|
1225
|
+
INT16_C( 12402), INT16_C(-22967), INT16_C(-17357), INT16_C( 16210)) },
|
1226
|
+
{ simde_x_mm_set_epu8(UINT8_C(252), UINT8_C( 31), UINT8_C(134), UINT8_C( 31),
|
1227
|
+
UINT8_C(122), UINT8_C(123), UINT8_C(150), UINT8_C(213),
|
1228
|
+
UINT8_C(153), UINT8_C(103), UINT8_C(181), UINT8_C(195),
|
1229
|
+
UINT8_C(170), UINT8_C( 58), UINT8_C(240), UINT8_C( 47)),
|
1230
|
+
simde_mm_set_epi8 (INT8_C( -25), INT8_C( 64), INT8_C(-100), INT8_C( -3),
|
1231
|
+
INT8_C( 4), INT8_C( -66), INT8_C( 118), INT8_C( 79),
|
1232
|
+
INT8_C(-101), INT8_C( -55), INT8_C( -4), INT8_C( -13),
|
1233
|
+
INT8_C( -64), INT8_C( 101), INT8_C( -30), INT8_C( 104)),
|
1234
|
+
simde_mm_set_epi16(INT16_C( -4316), INT16_C(-13493), INT16_C( -7630), INT16_C( 32767),
|
1235
|
+
INT16_C(-21118), INT16_C( -3259), INT16_C( -5022), INT16_C( -2312)) },
|
1236
|
+
{ simde_x_mm_set_epu8(UINT8_C(195), UINT8_C( 70), UINT8_C(169), UINT8_C( 25),
|
1237
|
+
UINT8_C( 44), UINT8_C(147), UINT8_C(212), UINT8_C(247),
|
1238
|
+
UINT8_C(193), UINT8_C(226), UINT8_C( 3), UINT8_C( 32),
|
1239
|
+
UINT8_C(176), UINT8_C(206), UINT8_C(162), UINT8_C(147)),
|
1240
|
+
simde_mm_set_epi8 (INT8_C( -49), INT8_C(-123), INT8_C( 64), INT8_C( -72),
|
1241
|
+
INT8_C( -48), INT8_C( 90), INT8_C( 7), INT8_C( 111),
|
1242
|
+
INT8_C( 51), INT8_C( -54), INT8_C( 46), INT8_C( 30),
|
1243
|
+
INT8_C( 16), INT8_C( 116), INT8_C( 86), INT8_C( 72)),
|
1244
|
+
simde_mm_set_epi16(INT16_C(-18165), INT16_C( 9016), INT16_C( 11118), INT16_C( 28901),
|
1245
|
+
INT16_C( -2361), INT16_C( 1098), INT16_C( 26712), INT16_C( 24516)) },
|
1246
|
+
{ simde_x_mm_set_epu8(UINT8_C(170), UINT8_C(171), UINT8_C(184), UINT8_C(175),
|
1247
|
+
UINT8_C(121), UINT8_C(154), UINT8_C(221), UINT8_C(215),
|
1248
|
+
UINT8_C( 81), UINT8_C(122), UINT8_C( 48), UINT8_C(175),
|
1249
|
+
UINT8_C(206), UINT8_C(142), UINT8_C( 87), UINT8_C(151)),
|
1250
|
+
simde_mm_set_epi8 (INT8_C( -17), INT8_C( 73), INT8_C( -38), INT8_C( -41),
|
1251
|
+
INT8_C( 82), INT8_C( 87), INT8_C( 108), INT8_C( 26),
|
1252
|
+
INT8_C( 57), INT8_C( 110), INT8_C( 70), INT8_C( 0),
|
1253
|
+
INT8_C( 68), INT8_C( -35), INT8_C( -9), INT8_C( -4)),
|
1254
|
+
simde_mm_set_epi16(INT16_C( 9593), INT16_C(-14167), INT16_C( 23320), INT16_C( 29458),
|
1255
|
+
INT16_C( 18037), INT16_C( 3360), INT16_C( 9038), INT16_C( -1387)) },
|
1256
|
+
{ simde_x_mm_set_epu8(UINT8_C(107), UINT8_C( 23), UINT8_C(200), UINT8_C(241),
|
1257
|
+
UINT8_C(184), UINT8_C( 99), UINT8_C(201), UINT8_C(184),
|
1258
|
+
UINT8_C( 6), UINT8_C(119), UINT8_C(156), UINT8_C(238),
|
1259
|
+
UINT8_C( 57), UINT8_C(220), UINT8_C( 31), UINT8_C( 63)),
|
1260
|
+
simde_mm_set_epi8 (INT8_C( 55), INT8_C( -43), INT8_C( -4), INT8_C( -8),
|
1261
|
+
INT8_C( 98), INT8_C(-128), INT8_C( 34), INT8_C( 78),
|
1262
|
+
INT8_C( -53), INT8_C( 85), INT8_C(-119), INT8_C(-112),
|
1263
|
+
INT8_C( 64), INT8_C( 97), INT8_C( -95), INT8_C( -65)),
|
1264
|
+
simde_mm_set_epi16(INT16_C( 4896), INT16_C( -2728), INT16_C( 5360), INT16_C( 21186),
|
1265
|
+
INT16_C( 9797), INT16_C(-32768), INT16_C( 24988), INT16_C( -7040)) },
|
1266
|
+
{ simde_x_mm_set_epu8(UINT8_C(240), UINT8_C(158), UINT8_C( 50), UINT8_C(200),
|
1267
|
+
UINT8_C(127), UINT8_C(117), UINT8_C(116), UINT8_C(126),
|
1268
|
+
UINT8_C( 28), UINT8_C( 15), UINT8_C(211), UINT8_C(171),
|
1269
|
+
UINT8_C( 26), UINT8_C(129), UINT8_C( 21), UINT8_C(147)),
|
1270
|
+
simde_mm_set_epi8 (INT8_C( 1), INT8_C( -35), INT8_C( 16), INT8_C( 63),
|
1271
|
+
INT8_C( 10), INT8_C( -86), INT8_C( -27), INT8_C( -9),
|
1272
|
+
INT8_C( -69), INT8_C( 85), INT8_C( -80), INT8_C( 80),
|
1273
|
+
INT8_C( 15), INT8_C( 55), INT8_C( 36), INT8_C( -33)),
|
1274
|
+
simde_mm_set_epi16(INT16_C( -5290), INT16_C( 13400), INT16_C( -8792), INT16_C( -4266),
|
1275
|
+
INT16_C( -657), INT16_C( -3200), INT16_C( 7485), INT16_C( -4095)) }
|
1276
|
+
};
|
1277
|
+
|
1278
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1279
|
+
simde__m128i r = simde_mm_maddubs_epi16(test_vec[i].a, test_vec[i].b);
|
1280
|
+
simde_assert_m128i_i16(r, ==, test_vec[i].r);
|
1281
|
+
}
|
1282
|
+
|
1283
|
+
return MUNIT_OK;
|
1284
|
+
}
|
1285
|
+
|
1286
|
+
static MunitResult
|
1287
|
+
test_simde_mm_maddubs_pi16(const MunitParameter params[], void* data) {
|
1288
|
+
(void) params;
|
1289
|
+
(void) data;
|
1290
|
+
|
1291
|
+
const struct {
|
1292
|
+
simde__m64 a;
|
1293
|
+
simde__m64 b;
|
1294
|
+
simde__m64 r;
|
1295
|
+
} test_vec[8] = {
|
1296
|
+
{ simde_x_mm_set_pu8(UINT8_C( 226), UINT8_C( 60), UINT8_C( 180), UINT8_C( 253),
|
1297
|
+
UINT8_C( 116), UINT8_C( 114), UINT8_C( 202), UINT8_C( 153)),
|
1298
|
+
simde_mm_set_pi8( INT8_C( -83), INT8_C( 11), INT8_C( 102), INT8_C( -8),
|
1299
|
+
INT8_C( 96), INT8_C( 7), INT8_C( -47), INT8_C( 11)),
|
1300
|
+
simde_mm_set_pi16(INT16_C(-18098), INT16_C( 16336), INT16_C( 11934), INT16_C( -7811)) },
|
1301
|
+
{ simde_x_mm_set_pu8(UINT8_C( 204), UINT8_C( 170), UINT8_C( 75), UINT8_C( 84),
|
1302
|
+
UINT8_C( 124), UINT8_C( 190), UINT8_C( 100), UINT8_C( 219)),
|
1303
|
+
simde_mm_set_pi8( INT8_C( -34), INT8_C( -62), INT8_C( -50), INT8_C( -16),
|
1304
|
+
INT8_C( 75), INT8_C( 6), INT8_C( 33), INT8_C( 1)),
|
1305
|
+
simde_mm_set_pi16(INT16_C(-17476), INT16_C( -5094), INT16_C( 10440), INT16_C( 3519)) },
|
1306
|
+
{ simde_x_mm_set_pu8(UINT8_C( 41), UINT8_C( 19), UINT8_C( 177), UINT8_C( 122),
|
1307
|
+
UINT8_C( 36), UINT8_C( 42), UINT8_C( 63), UINT8_C( 32)),
|
1308
|
+
simde_mm_set_pi8( INT8_C( 42), INT8_C( -60), INT8_C( 43), INT8_C( 108),
|
1309
|
+
INT8_C( -92), INT8_C( 62), INT8_C( -84), INT8_C(-109)),
|
1310
|
+
simde_mm_set_pi16(INT16_C( 582), INT16_C( 20787), INT16_C( -708), INT16_C( -8780)) },
|
1311
|
+
{ simde_x_mm_set_pu8(UINT8_C( 251), UINT8_C( 60), UINT8_C( 216), UINT8_C( 235),
|
1312
|
+
UINT8_C( 217), UINT8_C( 226), UINT8_C( 248), UINT8_C( 212)),
|
1313
|
+
simde_mm_set_pi8( INT8_C( 39), INT8_C( 2), INT8_C( -42), INT8_C(-128),
|
1314
|
+
INT8_C( 5), INT8_C( 35), INT8_C(-117), INT8_C( 123)),
|
1315
|
+
simde_mm_set_pi16(INT16_C( 9909), INT16_C(-32768), INT16_C( 8995), INT16_C( -2940)) },
|
1316
|
+
{ simde_x_mm_set_pu8(UINT8_C( 8), UINT8_C( 71), UINT8_C( 143), UINT8_C( 51),
|
1317
|
+
UINT8_C( 192), UINT8_C( 71), UINT8_C( 71), UINT8_C( 112)),
|
1318
|
+
simde_mm_set_pi8( INT8_C( 67), INT8_C( -27), INT8_C( 86), INT8_C( -45),
|
1319
|
+
INT8_C( -88), INT8_C( -88), INT8_C(-120), INT8_C( -56)),
|
1320
|
+
simde_mm_set_pi16(INT16_C( -1381), INT16_C( 10003), INT16_C(-23144), INT16_C(-14792)) },
|
1321
|
+
{ simde_x_mm_set_pu8(UINT8_C( 146), UINT8_C( 253), UINT8_C( 229), UINT8_C( 229),
|
1322
|
+
UINT8_C( 1), UINT8_C( 245), UINT8_C( 28), UINT8_C( 5)),
|
1323
|
+
simde_mm_set_pi8( INT8_C( 17), INT8_C( 111), INT8_C( -69), INT8_C(-110),
|
1324
|
+
INT8_C( -81), INT8_C( 80), INT8_C( 52), INT8_C( -25)),
|
1325
|
+
simde_mm_set_pi16(INT16_C( 30565), INT16_C(-32768), INT16_C( 19519), INT16_C( 1331)) },
|
1326
|
+
{ simde_x_mm_set_pu8(UINT8_C( 37), UINT8_C( 74), UINT8_C( 250), UINT8_C( 7),
|
1327
|
+
UINT8_C( 132), UINT8_C( 86), UINT8_C( 80), UINT8_C( 39)),
|
1328
|
+
simde_mm_set_pi8( INT8_C(-119), INT8_C( -25), INT8_C( 75), INT8_C( 100),
|
1329
|
+
INT8_C( -82), INT8_C( -30), INT8_C(-102), INT8_C( -17)),
|
1330
|
+
simde_mm_set_pi16(INT16_C( -6253), INT16_C( 19450), INT16_C(-13404), INT16_C( -8823)) },
|
1331
|
+
{ simde_x_mm_set_pu8(UINT8_C( 23), UINT8_C( 177), UINT8_C( 190), UINT8_C( 68),
|
1332
|
+
UINT8_C( 193), UINT8_C( 21), UINT8_C( 108), UINT8_C( 80)),
|
1333
|
+
simde_mm_set_pi8( INT8_C( -20), INT8_C(-112), INT8_C( -45), INT8_C( -43),
|
1334
|
+
INT8_C( 85), INT8_C( 92), INT8_C( -55), INT8_C( -79)),
|
1335
|
+
simde_mm_set_pi16(INT16_C(-20284), INT16_C(-11474), INT16_C( 18337), INT16_C(-12260)) }
|
1336
|
+
};
|
1337
|
+
|
1338
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1339
|
+
simde__m64 r = simde_mm_maddubs_pi16(test_vec[i].a, test_vec[i].b);
|
1340
|
+
simde_assert_m64_i16(r, ==, test_vec[i].r);
|
1341
|
+
}
|
1342
|
+
|
1343
|
+
return MUNIT_OK;
|
1344
|
+
}
|
1345
|
+
|
1346
|
+
static MunitResult
|
1347
|
+
test_simde_mm_mulhrs_epi16(const MunitParameter params[], void* data) {
|
1348
|
+
(void) params;
|
1349
|
+
(void) data;
|
1350
|
+
|
1351
|
+
const struct {
|
1352
|
+
simde__m128i a;
|
1353
|
+
simde__m128i b;
|
1354
|
+
simde__m128i r;
|
1355
|
+
} test_vec[8] = {
|
1356
|
+
{ simde_mm_set_epi16(INT16_C(-22170), INT16_C( 3338), INT16_C( 16927), INT16_C(-30825),
|
1357
|
+
INT16_C( -5333), INT16_C(-26742), INT16_C( 73), INT16_C( 12836)),
|
1358
|
+
simde_mm_set_epi16(INT16_C(-20552), INT16_C( 2947), INT16_C( 22103), INT16_C( -1959),
|
1359
|
+
INT16_C( 9399), INT16_C(-17978), INT16_C( 24358), INT16_C( -8616)),
|
1360
|
+
simde_mm_set_epi16(INT16_C( 13905), INT16_C( 300), INT16_C( 11418), INT16_C( 1843),
|
1361
|
+
INT16_C( -1530), INT16_C( 14672), INT16_C( 54), INT16_C( -3375)) },
|
1362
|
+
{ simde_mm_set_epi16(INT16_C( 13543), INT16_C(-11087), INT16_C( -705), INT16_C( -8016),
|
1363
|
+
INT16_C( 5997), INT16_C(-15738), INT16_C(-26326), INT16_C(-25662)),
|
1364
|
+
simde_mm_set_epi16(INT16_C( 6685), INT16_C(-23579), INT16_C( 14091), INT16_C( -6956),
|
1365
|
+
INT16_C(-18113), INT16_C( 23901), INT16_C( 31684), INT16_C( 2110)),
|
1366
|
+
simde_mm_set_epi16(INT16_C( 2763), INT16_C( 7978), INT16_C( -303), INT16_C( 1702),
|
1367
|
+
INT16_C( -3315), INT16_C(-11479), INT16_C(-25455), INT16_C( -1652)) },
|
1368
|
+
{ simde_mm_set_epi16(INT16_C( 2735), INT16_C(-11575), INT16_C(-10031), INT16_C( 17488),
|
1369
|
+
INT16_C(-28766), INT16_C( -754), INT16_C( 27755), INT16_C(-14828)),
|
1370
|
+
simde_mm_set_epi16(INT16_C(-18261), INT16_C( -2558), INT16_C( 20250), INT16_C(-32745),
|
1371
|
+
INT16_C( 26468), INT16_C( 12462), INT16_C( -600), INT16_C( 27533)),
|
1372
|
+
simde_mm_set_epi16(INT16_C( -1524), INT16_C( 904), INT16_C( -6199), INT16_C(-17476),
|
1373
|
+
INT16_C(-23235), INT16_C( -287), INT16_C( -508), INT16_C(-12459)) },
|
1374
|
+
{ simde_mm_set_epi16(INT16_C( 18118), INT16_C( 19684), INT16_C( 21218), INT16_C( 11012),
|
1375
|
+
INT16_C( 4581), INT16_C( 31051), INT16_C(-22370), INT16_C( 18948)),
|
1376
|
+
simde_mm_set_epi16(INT16_C( 4495), INT16_C(-26951), INT16_C( 10375), INT16_C( 11197),
|
1377
|
+
INT16_C(-22121), INT16_C( 27826), INT16_C(-13849), INT16_C( 15915)),
|
1378
|
+
simde_mm_set_epi16(INT16_C( 2485), INT16_C(-16190), INT16_C( 6718), INT16_C( 3763),
|
1379
|
+
INT16_C( -3093), INT16_C( 26368), INT16_C( 9454), INT16_C( 9203)) },
|
1380
|
+
{ simde_mm_set_epi16(INT16_C( -8749), INT16_C( 9365), INT16_C( -7001), INT16_C( 29368),
|
1381
|
+
INT16_C( 22035), INT16_C( 22575), INT16_C( 15151), INT16_C( 8924)),
|
1382
|
+
simde_mm_set_epi16(INT16_C( -5832), INT16_C(-10538), INT16_C(-26586), INT16_C(-26292),
|
1383
|
+
INT16_C( 24365), INT16_C( 31099), INT16_C( -6578), INT16_C(-26373)),
|
1384
|
+
simde_mm_set_epi16(INT16_C( 1557), INT16_C( -3012), INT16_C( 5680), INT16_C(-23564),
|
1385
|
+
INT16_C( 16384), INT16_C( 21425), INT16_C( -3041), INT16_C( -7182)) },
|
1386
|
+
{ simde_mm_set_epi16(INT16_C(-16986), INT16_C(-15026), INT16_C( 27907), INT16_C( 22865),
|
1387
|
+
INT16_C( 12487), INT16_C( -3271), INT16_C(-20289), INT16_C(-18773)),
|
1388
|
+
simde_mm_set_epi16(INT16_C( 12991), INT16_C( -159), INT16_C( -6884), INT16_C(-32273),
|
1389
|
+
INT16_C( 15955), INT16_C( 23229), INT16_C(-21266), INT16_C( 5578)),
|
1390
|
+
simde_mm_set_epi16(INT16_C( -6734), INT16_C( 73), INT16_C( -5863), INT16_C(-22520),
|
1391
|
+
INT16_C( 6080), INT16_C( -2319), INT16_C( 13167), INT16_C( -3196)) },
|
1392
|
+
{ simde_mm_set_epi16(INT16_C( -4913), INT16_C(-31702), INT16_C( -5693), INT16_C( 25724),
|
1393
|
+
INT16_C( 8769), INT16_C( -4014), INT16_C(-21883), INT16_C(-17971)),
|
1394
|
+
simde_mm_set_epi16(INT16_C( 5100), INT16_C( -4154), INT16_C( 5428), INT16_C( 15121),
|
1395
|
+
INT16_C( 10050), INT16_C( -9982), INT16_C(-14810), INT16_C( 21413)),
|
1396
|
+
simde_mm_set_epi16(INT16_C( -765), INT16_C( 4019), INT16_C( -943), INT16_C( 11871),
|
1397
|
+
INT16_C( 2689), INT16_C( 1223), INT16_C( 9890), INT16_C(-11744)) },
|
1398
|
+
{ simde_mm_set_epi16(INT16_C(-31657), INT16_C(-25785), INT16_C( -931), INT16_C( 4611),
|
1399
|
+
INT16_C(-30993), INT16_C(-28215), INT16_C( 22556), INT16_C( 13103)),
|
1400
|
+
simde_mm_set_epi16(INT16_C( 16378), INT16_C( 14367), INT16_C(-20270), INT16_C( 6205),
|
1401
|
+
INT16_C( 3145), INT16_C( 1055), INT16_C( -2582), INT16_C(-27163)),
|
1402
|
+
simde_mm_set_epi16(INT16_C(-15823), INT16_C(-11305), INT16_C( 576), INT16_C( 873),
|
1403
|
+
INT16_C( -2975), INT16_C( -908), INT16_C( -1777), INT16_C(-10862)) }
|
1404
|
+
};
|
1405
|
+
|
1406
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1407
|
+
simde__m128i r = simde_mm_mulhrs_epi16(test_vec[i].a, test_vec[i].b);
|
1408
|
+
simde_assert_m128i_i16(r, ==, test_vec[i].r);
|
1409
|
+
}
|
1410
|
+
|
1411
|
+
return MUNIT_OK;
|
1412
|
+
}
|
1413
|
+
|
1414
|
+
static MunitResult
|
1415
|
+
test_simde_mm_mulhrs_pi16(const MunitParameter params[], void* data) {
|
1416
|
+
(void) params;
|
1417
|
+
(void) data;
|
1418
|
+
|
1419
|
+
const struct {
|
1420
|
+
simde__m64 a;
|
1421
|
+
simde__m64 b;
|
1422
|
+
simde__m64 r;
|
1423
|
+
} test_vec[8] = {
|
1424
|
+
{ simde_mm_set_pi16(INT16_C( 10873), INT16_C( 20470), INT16_C(-22699), INT16_C( -8299)),
|
1425
|
+
simde_mm_set_pi16(INT16_C( 2803), INT16_C( 17070), INT16_C( 3401), INT16_C( 5228)),
|
1426
|
+
simde_mm_set_pi16(INT16_C( 930), INT16_C( 10664), INT16_C( -2356), INT16_C( -1324)) },
|
1427
|
+
{ simde_mm_set_pi16(INT16_C(-25757), INT16_C(-26678), INT16_C(-28851), INT16_C( -6466)),
|
1428
|
+
simde_mm_set_pi16(INT16_C( 14685), INT16_C(-28969), INT16_C(-23351), INT16_C(-21231)),
|
1429
|
+
simde_mm_set_pi16(INT16_C(-11543), INT16_C( 23585), INT16_C( 20560), INT16_C( 4189)) },
|
1430
|
+
{ simde_mm_set_pi16(INT16_C(-15507), INT16_C( 30038), INT16_C( 30487), INT16_C( 19821)),
|
1431
|
+
simde_mm_set_pi16(INT16_C( 12343), INT16_C(-21596), INT16_C(-10324), INT16_C(-29925)),
|
1432
|
+
simde_mm_set_pi16(INT16_C( -5841), INT16_C(-19797), INT16_C( -9605), INT16_C(-18101)) },
|
1433
|
+
{ simde_mm_set_pi16(INT16_C( -7595), INT16_C( 32328), INT16_C( -530), INT16_C( 25195)),
|
1434
|
+
simde_mm_set_pi16(INT16_C( 24804), INT16_C( 24789), INT16_C( 13974), INT16_C(-29149)),
|
1435
|
+
simde_mm_set_pi16(INT16_C( -5749), INT16_C( 24456), INT16_C( -226), INT16_C(-22412)) },
|
1436
|
+
{ simde_mm_set_pi16(INT16_C(-25620), INT16_C(-10236), INT16_C( -5862), INT16_C( -6331)),
|
1437
|
+
simde_mm_set_pi16(INT16_C( 850), INT16_C(-23039), INT16_C(-12194), INT16_C( 18653)),
|
1438
|
+
simde_mm_set_pi16(INT16_C( -665), INT16_C( 7197), INT16_C( 2181), INT16_C( -3604)) },
|
1439
|
+
{ simde_mm_set_pi16(INT16_C( -3270), INT16_C( 28976), INT16_C(-17448), INT16_C(-17812)),
|
1440
|
+
simde_mm_set_pi16(INT16_C( 22918), INT16_C(-11286), INT16_C(-17728), INT16_C( 18555)),
|
1441
|
+
simde_mm_set_pi16(INT16_C( -2287), INT16_C( -9980), INT16_C( 9440), INT16_C(-10086)) },
|
1442
|
+
{ simde_mm_set_pi16(INT16_C( -7562), INT16_C(-25114), INT16_C(-28747), INT16_C( 15932)),
|
1443
|
+
simde_mm_set_pi16(INT16_C( 19935), INT16_C( -3041), INT16_C( -4324), INT16_C( 19473)),
|
1444
|
+
simde_mm_set_pi16(INT16_C( -4600), INT16_C( 2331), INT16_C( 3793), INT16_C( 9468)) },
|
1445
|
+
{ simde_mm_set_pi16(INT16_C( 2783), INT16_C( -5706), INT16_C( 21220), INT16_C(-16928)),
|
1446
|
+
simde_mm_set_pi16(INT16_C( 5658), INT16_C( 25482), INT16_C( -693), INT16_C( 7606)),
|
1447
|
+
simde_mm_set_pi16(INT16_C( 481), INT16_C( -4437), INT16_C( -449), INT16_C( -3929)) }
|
1448
|
+
};
|
1449
|
+
|
1450
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1451
|
+
simde__m64 r = simde_mm_mulhrs_pi16(test_vec[i].a, test_vec[i].b);
|
1452
|
+
simde_assert_m64_i16(r, ==, test_vec[i].r);
|
1453
|
+
}
|
1454
|
+
|
1455
|
+
return MUNIT_OK;
|
1456
|
+
}
|
1457
|
+
|
1458
|
+
static MunitResult
|
1459
|
+
test_simde_mm_shuffle_epi8(const MunitParameter params[], void* data) {
|
1460
|
+
(void) params;
|
1461
|
+
(void) data;
|
1462
|
+
|
1463
|
+
const struct {
|
1464
|
+
simde__m128i a;
|
1465
|
+
simde__m128i b;
|
1466
|
+
simde__m128i r;
|
1467
|
+
} test_vec[8] = {
|
1468
|
+
{ simde_mm_set_epi8(INT8_C( 44), INT8_C( -68), INT8_C( 109), INT8_C( -92),
|
1469
|
+
INT8_C( -84), INT8_C( 83), INT8_C( -49), INT8_C( -4),
|
1470
|
+
INT8_C( 73), INT8_C( -26), INT8_C( 49), INT8_C( 23),
|
1471
|
+
INT8_C( 14), INT8_C( 33), INT8_C( 30), INT8_C( 21)),
|
1472
|
+
simde_mm_set_epi8(INT8_C(-115), INT8_C( 102), INT8_C( 8), INT8_C(-108),
|
1473
|
+
INT8_C(-116), INT8_C( 49), INT8_C( 91), INT8_C( 2),
|
1474
|
+
INT8_C( 32), INT8_C( 37), INT8_C( -49), INT8_C( -84),
|
1475
|
+
INT8_C( -92), INT8_C( -12), INT8_C( 37), INT8_C( -66)),
|
1476
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( -26), INT8_C( -4), INT8_C( 0),
|
1477
|
+
INT8_C( 0), INT8_C( 30), INT8_C( -84), INT8_C( 33),
|
1478
|
+
INT8_C( 21), INT8_C( 49), INT8_C( 0), INT8_C( 0),
|
1479
|
+
INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 0)) },
|
1480
|
+
{ simde_mm_set_epi8(INT8_C( 89), INT8_C( -43), INT8_C( 48), INT8_C( -87),
|
1481
|
+
INT8_C(-114), INT8_C( 41), INT8_C( 3), INT8_C( -2),
|
1482
|
+
INT8_C( 123), INT8_C( -90), INT8_C( 32), INT8_C( 61),
|
1483
|
+
INT8_C( 41), INT8_C( 90), INT8_C( 114), INT8_C( -84)),
|
1484
|
+
simde_mm_set_epi8(INT8_C( 51), INT8_C( 83), INT8_C( 120), INT8_C( 6),
|
1485
|
+
INT8_C( 44), INT8_C( -77), INT8_C( -77), INT8_C( -60),
|
1486
|
+
INT8_C(-100), INT8_C( -13), INT8_C( 38), INT8_C( 107),
|
1487
|
+
INT8_C( -16), INT8_C( 22), INT8_C( 88), INT8_C( -72)),
|
1488
|
+
simde_mm_set_epi8(INT8_C( 41), INT8_C( 41), INT8_C( -2), INT8_C( -90),
|
1489
|
+
INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
1490
|
+
INT8_C( 0), INT8_C( 0), INT8_C( -90), INT8_C(-114),
|
1491
|
+
INT8_C( 0), INT8_C( -90), INT8_C( -2), INT8_C( 0)) },
|
1492
|
+
{ simde_mm_set_epi8(INT8_C( -82), INT8_C( 4), INT8_C( -8), INT8_C( 75),
|
1493
|
+
INT8_C( -46), INT8_C( -97), INT8_C( 124), INT8_C( -42),
|
1494
|
+
INT8_C( -19), INT8_C( -88), INT8_C( 82), INT8_C( -56),
|
1495
|
+
INT8_C( -19), INT8_C( 12), INT8_C( -51), INT8_C( -26)),
|
1496
|
+
simde_mm_set_epi8(INT8_C(-105), INT8_C( -81), INT8_C( -16), INT8_C( -8),
|
1497
|
+
INT8_C( 85), INT8_C( 99), INT8_C( -25), INT8_C(-111),
|
1498
|
+
INT8_C(-109), INT8_C( -84), INT8_C( -46), INT8_C( 92),
|
1499
|
+
INT8_C( -64), INT8_C( 32), INT8_C( 127), INT8_C( 4)),
|
1500
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
1501
|
+
INT8_C( 82), INT8_C( -19), INT8_C( 0), INT8_C( 0),
|
1502
|
+
INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 75),
|
1503
|
+
INT8_C( 0), INT8_C( -26), INT8_C( -82), INT8_C( -56)) },
|
1504
|
+
{ simde_mm_set_epi8(INT8_C( 46), INT8_C( 56), INT8_C( 81), INT8_C( 110),
|
1505
|
+
INT8_C( -13), INT8_C( -23), INT8_C( -16), INT8_C( 99),
|
1506
|
+
INT8_C( 80), INT8_C( -49), INT8_C( 127), INT8_C( 115),
|
1507
|
+
INT8_C( -66), INT8_C( 50), INT8_C( 102), INT8_C(-123)),
|
1508
|
+
simde_mm_set_epi8(INT8_C(-104), INT8_C(-119), INT8_C( -63), INT8_C( 97),
|
1509
|
+
INT8_C( 38), INT8_C( 25), INT8_C( -72), INT8_C(-100),
|
1510
|
+
INT8_C( 24), INT8_C( -38), INT8_C( 119), INT8_C( -8),
|
1511
|
+
INT8_C( -44), INT8_C( -42), INT8_C( 68), INT8_C( -82)),
|
1512
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 102),
|
1513
|
+
INT8_C( -49), INT8_C( -16), INT8_C( 0), INT8_C( 0),
|
1514
|
+
INT8_C( 99), INT8_C( 0), INT8_C( 80), INT8_C( 0),
|
1515
|
+
INT8_C( 0), INT8_C( 0), INT8_C( 115), INT8_C( 0)) },
|
1516
|
+
{ simde_mm_set_epi8(INT8_C(-115), INT8_C( -99), INT8_C( -23), INT8_C( -12),
|
1517
|
+
INT8_C( -38), INT8_C( -56), INT8_C( -78), INT8_C( -83),
|
1518
|
+
INT8_C( 114), INT8_C( 18), INT8_C( -67), INT8_C( -35),
|
1519
|
+
INT8_C( 83), INT8_C( -4), INT8_C(-107), INT8_C( 44)),
|
1520
|
+
simde_mm_set_epi8(INT8_C( 9), INT8_C(-115), INT8_C( -83), INT8_C( 52),
|
1521
|
+
INT8_C( -91), INT8_C( -50), INT8_C( -37), INT8_C( -26),
|
1522
|
+
INT8_C( -47), INT8_C( -5), INT8_C( 109), INT8_C( 26),
|
1523
|
+
INT8_C( 107), INT8_C( 65), INT8_C( -20), INT8_C(-121)),
|
1524
|
+
simde_mm_set_epi8(INT8_C( -78), INT8_C( 0), INT8_C( 0), INT8_C( -35),
|
1525
|
+
INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
1526
|
+
INT8_C( 0), INT8_C( 0), INT8_C( -23), INT8_C( -56),
|
1527
|
+
INT8_C( -38), INT8_C(-107), INT8_C( 0), INT8_C( 0)) },
|
1528
|
+
{ simde_mm_set_epi8(INT8_C(-115), INT8_C( -8), INT8_C( 39), INT8_C( -2),
|
1529
|
+
INT8_C( 29), INT8_C( 101), INT8_C( 79), INT8_C( 16),
|
1530
|
+
INT8_C( -89), INT8_C( 91), INT8_C( 104), INT8_C( -22),
|
1531
|
+
INT8_C( -92), INT8_C(-127), INT8_C( -33), INT8_C( -57)),
|
1532
|
+
simde_mm_set_epi8(INT8_C( 39), INT8_C( -83), INT8_C( -41), INT8_C( -20),
|
1533
|
+
INT8_C( 45), INT8_C( 94), INT8_C(-102), INT8_C( 66),
|
1534
|
+
INT8_C( -26), INT8_C( 50), INT8_C( -29), INT8_C( -46),
|
1535
|
+
INT8_C( -77), INT8_C( 42), INT8_C( 100), INT8_C( 57)),
|
1536
|
+
simde_mm_set_epi8(INT8_C( -89), INT8_C( 0), INT8_C( 0), INT8_C( 0),
|
1537
|
+
INT8_C( 39), INT8_C( -8), INT8_C( 0), INT8_C(-127),
|
1538
|
+
INT8_C( 0), INT8_C(-127), INT8_C( 0), INT8_C( 0),
|
1539
|
+
INT8_C( 0), INT8_C( 101), INT8_C( -22), INT8_C( 79)) },
|
1540
|
+
{ simde_mm_set_epi8(INT8_C(-112), INT8_C( 100), INT8_C( -34), INT8_C( -39),
|
1541
|
+
INT8_C( 81), INT8_C( -76), INT8_C( 99), INT8_C(-116),
|
1542
|
+
INT8_C( -50), INT8_C( -98), INT8_C( 118), INT8_C( -34),
|
1543
|
+
INT8_C( -35), INT8_C( -13), INT8_C( -56), INT8_C( -73)),
|
1544
|
+
simde_mm_set_epi8(INT8_C(-108), INT8_C( 119), INT8_C( 11), INT8_C( -79),
|
1545
|
+
INT8_C( 109), INT8_C( -42), INT8_C( 22), INT8_C( 52),
|
1546
|
+
INT8_C(-123), INT8_C( -66), INT8_C( 127), INT8_C( 2),
|
1547
|
+
INT8_C( -56), INT8_C( 51), INT8_C( 46), INT8_C(-126)),
|
1548
|
+
simde_mm_set_epi8(INT8_C( 0), INT8_C( -50), INT8_C( 81), INT8_C( 0),
|
1549
|
+
INT8_C( -34), INT8_C( 0), INT8_C( -98), INT8_C( -34),
|
1550
|
+
INT8_C( 0), INT8_C( 0), INT8_C(-112), INT8_C( -13),
|
1551
|
+
INT8_C( 0), INT8_C( -35), INT8_C( 100), INT8_C( 0)) },
|
1552
|
+
{ simde_mm_set_epi8(INT8_C(-112), INT8_C( -70), INT8_C( 75), INT8_C( 43),
|
1553
|
+
INT8_C( 119), INT8_C( -79), INT8_C( -68), INT8_C( 101),
|
1554
|
+
INT8_C( -26), INT8_C( 20), INT8_C( -43), INT8_C( -70),
|
1555
|
+
INT8_C( -5), INT8_C( 99), INT8_C( -4), INT8_C( -98)),
|
1556
|
+
simde_mm_set_epi8(INT8_C( 92), INT8_C( 83), INT8_C( 95), INT8_C( 6),
|
1557
|
+
INT8_C( -16), INT8_C( 77), INT8_C( -25), INT8_C( 16),
|
1558
|
+
INT8_C( 51), INT8_C( 91), INT8_C( 21), INT8_C(-110),
|
1559
|
+
INT8_C( -9), INT8_C( -12), INT8_C( -77), INT8_C(-112)),
|
1560
|
+
simde_mm_set_epi8(INT8_C( 43), INT8_C( -5), INT8_C(-112), INT8_C( 20),
|
1561
|
+
INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( -98),
|
1562
|
+
INT8_C( -5), INT8_C( 119), INT8_C( -43), INT8_C( 0),
|
1563
|
+
INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }
|
1564
|
+
};
|
1565
|
+
|
1566
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1567
|
+
simde__m128i r = simde_mm_shuffle_epi8(test_vec[i].a, test_vec[i].b);
|
1568
|
+
simde_assert_m128i_i8(r, ==, test_vec[i].r);
|
1569
|
+
}
|
1570
|
+
|
1571
|
+
return MUNIT_OK;
|
1572
|
+
}
|
1573
|
+
|
1574
|
+
static MunitResult
|
1575
|
+
test_simde_mm_shuffle_pi8(const MunitParameter params[], void* data) {
|
1576
|
+
(void) params;
|
1577
|
+
(void) data;
|
1578
|
+
|
1579
|
+
const struct {
|
1580
|
+
simde__m64 a;
|
1581
|
+
simde__m64 b;
|
1582
|
+
simde__m64 r;
|
1583
|
+
} test_vec[8] = {
|
1584
|
+
{ simde_mm_set_pi8(INT8_C( 74), INT8_C(-101), INT8_C( 15), INT8_C( -66),
|
1585
|
+
INT8_C( -20), INT8_C( 87), INT8_C( 16), INT8_C( -79)),
|
1586
|
+
simde_mm_set_pi8(INT8_C( 29), INT8_C( -65), INT8_C( 38), INT8_C( -40),
|
1587
|
+
INT8_C( -70), INT8_C( 88), INT8_C( -38), INT8_C(-118)),
|
1588
|
+
simde_mm_set_pi8(INT8_C( 15), INT8_C( 0), INT8_C(-101), INT8_C( 0),
|
1589
|
+
INT8_C( 0), INT8_C( -79), INT8_C( 0), INT8_C( 0)) },
|
1590
|
+
{ simde_mm_set_pi8(INT8_C( 72), INT8_C( -15), INT8_C( -72), INT8_C( -84),
|
1591
|
+
INT8_C( -21), INT8_C( -95), INT8_C( -76), INT8_C( -92)),
|
1592
|
+
simde_mm_set_pi8(INT8_C( -94), INT8_C( -81), INT8_C( 23), INT8_C( 85),
|
1593
|
+
INT8_C(-111), INT8_C( 24), INT8_C(-116), INT8_C( 34)),
|
1594
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 72), INT8_C( -72),
|
1595
|
+
INT8_C( 0), INT8_C( -92), INT8_C( 0), INT8_C( -95)) },
|
1596
|
+
{ simde_mm_set_pi8(INT8_C( 72), INT8_C( 95), INT8_C( 109), INT8_C( -45),
|
1597
|
+
INT8_C( 11), INT8_C( -2), INT8_C( -6), INT8_C( 80)),
|
1598
|
+
simde_mm_set_pi8(INT8_C( -77), INT8_C(-102), INT8_C( 57), INT8_C( -50),
|
1599
|
+
INT8_C( 85), INT8_C( -92), INT8_C( 102), INT8_C( -10)),
|
1600
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( -6), INT8_C( 0),
|
1601
|
+
INT8_C( 109), INT8_C( 0), INT8_C( 95), INT8_C( 0)) },
|
1602
|
+
{ simde_mm_set_pi8(INT8_C( -13), INT8_C( -1), INT8_C( -80), INT8_C( 110),
|
1603
|
+
INT8_C( -12), INT8_C( -38), INT8_C( 43), INT8_C(-126)),
|
1604
|
+
simde_mm_set_pi8(INT8_C( 94), INT8_C( -52), INT8_C( -58), INT8_C( 119),
|
1605
|
+
INT8_C( -62), INT8_C( 119), INT8_C( -96), INT8_C(-113)),
|
1606
|
+
simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -13),
|
1607
|
+
INT8_C( 0), INT8_C( -13), INT8_C( 0), INT8_C( 0)) },
|
1608
|
+
{ simde_mm_set_pi8(INT8_C(-107), INT8_C(-111), INT8_C( 58), INT8_C( -52),
|
1609
|
+
INT8_C( -31), INT8_C( -75), INT8_C(-114), INT8_C( 50)),
|
1610
|
+
simde_mm_set_pi8(INT8_C( -40), INT8_C( -66), INT8_C( 33), INT8_C( 101),
|
1611
|
+
INT8_C( 11), INT8_C( -41), INT8_C( 77), INT8_C( 71)),
|
1612
|
+
simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C(-114), INT8_C( 58),
|
1613
|
+
INT8_C( -31), INT8_C( 0), INT8_C( 58), INT8_C(-107)) },
|
1614
|
+
{ simde_mm_set_pi8(INT8_C( -92), INT8_C( -56), INT8_C( 22), INT8_C( -56),
|
1615
|
+
INT8_C( -63), INT8_C( -78), INT8_C( 106), INT8_C( -87)),
|
1616
|
+
simde_mm_set_pi8(INT8_C( 95), INT8_C( 65), INT8_C( 46), INT8_C( 64),
|
1617
|
+
INT8_C( 39), INT8_C( -81), INT8_C( 83), INT8_C( -54)),
|
1618
|
+
simde_mm_set_pi8(INT8_C( -92), INT8_C( 106), INT8_C( -56), INT8_C( -87),
|
1619
|
+
INT8_C( -92), INT8_C( 0), INT8_C( -63), INT8_C( 0)) },
|
1620
|
+
{ simde_mm_set_pi8(INT8_C( 33), INT8_C(-110), INT8_C( 35), INT8_C( -83),
|
1621
|
+
INT8_C( -76), INT8_C( 59), INT8_C( 45), INT8_C( -42)),
|
1622
|
+
simde_mm_set_pi8(INT8_C( 73), INT8_C( -44), INT8_C( 97), INT8_C( -65),
|
1623
|
+
INT8_C( -88), INT8_C( -50), INT8_C( 19), INT8_C( -79)),
|
1624
|
+
simde_mm_set_pi8(INT8_C( 45), INT8_C( 0), INT8_C( 45), INT8_C( 0),
|
1625
|
+
INT8_C( 0), INT8_C( 0), INT8_C( -76), INT8_C( 0)) },
|
1626
|
+
{ simde_mm_set_pi8(INT8_C( -28), INT8_C( -77), INT8_C( 105), INT8_C( 105),
|
1627
|
+
INT8_C( 22), INT8_C( 1), INT8_C( 100), INT8_C( -15)),
|
1628
|
+
simde_mm_set_pi8(INT8_C( 115), INT8_C( -11), INT8_C( 20), INT8_C( 80),
|
1629
|
+
INT8_C( 40), INT8_C(-114), INT8_C( -49), INT8_C(-108)),
|
1630
|
+
simde_mm_set_pi8(INT8_C( 22), INT8_C( 0), INT8_C( 105), INT8_C( -15),
|
1631
|
+
INT8_C( -15), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }
|
1632
|
+
};
|
1633
|
+
|
1634
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1635
|
+
simde__m64 r = simde_mm_shuffle_pi8(test_vec[i].a, test_vec[i].b);
|
1636
|
+
simde_assert_m64_i8(r, ==, test_vec[i].r);
|
1637
|
+
}
|
1638
|
+
|
1639
|
+
return MUNIT_OK;
|
1640
|
+
}
|
1641
|
+
|
1642
|
+
static MunitResult
|
1643
|
+
test_simde_mm_sign_epi8(const MunitParameter params[], void* data) {
|
1644
|
+
(void) params;
|
1645
|
+
(void) data;
|
1646
|
+
|
1647
|
+
const struct {
|
1648
|
+
simde__m128i a;
|
1649
|
+
simde__m128i b;
|
1650
|
+
simde__m128i r;
|
1651
|
+
} test_vec[8] = {
|
1652
|
+
{ simde_mm_set_epi8(INT8_C( 37), INT8_C( -69), INT8_C( -16), INT8_C( -99),
|
1653
|
+
INT8_C( 36), INT8_C( 10), INT8_C( 56), INT8_C( -63),
|
1654
|
+
INT8_C(-117), INT8_C( 66), INT8_C( 76), INT8_C( 31),
|
1655
|
+
INT8_C( 61), INT8_C( -83), INT8_C( 44), INT8_C( -66)),
|
1656
|
+
simde_mm_set_epi8(INT8_C( -96), INT8_C( 30), INT8_C( 67), INT8_C( 75),
|
1657
|
+
INT8_C( 24), INT8_C( 56), INT8_C( 14), INT8_C( 75),
|
1658
|
+
INT8_C( -85), INT8_C(-103), INT8_C( 1), INT8_C( -80),
|
1659
|
+
INT8_C( -93), INT8_C( 37), INT8_C( -46), INT8_C( 95)),
|
1660
|
+
simde_mm_set_epi8(INT8_C( -37), INT8_C( -69), INT8_C( -16), INT8_C( -99),
|
1661
|
+
INT8_C( 36), INT8_C( 10), INT8_C( 56), INT8_C( -63),
|
1662
|
+
INT8_C( 117), INT8_C( -66), INT8_C( 76), INT8_C( -31),
|
1663
|
+
INT8_C( -61), INT8_C( -83), INT8_C( -44), INT8_C( -66)) },
|
1664
|
+
{ simde_mm_set_epi8(INT8_C( -16), INT8_C( 66), INT8_C( 6), INT8_C( 126),
|
1665
|
+
INT8_C( -12), INT8_C( -78), INT8_C(-121), INT8_C( -64),
|
1666
|
+
INT8_C( 49), INT8_C( -16), INT8_C( 89), INT8_C( -61),
|
1667
|
+
INT8_C( 60), INT8_C( 17), INT8_C( -94), INT8_C( 113)),
|
1668
|
+
simde_mm_set_epi8(INT8_C( 70), INT8_C( 27), INT8_C( 101), INT8_C( 119),
|
1669
|
+
INT8_C( -80), INT8_C( 103), INT8_C( -28), INT8_C( 79),
|
1670
|
+
INT8_C( 90), INT8_C(-127), INT8_C( -36), INT8_C( 57),
|
1671
|
+
INT8_C( -22), INT8_C( -74), INT8_C( 75), INT8_C( 106)),
|
1672
|
+
simde_mm_set_epi8(INT8_C( -16), INT8_C( 66), INT8_C( 6), INT8_C( 126),
|
1673
|
+
INT8_C( 12), INT8_C( -78), INT8_C( 121), INT8_C( -64),
|
1674
|
+
INT8_C( 49), INT8_C( 16), INT8_C( -89), INT8_C( -61),
|
1675
|
+
INT8_C( -60), INT8_C( -17), INT8_C( -94), INT8_C( 113)) },
|
1676
|
+
{ simde_mm_set_epi8(INT8_C( 99), INT8_C( 38), INT8_C(-110), INT8_C( 26),
|
1677
|
+
INT8_C( 106), INT8_C( 50), INT8_C( -36), INT8_C(-109),
|
1678
|
+
INT8_C( -69), INT8_C( -52), INT8_C( 61), INT8_C( -24),
|
1679
|
+
INT8_C( -63), INT8_C( 96), INT8_C( 45), INT8_C( 113)),
|
1680
|
+
simde_mm_set_epi8(INT8_C( 91), INT8_C( -69), INT8_C( 13), INT8_C( 48),
|
1681
|
+
INT8_C( -63), INT8_C( -35), INT8_C( 91), INT8_C(-109),
|
1682
|
+
INT8_C( -12), INT8_C( -94), INT8_C( 121), INT8_C( -64),
|
1683
|
+
INT8_C( -56), INT8_C( -95), INT8_C( 123), INT8_C( -38)),
|
1684
|
+
simde_mm_set_epi8(INT8_C( 99), INT8_C( -38), INT8_C(-110), INT8_C( 26),
|
1685
|
+
INT8_C(-106), INT8_C( -50), INT8_C( -36), INT8_C( 109),
|
1686
|
+
INT8_C( 69), INT8_C( 52), INT8_C( 61), INT8_C( 24),
|
1687
|
+
INT8_C( 63), INT8_C( -96), INT8_C( 45), INT8_C(-113)) },
|
1688
|
+
{ simde_mm_set_epi8(INT8_C( -46), INT8_C( -25), INT8_C( -91), INT8_C( -54),
|
1689
|
+
INT8_C( 77), INT8_C( -42), INT8_C( -7), INT8_C( -4),
|
1690
|
+
INT8_C( -52), INT8_C( 81), INT8_C( -53), INT8_C( -30),
|
1691
|
+
INT8_C( 73), INT8_C( 25), INT8_C( -34), INT8_C( 101)),
|
1692
|
+
simde_mm_set_epi8(INT8_C( 122), INT8_C( -35), INT8_C( -5), INT8_C( -48),
|
1693
|
+
INT8_C( -70), INT8_C( 5), INT8_C( 5), INT8_C( -1),
|
1694
|
+
INT8_C( 84), INT8_C( 42), INT8_C( 8), INT8_C( 11),
|
1695
|
+
INT8_C( -3), INT8_C(-102), INT8_C( -19), INT8_C( 36)),
|
1696
|
+
simde_mm_set_epi8(INT8_C( -46), INT8_C( 25), INT8_C( 91), INT8_C( 54),
|
1697
|
+
INT8_C( -77), INT8_C( -42), INT8_C( -7), INT8_C( 4),
|
1698
|
+
INT8_C( -52), INT8_C( 81), INT8_C( -53), INT8_C( -30),
|
1699
|
+
INT8_C( -73), INT8_C( -25), INT8_C( 34), INT8_C( 101)) },
|
1700
|
+
{ simde_mm_set_epi8(INT8_C( 100), INT8_C(-100), INT8_C( 72), INT8_C( 21),
|
1701
|
+
INT8_C( 116), INT8_C( 44), INT8_C(-111), INT8_C( 65),
|
1702
|
+
INT8_C( -91), INT8_C( 30), INT8_C( -90), INT8_C( -7),
|
1703
|
+
INT8_C( -19), INT8_C( 82), INT8_C(-121), INT8_C(-102)),
|
1704
|
+
simde_mm_set_epi8(INT8_C( 52), INT8_C( 93), INT8_C(-112), INT8_C( -21),
|
1705
|
+
INT8_C( 14), INT8_C( -56), INT8_C( -64), INT8_C( 86),
|
1706
|
+
INT8_C( 73), INT8_C( 115), INT8_C(-120), INT8_C( -32),
|
1707
|
+
INT8_C( 80), INT8_C( -83), INT8_C( 50), INT8_C( 58)),
|
1708
|
+
simde_mm_set_epi8(INT8_C( 100), INT8_C(-100), INT8_C( -72), INT8_C( -21),
|
1709
|
+
INT8_C( 116), INT8_C( -44), INT8_C( 111), INT8_C( 65),
|
1710
|
+
INT8_C( -91), INT8_C( 30), INT8_C( 90), INT8_C( 7),
|
1711
|
+
INT8_C( -19), INT8_C( -82), INT8_C(-121), INT8_C(-102)) },
|
1712
|
+
{ simde_mm_set_epi8(INT8_C(-106), INT8_C( 24), INT8_C( -64), INT8_C(-116),
|
1713
|
+
INT8_C( 54), INT8_C( 12), INT8_C( 9), INT8_C( 21),
|
1714
|
+
INT8_C( -21), INT8_C( 44), INT8_C( -75), INT8_C( 4),
|
1715
|
+
INT8_C(-124), INT8_C( -51), INT8_C( -45), INT8_C( 84)),
|
1716
|
+
simde_mm_set_epi8(INT8_C( -44), INT8_C( -17), INT8_C( 49), INT8_C( 10),
|
1717
|
+
INT8_C( 41), INT8_C( -6), INT8_C( -23), INT8_C( -29),
|
1718
|
+
INT8_C( 18), INT8_C( -37), INT8_C( -56), INT8_C( -37),
|
1719
|
+
INT8_C( -49), INT8_C( 7), INT8_C( 101), INT8_C( -47)),
|
1720
|
+
simde_mm_set_epi8(INT8_C( 106), INT8_C( -24), INT8_C( -64), INT8_C(-116),
|
1721
|
+
INT8_C( 54), INT8_C( -12), INT8_C( -9), INT8_C( -21),
|
1722
|
+
INT8_C( -21), INT8_C( -44), INT8_C( 75), INT8_C( -4),
|
1723
|
+
INT8_C( 124), INT8_C( -51), INT8_C( -45), INT8_C( -84)) },
|
1724
|
+
{ simde_mm_set_epi8(INT8_C( 54), INT8_C( -94), INT8_C( 102), INT8_C( 77),
|
1725
|
+
INT8_C( 43), INT8_C( 70), INT8_C( -80), INT8_C( 96),
|
1726
|
+
INT8_C( -60), INT8_C( -75), INT8_C( 42), INT8_C( -31),
|
1727
|
+
INT8_C( -2), INT8_C( 121), INT8_C( 29), INT8_C( 54)),
|
1728
|
+
simde_mm_set_epi8(INT8_C( 44), INT8_C( 49), INT8_C( 46), INT8_C( 120),
|
1729
|
+
INT8_C( -14), INT8_C( 89), INT8_C( 3), INT8_C(-114),
|
1730
|
+
INT8_C( -54), INT8_C( -45), INT8_C( 113), INT8_C( -76),
|
1731
|
+
INT8_C( 25), INT8_C( -90), INT8_C( -10), INT8_C( 65)),
|
1732
|
+
simde_mm_set_epi8(INT8_C( 54), INT8_C( -94), INT8_C( 102), INT8_C( 77),
|
1733
|
+
INT8_C( -43), INT8_C( 70), INT8_C( -80), INT8_C( -96),
|
1734
|
+
INT8_C( 60), INT8_C( 75), INT8_C( 42), INT8_C( 31),
|
1735
|
+
INT8_C( -2), INT8_C(-121), INT8_C( -29), INT8_C( 54)) },
|
1736
|
+
{ simde_mm_set_epi8(INT8_C( -18), INT8_C( -38), INT8_C( -9), INT8_C( -44),
|
1737
|
+
INT8_C( -53), INT8_C( -14), INT8_C( -61), INT8_C( 59),
|
1738
|
+
INT8_C( 58), INT8_C( -68), INT8_C( -23), INT8_C( -51),
|
1739
|
+
INT8_C( 43), INT8_C( -1), INT8_C( -6), INT8_C( 20)),
|
1740
|
+
simde_mm_set_epi8(INT8_C( 9), INT8_C( -16), INT8_C( -71), INT8_C( -90),
|
1741
|
+
INT8_C( -92), INT8_C( 58), INT8_C( -15), INT8_C( 115),
|
1742
|
+
INT8_C( -11), INT8_C( 34), INT8_C( 122), INT8_C( -11),
|
1743
|
+
INT8_C( 46), INT8_C( -86), INT8_C( 51), INT8_C( -3)),
|
1744
|
+
simde_mm_set_epi8(INT8_C( -18), INT8_C( 38), INT8_C( 9), INT8_C( 44),
|
1745
|
+
INT8_C( 53), INT8_C( -14), INT8_C( 61), INT8_C( 59),
|
1746
|
+
INT8_C( -58), INT8_C( -68), INT8_C( -23), INT8_C( 51),
|
1747
|
+
INT8_C( 43), INT8_C( 1), INT8_C( -6), INT8_C( -20)) }
|
1748
|
+
};
|
1749
|
+
|
1750
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1751
|
+
simde__m128i r = simde_mm_sign_epi8(test_vec[i].a, test_vec[i].b);
|
1752
|
+
simde_assert_m128i_i8(r, ==, test_vec[i].r);
|
1753
|
+
}
|
1754
|
+
|
1755
|
+
return MUNIT_OK;
|
1756
|
+
}
|
1757
|
+
|
1758
|
+
static MunitResult
|
1759
|
+
test_simde_mm_sign_epi16(const MunitParameter params[], void* data) {
|
1760
|
+
(void) params;
|
1761
|
+
(void) data;
|
1762
|
+
|
1763
|
+
const struct {
|
1764
|
+
simde__m128i a;
|
1765
|
+
simde__m128i b;
|
1766
|
+
simde__m128i r;
|
1767
|
+
} test_vec[8] = {
|
1768
|
+
{ simde_mm_set_epi16(INT16_C(-15759), INT16_C( 25741), INT16_C( 3088), INT16_C(-12147),
|
1769
|
+
INT16_C( 11906), INT16_C( 15052), INT16_C( 20544), INT16_C(-28879)),
|
1770
|
+
simde_mm_set_epi16(INT16_C( 31434), INT16_C(-10402), INT16_C( 15330), INT16_C(-15336),
|
1771
|
+
INT16_C(-27365), INT16_C( 7051), INT16_C(-15134), INT16_C( 30438)),
|
1772
|
+
simde_mm_set_epi16(INT16_C(-15759), INT16_C(-25741), INT16_C( 3088), INT16_C( 12147),
|
1773
|
+
INT16_C(-11906), INT16_C( 15052), INT16_C(-20544), INT16_C(-28879)) },
|
1774
|
+
{ simde_mm_set_epi16(INT16_C( 25362), INT16_C( 8410), INT16_C( -5772), INT16_C( 11356),
|
1775
|
+
INT16_C( -5145), INT16_C( -528), INT16_C( -3800), INT16_C(-22538)),
|
1776
|
+
simde_mm_set_epi16(INT16_C( 28705), INT16_C( 22529), INT16_C(-24415), INT16_C( 10605),
|
1777
|
+
INT16_C( 8581), INT16_C( 348), INT16_C( 17147), INT16_C(-28679)),
|
1778
|
+
simde_mm_set_epi16(INT16_C( 25362), INT16_C( 8410), INT16_C( 5772), INT16_C( 11356),
|
1779
|
+
INT16_C( -5145), INT16_C( -528), INT16_C( -3800), INT16_C( 22538)) },
|
1780
|
+
{ simde_mm_set_epi16(INT16_C(-15793), INT16_C(-15692), INT16_C(-32639), INT16_C( 2140),
|
1781
|
+
INT16_C( -7199), INT16_C(-11564), INT16_C( 8190), INT16_C( 1872)),
|
1782
|
+
simde_mm_set_epi16(INT16_C(-14750), INT16_C( -5416), INT16_C( -2422), INT16_C(-28769),
|
1783
|
+
INT16_C( 5810), INT16_C( 4853), INT16_C(-22556), INT16_C( 2950)),
|
1784
|
+
simde_mm_set_epi16(INT16_C( 15793), INT16_C( 15692), INT16_C( 32639), INT16_C( -2140),
|
1785
|
+
INT16_C( -7199), INT16_C(-11564), INT16_C( -8190), INT16_C( 1872)) },
|
1786
|
+
{ simde_mm_set_epi16(INT16_C( -6949), INT16_C( 14948), INT16_C(-19128), INT16_C( 99),
|
1787
|
+
INT16_C( -9508), INT16_C( 29377), INT16_C( 26021), INT16_C( 21262)),
|
1788
|
+
simde_mm_set_epi16(INT16_C( 31478), INT16_C(-23157), INT16_C( 27993), INT16_C( 18819),
|
1789
|
+
INT16_C( 21708), INT16_C( 11006), INT16_C(-32250), INT16_C( 28821)),
|
1790
|
+
simde_mm_set_epi16(INT16_C( -6949), INT16_C(-14948), INT16_C(-19128), INT16_C( 99),
|
1791
|
+
INT16_C( -9508), INT16_C( 29377), INT16_C(-26021), INT16_C( 21262)) },
|
1792
|
+
{ simde_mm_set_epi16(INT16_C(-27259), INT16_C( 15633), INT16_C( 24307), INT16_C( 25640),
|
1793
|
+
INT16_C( 23376), INT16_C(-30654), INT16_C( 19896), INT16_C(-14888)),
|
1794
|
+
simde_mm_set_epi16(INT16_C(-26725), INT16_C( -6818), INT16_C( 478), INT16_C( 25662),
|
1795
|
+
INT16_C( 26003), INT16_C( 21963), INT16_C( 1012), INT16_C( 16019)),
|
1796
|
+
simde_mm_set_epi16(INT16_C( 27259), INT16_C(-15633), INT16_C( 24307), INT16_C( 25640),
|
1797
|
+
INT16_C( 23376), INT16_C(-30654), INT16_C( 19896), INT16_C(-14888)) },
|
1798
|
+
{ simde_mm_set_epi16(INT16_C(-18809), INT16_C( 2505), INT16_C(-14233), INT16_C( 26092),
|
1799
|
+
INT16_C( 30746), INT16_C( 286), INT16_C(-10480), INT16_C( 18834)),
|
1800
|
+
simde_mm_set_epi16(INT16_C( 32423), INT16_C(-20791), INT16_C( -741), INT16_C( 17070),
|
1801
|
+
INT16_C( -8670), INT16_C( 1759), INT16_C(-27846), INT16_C( 12891)),
|
1802
|
+
simde_mm_set_epi16(INT16_C(-18809), INT16_C( -2505), INT16_C( 14233), INT16_C( 26092),
|
1803
|
+
INT16_C(-30746), INT16_C( 286), INT16_C( 10480), INT16_C( 18834)) },
|
1804
|
+
{ simde_mm_set_epi16(INT16_C( 10084), INT16_C(-24493), INT16_C( 7465), INT16_C( 3573),
|
1805
|
+
INT16_C(-29669), INT16_C( -5452), INT16_C( 25244), INT16_C( 30808)),
|
1806
|
+
simde_mm_set_epi16(INT16_C( 7390), INT16_C( 4062), INT16_C( -2410), INT16_C(-18994),
|
1807
|
+
INT16_C( 4689), INT16_C( 1376), INT16_C(-23142), INT16_C( 31884)),
|
1808
|
+
simde_mm_set_epi16(INT16_C( 10084), INT16_C(-24493), INT16_C( -7465), INT16_C( -3573),
|
1809
|
+
INT16_C(-29669), INT16_C( -5452), INT16_C(-25244), INT16_C( 30808)) },
|
1810
|
+
{ simde_mm_set_epi16(INT16_C( 11692), INT16_C( 22876), INT16_C(-18223), INT16_C(-22058),
|
1811
|
+
INT16_C(-12080), INT16_C( 30075), INT16_C( 16936), INT16_C(-31252)),
|
1812
|
+
simde_mm_set_epi16(INT16_C( -4320), INT16_C(-15410), INT16_C( 21240), INT16_C(-12323),
|
1813
|
+
INT16_C( -6944), INT16_C(-29167), INT16_C( 21220), INT16_C( 24483)),
|
1814
|
+
simde_mm_set_epi16(INT16_C(-11692), INT16_C(-22876), INT16_C(-18223), INT16_C( 22058),
|
1815
|
+
INT16_C( 12080), INT16_C(-30075), INT16_C( 16936), INT16_C(-31252)) }
|
1816
|
+
};
|
1817
|
+
|
1818
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1819
|
+
simde__m128i r = simde_mm_sign_epi16(test_vec[i].a, test_vec[i].b);
|
1820
|
+
simde_assert_m128i_i16(r, ==, test_vec[i].r);
|
1821
|
+
}
|
1822
|
+
|
1823
|
+
return MUNIT_OK;
|
1824
|
+
}
|
1825
|
+
|
1826
|
+
static MunitResult
|
1827
|
+
test_simde_mm_sign_epi32(const MunitParameter params[], void* data) {
|
1828
|
+
(void) params;
|
1829
|
+
(void) data;
|
1830
|
+
|
1831
|
+
const struct {
|
1832
|
+
simde__m128i a;
|
1833
|
+
simde__m128i b;
|
1834
|
+
simde__m128i r;
|
1835
|
+
} test_vec[8] = {
|
1836
|
+
{ simde_mm_set_epi32(INT32_C( 633073821), INT32_C( 604649665), INT32_C(-1958589409), INT32_C( 1034759358)),
|
1837
|
+
simde_mm_set_epi32(INT32_C(-1608629429), INT32_C( 406326859), INT32_C(-1416035920), INT32_C(-1557802401)),
|
1838
|
+
simde_mm_set_epi32(INT32_C( -633073821), INT32_C( 604649665), INT32_C( 1958589409), INT32_C(-1034759358)) },
|
1839
|
+
{ simde_mm_set_epi32(INT32_C( -264108418), INT32_C( -189626432), INT32_C( 837835203), INT32_C( 1007788657)),
|
1840
|
+
simde_mm_set_epi32(INT32_C( 1176200567), INT32_C(-1335368625), INT32_C( 1518459961), INT32_C( -357151894)),
|
1841
|
+
simde_mm_set_epi32(INT32_C( -264108418), INT32_C( 189626432), INT32_C( 837835203), INT32_C(-1007788657)) },
|
1842
|
+
{ simde_mm_set_epi32(INT32_C( 1663472154), INT32_C( 1781718163), INT32_C(-1144242712), INT32_C(-1050661519)),
|
1843
|
+
simde_mm_set_epi32(INT32_C( 1538985264), INT32_C(-1042457709), INT32_C( -190678592), INT32_C( -928941094)),
|
1844
|
+
simde_mm_set_epi32(INT32_C( 1663472154), INT32_C(-1781718163), INT32_C( 1144242712), INT32_C( 1050661519)) },
|
1845
|
+
{ simde_mm_set_epi32(INT32_C( -756570678), INT32_C( 1305934332), INT32_C( -867054622), INT32_C( 1226432101)),
|
1846
|
+
simde_mm_set_epi32(INT32_C( 2061368272), INT32_C(-1174075905), INT32_C( 1412040715), INT32_C( -40178396)),
|
1847
|
+
simde_mm_set_epi32(INT32_C( -756570678), INT32_C(-1305934332), INT32_C( -867054622), INT32_C(-1226432101)) },
|
1848
|
+
{ simde_mm_set_epi32(INT32_C( 1687963669), INT32_C( 1949077825), INT32_C(-1524717831), INT32_C( -313358438)),
|
1849
|
+
simde_mm_set_epi32(INT32_C( 878547179), INT32_C( 248037462), INT32_C( 1232308448), INT32_C( 1353527866)),
|
1850
|
+
simde_mm_set_epi32(INT32_C( 1687963669), INT32_C( 1949077825), INT32_C(-1524717831), INT32_C( -313358438)) },
|
1851
|
+
{ simde_mm_set_epi32(INT32_C(-1776762740), INT32_C( 906758421), INT32_C( -349391612), INT32_C(-2066885804)),
|
1852
|
+
simde_mm_set_epi32(INT32_C( -722521846), INT32_C( 704309731), INT32_C( 316393691), INT32_C( -821598767)),
|
1853
|
+
simde_mm_set_epi32(INT32_C( 1776762740), INT32_C( 906758421), INT32_C( -349391612), INT32_C( 2066885804)) },
|
1854
|
+
{ simde_mm_set_epi32(INT32_C( 916612685), INT32_C( 726052960), INT32_C( -994759967), INT32_C( -25617098)),
|
1855
|
+
simde_mm_set_epi32(INT32_C( 741420664), INT32_C( -229047410), INT32_C( -892112460), INT32_C( 430372417)),
|
1856
|
+
simde_mm_set_epi32(INT32_C( 916612685), INT32_C( -726052960), INT32_C( 994759967), INT32_C( -25617098)) },
|
1857
|
+
{ simde_mm_set_epi32(INT32_C( -287639596), INT32_C( -873282757), INT32_C( 985459149), INT32_C( 738195988)),
|
1858
|
+
simde_mm_set_epi32(INT32_C( 166771110), INT32_C(-1539640973), INT32_C( -182289675), INT32_C( 782906365)),
|
1859
|
+
simde_mm_set_epi32(INT32_C( -287639596), INT32_C( 873282757), INT32_C( -985459149), INT32_C( 738195988)) }
|
1860
|
+
};
|
1861
|
+
|
1862
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1863
|
+
simde__m128i r = simde_mm_sign_epi32(test_vec[i].a, test_vec[i].b);
|
1864
|
+
simde_assert_m128i_i32(r, ==, test_vec[i].r);
|
1865
|
+
}
|
1866
|
+
|
1867
|
+
return MUNIT_OK;
|
1868
|
+
}
|
1869
|
+
|
1870
|
+
static MunitResult
|
1871
|
+
test_simde_mm_sign_pi8(const MunitParameter params[], void* data) {
|
1872
|
+
(void) params;
|
1873
|
+
(void) data;
|
1874
|
+
|
1875
|
+
const struct {
|
1876
|
+
simde__m64 a;
|
1877
|
+
simde__m64 b;
|
1878
|
+
simde__m64 r;
|
1879
|
+
} test_vec[8] = {
|
1880
|
+
{ simde_mm_set_pi8(INT8_C( 41), INT8_C( -70), INT8_C( 22), INT8_C( -60),
|
1881
|
+
INT8_C( -48), INT8_C( -81), INT8_C( 97), INT8_C(-114)),
|
1882
|
+
simde_mm_set_pi8(INT8_C( 12), INT8_C( 54), INT8_C( 116), INT8_C( 111),
|
1883
|
+
INT8_C( 3), INT8_C( 16), INT8_C(-115), INT8_C( 56)),
|
1884
|
+
simde_mm_set_pi8(INT8_C( 41), INT8_C( -70), INT8_C( 22), INT8_C( -60),
|
1885
|
+
INT8_C( -48), INT8_C( -81), INT8_C( -97), INT8_C(-114)) },
|
1886
|
+
{ simde_mm_set_pi8(INT8_C( -31), INT8_C( -22), INT8_C( 55), INT8_C(-115),
|
1887
|
+
INT8_C( -14), INT8_C( -2), INT8_C( 92), INT8_C( 11)),
|
1888
|
+
simde_mm_set_pi8(INT8_C( 67), INT8_C( -80), INT8_C( 19), INT8_C( -63),
|
1889
|
+
INT8_C( -35), INT8_C( -59), INT8_C( -4), INT8_C( 14)),
|
1890
|
+
simde_mm_set_pi8(INT8_C( -31), INT8_C( 22), INT8_C( 55), INT8_C( 115),
|
1891
|
+
INT8_C( 14), INT8_C( 2), INT8_C( -92), INT8_C( 11)) },
|
1892
|
+
{ simde_mm_set_pi8(INT8_C( 123), INT8_C( 72), INT8_C( 109), INT8_C( 92),
|
1893
|
+
INT8_C( -54), INT8_C( 74), INT8_C( 42), INT8_C( 113)),
|
1894
|
+
simde_mm_set_pi8(INT8_C( 49), INT8_C( -54), INT8_C( 108), INT8_C( 45),
|
1895
|
+
INT8_C( 63), INT8_C(-116), INT8_C( 46), INT8_C( 95)),
|
1896
|
+
simde_mm_set_pi8(INT8_C( 123), INT8_C( -72), INT8_C( 109), INT8_C( 92),
|
1897
|
+
INT8_C( -54), INT8_C( -74), INT8_C( 42), INT8_C( 113)) },
|
1898
|
+
{ simde_mm_set_pi8(INT8_C( 51), INT8_C( 95), INT8_C( 90), INT8_C( -33),
|
1899
|
+
INT8_C( 15), INT8_C(-121), INT8_C( -5), INT8_C( 54)),
|
1900
|
+
simde_mm_set_pi8(INT8_C( 88), INT8_C( 89), INT8_C( -32), INT8_C( 32),
|
1901
|
+
INT8_C( 7), INT8_C( 21), INT8_C( -69), INT8_C( 56)),
|
1902
|
+
simde_mm_set_pi8(INT8_C( 51), INT8_C( 95), INT8_C( -90), INT8_C( -33),
|
1903
|
+
INT8_C( 15), INT8_C(-121), INT8_C( 5), INT8_C( 54)) },
|
1904
|
+
{ simde_mm_set_pi8(INT8_C( 10), INT8_C( 79), INT8_C( -53), INT8_C( 15),
|
1905
|
+
INT8_C( 22), INT8_C( -53), INT8_C( -60), INT8_C( -88)),
|
1906
|
+
simde_mm_set_pi8(INT8_C( 40), INT8_C(-102), INT8_C( -67), INT8_C( 115),
|
1907
|
+
INT8_C( -77), INT8_C( -15), INT8_C( -36), INT8_C( -80)),
|
1908
|
+
simde_mm_set_pi8(INT8_C( 10), INT8_C( -79), INT8_C( 53), INT8_C( 15),
|
1909
|
+
INT8_C( -22), INT8_C( 53), INT8_C( 60), INT8_C( 88)) },
|
1910
|
+
{ simde_mm_set_pi8(INT8_C(-120), INT8_C(-122), INT8_C( -65), INT8_C( 22),
|
1911
|
+
INT8_C(-100), INT8_C( 44), INT8_C( -91), INT8_C( 42)),
|
1912
|
+
simde_mm_set_pi8(INT8_C( -87), INT8_C( 50), INT8_C( 14), INT8_C( 36),
|
1913
|
+
INT8_C( -95), INT8_C( 69), INT8_C( -70), INT8_C( 38)),
|
1914
|
+
simde_mm_set_pi8(INT8_C( 120), INT8_C(-122), INT8_C( -65), INT8_C( 22),
|
1915
|
+
INT8_C( 100), INT8_C( 44), INT8_C( 91), INT8_C( 42)) },
|
1916
|
+
{ simde_mm_set_pi8(INT8_C( 71), INT8_C( -24), INT8_C( 115), INT8_C( 90),
|
1917
|
+
INT8_C( 52), INT8_C( 52), INT8_C( -42), INT8_C( 119)),
|
1918
|
+
simde_mm_set_pi8(INT8_C( -78), INT8_C( -10), INT8_C( 31), INT8_C( 106),
|
1919
|
+
INT8_C( -76), INT8_C( -74), INT8_C( 82), INT8_C( 103)),
|
1920
|
+
simde_mm_set_pi8(INT8_C( -71), INT8_C( 24), INT8_C( 115), INT8_C( 90),
|
1921
|
+
INT8_C( -52), INT8_C( -52), INT8_C( -42), INT8_C( 119)) },
|
1922
|
+
{ simde_mm_set_pi8(INT8_C( -29), INT8_C( -55), INT8_C(-107), INT8_C( -94),
|
1923
|
+
INT8_C(-100), INT8_C(-105), INT8_C( 110), INT8_C( 49)),
|
1924
|
+
simde_mm_set_pi8(INT8_C( 116), INT8_C( -73), INT8_C(-114), INT8_C( -3),
|
1925
|
+
INT8_C( 58), INT8_C( 101), INT8_C(-111), INT8_C(-116)),
|
1926
|
+
simde_mm_set_pi8(INT8_C( -29), INT8_C( 55), INT8_C( 107), INT8_C( 94),
|
1927
|
+
INT8_C(-100), INT8_C(-105), INT8_C(-110), INT8_C( -49)) }
|
1928
|
+
};
|
1929
|
+
|
1930
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1931
|
+
simde__m64 r = simde_mm_sign_pi8(test_vec[i].a, test_vec[i].b);
|
1932
|
+
simde_assert_m64_i8(r, ==, test_vec[i].r);
|
1933
|
+
}
|
1934
|
+
|
1935
|
+
return MUNIT_OK;
|
1936
|
+
}
|
1937
|
+
|
1938
|
+
static MunitResult
|
1939
|
+
test_simde_mm_sign_pi16(const MunitParameter params[], void* data) {
|
1940
|
+
(void) params;
|
1941
|
+
(void) data;
|
1942
|
+
|
1943
|
+
const struct {
|
1944
|
+
simde__m64 a;
|
1945
|
+
simde__m64 b;
|
1946
|
+
simde__m64 r;
|
1947
|
+
} test_vec[8] = {
|
1948
|
+
{ simde_mm_set_pi16(INT16_C( 10682), INT16_C( 5828), INT16_C(-12113), INT16_C( 24974)),
|
1949
|
+
simde_mm_set_pi16(INT16_C( 3126), INT16_C( 29807), INT16_C( 784), INT16_C(-29384)),
|
1950
|
+
simde_mm_set_pi16(INT16_C( 10682), INT16_C( 5828), INT16_C(-12113), INT16_C(-24974)) },
|
1951
|
+
{ simde_mm_set_pi16(INT16_C( -7702), INT16_C( 14221), INT16_C( -3330), INT16_C( 23563)),
|
1952
|
+
simde_mm_set_pi16(INT16_C( 17328), INT16_C( 5057), INT16_C( -8763), INT16_C( -1010)),
|
1953
|
+
simde_mm_set_pi16(INT16_C( -7702), INT16_C( 14221), INT16_C( 3330), INT16_C(-23563)) },
|
1954
|
+
{ simde_mm_set_pi16(INT16_C( 31560), INT16_C( 27996), INT16_C(-13750), INT16_C( 10865)),
|
1955
|
+
simde_mm_set_pi16(INT16_C( 12746), INT16_C( 27693), INT16_C( 16268), INT16_C( 11871)),
|
1956
|
+
simde_mm_set_pi16(INT16_C( 31560), INT16_C( 27996), INT16_C(-13750), INT16_C( 10865)) },
|
1957
|
+
{ simde_mm_set_pi16(INT16_C( 13151), INT16_C( 23263), INT16_C( 3975), INT16_C( -1226)),
|
1958
|
+
simde_mm_set_pi16(INT16_C( 22617), INT16_C( -8160), INT16_C( 1813), INT16_C(-17608)),
|
1959
|
+
simde_mm_set_pi16(INT16_C( 13151), INT16_C(-23263), INT16_C( 3975), INT16_C( 1226)) },
|
1960
|
+
{ simde_mm_set_pi16(INT16_C( 2639), INT16_C(-13553), INT16_C( 5835), INT16_C(-15192)),
|
1961
|
+
simde_mm_set_pi16(INT16_C( 10394), INT16_C(-17037), INT16_C(-19471), INT16_C( -9040)),
|
1962
|
+
simde_mm_set_pi16(INT16_C( 2639), INT16_C( 13553), INT16_C( -5835), INT16_C( 15192)) },
|
1963
|
+
{ simde_mm_set_pi16(INT16_C(-30586), INT16_C(-16618), INT16_C(-25556), INT16_C(-23254)),
|
1964
|
+
simde_mm_set_pi16(INT16_C(-22222), INT16_C( 3620), INT16_C(-24251), INT16_C(-17882)),
|
1965
|
+
simde_mm_set_pi16(INT16_C( 30586), INT16_C(-16618), INT16_C( 25556), INT16_C( 23254)) },
|
1966
|
+
{ simde_mm_set_pi16(INT16_C( 18408), INT16_C( 29530), INT16_C( 13364), INT16_C(-10633)),
|
1967
|
+
simde_mm_set_pi16(INT16_C(-19722), INT16_C( 8042), INT16_C(-19274), INT16_C( 21095)),
|
1968
|
+
simde_mm_set_pi16(INT16_C(-18408), INT16_C( 29530), INT16_C(-13364), INT16_C(-10633)) },
|
1969
|
+
{ simde_mm_set_pi16(INT16_C( -7223), INT16_C(-27230), INT16_C(-25449), INT16_C( 28209)),
|
1970
|
+
simde_mm_set_pi16(INT16_C( 29879), INT16_C(-28931), INT16_C( 14949), INT16_C(-28276)),
|
1971
|
+
simde_mm_set_pi16(INT16_C( -7223), INT16_C( 27230), INT16_C(-25449), INT16_C(-28209)) }
|
1972
|
+
};
|
1973
|
+
|
1974
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
1975
|
+
simde__m64 r = simde_mm_sign_pi16(test_vec[i].a, test_vec[i].b);
|
1976
|
+
simde_assert_m64_i16(r, ==, test_vec[i].r);
|
1977
|
+
}
|
1978
|
+
|
1979
|
+
return MUNIT_OK;
|
1980
|
+
}
|
1981
|
+
|
1982
|
+
static MunitResult
|
1983
|
+
test_simde_mm_sign_pi32(const MunitParameter params[], void* data) {
|
1984
|
+
(void) params;
|
1985
|
+
(void) data;
|
1986
|
+
|
1987
|
+
const struct {
|
1988
|
+
simde__m64 a;
|
1989
|
+
simde__m64 b;
|
1990
|
+
simde__m64 r;
|
1991
|
+
} test_vec[8] = {
|
1992
|
+
{ simde_mm_set_pi32(INT32_C( 700061380), INT32_C( -793812594)),
|
1993
|
+
simde_mm_set_pi32(INT32_C( 204895343), INT32_C( 51416376)),
|
1994
|
+
simde_mm_set_pi32(INT32_C( 700061380), INT32_C( -793812594)) },
|
1995
|
+
{ simde_mm_set_pi32(INT32_C( -504744051), INT32_C( -218211317)),
|
1996
|
+
simde_mm_set_pi32(INT32_C( 1135612865), INT32_C( -574227442)),
|
1997
|
+
simde_mm_set_pi32(INT32_C( -504744051), INT32_C( 218211317)) },
|
1998
|
+
{ simde_mm_set_pi32(INT32_C( 2068344156), INT32_C( -901109135)),
|
1999
|
+
simde_mm_set_pi32(INT32_C( 835349549), INT32_C( 1066151519)),
|
2000
|
+
simde_mm_set_pi32(INT32_C( 2068344156), INT32_C( -901109135)) },
|
2001
|
+
{ simde_mm_set_pi32(INT32_C( 861887199), INT32_C( 260569910)),
|
2002
|
+
simde_mm_set_pi32(INT32_C( 1482285088), INT32_C( 118864696)),
|
2003
|
+
simde_mm_set_pi32(INT32_C( 861887199), INT32_C( 260569910)) },
|
2004
|
+
{ simde_mm_set_pi32(INT32_C( 173001487), INT32_C( 382452904)),
|
2005
|
+
simde_mm_set_pi32(INT32_C( 681229683), INT32_C(-1275994960)),
|
2006
|
+
simde_mm_set_pi32(INT32_C( 173001487), INT32_C( -382452904)) },
|
2007
|
+
{ simde_mm_set_pi32(INT32_C(-2004435178), INT32_C(-1674795734)),
|
2008
|
+
simde_mm_set_pi32(INT32_C(-1456337372), INT32_C(-1589265882)),
|
2009
|
+
simde_mm_set_pi32(INT32_C( 2004435178), INT32_C( 1674795734)) },
|
2010
|
+
{ simde_mm_set_pi32(INT32_C( 1206416218), INT32_C( 875878007)),
|
2011
|
+
simde_mm_set_pi32(INT32_C(-1292492950), INT32_C(-1263119769)),
|
2012
|
+
simde_mm_set_pi32(INT32_C(-1206416218), INT32_C( -875878007)) },
|
2013
|
+
{ simde_mm_set_pi32(INT32_C( -473328222), INT32_C(-1667797455)),
|
2014
|
+
simde_mm_set_pi32(INT32_C( 1958186749), INT32_C( 979734924)),
|
2015
|
+
simde_mm_set_pi32(INT32_C( -473328222), INT32_C(-1667797455)) }
|
2016
|
+
};
|
2017
|
+
|
2018
|
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
|
2019
|
+
simde__m64 r = simde_mm_sign_pi32(test_vec[i].a, test_vec[i].b);
|
2020
|
+
simde_assert_m64_i32(r, ==, test_vec[i].r);
|
2021
|
+
}
|
2022
|
+
|
2023
|
+
return MUNIT_OK;
|
2024
|
+
}
|
2025
|
+
|
2026
|
+
#endif /* defined(SIMDE_SSSE3_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
|
2027
|
+
|
2028
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
2029
|
+
HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
|
2030
|
+
|
2031
|
+
static MunitTest test_suite_tests[] = {
|
2032
|
+
#if defined(SIMDE_SSSE3_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
|
2033
|
+
SIMDE_TESTS_DEFINE_TEST(mm_abs_epi8),
|
2034
|
+
SIMDE_TESTS_DEFINE_TEST(mm_abs_epi16),
|
2035
|
+
SIMDE_TESTS_DEFINE_TEST(mm_abs_epi32),
|
2036
|
+
SIMDE_TESTS_DEFINE_TEST(mm_abs_pi8),
|
2037
|
+
SIMDE_TESTS_DEFINE_TEST(mm_abs_pi16),
|
2038
|
+
SIMDE_TESTS_DEFINE_TEST(mm_abs_pi32),
|
2039
|
+
SIMDE_TESTS_DEFINE_TEST(mm_alignr_epi8),
|
2040
|
+
SIMDE_TESTS_DEFINE_TEST(mm_alignr_pi8),
|
2041
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hadd_epi16),
|
2042
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hadd_epi32),
|
2043
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hadd_pi16),
|
2044
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hadd_pi32),
|
2045
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hadds_epi16),
|
2046
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hadds_pi16),
|
2047
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hsub_epi16),
|
2048
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hsub_epi32),
|
2049
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hsub_pi16),
|
2050
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hsub_pi32),
|
2051
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hsubs_epi16),
|
2052
|
+
SIMDE_TESTS_DEFINE_TEST(mm_hsubs_pi16),
|
2053
|
+
SIMDE_TESTS_DEFINE_TEST(mm_maddubs_epi16),
|
2054
|
+
SIMDE_TESTS_DEFINE_TEST(mm_maddubs_pi16),
|
2055
|
+
SIMDE_TESTS_DEFINE_TEST(mm_mulhrs_epi16),
|
2056
|
+
SIMDE_TESTS_DEFINE_TEST(mm_mulhrs_pi16),
|
2057
|
+
SIMDE_TESTS_DEFINE_TEST(mm_shuffle_epi8),
|
2058
|
+
SIMDE_TESTS_DEFINE_TEST(mm_shuffle_pi8),
|
2059
|
+
SIMDE_TESTS_DEFINE_TEST(mm_sign_epi8),
|
2060
|
+
SIMDE_TESTS_DEFINE_TEST(mm_sign_epi16),
|
2061
|
+
SIMDE_TESTS_DEFINE_TEST(mm_sign_epi32),
|
2062
|
+
SIMDE_TESTS_DEFINE_TEST(mm_sign_pi8),
|
2063
|
+
SIMDE_TESTS_DEFINE_TEST(mm_sign_pi16),
|
2064
|
+
SIMDE_TESTS_DEFINE_TEST(mm_sign_pi32),
|
2065
|
+
#endif /* defined(SIMDE_SSSE3_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
|
2066
|
+
|
2067
|
+
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
|
2068
|
+
};
|
2069
|
+
|
2070
|
+
HEDLEY_C_DECL MunitSuite* SIMDE_TESTS_GENERATE_SYMBOL(suite)(void) {
|
2071
|
+
static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
|
2072
|
+
|
2073
|
+
return &suite;
|
2074
|
+
}
|
2075
|
+
|
2076
|
+
#if defined(SIMDE_TESTS_SINGLE_ISAX)
|
2077
|
+
int main(int argc, char* argv[HEDLEY_ARRAY_PARAM(argc + 1)]) {
|
2078
|
+
static MunitSuite suite = { "", test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
|
2079
|
+
|
2080
|
+
return munit_suite_main(&suite, NULL, argc, argv);
|
2081
|
+
}
|
2082
|
+
#endif /* defined(SIMDE_TESTS_SINGLE_ISAX) */
|
2083
|
+
|
2084
|
+
HEDLEY_DIAGNOSTIC_POP
|