minimap2 0.2.24.3 → 0.2.24.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  3. data/ext/minimap2/lib/simde/COPYING +20 -0
  4. data/ext/minimap2/lib/simde/README.md +333 -0
  5. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  6. data/ext/minimap2/lib/simde/meson.build +33 -0
  7. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  8. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  9. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  10. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  11. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  12. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  13. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  14. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  15. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  16. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  17. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  18. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  19. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  20. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  21. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  29. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  30. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  31. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  32. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  33. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  34. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  35. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  36. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  37. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  38. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  39. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  40. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  41. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  42. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  43. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  44. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  45. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  46. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  47. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  48. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  49. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  50. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  51. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  52. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  53. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  54. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  55. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  56. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  57. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  58. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  59. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  60. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  61. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  62. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  63. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  64. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  65. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  66. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  67. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  68. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  69. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  70. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  71. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  72. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  73. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  74. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  75. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  76. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  77. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  78. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  79. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  80. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  81. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  82. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  83. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  84. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  85. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  86. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  87. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  88. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  89. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  90. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  91. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  92. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  93. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  94. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  95. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  96. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  97. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  98. data/lib/minimap2/aligner.rb +2 -2
  99. data/lib/minimap2/ffi/constants.rb +3 -0
  100. data/lib/minimap2/version.rb +1 -1
  101. metadata +99 -3
@@ -0,0 +1,486 @@
1
+ /* Copyright (c) 2017 Evan Nemerson <evan@nemerson.com>
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person
4
+ * obtaining a copy of this software and associated documentation
5
+ * files (the "Software"), to deal in the Software without
6
+ * restriction, including without limitation the rights to use, copy,
7
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ * of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be
12
+ * included in all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ * SOFTWARE.
22
+ */
23
+
24
+ #define SIMDE_TESTS_CURRENT_ISAX sse3
25
+ #include <simde/x86/sse3.h>
26
+ #include <test/x86/test-sse2.h>
27
+
28
+ #if defined(SIMDE_SSE3_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
29
+
30
+ static MunitResult
31
+ test_simde_mm_addsub_pd(const MunitParameter params[], void* data) {
32
+ (void) params;
33
+ (void) data;
34
+
35
+ const struct {
36
+ simde__m128d a;
37
+ simde__m128d b;
38
+ simde__m128d r;
39
+ } test_vec[8] = {
40
+ { simde_mm_set_pd( 476.02, -639.97),
41
+ simde_mm_set_pd( 710.19, -41.14),
42
+ simde_mm_set_pd( 1186.21, -598.83) },
43
+ { simde_mm_set_pd( 650.79, -848.27),
44
+ simde_mm_set_pd( 773.15, 711.98),
45
+ simde_mm_set_pd( 1423.94, -1560.25) },
46
+ { simde_mm_set_pd( -904.77, -447.30),
47
+ simde_mm_set_pd( -414.59, -690.17),
48
+ simde_mm_set_pd(-1319.36, 242.87) },
49
+ { simde_mm_set_pd( 727.10, -46.44),
50
+ simde_mm_set_pd( -635.38, 20.27),
51
+ simde_mm_set_pd( 91.72, -66.71) },
52
+ { simde_mm_set_pd( 74.87, -444.69),
53
+ simde_mm_set_pd( -222.00, 809.16),
54
+ simde_mm_set_pd( -147.13, -1253.85) },
55
+ { simde_mm_set_pd( 468.30, -546.58),
56
+ simde_mm_set_pd( 629.89, 504.95),
57
+ simde_mm_set_pd( 1098.19, -1051.53) },
58
+ { simde_mm_set_pd( 908.04, -977.41),
59
+ simde_mm_set_pd( 521.23, -249.10),
60
+ simde_mm_set_pd( 1429.27, -728.31) },
61
+ { simde_mm_set_pd( 107.41, -431.12),
62
+ simde_mm_set_pd( 91.73, 142.37),
63
+ simde_mm_set_pd( 199.14, -573.49) }
64
+ };
65
+
66
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
67
+ simde__m128d r = simde_mm_addsub_pd(test_vec[i].a, test_vec[i].b);
68
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
69
+ }
70
+
71
+ return MUNIT_OK;
72
+ }
73
+
74
+ static MunitResult
75
+ test_simde_mm_addsub_ps(const MunitParameter params[], void* data) {
76
+ (void) params;
77
+ (void) data;
78
+
79
+ const struct {
80
+ simde__m128 a;
81
+ simde__m128 b;
82
+ simde__m128 r;
83
+ } test_vec[8] = {
84
+ { simde_mm_set_ps( 827.09f, 888.55f, 270.24f, 512.98f),
85
+ simde_mm_set_ps( 691.09f, 805.07f, 343.35f, 695.79f),
86
+ simde_mm_set_ps( 1518.18f, 83.48f, 613.59f, -182.81f) },
87
+ { simde_mm_set_ps( -122.09f, 678.17f, -910.24f, -995.98f),
88
+ simde_mm_set_ps( -197.90f, 177.04f, -469.81f, -451.24f),
89
+ simde_mm_set_ps( -319.99f, 501.13f, -1380.05f, -544.74f) },
90
+ { simde_mm_set_ps( 589.86f, -922.72f, 221.54f, -598.55f),
91
+ simde_mm_set_ps( -751.93f, 480.30f, 218.06f, 103.71f),
92
+ simde_mm_set_ps( -162.07f, -1403.02f, 439.60f, -702.26f) },
93
+ { simde_mm_set_ps( -375.10f, 590.75f, 672.39f, 216.94f),
94
+ simde_mm_set_ps( 184.12f, 575.54f, -189.52f, 591.53f),
95
+ simde_mm_set_ps( -190.98f, 15.21f, 482.87f, -374.59f) },
96
+ { simde_mm_set_ps( 838.92f, -777.48f, -357.82f, 473.60f),
97
+ simde_mm_set_ps( 655.27f, -960.61f, 194.84f, 470.24f),
98
+ simde_mm_set_ps( 1494.19f, 183.13f, -162.98f, 3.36f) },
99
+ { simde_mm_set_ps( 141.50f, 865.93f, 836.92f, 780.12f),
100
+ simde_mm_set_ps( 237.78f, -664.15f, 934.51f, 175.34f),
101
+ simde_mm_set_ps( 379.28f, 1530.08f, 1771.43f, 604.78f) },
102
+ { simde_mm_set_ps( -146.63f, 845.58f, -575.02f, -435.05f),
103
+ simde_mm_set_ps( 46.98f, 315.33f, -622.74f, -392.97f),
104
+ simde_mm_set_ps( -99.65f, 530.25f, -1197.76f, -42.08f) },
105
+ { simde_mm_set_ps( -588.54f, 208.80f, 44.42f, -534.81f),
106
+ simde_mm_set_ps( 849.82f, -315.73f, -758.03f, 754.33f),
107
+ simde_mm_set_ps( 261.28f, 524.53f, -713.61f, -1289.14f) }
108
+ };
109
+
110
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
111
+ simde__m128 r = simde_mm_addsub_ps(test_vec[i].a, test_vec[i].b);
112
+ simde_assert_m128_close(r, test_vec[i].r, 1);
113
+ }
114
+
115
+ return MUNIT_OK;
116
+ }
117
+
118
+ static MunitResult
119
+ test_simde_mm_hadd_pd(const MunitParameter params[], void* data) {
120
+ (void) params;
121
+ (void) data;
122
+
123
+ const struct {
124
+ simde__m128d a;
125
+ simde__m128d b;
126
+ simde__m128d r;
127
+ } test_vec[8] = {
128
+ { simde_mm_set_pd( 44.10, -542.35),
129
+ simde_mm_set_pd( -346.60, -427.89),
130
+ simde_mm_set_pd( -774.49, -498.25) },
131
+ { simde_mm_set_pd( 716.10, 840.74),
132
+ simde_mm_set_pd( -654.24, -672.74),
133
+ simde_mm_set_pd(-1326.98, 1556.84) },
134
+ { simde_mm_set_pd( -397.69, 265.98),
135
+ simde_mm_set_pd( -595.53, 562.15),
136
+ simde_mm_set_pd( -33.38, -131.71) },
137
+ { simde_mm_set_pd( 416.44, 929.19),
138
+ simde_mm_set_pd( -225.30, -546.63),
139
+ simde_mm_set_pd( -771.93, 1345.63) },
140
+ { simde_mm_set_pd( 506.73, 886.11),
141
+ simde_mm_set_pd( 344.49, 957.84),
142
+ simde_mm_set_pd( 1302.33, 1392.84) },
143
+ { simde_mm_set_pd( 886.60, -404.84),
144
+ simde_mm_set_pd( 386.06, -275.34),
145
+ simde_mm_set_pd( 110.72, 481.76) },
146
+ { simde_mm_set_pd( 4.86, 401.30),
147
+ simde_mm_set_pd( 316.75, 350.77),
148
+ simde_mm_set_pd( 667.52, 406.16) },
149
+ { simde_mm_set_pd( -409.95, 357.27),
150
+ simde_mm_set_pd( -949.43, -786.56),
151
+ simde_mm_set_pd(-1735.99, -52.68) }
152
+ };
153
+
154
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
155
+ simde__m128d r = simde_mm_hadd_pd(test_vec[i].a, test_vec[i].b);
156
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
157
+ }
158
+
159
+ return MUNIT_OK;
160
+ }
161
+
162
+ static MunitResult
163
+ test_simde_mm_hadd_ps(const MunitParameter params[], void* data) {
164
+ (void) params;
165
+ (void) data;
166
+
167
+ const struct {
168
+ simde__m128 a;
169
+ simde__m128 b;
170
+ simde__m128 r;
171
+ } test_vec[8] = {
172
+ { simde_mm_set_ps( 732.68f, -915.27f, -191.77f, -862.58f),
173
+ simde_mm_set_ps( 81.80f, 547.56f, 259.82f, 55.94f),
174
+ simde_mm_set_ps( 629.36f, 315.76f, -182.59f, -1054.35f) },
175
+ { simde_mm_set_ps( 429.35f, -314.15f, -691.69f, -113.96f),
176
+ simde_mm_set_ps( -636.15f, 881.85f, 515.05f, -694.57f),
177
+ simde_mm_set_ps( 245.70f, -179.52f, 115.20f, -805.65f) },
178
+ { simde_mm_set_ps( 163.17f, 585.35f, 889.94f, 989.94f),
179
+ simde_mm_set_ps( 558.88f, -287.71f, 978.54f, -729.07f),
180
+ simde_mm_set_ps( 271.17f, 249.47f, 748.52f, 1879.88f) },
181
+ { simde_mm_set_ps( 396.52f, 255.51f, 531.47f, -510.49f),
182
+ simde_mm_set_ps( -162.17f, 929.03f, -176.85f, 827.75f),
183
+ simde_mm_set_ps( 766.86f, 650.90f, 652.03f, 20.98f) },
184
+ { simde_mm_set_ps( 348.14f, -946.97f, -177.74f, 520.68f),
185
+ simde_mm_set_ps( 339.94f, 653.25f, 168.00f, 216.81f),
186
+ simde_mm_set_ps( 993.19f, 384.81f, -598.83f, 342.94f) },
187
+ { simde_mm_set_ps( -341.20f, -395.72f, -751.71f, 483.71f),
188
+ simde_mm_set_ps( 214.25f, 187.29f, 627.65f, -993.70f),
189
+ simde_mm_set_ps( 401.54f, -366.05f, -736.92f, -268.00f) },
190
+ { simde_mm_set_ps( -117.08f, -155.79f, 327.94f, -604.45f),
191
+ simde_mm_set_ps( -924.11f, -3.93f, -496.48f, -281.24f),
192
+ simde_mm_set_ps( -928.04f, -777.72f, -272.87f, -276.51f) },
193
+ { simde_mm_set_ps( -207.92f, 955.09f, 949.83f, -476.81f),
194
+ simde_mm_set_ps( -883.98f, 810.86f, 947.09f, -558.58f),
195
+ simde_mm_set_ps( -73.12f, 388.51f, 747.17f, 473.02f) }
196
+ };
197
+
198
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
199
+ simde__m128 r = simde_mm_hadd_ps(test_vec[i].a, test_vec[i].b);
200
+ simde_assert_m128_close(r, test_vec[i].r, 1);
201
+ }
202
+
203
+ return MUNIT_OK;
204
+ }
205
+
206
+ static MunitResult
207
+ test_simde_mm_hsub_pd(const MunitParameter params[], void* data) {
208
+ (void) params;
209
+ (void) data;
210
+
211
+ const struct {
212
+ simde__m128d a;
213
+ simde__m128d b;
214
+ simde__m128d r;
215
+ } test_vec[8] = {
216
+ { simde_mm_set_pd( -15.50, 258.33),
217
+ simde_mm_set_pd( 484.94, -432.56),
218
+ simde_mm_set_pd( -917.50, 273.83) },
219
+ { simde_mm_set_pd( 50.11, -735.38),
220
+ simde_mm_set_pd( 70.36, 538.50),
221
+ simde_mm_set_pd( 468.14, -785.49) },
222
+ { simde_mm_set_pd( 140.13, -672.00),
223
+ simde_mm_set_pd( -602.17, -745.12),
224
+ simde_mm_set_pd( -142.95, -812.13) },
225
+ { simde_mm_set_pd( 1.89, -114.93),
226
+ simde_mm_set_pd( 125.81, 137.32),
227
+ simde_mm_set_pd( 11.51, -116.82) },
228
+ { simde_mm_set_pd( -579.13, -899.36),
229
+ simde_mm_set_pd( 893.51, 328.15),
230
+ simde_mm_set_pd( -565.36, -320.23) },
231
+ { simde_mm_set_pd( -275.68, -217.61),
232
+ simde_mm_set_pd( 167.25, -93.39),
233
+ simde_mm_set_pd( -260.64, 58.07) },
234
+ { simde_mm_set_pd( 312.59, 137.63),
235
+ simde_mm_set_pd( 589.59, 751.69),
236
+ simde_mm_set_pd( 162.10, -174.96) },
237
+ { simde_mm_set_pd( 359.94, -880.43),
238
+ simde_mm_set_pd( 239.69, -581.16),
239
+ simde_mm_set_pd( -820.85, -1240.37) }
240
+ };
241
+
242
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
243
+ simde__m128d r = simde_mm_hsub_pd(test_vec[i].a, test_vec[i].b);
244
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
245
+ }
246
+
247
+ return MUNIT_OK;
248
+ }
249
+
250
+ static MunitResult
251
+ test_simde_mm_hsub_ps(const MunitParameter params[], void* data) {
252
+ (void) params;
253
+ (void) data;
254
+
255
+ const struct {
256
+ simde__m128 a;
257
+ simde__m128 b;
258
+ simde__m128 r;
259
+ } test_vec[8] = {
260
+ { simde_mm_set_ps( 50.11f, -735.38f, -15.50f, 258.33f),
261
+ simde_mm_set_ps( 70.36f, 538.50f, 484.94f, -432.56f),
262
+ simde_mm_set_ps( 468.14f, -917.50f, -785.49f, 273.83f) },
263
+ { simde_mm_set_ps( 1.89f, -114.93f, 140.13f, -672.00f),
264
+ simde_mm_set_ps( 125.81f, 137.32f, -602.17f, -745.12f),
265
+ simde_mm_set_ps( 11.51f, -142.95f, -116.82f, -812.13f) },
266
+ { simde_mm_set_ps( -275.68f, -217.61f, -579.13f, -899.36f),
267
+ simde_mm_set_ps( 167.25f, -93.39f, 893.51f, 328.15f),
268
+ simde_mm_set_ps( -260.64f, -565.36f, 58.07f, -320.23f) },
269
+ { simde_mm_set_ps( 359.94f, -880.43f, 312.59f, 137.63f),
270
+ simde_mm_set_ps( 239.69f, -581.16f, 589.59f, 751.69f),
271
+ simde_mm_set_ps( -820.85f, 162.10f, -1240.37f, -174.96f) },
272
+ { simde_mm_set_ps( 923.43f, 905.56f, -615.92f, 454.60f),
273
+ simde_mm_set_ps( 375.63f, 326.29f, -819.79f, -550.42f),
274
+ simde_mm_set_ps( -49.34f, 269.37f, -17.87f, 1070.52f) },
275
+ { simde_mm_set_ps( 344.96f, -84.73f, -925.77f, 984.26f),
276
+ simde_mm_set_ps( 584.98f, 981.58f, -824.48f, 268.25f),
277
+ simde_mm_set_ps( 396.60f, 1092.73f, -429.69f, 1910.03f) },
278
+ { simde_mm_set_ps( 405.32f, -74.19f, 712.30f, 820.93f),
279
+ simde_mm_set_ps( -939.26f, -768.80f, -854.21f, -69.68f),
280
+ simde_mm_set_ps( 170.46f, 784.53f, -479.51f, 108.63f) },
281
+ { simde_mm_set_ps( -199.94f, 783.57f, 779.03f, 578.25f),
282
+ simde_mm_set_ps( 177.19f, -819.96f, -14.40f, 361.82f),
283
+ simde_mm_set_ps( -997.15f, 376.22f, 983.51f, -200.78f) }
284
+ };
285
+
286
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
287
+ simde__m128 r = simde_mm_hsub_ps(test_vec[i].a, test_vec[i].b);
288
+ simde_assert_m128_close(r, test_vec[i].r, 1);
289
+ }
290
+
291
+ return MUNIT_OK;
292
+ }
293
+
294
+ static MunitResult
295
+ test_simde_mm_lddqu_si128(const MunitParameter params[], void* data) {
296
+ (void) params;
297
+ (void) data;
298
+
299
+ const struct {
300
+ simde__m128i a;
301
+ simde__m128i r;
302
+ } test_vec[8] = {
303
+ { simde_mm_set_epi8(INT8_C(-112), INT8_C( 117), INT8_C( -34), INT8_C( 65), INT8_C( -1), INT8_C( 38), INT8_C( 89), INT8_C(-126),
304
+ INT8_C( 67), INT8_C( -47), INT8_C( -14), INT8_C( -14), INT8_C( -36), INT8_C( 93), INT8_C( 67), INT8_C( -57)),
305
+ simde_mm_set_epi8(INT8_C(-112), INT8_C( 117), INT8_C( -34), INT8_C( 65), INT8_C( -1), INT8_C( 38), INT8_C( 89), INT8_C(-126),
306
+ INT8_C( 67), INT8_C( -47), INT8_C( -14), INT8_C( -14), INT8_C( -36), INT8_C( 93), INT8_C( 67), INT8_C( -57)) },
307
+ { simde_mm_set_epi8(INT8_C( 35), INT8_C( -25), INT8_C( 29), INT8_C(-117), INT8_C( -37), INT8_C( 120), INT8_C(-105), INT8_C( 106),
308
+ INT8_C( 4), INT8_C( 73), INT8_C( -55), INT8_C( -70), INT8_C( 11), INT8_C( -15), INT8_C( -35), INT8_C(-116)),
309
+ simde_mm_set_epi8(INT8_C( 35), INT8_C( -25), INT8_C( 29), INT8_C(-117), INT8_C( -37), INT8_C( 120), INT8_C(-105), INT8_C( 106),
310
+ INT8_C( 4), INT8_C( 73), INT8_C( -55), INT8_C( -70), INT8_C( 11), INT8_C( -15), INT8_C( -35), INT8_C(-116)) },
311
+ { simde_mm_set_epi8(INT8_C(-101), INT8_C(-119), INT8_C( 63), INT8_C(-115), INT8_C( -96), INT8_C( -31), INT8_C( -21), INT8_C( 40),
312
+ INT8_C( 85), INT8_C( 109), INT8_C(-125), INT8_C( -15), INT8_C( 21), INT8_C( -59), INT8_C( -50), INT8_C( 101)),
313
+ simde_mm_set_epi8(INT8_C(-101), INT8_C(-119), INT8_C( 63), INT8_C(-115), INT8_C( -96), INT8_C( -31), INT8_C( -21), INT8_C( 40),
314
+ INT8_C( 85), INT8_C( 109), INT8_C(-125), INT8_C( -15), INT8_C( 21), INT8_C( -59), INT8_C( -50), INT8_C( 101)) },
315
+ { simde_mm_set_epi8(INT8_C( -59), INT8_C( 124), INT8_C( 14), INT8_C( -11), INT8_C( 3), INT8_C( -21), INT8_C( 36), INT8_C(-103),
316
+ INT8_C( -34), INT8_C( -66), INT8_C( 35), INT8_C( 90), INT8_C( 43), INT8_C( -21), INT8_C( -53), INT8_C( -61)),
317
+ simde_mm_set_epi8(INT8_C( -59), INT8_C( 124), INT8_C( 14), INT8_C( -11), INT8_C( 3), INT8_C( -21), INT8_C( 36), INT8_C(-103),
318
+ INT8_C( -34), INT8_C( -66), INT8_C( 35), INT8_C( 90), INT8_C( 43), INT8_C( -21), INT8_C( -53), INT8_C( -61)) },
319
+ { simde_mm_set_epi8(INT8_C( -66), INT8_C( -33), INT8_C( 33), INT8_C( -43), INT8_C( 92), INT8_C( -19), INT8_C( -42), INT8_C(-112),
320
+ INT8_C( -49), INT8_C( 23), INT8_C( 30), INT8_C( 67), INT8_C( -77), INT8_C( 104), INT8_C( 55), INT8_C( -77)),
321
+ simde_mm_set_epi8(INT8_C( -66), INT8_C( -33), INT8_C( 33), INT8_C( -43), INT8_C( 92), INT8_C( -19), INT8_C( -42), INT8_C(-112),
322
+ INT8_C( -49), INT8_C( 23), INT8_C( 30), INT8_C( 67), INT8_C( -77), INT8_C( 104), INT8_C( 55), INT8_C( -77)) },
323
+ { simde_mm_set_epi8(INT8_C(-109), INT8_C( -50), INT8_C(-103), INT8_C( -95), INT8_C( 10), INT8_C( 39), INT8_C( -20), INT8_C( -38),
324
+ INT8_C( -87), INT8_C( -89), INT8_C(-100), INT8_C( -30), INT8_C( 0), INT8_C( 13), INT8_C( 36), INT8_C(-101)),
325
+ simde_mm_set_epi8(INT8_C(-109), INT8_C( -50), INT8_C(-103), INT8_C( -95), INT8_C( 10), INT8_C( 39), INT8_C( -20), INT8_C( -38),
326
+ INT8_C( -87), INT8_C( -89), INT8_C(-100), INT8_C( -30), INT8_C( 0), INT8_C( 13), INT8_C( 36), INT8_C(-101)) },
327
+ { simde_mm_set_epi8(INT8_C( 112), INT8_C( 112), INT8_C( -55), INT8_C( -93), INT8_C( -81), INT8_C( 57), INT8_C( 84), INT8_C( -3),
328
+ INT8_C( -51), INT8_C( -7), INT8_C( 0), INT8_C(-102), INT8_C( 82), INT8_C( -68), INT8_C( 109), INT8_C( 126)),
329
+ simde_mm_set_epi8(INT8_C( 112), INT8_C( 112), INT8_C( -55), INT8_C( -93), INT8_C( -81), INT8_C( 57), INT8_C( 84), INT8_C( -3),
330
+ INT8_C( -51), INT8_C( -7), INT8_C( 0), INT8_C(-102), INT8_C( 82), INT8_C( -68), INT8_C( 109), INT8_C( 126)) },
331
+ { simde_mm_set_epi8(INT8_C( 85), INT8_C( 18), INT8_C( 96), INT8_C( -54), INT8_C( -78), INT8_C( 122), INT8_C(-109), INT8_C( 31),
332
+ INT8_C( 104), INT8_C( -42), INT8_C( 93), INT8_C( -40), INT8_C( -73), INT8_C( 110), INT8_C( -72), INT8_C( -16)),
333
+ simde_mm_set_epi8(INT8_C( 85), INT8_C( 18), INT8_C( 96), INT8_C( -54), INT8_C( -78), INT8_C( 122), INT8_C(-109), INT8_C( 31),
334
+ INT8_C( 104), INT8_C( -42), INT8_C( 93), INT8_C( -40), INT8_C( -73), INT8_C( 110), INT8_C( -72), INT8_C( -16)) }
335
+ };
336
+
337
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
338
+ simde__m128i r = simde_mm_lddqu_si128(&test_vec[i].a);
339
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
340
+ }
341
+
342
+ return MUNIT_OK;
343
+ }
344
+
345
+ static MunitResult
346
+ test_simde_mm_movedup_pd(const MunitParameter params[], void* data) {
347
+ (void) params;
348
+ (void) data;
349
+
350
+ const struct {
351
+ simde__m128d a;
352
+ simde__m128d r;
353
+ } test_vec[8] = {
354
+ { simde_mm_set_pd( 850.06, 701.47),
355
+ simde_mm_set_pd( 701.47, 701.47) },
356
+ { simde_mm_set_pd( -959.23, 823.21),
357
+ simde_mm_set_pd( 823.21, 823.21) },
358
+ { simde_mm_set_pd( 37.96, 501.12),
359
+ simde_mm_set_pd( 501.12, 501.12) },
360
+ { simde_mm_set_pd( 288.76, -831.45),
361
+ simde_mm_set_pd( -831.45, -831.45) },
362
+ { simde_mm_set_pd( -93.81, 587.70),
363
+ simde_mm_set_pd( 587.70, 587.70) },
364
+ { simde_mm_set_pd( 524.72, 282.96),
365
+ simde_mm_set_pd( 282.96, 282.96) },
366
+ { simde_mm_set_pd( -824.72, 818.07),
367
+ simde_mm_set_pd( 818.07, 818.07) },
368
+ { simde_mm_set_pd( 136.95, -565.46),
369
+ simde_mm_set_pd( -565.46, -565.46) }
370
+ };
371
+
372
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
373
+ simde__m128d r = simde_mm_movedup_pd(test_vec[i].a);
374
+ simde_assert_m128d_close(r, test_vec[i].r, 1);
375
+ }
376
+
377
+ return MUNIT_OK;
378
+ }
379
+
380
+ static MunitResult
381
+ test_simde_mm_movehdup_ps(const MunitParameter params[], void* data) {
382
+ (void) params;
383
+ (void) data;
384
+
385
+ const struct {
386
+ simde__m128 a;
387
+ simde__m128 r;
388
+ } test_vec[8] = {
389
+ { simde_mm_set_ps( -122.14f, 610.86f, -649.87f, 155.05f),
390
+ simde_mm_set_ps( -122.14f, -122.14f, -649.87f, -649.87f) },
391
+ { simde_mm_set_ps( 559.30f, 847.22f, 946.27f, 786.62f),
392
+ simde_mm_set_ps( 559.30f, 559.30f, 946.27f, 946.27f) },
393
+ { simde_mm_set_ps( -110.32f, 87.26f, -69.05f, -39.46f),
394
+ simde_mm_set_ps( -110.32f, -110.32f, -69.05f, -69.05f) },
395
+ { simde_mm_set_ps( -91.69f, -770.73f, 838.47f, 700.02f),
396
+ simde_mm_set_ps( -91.69f, -91.69f, 838.47f, 838.47f) },
397
+ { simde_mm_set_ps( 54.77f, -632.77f, -6.45f, -696.48f),
398
+ simde_mm_set_ps( 54.77f, 54.77f, -6.45f, -6.45f) },
399
+ { simde_mm_set_ps( -313.08f, 792.67f, -389.34f, -153.47f),
400
+ simde_mm_set_ps( -313.08f, -313.08f, -389.34f, -389.34f) },
401
+ { simde_mm_set_ps( -873.54f, 935.41f, -178.48f, 320.54f),
402
+ simde_mm_set_ps( -873.54f, -873.54f, -178.48f, -178.48f) },
403
+ { simde_mm_set_ps( 886.69f, -558.71f, 768.00f, 565.76f),
404
+ simde_mm_set_ps( 886.69f, 886.69f, 768.00f, 768.00f) }
405
+ };
406
+
407
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
408
+ simde__m128 r = simde_mm_movehdup_ps(test_vec[i].a);
409
+ simde_assert_m128_close(r, test_vec[i].r, 1);
410
+ }
411
+
412
+ return MUNIT_OK;
413
+ }
414
+
415
+ static MunitResult
416
+ test_simde_mm_moveldup_ps(const MunitParameter params[], void* data) {
417
+ (void) params;
418
+ (void) data;
419
+
420
+ const struct {
421
+ simde__m128 a;
422
+ simde__m128 r;
423
+ } test_vec[8] = {
424
+ { simde_mm_set_ps( -122.14f, 610.86f, -649.87f, 155.05f),
425
+ simde_mm_set_ps( 610.86f, 610.86f, 155.05f, 155.05f) },
426
+ { simde_mm_set_ps( 559.30f, 847.22f, 946.27f, 786.62f),
427
+ simde_mm_set_ps( 847.22f, 847.22f, 786.62f, 786.62f) },
428
+ { simde_mm_set_ps( -110.32f, 87.26f, -69.05f, -39.46f),
429
+ simde_mm_set_ps( 87.26f, 87.26f, -39.46f, -39.46f) },
430
+ { simde_mm_set_ps( -91.69f, -770.73f, 838.47f, 700.02f),
431
+ simde_mm_set_ps( -770.73f, -770.73f, 700.02f, 700.02f) },
432
+ { simde_mm_set_ps( 54.77f, -632.77f, -6.45f, -696.48f),
433
+ simde_mm_set_ps( -632.77f, -632.77f, -696.48f, -696.48f) },
434
+ { simde_mm_set_ps( -313.08f, 792.67f, -389.34f, -153.47f),
435
+ simde_mm_set_ps( 792.67f, 792.67f, -153.47f, -153.47f) },
436
+ { simde_mm_set_ps( -873.54f, 935.41f, -178.48f, 320.54f),
437
+ simde_mm_set_ps( 935.41f, 935.41f, 320.54f, 320.54f) },
438
+ { simde_mm_set_ps( 886.69f, -558.71f, 768.00f, 565.76f),
439
+ simde_mm_set_ps( -558.71f, -558.71f, 565.76f, 565.76f) }
440
+ };
441
+
442
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
443
+ simde__m128 r = simde_mm_moveldup_ps(test_vec[i].a);
444
+ simde_assert_m128_close(r, test_vec[i].r, 1);
445
+ }
446
+
447
+ return MUNIT_OK;
448
+ }
449
+
450
+ #endif /* defined(SIMDE_SSE3_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
451
+
452
+ HEDLEY_DIAGNOSTIC_PUSH
453
+ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
454
+
455
+ static MunitTest test_suite_tests[] = {
456
+ #if defined(SIMDE_SSE3_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
457
+ SIMDE_TESTS_DEFINE_TEST(mm_addsub_pd),
458
+ SIMDE_TESTS_DEFINE_TEST(mm_addsub_ps),
459
+ SIMDE_TESTS_DEFINE_TEST(mm_hadd_pd),
460
+ SIMDE_TESTS_DEFINE_TEST(mm_hadd_ps),
461
+ SIMDE_TESTS_DEFINE_TEST(mm_hsub_pd),
462
+ SIMDE_TESTS_DEFINE_TEST(mm_hsub_ps),
463
+ SIMDE_TESTS_DEFINE_TEST(mm_lddqu_si128),
464
+ SIMDE_TESTS_DEFINE_TEST(mm_movedup_pd),
465
+ SIMDE_TESTS_DEFINE_TEST(mm_movehdup_ps),
466
+ SIMDE_TESTS_DEFINE_TEST(mm_moveldup_ps),
467
+ #endif /* defined(SIMDE_SSE3_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
468
+
469
+ { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
470
+ };
471
+
472
+ HEDLEY_C_DECL MunitSuite* SIMDE_TESTS_GENERATE_SYMBOL(suite)(void) {
473
+ static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
474
+
475
+ return &suite;
476
+ }
477
+
478
+ #if defined(SIMDE_TESTS_SINGLE_ISAX)
479
+ int main(int argc, char* argv[HEDLEY_ARRAY_PARAM(argc + 1)]) {
480
+ static MunitSuite suite = { "", test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
481
+
482
+ return munit_suite_main(&suite, NULL, argc, argv);
483
+ }
484
+ #endif /* defined(SIMDE_TESTS_SINGLE_ISAX) */
485
+
486
+ HEDLEY_DIAGNOSTIC_POP