minimap2 0.2.24.3 → 0.2.24.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  3. data/ext/minimap2/lib/simde/COPYING +20 -0
  4. data/ext/minimap2/lib/simde/README.md +333 -0
  5. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  6. data/ext/minimap2/lib/simde/meson.build +33 -0
  7. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  8. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  9. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  10. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  11. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  12. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  13. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  14. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  15. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  16. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  17. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  18. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  19. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  20. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  21. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  29. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  30. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  31. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  32. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  33. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  34. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  35. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  36. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  37. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  38. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  39. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  40. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  41. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  42. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  43. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  44. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  45. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  46. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  47. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  48. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  49. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  50. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  51. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  52. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  53. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  54. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  55. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  56. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  57. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  58. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  59. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  60. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  61. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  62. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  63. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  64. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  65. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  66. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  67. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  68. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  69. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  70. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  71. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  72. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  73. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  74. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  75. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  76. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  77. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  78. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  79. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  80. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  81. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  82. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  83. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  84. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  85. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  86. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  87. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  88. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  89. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  90. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  91. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  92. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  93. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  94. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  95. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  96. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  97. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  98. data/lib/minimap2/aligner.rb +2 -2
  99. data/lib/minimap2/ffi/constants.rb +3 -0
  100. data/lib/minimap2/version.rb +1 -1
  101. metadata +99 -3
@@ -0,0 +1,659 @@
1
+ /* Permission is hereby granted, free of charge, to any person
2
+ * obtaining a copy of this software and associated documentation
3
+ * files (the "Software"), to deal in the Software without
4
+ * restriction, including without limitation the rights to use, copy,
5
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
6
+ * of the Software, and to permit persons to whom the Software is
7
+ * furnished to do so, subject to the following conditions:
8
+ *
9
+ * The above copyright notice and this permission notice shall be
10
+ * included in all copies or substantial portions of the Software.
11
+ *
12
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
16
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
17
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ * SOFTWARE.
20
+ *
21
+ * Copyright:
22
+ * 2019 Evan Nemerson <evan@nemerson.com>
23
+ */
24
+
25
+ #include "sse.h"
26
+ #include "sse2.h"
27
+ #if !defined(SIMDE__FMA_H)
28
+ # if !defined(SIMDE__FMA_H)
29
+ # define SIMDE__FMA_H
30
+ # endif
31
+ # include "avx.h"
32
+
33
+ HEDLEY_DIAGNOSTIC_PUSH
34
+ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35
+
36
+ # if defined(SIMDE_FMA_NATIVE)
37
+ # undef SIMDE_FMA_NATIVE
38
+ # endif
39
+ # if defined(SIMDE_ARCH_X86_GMA) && !defined(SIMDE_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
40
+ # define SIMDE_FMA_NATIVE
41
+ # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_FMA_NO_NEON) && !defined(SIMDE_NO_NEON)
42
+ # define SIMDE_FMA_NEON
43
+ # endif
44
+
45
+ # if defined(SIMDE_FMA_NATIVE) && !defined(SIMDE_AVX_NATIVE)
46
+ # if defined(SIMDE_FMA_FORCE_NATIVE)
47
+ # error Native FMA support requires native AVX support
48
+ # else
49
+ HEDLEY_WARNING("Native FMA support requires native AVX support, disabling")
50
+ # undef SIMDE_FMA_NATIVE
51
+ # endif
52
+ # elif defined(SIMDE_FMA_NEON) && !defined(SIMDE_AVX_NEON)
53
+ HEDLEY_WARNING("FMA NEON support requires AVX NEON support, disabling")
54
+ # undef SIMDE_AVX_NEON
55
+ # endif
56
+
57
+ # if defined(SIMDE_FMA_NATIVE)
58
+ # include <immintrin.h>
59
+ # endif
60
+
61
+ # if !defined(SIMDE_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
62
+ # define SIMDE_FMA_ENABLE_NATIVE_ALIASES
63
+ # endif
64
+
65
+ SIMDE__BEGIN_DECLS
66
+
67
+ SIMDE__FUNCTION_ATTRIBUTES
68
+ simde__m128d
69
+ simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
70
+ #if defined(SIMDE_FMA_NATIVE)
71
+ return _mm_fmadd_pd(a, b, c);
72
+ #else
73
+ return simde_mm_add_pd(simde_mm_mul_pd(a, b), c);
74
+ #endif
75
+ }
76
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
77
+ # define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c)
78
+ #endif
79
+
80
+ SIMDE__FUNCTION_ATTRIBUTES
81
+ simde__m256d
82
+ simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
83
+ #if defined(SIMDE_FMA_NATIVE)
84
+ return _mm256_fmadd_pd(a, b, c);
85
+ #else
86
+ return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c);
87
+ #endif
88
+ }
89
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
90
+ # define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c)
91
+ #endif
92
+
93
+ SIMDE__FUNCTION_ATTRIBUTES
94
+ simde__m128
95
+ simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
96
+ #if defined(SIMDE_FMA_NATIVE)
97
+ return _mm_fmadd_ps(a, b, c);
98
+ #else
99
+ return simde_mm_add_ps(simde_mm_mul_ps(a, b), c);
100
+ #endif
101
+ }
102
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
103
+ # define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c)
104
+ #endif
105
+
106
+ SIMDE__FUNCTION_ATTRIBUTES
107
+ simde__m256
108
+ simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
109
+ #if defined(SIMDE_FMA_NATIVE)
110
+ return _mm256_fmadd_ps(a, b, c);
111
+ #else
112
+ return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c);
113
+ #endif
114
+ }
115
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
116
+ # define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c)
117
+ #endif
118
+
119
+ SIMDE__FUNCTION_ATTRIBUTES
120
+ simde__m128d
121
+ simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
122
+ #if defined(SIMDE_FMA_NATIVE)
123
+ return _mm_fmadd_sd(a, b, c);
124
+ #else
125
+ return simde_mm_add_sd(simde_mm_mul_sd(a, b), c);
126
+ #endif
127
+ }
128
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
129
+ # define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c)
130
+ #endif
131
+
132
+ SIMDE__FUNCTION_ATTRIBUTES
133
+ simde__m128
134
+ simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
135
+ #if defined(SIMDE_FMA_NATIVE)
136
+ return _mm_fmadd_ss(a, b, c);
137
+ #else
138
+ return simde_mm_add_ss(simde_mm_mul_ss(a, b), c);
139
+ #endif
140
+ }
141
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
142
+ # define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c)
143
+ #endif
144
+
145
+ SIMDE__FUNCTION_ATTRIBUTES
146
+ simde__m128d
147
+ simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
148
+ #if defined(SIMDE_FMA_NATIVE)
149
+ return _mm_fmaddsub_pd(a, b, c);
150
+ #else
151
+ return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c);
152
+ #endif
153
+ }
154
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
155
+ # define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c)
156
+ #endif
157
+
158
+ SIMDE__FUNCTION_ATTRIBUTES
159
+ simde__m256d
160
+ simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
161
+ #if defined(SIMDE_FMA_NATIVE)
162
+ return _mm256_fmaddsub_pd(a, b, c);
163
+ #else
164
+ return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c);
165
+ #endif
166
+ }
167
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
168
+ # define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c)
169
+ #endif
170
+
171
+ SIMDE__FUNCTION_ATTRIBUTES
172
+ simde__m128
173
+ simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
174
+ #if defined(SIMDE_FMA_NATIVE)
175
+ return _mm_fmaddsub_ps(a, b, c);
176
+ #else
177
+ return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c);
178
+ #endif
179
+ }
180
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
181
+ # define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c)
182
+ #endif
183
+
184
+ SIMDE__FUNCTION_ATTRIBUTES
185
+ simde__m256
186
+ simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
187
+ #if defined(SIMDE_FMA_NATIVE)
188
+ return _mm256_fmaddsub_ps(a, b, c);
189
+ #else
190
+ return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c);
191
+ #endif
192
+ }
193
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
194
+ # define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c)
195
+ #endif
196
+
197
+ SIMDE__FUNCTION_ATTRIBUTES
198
+ simde__m128d
199
+ simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
200
+ #if defined(SIMDE_FMA_NATIVE)
201
+ return _mm_fmsub_pd(a, b, c);
202
+ #else
203
+ return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c);
204
+ #endif
205
+ }
206
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
207
+ # define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c)
208
+ #endif
209
+
210
+ SIMDE__FUNCTION_ATTRIBUTES
211
+ simde__m256d
212
+ simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
213
+ #if defined(SIMDE_FMA_NATIVE)
214
+ return _mm256_fmsub_pd(a, b, c);
215
+ #else
216
+ return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c);
217
+ #endif
218
+ }
219
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
220
+ # define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c)
221
+ #endif
222
+
223
+ SIMDE__FUNCTION_ATTRIBUTES
224
+ simde__m128
225
+ simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
226
+ #if defined(SIMDE_FMA_NATIVE)
227
+ return _mm_fmsub_ps(a, b, c);
228
+ #else
229
+ return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c);
230
+ #endif
231
+ }
232
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
233
+ # define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c)
234
+ #endif
235
+
236
+ SIMDE__FUNCTION_ATTRIBUTES
237
+ simde__m256
238
+ simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
239
+ #if defined(SIMDE_FMA_NATIVE)
240
+ return _mm256_fmsub_ps(a, b, c);
241
+ #else
242
+ return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c);
243
+ #endif
244
+ }
245
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
246
+ # define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c)
247
+ #endif
248
+
249
+ SIMDE__FUNCTION_ATTRIBUTES
250
+ simde__m128d
251
+ simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
252
+ #if defined(SIMDE_FMA_NATIVE)
253
+ return _mm_fmsub_sd(a, b, c);
254
+ #else
255
+ return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c);
256
+ #endif
257
+ }
258
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
259
+ # define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c)
260
+ #endif
261
+
262
+ SIMDE__FUNCTION_ATTRIBUTES
263
+ simde__m128
264
+ simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
265
+ #if defined(SIMDE_FMA_NATIVE)
266
+ return _mm_fmsub_ss(a, b, c);
267
+ #else
268
+ return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c);
269
+ #endif
270
+ }
271
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
272
+ # define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c)
273
+ #endif
274
+
275
+ SIMDE__FUNCTION_ATTRIBUTES
276
+ simde__m128d
277
+ simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
278
+ #if defined(SIMDE_FMA_NATIVE)
279
+ return _mm_fmsubadd_pd(a, b, c);
280
+ #else
281
+ simde__m128d_private
282
+ r_,
283
+ a_ = simde__m128d_to_private(a),
284
+ b_ = simde__m128d_to_private(b),
285
+ c_ = simde__m128d_to_private(c);
286
+
287
+ SIMDE__VECTORIZE
288
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {
289
+ r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ];
290
+ r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1];
291
+ }
292
+
293
+ return simde__m128d_from_private(r_);
294
+ #endif
295
+ }
296
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
297
+ # define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c)
298
+ #endif
299
+
300
+ SIMDE__FUNCTION_ATTRIBUTES
301
+ simde__m256d
302
+ simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
303
+ #if defined(SIMDE_FMA_NATIVE)
304
+ return _mm256_fmsubadd_pd(a, b, c);
305
+ #else
306
+ simde__m256d_private
307
+ r_,
308
+ a_ = simde__m256d_to_private(a),
309
+ b_ = simde__m256d_to_private(b),
310
+ c_ = simde__m256d_to_private(c);
311
+
312
+ SIMDE__VECTORIZE
313
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {
314
+ r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ];
315
+ r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1];
316
+ }
317
+
318
+ return simde__m256d_from_private(r_);
319
+ #endif
320
+ }
321
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
322
+ # define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c)
323
+ #endif
324
+
325
+ SIMDE__FUNCTION_ATTRIBUTES
326
+ simde__m128
327
+ simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
328
+ #if defined(SIMDE_FMA_NATIVE)
329
+ return _mm_fmsubadd_ps(a, b, c);
330
+ #else
331
+ simde__m128_private
332
+ r_,
333
+ a_ = simde__m128_to_private(a),
334
+ b_ = simde__m128_to_private(b),
335
+ c_ = simde__m128_to_private(c);
336
+
337
+ SIMDE__VECTORIZE
338
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
339
+ r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ];
340
+ r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1];
341
+ }
342
+
343
+ return simde__m128_from_private(r_);
344
+ #endif
345
+ }
346
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
347
+ # define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c)
348
+ #endif
349
+
350
+ SIMDE__FUNCTION_ATTRIBUTES
351
+ simde__m256
352
+ simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
353
+ #if defined(SIMDE_FMA_NATIVE)
354
+ return _mm256_fmsubadd_ps(a, b, c);
355
+ #else
356
+ simde__m256_private
357
+ r_,
358
+ a_ = simde__m256_to_private(a),
359
+ b_ = simde__m256_to_private(b),
360
+ c_ = simde__m256_to_private(c);
361
+
362
+ SIMDE__VECTORIZE
363
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
364
+ r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ];
365
+ r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1];
366
+ }
367
+
368
+ return simde__m256_from_private(r_);
369
+ #endif
370
+ }
371
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
372
+ # define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c)
373
+ #endif
374
+
375
+ SIMDE__FUNCTION_ATTRIBUTES
376
+ simde__m128d
377
+ simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
378
+ #if defined(SIMDE_FMA_NATIVE)
379
+ return _mm_fnmadd_pd(a, b, c);
380
+ #else
381
+ simde__m128d_private
382
+ r_,
383
+ a_ = simde__m128d_to_private(a),
384
+ b_ = simde__m128d_to_private(b),
385
+ c_ = simde__m128d_to_private(c);
386
+
387
+ SIMDE__VECTORIZE
388
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
389
+ r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];
390
+ }
391
+
392
+ return simde__m128d_from_private(r_);
393
+ #endif
394
+ }
395
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
396
+ # define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c)
397
+ #endif
398
+
399
+ SIMDE__FUNCTION_ATTRIBUTES
400
+ simde__m256d
401
+ simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
402
+ #if defined(SIMDE_FMA_NATIVE)
403
+ return _mm256_fnmadd_pd(a, b, c);
404
+ #else
405
+ simde__m256d_private
406
+ r_,
407
+ a_ = simde__m256d_to_private(a),
408
+ b_ = simde__m256d_to_private(b),
409
+ c_ = simde__m256d_to_private(c);
410
+
411
+ SIMDE__VECTORIZE
412
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
413
+ r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];
414
+ }
415
+
416
+ return simde__m256d_from_private(r_);
417
+ #endif
418
+ }
419
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
420
+ # define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c)
421
+ #endif
422
+
423
+ SIMDE__FUNCTION_ATTRIBUTES
424
+ simde__m128
425
+ simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
426
+ #if defined(SIMDE_FMA_NATIVE)
427
+ return _mm_fnmadd_ps(a, b, c);
428
+ #else
429
+ simde__m128_private
430
+ r_,
431
+ a_ = simde__m128_to_private(a),
432
+ b_ = simde__m128_to_private(b),
433
+ c_ = simde__m128_to_private(c);
434
+
435
+ SIMDE__VECTORIZE
436
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
437
+ r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i];
438
+ }
439
+
440
+ return simde__m128_from_private(r_);
441
+ #endif
442
+ }
443
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
444
+ # define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c)
445
+ #endif
446
+
447
+ SIMDE__FUNCTION_ATTRIBUTES
448
+ simde__m256
449
+ simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
450
+ #if defined(SIMDE_FMA_NATIVE)
451
+ return _mm256_fnmadd_ps(a, b, c);
452
+ #else
453
+ simde__m256_private
454
+ r_,
455
+ a_ = simde__m256_to_private(a),
456
+ b_ = simde__m256_to_private(b),
457
+ c_ = simde__m256_to_private(c);
458
+
459
+ SIMDE__VECTORIZE
460
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
461
+ r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i];
462
+ }
463
+
464
+ return simde__m256_from_private(r_);
465
+ #endif
466
+ }
467
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
468
+ # define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c)
469
+ #endif
470
+
471
+ SIMDE__FUNCTION_ATTRIBUTES
472
+ simde__m128d
473
+ simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
474
+ #if defined(SIMDE_FMA_NATIVE)
475
+ return _mm_fnmadd_sd(a, b, c);
476
+ #else
477
+ simde__m128d_private
478
+ r_,
479
+ a_ = simde__m128d_to_private(a),
480
+ b_ = simde__m128d_to_private(b),
481
+ c_ = simde__m128d_to_private(c);
482
+
483
+ r_ = a_;
484
+ r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0];
485
+
486
+ return simde__m128d_from_private(r_);
487
+ #endif
488
+ }
489
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
490
+ # define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c)
491
+ #endif
492
+
493
+ SIMDE__FUNCTION_ATTRIBUTES
494
+ simde__m128
495
+ simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
496
+ #if defined(SIMDE_FMA_NATIVE)
497
+ return _mm_fnmadd_ss(a, b, c);
498
+ #else
499
+ simde__m128_private
500
+ r_,
501
+ a_ = simde__m128_to_private(a),
502
+ b_ = simde__m128_to_private(b),
503
+ c_ = simde__m128_to_private(c);
504
+
505
+ r_ = a_;
506
+ r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0];
507
+
508
+ return simde__m128_from_private(r_);
509
+ #endif
510
+ }
511
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
512
+ # define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c)
513
+ #endif
514
+
515
+ SIMDE__FUNCTION_ATTRIBUTES
516
+ simde__m128d
517
+ simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
518
+ #if defined(SIMDE_FMA_NATIVE)
519
+ return _mm_fnmsub_pd(a, b, c);
520
+ #else
521
+ simde__m128d_private
522
+ r_,
523
+ a_ = simde__m128d_to_private(a),
524
+ b_ = simde__m128d_to_private(b),
525
+ c_ = simde__m128d_to_private(c);
526
+
527
+ SIMDE__VECTORIZE
528
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
529
+ r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i];
530
+ }
531
+
532
+ return simde__m128d_from_private(r_);
533
+ #endif
534
+ }
535
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
536
+ # define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c)
537
+ #endif
538
+
539
+ SIMDE__FUNCTION_ATTRIBUTES
540
+ simde__m256d
541
+ simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
542
+ #if defined(SIMDE_FMA_NATIVE)
543
+ return _mm256_fnmsub_pd(a, b, c);
544
+ #else
545
+ simde__m256d_private
546
+ r_,
547
+ a_ = simde__m256d_to_private(a),
548
+ b_ = simde__m256d_to_private(b),
549
+ c_ = simde__m256d_to_private(c);
550
+
551
+ SIMDE__VECTORIZE
552
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
553
+ r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i];
554
+ }
555
+
556
+ return simde__m256d_from_private(r_);
557
+ #endif
558
+ }
559
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
560
+ # define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c)
561
+ #endif
562
+
563
+ SIMDE__FUNCTION_ATTRIBUTES
564
+ simde__m128
565
+ simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
566
+ #if defined(SIMDE_FMA_NATIVE)
567
+ return _mm_fnmsub_ps(a, b, c);
568
+ #else
569
+ simde__m128_private
570
+ r_,
571
+ a_ = simde__m128_to_private(a),
572
+ b_ = simde__m128_to_private(b),
573
+ c_ = simde__m128_to_private(c);
574
+
575
+ SIMDE__VECTORIZE
576
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
577
+ r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i];
578
+ }
579
+
580
+ return simde__m128_from_private(r_);
581
+ #endif
582
+ }
583
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
584
+ # define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c)
585
+ #endif
586
+
587
+ SIMDE__FUNCTION_ATTRIBUTES
588
+ simde__m256
589
+ simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
590
+ #if defined(SIMDE_FMA_NATIVE)
591
+ return _mm256_fnmsub_ps(a, b, c);
592
+ #else
593
+ simde__m256_private
594
+ r_,
595
+ a_ = simde__m256_to_private(a),
596
+ b_ = simde__m256_to_private(b),
597
+ c_ = simde__m256_to_private(c);
598
+
599
+ SIMDE__VECTORIZE
600
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
601
+ r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i];
602
+ }
603
+
604
+ return simde__m256_from_private(r_);
605
+ #endif
606
+ }
607
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
608
+ # define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c)
609
+ #endif
610
+
611
+ SIMDE__FUNCTION_ATTRIBUTES
612
+ simde__m128d
613
+ simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
614
+ #if defined(SIMDE_FMA_NATIVE)
615
+ return _mm_fnmsub_sd(a, b, c);
616
+ #else
617
+ simde__m128d_private
618
+ r_,
619
+ a_ = simde__m128d_to_private(a),
620
+ b_ = simde__m128d_to_private(b),
621
+ c_ = simde__m128d_to_private(c);
622
+
623
+ r_ = a_;
624
+ r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0];
625
+
626
+ return simde__m128d_from_private(r_);
627
+ #endif
628
+ }
629
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
630
+ # define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c)
631
+ #endif
632
+
633
+ SIMDE__FUNCTION_ATTRIBUTES
634
+ simde__m128
635
+ simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
636
+ #if defined(SIMDE_FMA_NATIVE)
637
+ return _mm_fnmsub_ss(a, b, c);
638
+ #else
639
+ simde__m128_private
640
+ r_,
641
+ a_ = simde__m128_to_private(a),
642
+ b_ = simde__m128_to_private(b),
643
+ c_ = simde__m128_to_private(c);
644
+
645
+ r_ = simde__m128_to_private(a);
646
+ r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0];
647
+
648
+ return simde__m128_from_private(r_);
649
+ #endif
650
+ }
651
+ #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
652
+ # define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c)
653
+ #endif
654
+
655
+ SIMDE__END_DECLS
656
+
657
+ HEDLEY_DIAGNOSTIC_POP
658
+
659
+ #endif /* !defined(SIMDE__FMA_H) */