minimap2 0.2.25.0 → 0.2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
@@ -0,0 +1,2210 @@
1
+ /* Copyright (c) 2017-2020 Evan Nemerson <evan@nemerson.com>
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person
4
+ * obtaining a copy of this software and associated documentation
5
+ * files (the "Software"), to deal in the Software without
6
+ * restriction, including without limitation the rights to use, copy,
7
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ * of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be
12
+ * included in all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ * SOFTWARE.
22
+ */
23
+
24
+ #if !defined(SIMDE__MMX_H)
25
+ # if !defined(SIMDE__MMX_H)
26
+ # define SIMDE__MMX_H
27
+ # endif
28
+ # include "../simde-common.h"
29
+
30
+ HEDLEY_DIAGNOSTIC_PUSH
31
+ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
32
+
33
+ # if defined(SIMDE_MMX_FORCE_NATIVE)
34
+ # define SIMDE_MMX_NATIVE
35
+ # elif defined(SIMDE_ARCH_X86_MMX) && !defined(SIMDE_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
36
+ # define SIMDE_MMX_NATIVE
37
+ # elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_MMX_NO_NEON) && !defined(SIMDE_NO_NEON)
38
+ # define SIMDE_MMX_NEON
39
+ # endif
40
+
41
+ # if defined(SIMDE_MMX_NATIVE)
42
+ # define SIMDE_MMX_USE_NATIVE_TYPE
43
+ # elif defined(SIMDE_ARCH_X86_SSE)
44
+ # define SIMDE_MMX_USE_NATIVE_TYPE
45
+ # endif
46
+
47
+ # if defined(SIMDE_MMX_USE_NATIVE_TYPE)
48
+ # include <mmintrin.h>
49
+ # else
50
+ # if defined(SIMDE_MMX_NEON)
51
+ # include <arm_neon.h>
52
+ # endif
53
+ # endif
54
+ # include <stdint.h>
55
+ # include <limits.h>
56
+
57
+ SIMDE__BEGIN_DECLS
58
+
59
+ typedef union {
60
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
61
+ SIMDE_ALIGN(8) int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
62
+ SIMDE_ALIGN(8) int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
63
+ SIMDE_ALIGN(8) int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
64
+ SIMDE_ALIGN(8) int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
65
+ SIMDE_ALIGN(8) uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
66
+ SIMDE_ALIGN(8) uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
67
+ SIMDE_ALIGN(8) uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
68
+ SIMDE_ALIGN(8) uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
69
+ SIMDE_ALIGN(8) simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
70
+ SIMDE_ALIGN(8) int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
71
+ SIMDE_ALIGN(8) uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
72
+ #else
73
+ SIMDE_ALIGN(8) int8_t i8[8];
74
+ SIMDE_ALIGN(8) int16_t i16[4];
75
+ SIMDE_ALIGN(8) int32_t i32[2];
76
+ SIMDE_ALIGN(8) int64_t i64[1];
77
+ SIMDE_ALIGN(8) uint8_t u8[8];
78
+ SIMDE_ALIGN(8) uint16_t u16[4];
79
+ SIMDE_ALIGN(8) uint32_t u32[2];
80
+ SIMDE_ALIGN(8) uint64_t u64[1];
81
+ SIMDE_ALIGN(8) simde_float32 f32[2];
82
+ SIMDE_ALIGN(8) int_fast32_t i32f[8 / sizeof(int_fast32_t)];
83
+ SIMDE_ALIGN(8) uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
84
+ #endif
85
+
86
+ #if defined(SIMDE_MMX_USE_NATIVE_TYPE)
87
+ __m64 n;
88
+ #endif
89
+ #if defined(SIMDE_MMX_NEON)
90
+ int8x8_t neon_i8;
91
+ int16x4_t neon_i16;
92
+ int32x2_t neon_i32;
93
+ int64x1_t neon_i64;
94
+ uint8x8_t neon_u8;
95
+ uint16x4_t neon_u16;
96
+ uint32x2_t neon_u32;
97
+ uint64x1_t neon_u64;
98
+ float32x2_t neon_f32;
99
+ #endif
100
+ } simde__m64_private;
101
+
102
+ #if defined(SIMDE_MMX_USE_NATIVE_TYPE)
103
+ typedef __m64 simde__m64;
104
+ #elif defined(SIMDE_MMX_NEON)
105
+ typedef int32x2_t simde__m64;
106
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT)
107
+ typedef int32_t simde__m64 SIMDE_ALIGN(8) SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
108
+ #else
109
+ typedef simde__m64_private simde__m64;
110
+ #endif
111
+
112
+ #if !defined(SIMDE_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
113
+ #define SIMDE_MMX_ENABLE_NATIVE_ALIASES
114
+ typedef simde__m64 __m64;
115
+ #endif
116
+
117
+ HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect");
118
+ HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect");
119
+ #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
120
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned");
121
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned");
122
+ #endif
123
+
124
+ SIMDE__FUNCTION_ATTRIBUTES
125
+ simde__m64
126
+ simde__m64_from_private(simde__m64_private v) {
127
+ simde__m64 r;
128
+ simde_memcpy(&r, &v, sizeof(r));
129
+ return r;
130
+ }
131
+
132
+ SIMDE__FUNCTION_ATTRIBUTES
133
+ simde__m64_private
134
+ simde__m64_to_private(simde__m64 v) {
135
+ simde__m64_private r;
136
+ simde_memcpy(&r, &v, sizeof(r));
137
+ return r;
138
+ }
139
+
140
+ SIMDE__FUNCTION_ATTRIBUTES
141
+ simde__m64
142
+ simde_mm_add_pi8 (simde__m64 a, simde__m64 b) {
143
+ #if defined(SIMDE_MMX_NATIVE)
144
+ return _mm_add_pi8(a, b);
145
+ #else
146
+ simde__m64_private r_;
147
+ simde__m64_private a_ = simde__m64_to_private(a);
148
+ simde__m64_private b_ = simde__m64_to_private(b);
149
+
150
+ #if defined(SIMDE_MMX_NEON)
151
+ r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);
152
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
153
+ r_.i8 = a_.i8 + b_.i8;
154
+ #else
155
+ SIMDE__VECTORIZE
156
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
157
+ r_.i8[i] = a_.i8[i] + b_.i8[i];
158
+ }
159
+ #endif
160
+
161
+ return simde__m64_from_private(r_);
162
+ #endif
163
+ }
164
+ #define simde_m_paddb(a, b) simde_mm_add_pi8(a, b)
165
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
166
+ # define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b)
167
+ # define _m_paddb(a, b) simde_m_paddb(a, b)
168
+ #endif
169
+
170
+ SIMDE__FUNCTION_ATTRIBUTES
171
+ simde__m64
172
+ simde_mm_add_pi16 (simde__m64 a, simde__m64 b) {
173
+ #if defined(SIMDE_MMX_NATIVE)
174
+ return _mm_add_pi16(a, b);
175
+ #else
176
+ simde__m64_private r_;
177
+ simde__m64_private a_ = simde__m64_to_private(a);
178
+ simde__m64_private b_ = simde__m64_to_private(b);
179
+
180
+ #if defined(SIMDE_MMX_NEON)
181
+ r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);
182
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
183
+ r_.i16 = a_.i16 + b_.i16;
184
+ #else
185
+ SIMDE__VECTORIZE
186
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
187
+ r_.i16[i] = a_.i16[i] + b_.i16[i];
188
+ }
189
+ #endif
190
+
191
+ return simde__m64_from_private(r_);
192
+ #endif
193
+ }
194
+ #define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
195
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
196
+ # define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)
197
+ # define _m_add_paddw(a, b) simde_mm_add_pi16(a, b)
198
+ #endif
199
+
200
+ SIMDE__FUNCTION_ATTRIBUTES
201
+ simde__m64
202
+ simde_mm_add_pi32 (simde__m64 a, simde__m64 b) {
203
+ #if defined(SIMDE_MMX_NATIVE)
204
+ return _mm_add_pi32(a, b);
205
+ #else
206
+ simde__m64_private r_;
207
+ simde__m64_private a_ = simde__m64_to_private(a);
208
+ simde__m64_private b_ = simde__m64_to_private(b);
209
+
210
+ #if defined(SIMDE_MMX_NEON)
211
+ r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);
212
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
213
+ r_.i32 = a_.i32 + b_.i32;
214
+ #else
215
+ SIMDE__VECTORIZE
216
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
217
+ r_.i32[i] = a_.i32[i] + b_.i32[i];
218
+ }
219
+ #endif
220
+
221
+ return simde__m64_from_private(r_);
222
+ #endif
223
+ }
224
+ #define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
225
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
226
+ # define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)
227
+ # define _m_add_paddd(a, b) simde_mm_add_pi32(a, b)
228
+ #endif
229
+
230
+ SIMDE__FUNCTION_ATTRIBUTES
231
+ simde__m64
232
+ simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) {
233
+ #if defined(SIMDE_MMX_NATIVE)
234
+ return _mm_adds_pi8(a, b);
235
+ #else
236
+ simde__m64_private r_;
237
+ simde__m64_private a_ = simde__m64_to_private(a);
238
+ simde__m64_private b_ = simde__m64_to_private(b);
239
+
240
+ #if defined(SIMDE_MMX_NEON)
241
+ r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);
242
+ #else
243
+ SIMDE__VECTORIZE
244
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
245
+ if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) {
246
+ r_.i8[i] = INT8_MAX;
247
+ } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) {
248
+ r_.i8[i] = INT8_MIN;
249
+ } else {
250
+ r_.i8[i] = (a_.i8[i]) + (b_.i8[i]);
251
+ }
252
+ }
253
+ #endif
254
+
255
+ return simde__m64_from_private(r_);
256
+ #endif
257
+ }
258
+ #define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
259
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
260
+ # define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)
261
+ # define _m_add_paddsb(a, b) simde_mm_adds_pi8(a, b)
262
+ #endif
263
+
264
+ SIMDE__FUNCTION_ATTRIBUTES
265
+ simde__m64
266
+ simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) {
267
+ #if defined(SIMDE_MMX_NATIVE)
268
+ return _mm_adds_pu8(a, b);
269
+ #else
270
+ simde__m64_private r_;
271
+ simde__m64_private a_ = simde__m64_to_private(a);
272
+ simde__m64_private b_ = simde__m64_to_private(b);
273
+
274
+ #if defined(SIMDE_MMX_NEON)
275
+ r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);
276
+ #else
277
+ SIMDE__VECTORIZE
278
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
279
+ const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]);
280
+ if (x > UINT8_MAX)
281
+ r_.u8[i] = UINT8_MAX;
282
+ else
283
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
284
+ }
285
+ #endif
286
+
287
+ return simde__m64_from_private(r_);
288
+ #endif
289
+ }
290
+ #define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b)
291
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
292
+ # define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b)
293
+ # define _m_paddusb(a, b) simde_mm_adds_pu8(a, b)
294
+ #endif
295
+
296
+ SIMDE__FUNCTION_ATTRIBUTES
297
+ simde__m64
298
+ simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) {
299
+ #if defined(SIMDE_MMX_NATIVE)
300
+ return _mm_adds_pi16(a, b);
301
+ #else
302
+ simde__m64_private r_;
303
+ simde__m64_private a_ = simde__m64_to_private(a);
304
+ simde__m64_private b_ = simde__m64_to_private(b);
305
+
306
+ #if defined(SIMDE_MMX_NEON)
307
+ r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);
308
+ #else
309
+ SIMDE__VECTORIZE
310
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
311
+ if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) {
312
+ r_.i16[i] = INT16_MAX;
313
+ } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) {
314
+ r_.i16[i] = SHRT_MIN;
315
+ } else {
316
+ r_.i16[i] = (a_.i16[i]) + (b_.i16[i]);
317
+ }
318
+ }
319
+ #endif
320
+
321
+ return simde__m64_from_private(r_);
322
+ #endif
323
+ }
324
+ #define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b)
325
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
326
+ # define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b)
327
+ # define _m_paddsw(a, b) simde_mm_adds_pi16(a, b)
328
+ #endif
329
+
330
+ SIMDE__FUNCTION_ATTRIBUTES
331
+ simde__m64
332
+ simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) {
333
+ #if defined(SIMDE_MMX_NATIVE)
334
+ return _mm_adds_pu16(a, b);
335
+ #else
336
+ simde__m64_private r_;
337
+ simde__m64_private a_ = simde__m64_to_private(a);
338
+ simde__m64_private b_ = simde__m64_to_private(b);
339
+
340
+ #if defined(SIMDE_MMX_NEON)
341
+ r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);
342
+ #else
343
+ SIMDE__VECTORIZE
344
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
345
+ const uint32_t x = a_.u16[i] + b_.u16[i];
346
+ if (x > UINT16_MAX)
347
+ r_.u16[i] = UINT16_MAX;
348
+ else
349
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
350
+ }
351
+ #endif
352
+
353
+ return simde__m64_from_private(r_);
354
+ #endif
355
+ }
356
+ #define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b)
357
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
358
+ # define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b)
359
+ # define _m_paddusw(a, b) simde_mm_adds_pu16(a, b)
360
+ #endif
361
+
362
+ SIMDE__FUNCTION_ATTRIBUTES
363
+ simde__m64
364
+ simde_mm_and_si64 (simde__m64 a, simde__m64 b) {
365
+ #if defined(SIMDE_MMX_NATIVE)
366
+ return _mm_and_si64(a, b);
367
+ #else
368
+ simde__m64_private r_;
369
+ simde__m64_private a_ = simde__m64_to_private(a);
370
+ simde__m64_private b_ = simde__m64_to_private(b);
371
+
372
+ #if defined(SIMDE_MMX_NEON)
373
+ r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32);
374
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
375
+ r_.i64 = a_.i64 & b_.i64;
376
+ #else
377
+ r_.i64[0] = a_.i64[0] & b_.i64[0];
378
+ #endif
379
+
380
+ return simde__m64_from_private(r_);
381
+ #endif
382
+ }
383
+ #define simde_m_pand(a, b) simde_mm_and_si64(a, b)
384
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
385
+ # define _mm_and_si64(a, b) simde_mm_and_si64(a, b)
386
+ # define _m_pand(a, b) simde_mm_and_si64(a, b)
387
+ #endif
388
+
389
+ SIMDE__FUNCTION_ATTRIBUTES
390
+ simde__m64
391
+ simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) {
392
+ #if defined(SIMDE_MMX_NATIVE)
393
+ return _mm_andnot_si64(a, b);
394
+ #else
395
+ simde__m64_private r_;
396
+ simde__m64_private a_ = simde__m64_to_private(a);
397
+ simde__m64_private b_ = simde__m64_to_private(b);
398
+
399
+ #if defined(SIMDE_MMX_NEON)
400
+ r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);
401
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
402
+ r_.i32f = ~a_.i32f & b_.i32f;
403
+ #else
404
+ r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]);
405
+ #endif
406
+
407
+ return simde__m64_from_private(r_);
408
+ #endif
409
+ }
410
+ #define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b)
411
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
412
+ # define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b)
413
+ # define _m_pandn(a, b) simde_mm_andnot_si64(a, b)
414
+ #endif
415
+
416
+ SIMDE__FUNCTION_ATTRIBUTES
417
+ simde__m64
418
+ simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) {
419
+ #if defined(SIMDE_MMX_NATIVE)
420
+ return _mm_cmpeq_pi8(a, b);
421
+ #else
422
+ simde__m64_private r_;
423
+ simde__m64_private a_ = simde__m64_to_private(a);
424
+ simde__m64_private b_ = simde__m64_to_private(b);
425
+
426
+ #if defined(SIMDE_MMX_NEON)
427
+ r_.neon_i8 = vreinterpret_s8_u8(vceq_s8(a_.neon_i8, b_.neon_i8));
428
+ #else
429
+ SIMDE__VECTORIZE
430
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
431
+ r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
432
+ }
433
+ #endif
434
+
435
+ return simde__m64_from_private(r_);
436
+ #endif
437
+ }
438
+ #define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
439
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
440
+ # define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b)
441
+ # define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
442
+ #endif
443
+
444
+ SIMDE__FUNCTION_ATTRIBUTES
445
+ simde__m64
446
+ simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) {
447
+ #if defined(SIMDE_MMX_NATIVE)
448
+ return _mm_cmpeq_pi16(a, b);
449
+ #else
450
+ simde__m64_private r_;
451
+ simde__m64_private a_ = simde__m64_to_private(a);
452
+ simde__m64_private b_ = simde__m64_to_private(b);
453
+
454
+ #if defined(SIMDE_MMX_NEON)
455
+ r_.neon_i16 = vreinterpret_s16_u16(vceq_s16(a_.neon_i16, b_.neon_i16));
456
+ #else
457
+ SIMDE__VECTORIZE
458
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
459
+ r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
460
+ }
461
+ #endif
462
+
463
+ return simde__m64_from_private(r_);
464
+ #endif
465
+ }
466
+ #define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
467
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
468
+ # define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b)
469
+ # define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
470
+ #endif
471
+
472
+ SIMDE__FUNCTION_ATTRIBUTES
473
+ simde__m64
474
+ simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) {
475
+ #if defined(SIMDE_MMX_NATIVE)
476
+ return _mm_cmpeq_pi32(a, b);
477
+ #else
478
+ simde__m64_private r_;
479
+ simde__m64_private a_ = simde__m64_to_private(a);
480
+ simde__m64_private b_ = simde__m64_to_private(b);
481
+
482
+ #if defined(SIMDE_MMX_NEON)
483
+ r_.neon_i32 = vreinterpret_s32_u32(vceq_s32(a_.neon_i32, b_.neon_i32));
484
+ #else
485
+ SIMDE__VECTORIZE
486
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
487
+ r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
488
+ }
489
+ #endif
490
+
491
+ return simde__m64_from_private(r_);
492
+ #endif
493
+ }
494
+ #define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
495
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
496
+ # define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b)
497
+ # define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
498
+ #endif
499
+
500
+ SIMDE__FUNCTION_ATTRIBUTES
501
+ simde__m64
502
+ simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) {
503
+ #if defined(SIMDE_MMX_NATIVE)
504
+ return _mm_cmpgt_pi8(a, b);
505
+ #else
506
+ simde__m64_private r_;
507
+ simde__m64_private a_ = simde__m64_to_private(a);
508
+ simde__m64_private b_ = simde__m64_to_private(b);
509
+
510
+ #if defined(SIMDE_MMX_NEON)
511
+ r_.neon_i8 = vreinterpret_s8_u8(vcgt_s8(a_.neon_i8, b_.neon_i8));
512
+ #else
513
+ SIMDE__VECTORIZE
514
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
515
+ r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
516
+ }
517
+ #endif
518
+
519
+ return simde__m64_from_private(r_);
520
+ #endif
521
+ }
522
+ #define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
523
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
524
+ # define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b)
525
+ # define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
526
+ #endif
527
+
528
+ SIMDE__FUNCTION_ATTRIBUTES
529
+ simde__m64
530
+ simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) {
531
+ #if defined(SIMDE_MMX_NATIVE)
532
+ return _mm_cmpgt_pi16(a, b);
533
+ #else
534
+ simde__m64_private r_;
535
+ simde__m64_private a_ = simde__m64_to_private(a);
536
+ simde__m64_private b_ = simde__m64_to_private(b);
537
+
538
+ #if defined(SIMDE_MMX_NEON)
539
+ r_.neon_i16 = vreinterpret_s16_u16(vcgt_s16(a_.neon_i16, b_.neon_i16));
540
+ #else
541
+ SIMDE__VECTORIZE
542
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
543
+ r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
544
+ }
545
+ #endif
546
+
547
+ return simde__m64_from_private(r_);
548
+ #endif
549
+ }
550
+ #define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
551
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
552
+ # define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b)
553
+ # define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
554
+ #endif
555
+
556
+ SIMDE__FUNCTION_ATTRIBUTES
557
+ simde__m64
558
+ simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) {
559
+ #if defined(SIMDE_MMX_NATIVE)
560
+ return _mm_cmpgt_pi32(a, b);
561
+ #else
562
+ simde__m64_private r_;
563
+ simde__m64_private a_ = simde__m64_to_private(a);
564
+ simde__m64_private b_ = simde__m64_to_private(b);
565
+
566
+ #if defined(SIMDE_MMX_NEON)
567
+ r_.neon_i32 = vreinterpret_s32_u32(vcgt_s32(a_.neon_i32, b_.neon_i32));
568
+ #else
569
+ SIMDE__VECTORIZE
570
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
571
+ r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
572
+ }
573
+ #endif
574
+
575
+ return simde__m64_from_private(r_);
576
+ #endif
577
+ }
578
+ #define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
579
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
580
+ # define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b)
581
+ # define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
582
+ #endif
583
+
584
+ SIMDE__FUNCTION_ATTRIBUTES
585
+ int64_t
586
+ simde_mm_cvtm64_si64 (simde__m64 a) {
587
+ #if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)
588
+ return _mm_cvtm64_si64(a);
589
+ #else
590
+ simde__m64_private a_ = simde__m64_to_private(a);
591
+
592
+ #if defined(SIMDE_MMX_NEON)
593
+ return vget_lane_s64(a_.neon_i64, 0);
594
+ #else
595
+ return a_.i64[0];
596
+ #endif
597
+ #endif
598
+ }
599
+ #define simde_m_to_int64(a) simde_mm_cvtm64_si64(a)
600
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
601
+ # define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a)
602
+ # define _m_to_int64(a) simde_mm_cvtm64_si64(a)
603
+ #endif
604
+
605
+ SIMDE__FUNCTION_ATTRIBUTES
606
+ simde__m64
607
+ simde_mm_cvtsi32_si64 (int32_t a) {
608
+ #if defined(SIMDE_MMX_NATIVE)
609
+ return _mm_cvtsi32_si64(a);
610
+ #else
611
+ simde__m64_private r_;
612
+
613
+ #if defined(SIMDE_MMX_NEON)
614
+ const int32_t av[sizeof(r_.neon_i32) / sizeof(r_.neon_i32[0])] = { a, 0 };
615
+ r_.neon_i32 = vld1_s32(av);
616
+ #else
617
+ r_.i32[0] = a;
618
+ r_.i32[1] = 0;
619
+ #endif
620
+
621
+ return simde__m64_from_private(r_);
622
+ #endif
623
+ }
624
+ #define simde_m_from_int(a) simde_mm_cvtsi32_si64(a)
625
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
626
+ # define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a)
627
+ # define _m_from_int(a) simde_mm_cvtsi32_si64(a)
628
+ #endif
629
+
630
+ SIMDE__FUNCTION_ATTRIBUTES
631
+ simde__m64
632
+ simde_mm_cvtsi64_m64 (int64_t a) {
633
+ #if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI)
634
+ return _mm_cvtsi64_m64(a);
635
+ #else
636
+ simde__m64_private r_;
637
+
638
+ #if defined(SIMDE_MMX_NEON)
639
+ r_.neon_i64 = vld1_s64(&a);
640
+ #else
641
+ r_.i64[0] = a;
642
+ #endif
643
+
644
+ return simde__m64_from_private(r_);
645
+ #endif
646
+ }
647
+ #define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a)
648
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
649
+ # define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a)
650
+ # define _m_from_int64(a) simde_mm_cvtsi64_m64(a)
651
+ #endif
652
+
653
+ SIMDE__FUNCTION_ATTRIBUTES
654
+ int32_t
655
+ simde_mm_cvtsi64_si32 (simde__m64 a) {
656
+ #if defined(SIMDE_MMX_NATIVE)
657
+ return _mm_cvtsi64_si32(a);
658
+ #else
659
+ simde__m64_private a_ = simde__m64_to_private(a);
660
+
661
+ #if defined(SIMDE_MMX_NEON)
662
+ return vget_lane_s32(a_.neon_i32, 0);
663
+ #else
664
+ return a_.i32[0];
665
+ #endif
666
+ #endif
667
+ }
668
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
669
+ # define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a)
670
+ #endif
671
+
672
+ SIMDE__FUNCTION_ATTRIBUTES
673
+ void
674
+ simde_mm_empty (void) {
675
+ #if defined(SIMDE_MMX_NATIVE)
676
+ _mm_empty();
677
+ #else
678
+ #endif
679
+ }
680
+ #define simde_m_empty() simde_mm_empty()
681
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
682
+ # define _mm_empty() simde_mm_empty()
683
+ # define _m_empty() simde_mm_empty()
684
+ #endif
685
+
686
+ SIMDE__FUNCTION_ATTRIBUTES
687
+ simde__m64
688
+ simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) {
689
+ #if defined(SIMDE_MMX_NATIVE)
690
+ return _mm_madd_pi16(a, b);
691
+ #else
692
+ simde__m64_private r_;
693
+ simde__m64_private a_ = simde__m64_to_private(a);
694
+ simde__m64_private b_ = simde__m64_to_private(b);
695
+
696
+ #if defined(SIMDE_MMX_NEON)
697
+ int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);
698
+ r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));
699
+ #else
700
+ SIMDE__VECTORIZE
701
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) {
702
+ r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]);
703
+ }
704
+ #endif
705
+
706
+ return simde__m64_from_private(r_);
707
+ #endif
708
+ }
709
+ #define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
710
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
711
+ # define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b)
712
+ # define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
713
+ #endif
714
+
715
+ SIMDE__FUNCTION_ATTRIBUTES
716
+ simde__m64
717
+ simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) {
718
+ #if defined(SIMDE_MMX_NATIVE)
719
+ return _mm_mulhi_pi16(a, b);
720
+ #else
721
+ simde__m64_private r_;
722
+ simde__m64_private a_ = simde__m64_to_private(a);
723
+ simde__m64_private b_ = simde__m64_to_private(b);
724
+
725
+ #if defined(SIMDE_MMX_NEON)
726
+ const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
727
+ const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);
728
+ const uint16x4_t t3 = vmovn_u32(t2);
729
+ r_.neon_i16 = vreinterpret_s16_u16(t3);
730
+ #else
731
+ SIMDE__VECTORIZE
732
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
733
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16));
734
+ }
735
+ #endif
736
+
737
+ return simde__m64_from_private(r_);
738
+ #endif
739
+ }
740
+ #define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
741
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
742
+ # define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b)
743
+ # define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
744
+ #endif
745
+
746
+ SIMDE__FUNCTION_ATTRIBUTES
747
+ simde__m64
748
+ simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) {
749
+ #if defined(SIMDE_MMX_NATIVE)
750
+ return _mm_mullo_pi16(a, b);
751
+ #else
752
+ simde__m64_private r_;
753
+ simde__m64_private a_ = simde__m64_to_private(a);
754
+ simde__m64_private b_ = simde__m64_to_private(b);
755
+
756
+ #if defined(SIMDE_MMX_NEON)
757
+ const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
758
+ const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));
759
+ r_.neon_i16 = vreinterpret_s16_u16(t2);
760
+ #else
761
+ SIMDE__VECTORIZE
762
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
763
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff));
764
+ }
765
+ #endif
766
+
767
+ return simde__m64_from_private(r_);
768
+ #endif
769
+ }
770
+ #define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
771
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
772
+ # define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b)
773
+ # define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
774
+ #endif
775
+
776
+ SIMDE__FUNCTION_ATTRIBUTES
777
+ simde__m64
778
+ simde_mm_or_si64 (simde__m64 a, simde__m64 b) {
779
+ #if defined(SIMDE_MMX_NATIVE)
780
+ return _mm_or_si64(a, b);
781
+ #else
782
+ simde__m64_private r_;
783
+ simde__m64_private a_ = simde__m64_to_private(a);
784
+ simde__m64_private b_ = simde__m64_to_private(b);
785
+
786
+ #if defined(SIMDE_MMX_NEON)
787
+ r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32);
788
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
789
+ r_.i64 = a_.i64 | b_.i64;
790
+ #else
791
+ r_.i64[0] = a_.i64[0] | b_.i64[0];
792
+ #endif
793
+
794
+ return simde__m64_from_private(r_);
795
+ #endif
796
+ }
797
+ #define simde_m_por(a, b) simde_mm_or_si64(a, b)
798
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
799
+ # define _mm_or_si64(a, b) simde_mm_or_si64(a, b)
800
+ # define _m_por(a, b) simde_mm_or_si64(a, b)
801
+ #endif
802
+
803
+ SIMDE__FUNCTION_ATTRIBUTES
804
+ simde__m64
805
+ simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) {
806
+ #if defined(SIMDE_MMX_NATIVE)
807
+ return _mm_packs_pi16(a, b);
808
+ #else
809
+ simde__m64_private r_;
810
+ simde__m64_private a_ = simde__m64_to_private(a);
811
+ simde__m64_private b_ = simde__m64_to_private(b);
812
+
813
+ #if defined(SIMDE_MMX_NEON)
814
+ r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));
815
+ #else
816
+ SIMDE__VECTORIZE
817
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
818
+ if (a_.i16[i] < INT8_MIN) {
819
+ r_.i8[i] = INT8_MIN;
820
+ } else if (a_.i16[i] > INT8_MAX) {
821
+ r_.i8[i] = INT8_MAX;
822
+ } else {
823
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);
824
+ }
825
+ }
826
+
827
+ SIMDE__VECTORIZE
828
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
829
+ if (b_.i16[i] < INT8_MIN) {
830
+ r_.i8[i + 4] = INT8_MIN;
831
+ } else if (b_.i16[i] > INT8_MAX) {
832
+ r_.i8[i + 4] = INT8_MAX;
833
+ } else {
834
+ r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]);
835
+ }
836
+ }
837
+ #endif
838
+
839
+ return simde__m64_from_private(r_);
840
+ #endif
841
+ }
842
+ #define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
843
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
844
+ # define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)
845
+ # define _m_packsswb(a, b) mm_packs_pi16(a, b)
846
+ #endif
847
+
848
+ SIMDE__FUNCTION_ATTRIBUTES
849
+ simde__m64
850
+ simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) {
851
+ #if defined(SIMDE_MMX_NATIVE)
852
+ return _mm_packs_pi32(a, b);
853
+ #else
854
+ simde__m64_private r_;
855
+ simde__m64_private a_ = simde__m64_to_private(a);
856
+ simde__m64_private b_ = simde__m64_to_private(b);
857
+
858
+ #if defined(SIMDE_MMX_NEON)
859
+ r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
860
+ #else
861
+ SIMDE__VECTORIZE
862
+ for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) {
863
+ if (a_.i32[i] < SHRT_MIN) {
864
+ r_.i16[i] = SHRT_MIN;
865
+ } else if (a_.i32[i] > INT16_MAX) {
866
+ r_.i16[i] = INT16_MAX;
867
+ } else {
868
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
869
+ }
870
+ }
871
+
872
+ SIMDE__VECTORIZE
873
+ for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) {
874
+ if (b_.i32[i] < SHRT_MIN) {
875
+ r_.i16[i + 2] = SHRT_MIN;
876
+ } else if (b_.i32[i] > INT16_MAX) {
877
+ r_.i16[i + 2] = INT16_MAX;
878
+ } else {
879
+ r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]);
880
+ }
881
+ }
882
+ #endif
883
+
884
+ return simde__m64_from_private(r_);
885
+ #endif
886
+ }
887
+ #define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b)
888
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
889
+ # define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b)
890
+ # define _m_packssdw(a, b) simde_mm_packs_pi32(a, b)
891
+ #endif
892
+
893
+ SIMDE__FUNCTION_ATTRIBUTES
894
+ simde__m64
895
+ simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) {
896
+ #if defined(SIMDE_MMX_NATIVE)
897
+ return _mm_packs_pu16(a, b);
898
+ #else
899
+ simde__m64_private r_;
900
+ simde__m64_private a_ = simde__m64_to_private(a);
901
+ simde__m64_private b_ = simde__m64_to_private(b);
902
+
903
+ #if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
904
+ const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16);
905
+
906
+ /* Set elements which are < 0 to 0 */
907
+ const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));
908
+
909
+ /* Vector with all s16 elements set to UINT8_MAX */
910
+ const int16x8_t vmax = vmovq_n_s16((int16_t) UINT8_MAX);
911
+
912
+ /* Elements which are within the acceptable range */
913
+ const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax)));
914
+ const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax)));
915
+
916
+ /* Final values as 16-bit integers */
917
+ const int16x8_t values = vorrq_s16(le_max, gt_max);
918
+
919
+ r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));
920
+ #else
921
+ SIMDE__VECTORIZE
922
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
923
+ if (a_.i16[i] > UINT8_MAX) {
924
+ r_.u8[i] = UINT8_MAX;
925
+ } else if (a_.i16[i] < 0) {
926
+ r_.u8[i] = 0;
927
+ } else {
928
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]);
929
+ }
930
+ }
931
+
932
+ SIMDE__VECTORIZE
933
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
934
+ if (b_.i16[i] > UINT8_MAX) {
935
+ r_.u8[i + 4] = UINT8_MAX;
936
+ } else if (b_.i16[i] < 0) {
937
+ r_.u8[i + 4] = 0;
938
+ } else {
939
+ r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]);
940
+ }
941
+ }
942
+ #endif
943
+
944
+ return simde__m64_from_private(r_);
945
+ #endif
946
+ }
947
+ #define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b)
948
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
949
+ # define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b)
950
+ # define _m_packuswb(a, b) simde_mm_packs_pu16(a, b)
951
+ #endif
952
+
953
+ SIMDE__FUNCTION_ATTRIBUTES
954
+ simde__m64
955
+ simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {
956
+ #if defined(SIMDE_MMX_NATIVE)
957
+ return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
958
+ #else
959
+ simde__m64_private r_;
960
+
961
+ #if defined(SIMDE_MMX_NEON)
962
+ const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 };
963
+ r_.neon_i8 = vld1_s8(v);
964
+ #else
965
+ r_.i8[0] = e0;
966
+ r_.i8[1] = e1;
967
+ r_.i8[2] = e2;
968
+ r_.i8[3] = e3;
969
+ r_.i8[4] = e4;
970
+ r_.i8[5] = e5;
971
+ r_.i8[6] = e6;
972
+ r_.i8[7] = e7;
973
+ #endif
974
+
975
+ return simde__m64_from_private(r_);
976
+ #endif
977
+ }
978
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
979
+ # define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
980
+ #endif
981
+
982
+ SIMDE__FUNCTION_ATTRIBUTES
983
+ simde__m64
984
+ simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) {
985
+ simde__m64_private r_;
986
+
987
+ #if defined(SIMDE_MMX_NATIVE)
988
+ r_.n = _mm_set_pi8(
989
+ HEDLEY_STATIC_CAST(int8_t, e7),
990
+ HEDLEY_STATIC_CAST(int8_t, e6),
991
+ HEDLEY_STATIC_CAST(int8_t, e5),
992
+ HEDLEY_STATIC_CAST(int8_t, e4),
993
+ HEDLEY_STATIC_CAST(int8_t, e3),
994
+ HEDLEY_STATIC_CAST(int8_t, e2),
995
+ HEDLEY_STATIC_CAST(int8_t, e1),
996
+ HEDLEY_STATIC_CAST(int8_t, e0));
997
+ #elif defined(SIMDE_MMX_NEON)
998
+ const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 };
999
+ r_.neon_u8 = vld1_u8(v);
1000
+ #else
1001
+ r_.u8[0] = e0;
1002
+ r_.u8[1] = e1;
1003
+ r_.u8[2] = e2;
1004
+ r_.u8[3] = e3;
1005
+ r_.u8[4] = e4;
1006
+ r_.u8[5] = e5;
1007
+ r_.u8[6] = e6;
1008
+ r_.u8[7] = e7;
1009
+ #endif
1010
+
1011
+ return simde__m64_from_private(r_);
1012
+ }
1013
+
1014
+ SIMDE__FUNCTION_ATTRIBUTES
1015
+ simde__m64
1016
+ simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) {
1017
+ #if defined(SIMDE_MMX_NATIVE)
1018
+ return _mm_set_pi16(e3, e2, e1, e0);
1019
+ #else
1020
+ simde__m64_private r_;
1021
+
1022
+ #if defined(SIMDE_MMX_NEON)
1023
+ const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 };
1024
+ r_.neon_i16 = vld1_s16(v);
1025
+ #else
1026
+ r_.i16[0] = e0;
1027
+ r_.i16[1] = e1;
1028
+ r_.i16[2] = e2;
1029
+ r_.i16[3] = e3;
1030
+ #endif
1031
+ return simde__m64_from_private(r_);
1032
+ #endif
1033
+ }
1034
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1035
+ # define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0)
1036
+ #endif
1037
+
1038
+ SIMDE__FUNCTION_ATTRIBUTES
1039
+ simde__m64
1040
+ simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) {
1041
+ simde__m64_private r_;
1042
+
1043
+ #if defined(SIMDE_MMX_NATIVE)
1044
+ r_.n = _mm_set_pi16(
1045
+ HEDLEY_STATIC_CAST(int16_t, e3),
1046
+ HEDLEY_STATIC_CAST(int16_t, e2),
1047
+ HEDLEY_STATIC_CAST(int16_t, e1),
1048
+ HEDLEY_STATIC_CAST(int16_t, e0)
1049
+ );
1050
+ #elif defined(SIMDE_MMX_NEON)
1051
+ const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 };
1052
+ r_.neon_u16 = vld1_u16(v);
1053
+ #else
1054
+ r_.u16[0] = e0;
1055
+ r_.u16[1] = e1;
1056
+ r_.u16[2] = e2;
1057
+ r_.u16[3] = e3;
1058
+ #endif
1059
+
1060
+ return simde__m64_from_private(r_);
1061
+ }
1062
+
1063
+ SIMDE__FUNCTION_ATTRIBUTES
1064
+ simde__m64
1065
+ simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) {
1066
+ simde__m64_private r_;
1067
+
1068
+ #if defined(SIMDE_MMX_NATIVE)
1069
+ r_.n = _mm_set_pi32(
1070
+ HEDLEY_STATIC_CAST(int32_t, e1),
1071
+ HEDLEY_STATIC_CAST(int32_t, e0));
1072
+ #elif defined(SIMDE_MMX_NEON)
1073
+ const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 };
1074
+ r_.neon_u32 = vld1_u32(v);
1075
+ #else
1076
+ r_.u32[0] = e0;
1077
+ r_.u32[1] = e1;
1078
+ #endif
1079
+
1080
+ return simde__m64_from_private(r_);
1081
+ }
1082
+
1083
+ SIMDE__FUNCTION_ATTRIBUTES
1084
+ simde__m64
1085
+ simde_mm_set_pi32 (int32_t e1, int32_t e0) {
1086
+ simde__m64_private r_;
1087
+
1088
+ #if defined(SIMDE_MMX_NATIVE)
1089
+ r_.n = _mm_set_pi32(e1, e0);
1090
+ #elif defined(SIMDE_MMX_NEON)
1091
+ const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 };
1092
+ r_.neon_i32 = vld1_s32(v);
1093
+ #else
1094
+ r_.i32[0] = e0;
1095
+ r_.i32[1] = e1;
1096
+ #endif
1097
+
1098
+ return simde__m64_from_private(r_);
1099
+ }
1100
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1101
+ # define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0)
1102
+ #endif
1103
+
1104
+ SIMDE__FUNCTION_ATTRIBUTES
1105
+ simde__m64
1106
+ simde_x_mm_set_pi64 (int64_t e0) {
1107
+ simde__m64_private r_;
1108
+
1109
+ #if defined(SIMDE_MMX_NEON)
1110
+ const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 };
1111
+ r_.neon_i64 = vld1_s64(v);
1112
+ #else
1113
+ r_.i64[0] = e0;
1114
+ #endif
1115
+
1116
+ return simde__m64_from_private(r_);
1117
+ }
1118
+
1119
+
1120
+ SIMDE__FUNCTION_ATTRIBUTES
1121
+ simde__m64
1122
+ simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) {
1123
+ simde__m64_private r_;
1124
+
1125
+ #if defined(SIMDE_MMX_NEON)
1126
+ const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 };
1127
+ r_.neon_f32 = vld1_f32(v);
1128
+ #else
1129
+ r_.f32[0] = e0;
1130
+ r_.f32[1] = e1;
1131
+ #endif
1132
+
1133
+ return simde__m64_from_private(r_);
1134
+ }
1135
+
1136
+ SIMDE__FUNCTION_ATTRIBUTES
1137
+ simde__m64
1138
+ simde_mm_set1_pi8 (int8_t a) {
1139
+ #if defined(SIMDE_MMX_NATIVE)
1140
+ return _mm_set1_pi8(a);
1141
+ #elif defined(SIMDE_MMX_NEON)
1142
+ simde__m64_private r_;
1143
+ r_.neon_i8 = vmov_n_s8(a);
1144
+ return simde__m64_from_private(r_);
1145
+ #else
1146
+ return simde_mm_set_pi8(a, a, a, a, a, a, a, a);
1147
+ #endif
1148
+ }
1149
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1150
+ # define _mm_set1_pi8(a) simde_mm_set1_pi8(a)
1151
+ #endif
1152
+
1153
+ SIMDE__FUNCTION_ATTRIBUTES
1154
+ simde__m64
1155
+ simde_mm_set1_pi16 (int16_t a) {
1156
+ #if defined(SIMDE_MMX_NATIVE)
1157
+ return _mm_set1_pi16(a);
1158
+ #elif defined(SIMDE_MMX_NEON)
1159
+ simde__m64_private r_;
1160
+ r_.neon_i16 = vmov_n_s16(a);
1161
+ return simde__m64_from_private(r_);
1162
+ #else
1163
+ return simde_mm_set_pi16(a, a, a, a);
1164
+ #endif
1165
+ }
1166
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1167
+ # define _mm_set1_pi16(a) simde_mm_set1_pi16(a)
1168
+ #endif
1169
+
1170
+ SIMDE__FUNCTION_ATTRIBUTES
1171
+ simde__m64
1172
+ simde_mm_set1_pi32 (int32_t a) {
1173
+ #if defined(SIMDE_MMX_NATIVE)
1174
+ return _mm_set1_pi32(a);
1175
+ #elif defined(SIMDE_MMX_NEON)
1176
+ simde__m64_private r_;
1177
+ r_.neon_i32 = vmov_n_s32(a);
1178
+ return simde__m64_from_private(r_);
1179
+ #else
1180
+ return simde_mm_set_pi32(a, a);
1181
+ #endif
1182
+ }
1183
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1184
+ # define _mm_set1_pi32(a) simde_mm_set1_pi32(a)
1185
+ #endif
1186
+
1187
+ SIMDE__FUNCTION_ATTRIBUTES
1188
+ simde__m64
1189
+ simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) {
1190
+ #if defined(SIMDE_MMX_NATIVE)
1191
+ return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
1192
+ #else
1193
+ return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7);
1194
+ #endif
1195
+ }
1196
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1197
+ # define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
1198
+ #endif
1199
+
1200
+ SIMDE__FUNCTION_ATTRIBUTES
1201
+ simde__m64
1202
+ simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) {
1203
+ #if defined(SIMDE_MMX_NATIVE)
1204
+ return _mm_setr_pi16(e3, e2, e1, e0);
1205
+ #else
1206
+ return simde_mm_set_pi16(e0, e1, e2, e3);
1207
+ #endif
1208
+ }
1209
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1210
+ # define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0)
1211
+ #endif
1212
+
1213
+ SIMDE__FUNCTION_ATTRIBUTES
1214
+ simde__m64
1215
+ simde_mm_setr_pi32 (int32_t e1, int32_t e0) {
1216
+ #if defined(SIMDE_MMX_NATIVE)
1217
+ return _mm_setr_pi32(e1, e0);
1218
+ #else
1219
+ return simde_mm_set_pi32(e0, e1);
1220
+ #endif
1221
+ }
1222
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1223
+ # define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0)
1224
+ #endif
1225
+
1226
+ SIMDE__FUNCTION_ATTRIBUTES
1227
+ simde__m64
1228
+ simde_mm_setzero_si64 (void) {
1229
+ #if defined(SIMDE_MMX_NATIVE)
1230
+ return _mm_setzero_si64();
1231
+ #elif defined(SIMDE_MMX_NEON)
1232
+ simde__m64_private r_;
1233
+ r_.neon_u32 = vmov_n_u32(0);
1234
+ return simde__m64_from_private(r_);
1235
+ #else
1236
+ return simde_mm_set_pi32(0, 0);
1237
+ #endif
1238
+ }
1239
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1240
+ # define _mm_setzero_si64() simde_mm_setzero_si64()
1241
+ #endif
1242
+
1243
+ SIMDE__FUNCTION_ATTRIBUTES
1244
+ simde__m64
1245
+ simde_mm_setone_si64 (void) {
1246
+ #if defined(SIMDE_SSE_NATIVE)
1247
+ __m64 t = _mm_undefined_ps();
1248
+ return _mm_andnot_ps(t, t);
1249
+ #else
1250
+ simde__m64 r;
1251
+ simde_memset(&r, ~0, sizeof(r));
1252
+ return r;
1253
+ #endif
1254
+ }
1255
+
1256
+ SIMDE__FUNCTION_ATTRIBUTES
1257
+ simde__m64
1258
+ simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) {
1259
+ #if defined(SIMDE_MMX_NATIVE)
1260
+ return _mm_sll_pi16(a, count);
1261
+ #else
1262
+ simde__m64_private r_;
1263
+ simde__m64_private a_ = simde__m64_to_private(a);
1264
+ simde__m64_private count_ = simde__m64_to_private(count);
1265
+
1266
+ #if defined(SIMDE_MMX_NEON)
1267
+ r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) vget_lane_u64(count_.neon_u64, 0)));
1268
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1269
+ r_.i16 = a_.i16 << count_.u64[0];
1270
+ #else
1271
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
1272
+ simde_memset(&r_, 0, sizeof(r_));
1273
+ return simde__m64_from_private(r_);
1274
+ }
1275
+
1276
+ SIMDE__VECTORIZE
1277
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1278
+ r_.u16[i] = (uint16_t) (a_.u16[i] << count_.u64[0]);
1279
+ }
1280
+ #endif
1281
+
1282
+ return simde__m64_from_private(r_);
1283
+ #endif
1284
+ }
1285
+ #define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count)
1286
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1287
+ # define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count)
1288
+ # define _m_psllw(a, count) simde_mm_sll_pi16(a, count)
1289
+ #endif
1290
+
1291
+ SIMDE__FUNCTION_ATTRIBUTES
1292
+ simde__m64
1293
+ simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) {
1294
+ #if defined(SIMDE_MMX_NATIVE)
1295
+ return _mm_sll_pi32(a, count);
1296
+ #else
1297
+ simde__m64_private r_;
1298
+ simde__m64_private a_ = simde__m64_to_private(a);
1299
+ simde__m64_private count_ = simde__m64_to_private(count);
1300
+
1301
+ #if defined(SIMDE_MMX_NEON)
1302
+ r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) vget_lane_u64(count_.neon_u64, 0)));
1303
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1304
+ r_.i32 = a_.i32 << count_.u64[0];
1305
+ #else
1306
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
1307
+ simde_memset(&r_, 0, sizeof(r_));
1308
+ return simde__m64_from_private(r_);
1309
+ }
1310
+
1311
+ SIMDE__VECTORIZE
1312
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1313
+ r_.u32[i] = a_.u32[i] << count_.u64[0];
1314
+ }
1315
+ #endif
1316
+
1317
+ return simde__m64_from_private(r_);
1318
+ #endif
1319
+ }
1320
+ #define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count)
1321
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1322
+ # define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count)
1323
+ # define _m_pslld(a, count) simde_mm_sll_pi32(a, count)
1324
+ #endif
1325
+
1326
+ SIMDE__FUNCTION_ATTRIBUTES
1327
+ simde__m64
1328
+ simde_mm_slli_pi16 (simde__m64 a, int count) {
1329
+ #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
1330
+ return _mm_slli_pi16(a, count);
1331
+ #else
1332
+ simde__m64_private r_;
1333
+ simde__m64_private a_ = simde__m64_to_private(a);
1334
+
1335
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1336
+ r_.i16 = a_.i16 << count;
1337
+ #elif defined(SIMDE_MMX_NEON)
1338
+ r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count));
1339
+ #else
1340
+ SIMDE__VECTORIZE
1341
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1342
+ r_.u16[i] = (uint16_t) (a_.u16[i] << count);
1343
+ }
1344
+ #endif
1345
+
1346
+ return simde__m64_from_private(r_);
1347
+ #endif
1348
+ }
1349
+ #define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count)
1350
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1351
+ # define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count)
1352
+ # define _m_psllwi(a, count) simde_mm_slli_pi16(a, count)
1353
+ #endif
1354
+
1355
+ SIMDE__FUNCTION_ATTRIBUTES
1356
+ simde__m64
1357
+ simde_mm_slli_pi32 (simde__m64 a, int count) {
1358
+ #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
1359
+ return _mm_slli_pi32(a, count);
1360
+ #else
1361
+ simde__m64_private r_;
1362
+ simde__m64_private a_ = simde__m64_to_private(a);
1363
+
1364
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1365
+ r_.i32 = a_.i32 << count;
1366
+ #elif defined(SIMDE_MMX_NEON)
1367
+ r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count));
1368
+ #else
1369
+ SIMDE__VECTORIZE
1370
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1371
+ r_.u32[i] = a_.u32[i] << count;
1372
+ }
1373
+ #endif
1374
+
1375
+ return simde__m64_from_private(r_);
1376
+ #endif
1377
+ }
1378
+ #define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b)
1379
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1380
+ # define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count)
1381
+ # define _m_pslldi(a, count) simde_mm_slli_pi32(a, count)
1382
+ #endif
1383
+
1384
+ SIMDE__FUNCTION_ATTRIBUTES
1385
+ simde__m64
1386
+ simde_mm_slli_si64 (simde__m64 a, int count) {
1387
+ #if defined(SIMDE_MMX_NATIVE)
1388
+ return _mm_slli_si64(a, count);
1389
+ #else
1390
+ simde__m64_private r_;
1391
+ simde__m64_private a_ = simde__m64_to_private(a);
1392
+
1393
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1394
+ r_.i64 = a_.i64 << count;
1395
+ #elif defined(SIMDE_MMX_NEON)
1396
+ r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count));
1397
+ #else
1398
+ r_.u64[0] = a_.u64[0] << count;
1399
+ #endif
1400
+
1401
+ return simde__m64_from_private(r_);
1402
+ #endif
1403
+ }
1404
+ #define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count)
1405
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1406
+ # define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count)
1407
+ # define _m_psllqi(a, count) simde_mm_slli_si64(a, count)
1408
+ #endif
1409
+
1410
+ SIMDE__FUNCTION_ATTRIBUTES
1411
+ simde__m64
1412
+ simde_mm_sll_si64 (simde__m64 a, simde__m64 count) {
1413
+ #if defined(SIMDE_MMX_NATIVE)
1414
+ return _mm_sll_si64(a, count);
1415
+ #else
1416
+ simde__m64_private r_;
1417
+ simde__m64_private a_ = simde__m64_to_private(a);
1418
+ simde__m64_private count_ = simde__m64_to_private(count);
1419
+
1420
+ #if defined(SIMDE_MMX_NEON)
1421
+ r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64);
1422
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1423
+ r_.i64 = a_.i64 << count_.i64;
1424
+ #else
1425
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
1426
+ simde_memset(&r_, 0, sizeof(r_));
1427
+ return simde__m64_from_private(r_);
1428
+ }
1429
+
1430
+ r_.u64[0] = a_.u64[0] << count_.u64[0];
1431
+ #endif
1432
+
1433
+ return simde__m64_from_private(r_);
1434
+ #endif
1435
+ }
1436
+ #define simde_m_psllq(a, count) simde_mm_sll_si64(a, count)
1437
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1438
+ # define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count)
1439
+ # define _m_psllq(a, count) simde_mm_sll_si64(a, count)
1440
+ #endif
1441
+
1442
+ SIMDE__FUNCTION_ATTRIBUTES
1443
+ simde__m64
1444
+ simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) {
1445
+ #if defined(SIMDE_MMX_NATIVE)
1446
+ return _mm_srl_pi16(a, count);
1447
+ #else
1448
+ simde__m64_private r_;
1449
+ simde__m64_private a_ = simde__m64_to_private(a);
1450
+ simde__m64_private count_ = simde__m64_to_private(count);
1451
+
1452
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1453
+ r_.u16 = a_.u16 >> count_.u64[0];
1454
+ #elif defined(SIMDE_MMX_NEON)
1455
+ r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0))));
1456
+ #else
1457
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
1458
+ simde_memset(&r_, 0, sizeof(r_));
1459
+ return simde__m64_from_private(r_);
1460
+ }
1461
+
1462
+ SIMDE__VECTORIZE
1463
+ for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) {
1464
+ r_.u16[i] = a_.u16[i] >> count_.u64[0];
1465
+ }
1466
+ #endif
1467
+
1468
+ return simde__m64_from_private(r_);
1469
+ #endif
1470
+ }
1471
+ #define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count)
1472
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1473
+ # define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count)
1474
+ # define _m_psrlw(a, count) simde_mm_srl_pi16(a, count)
1475
+ #endif
1476
+
1477
+ SIMDE__FUNCTION_ATTRIBUTES
1478
+ simde__m64
1479
+ simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) {
1480
+ #if defined(SIMDE_MMX_NATIVE)
1481
+ return _mm_srl_pi32(a, count);
1482
+ #else
1483
+ simde__m64_private r_;
1484
+ simde__m64_private a_ = simde__m64_to_private(a);
1485
+ simde__m64_private count_ = simde__m64_to_private(count);
1486
+
1487
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1488
+ r_.u32 = a_.u32 >> count_.u64[0];
1489
+ #elif defined(SIMDE_MMX_NEON)
1490
+ r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0))));
1491
+ #else
1492
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
1493
+ simde_memset(&r_, 0, sizeof(r_));
1494
+ return simde__m64_from_private(r_);
1495
+ }
1496
+
1497
+ SIMDE__VECTORIZE
1498
+ for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) {
1499
+ r_.u32[i] = a_.u32[i] >> count_.u64[0];
1500
+ }
1501
+ #endif
1502
+
1503
+ return simde__m64_from_private(r_);
1504
+ #endif
1505
+ }
1506
+ #define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count)
1507
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1508
+ # define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count)
1509
+ # define _m_psrld(a, count) simde_mm_srl_pi32(a, count)
1510
+ #endif
1511
+
1512
+ SIMDE__FUNCTION_ATTRIBUTES
1513
+ simde__m64
1514
+ simde_mm_srli_pi16 (simde__m64 a, int count) {
1515
+ #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
1516
+ return _mm_srli_pi16(a, count);
1517
+ #else
1518
+ simde__m64_private r_;
1519
+ simde__m64_private a_ = simde__m64_to_private(a);
1520
+
1521
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1522
+ r_.u16 = a_.u16 >> count;
1523
+ #elif defined(SIMDE_MMX_NEON)
1524
+ r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count)));
1525
+ #else
1526
+ SIMDE__VECTORIZE
1527
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1528
+ r_.u16[i] = a_.u16[i] >> count;
1529
+ }
1530
+ #endif
1531
+
1532
+ return simde__m64_from_private(r_);
1533
+ #endif
1534
+ }
1535
+ #define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
1536
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1537
+ # define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count)
1538
+ # define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
1539
+ #endif
1540
+
1541
+ SIMDE__FUNCTION_ATTRIBUTES
1542
+ simde__m64
1543
+ simde_mm_srli_pi32 (simde__m64 a, int count) {
1544
+ #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
1545
+ return _mm_srli_pi32(a, count);
1546
+ #else
1547
+ simde__m64_private r_;
1548
+ simde__m64_private a_ = simde__m64_to_private(a);
1549
+
1550
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1551
+ r_.u32 = a_.u32 >> count;
1552
+ #elif defined(SIMDE_MMX_NEON)
1553
+ r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count)));
1554
+ #else
1555
+ SIMDE__VECTORIZE
1556
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1557
+ r_.u32[i] = a_.u32[i] >> count;
1558
+ }
1559
+ #endif
1560
+
1561
+ return simde__m64_from_private(r_);
1562
+ #endif
1563
+ }
1564
+ #define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count)
1565
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1566
+ # define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count)
1567
+ # define _m_psrldi(a, count) simde_mm_srli_pi32(a, count)
1568
+ #endif
1569
+
1570
+ SIMDE__FUNCTION_ATTRIBUTES
1571
+ simde__m64
1572
+ simde_mm_srli_si64 (simde__m64 a, int count) {
1573
+ #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
1574
+ return _mm_srli_si64(a, count);
1575
+ #else
1576
+ simde__m64_private r_;
1577
+ simde__m64_private a_ = simde__m64_to_private(a);
1578
+
1579
+ #if defined(SIMDE_MMX_NEON)
1580
+ r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count));
1581
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1582
+ r_.u64 = a_.u64 >> count;
1583
+ #else
1584
+ r_.u64[0] = a_.u64[0] >> count;
1585
+ #endif
1586
+
1587
+ return simde__m64_from_private(r_);
1588
+ #endif
1589
+ }
1590
+ #define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count)
1591
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1592
+ # define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count)
1593
+ # define _m_psrlqi(a, count) simde_mm_srli_si64(a, count)
1594
+ #endif
1595
+
1596
+ SIMDE__FUNCTION_ATTRIBUTES
1597
+ simde__m64
1598
+ simde_mm_srl_si64 (simde__m64 a, simde__m64 count) {
1599
+ #if defined(SIMDE_MMX_NATIVE)
1600
+ return _mm_srl_si64(a, count);
1601
+ #else
1602
+ simde__m64_private r_;
1603
+ simde__m64_private a_ = simde__m64_to_private(a);
1604
+ simde__m64_private count_ = simde__m64_to_private(count);
1605
+
1606
+ #if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
1607
+ r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64));
1608
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1609
+ r_.u64 = a_.u64 >> count_.u64;
1610
+ #else
1611
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
1612
+ simde_memset(&r_, 0, sizeof(r_));
1613
+ return simde__m64_from_private(r_);
1614
+ }
1615
+
1616
+ r_.u64[0] = a_.u64[0] >> count_.u64[0];
1617
+ #endif
1618
+
1619
+ return simde__m64_from_private(r_);
1620
+ #endif
1621
+ }
1622
+ #define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count)
1623
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1624
+ # define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count)
1625
+ # define _m_psrlq(a, count) simde_mm_srl_si64(a, count)
1626
+ #endif
1627
+
1628
+ SIMDE__FUNCTION_ATTRIBUTES
1629
+ simde__m64
1630
+ simde_mm_srai_pi16 (simde__m64 a, int count) {
1631
+ #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
1632
+ return _mm_srai_pi16(a, count);
1633
+ #else
1634
+ simde__m64_private r_;
1635
+ simde__m64_private a_ = simde__m64_to_private(a);
1636
+
1637
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1638
+ r_.i16 = a_.i16 >> (count & 0xff);
1639
+ #elif defined(SIMDE_MMX_NEON)
1640
+ r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count));
1641
+ #else
1642
+ SIMDE__VECTORIZE
1643
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1644
+ r_.i16[i] = a_.i16[i] >> (count & 0xff);
1645
+ }
1646
+ #endif
1647
+
1648
+ return simde__m64_from_private(r_);
1649
+ #endif
1650
+ }
1651
+ #define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count)
1652
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1653
+ # define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count)
1654
+ # define _m_psrawi(a, count) simde_mm_srai_pi16(a, count)
1655
+ #endif
1656
+
1657
+ SIMDE__FUNCTION_ATTRIBUTES
1658
+ simde__m64
1659
+ simde_mm_srai_pi32 (simde__m64 a, int count) {
1660
+ #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI)
1661
+ return _mm_srai_pi32(a, count);
1662
+ #else
1663
+ simde__m64_private r_;
1664
+ simde__m64_private a_ = simde__m64_to_private(a);
1665
+
1666
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1667
+ r_.i32 = a_.i32 >> (count & 0xff);
1668
+ #elif defined(SIMDE_MMX_NEON)
1669
+ r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));
1670
+ #else
1671
+ SIMDE__VECTORIZE
1672
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1673
+ r_.i32[i] = a_.i32[i] >> (count & 0xff);
1674
+ }
1675
+ #endif
1676
+
1677
+ return simde__m64_from_private(r_);
1678
+ #endif
1679
+ }
1680
+ #define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)
1681
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1682
+ # define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
1683
+ # define _m_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
1684
+ #endif
1685
+
1686
+ SIMDE__FUNCTION_ATTRIBUTES
1687
+ simde__m64
1688
+ simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) {
1689
+ #if defined(SIMDE_MMX_NATIVE)
1690
+ return _mm_sra_pi16(a, count);
1691
+ #else
1692
+ simde__m64_private r_;
1693
+ simde__m64_private a_ = simde__m64_to_private(a);
1694
+ simde__m64_private count_ = simde__m64_to_private(count);
1695
+ const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));
1696
+
1697
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1698
+ r_.i16 = a_.i16 >> cnt;
1699
+ #elif defined(SIMDE_MMX_NEON)
1700
+ r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0))));
1701
+ #else
1702
+ SIMDE__VECTORIZE
1703
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1704
+ r_.i16[i] = a_.i16[i] >> cnt;
1705
+ }
1706
+ #endif
1707
+
1708
+ return simde__m64_from_private(r_);
1709
+ #endif
1710
+ }
1711
+ #define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count)
1712
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1713
+ # define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count)
1714
+ # define _m_psraw(a, count) simde_mm_sra_pi16(a, count)
1715
+ #endif
1716
+
1717
+ SIMDE__FUNCTION_ATTRIBUTES
1718
+ simde__m64
1719
+ simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) {
1720
+ #if defined(SIMDE_MMX_NATIVE)
1721
+ return _mm_sra_pi32(a, count);
1722
+ #else
1723
+ simde__m64_private r_;
1724
+ simde__m64_private a_ = simde__m64_to_private(a);
1725
+ simde__m64_private count_ = simde__m64_to_private(count);
1726
+ const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]);
1727
+
1728
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1729
+ r_.i32 = a_.i32 >> cnt;
1730
+ #elif defined(SIMDE_MMX_NEON)
1731
+ r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0))));
1732
+ #else
1733
+ SIMDE__VECTORIZE
1734
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1735
+ r_.i32[i] = a_.i32[i] >> cnt;
1736
+ }
1737
+ #endif
1738
+
1739
+ return simde__m64_from_private(r_);
1740
+ #endif
1741
+ }
1742
+ #define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b)
1743
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1744
+ # define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count)
1745
+ # define _m_psrad(a, count) simde_mm_sra_pi32(a, count)
1746
+ #endif
1747
+
1748
+ SIMDE__FUNCTION_ATTRIBUTES
1749
+ simde__m64
1750
+ simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) {
1751
+ #if defined(SIMDE_MMX_NATIVE)
1752
+ return _mm_sub_pi8(a, b);
1753
+ #else
1754
+ simde__m64_private r_;
1755
+ simde__m64_private a_ = simde__m64_to_private(a);
1756
+ simde__m64_private b_ = simde__m64_to_private(b);
1757
+
1758
+ #if defined(SIMDE_MMX_NEON)
1759
+ r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);
1760
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1761
+ r_.i8 = a_.i8 - b_.i8;
1762
+ #else
1763
+ SIMDE__VECTORIZE
1764
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1765
+ r_.i8[i] = a_.i8[i] - b_.i8[i];
1766
+ }
1767
+ #endif
1768
+
1769
+ return simde__m64_from_private(r_);
1770
+ #endif
1771
+ }
1772
+ #define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b)
1773
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1774
+ # define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b)
1775
+ # define _m_psubb(a, b) simde_mm_sub_pi8(a, b)
1776
+ #endif
1777
+
1778
+ SIMDE__FUNCTION_ATTRIBUTES
1779
+ simde__m64
1780
+ simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) {
1781
+ #if defined(SIMDE_MMX_NATIVE)
1782
+ return _mm_sub_pi16(a, b);
1783
+ #else
1784
+ simde__m64_private r_;
1785
+ simde__m64_private a_ = simde__m64_to_private(a);
1786
+ simde__m64_private b_ = simde__m64_to_private(b);
1787
+
1788
+ #if defined(SIMDE_MMX_NEON)
1789
+ r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);
1790
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1791
+ r_.i16 = a_.i16 - b_.i16;
1792
+ #else
1793
+ SIMDE__VECTORIZE
1794
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1795
+ r_.i16[i] = a_.i16[i] - b_.i16[i];
1796
+ }
1797
+ #endif
1798
+
1799
+ return simde__m64_from_private(r_);
1800
+ #endif
1801
+ }
1802
+ #define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b)
1803
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1804
+ # define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b)
1805
+ # define _m_psubw(a, b) simde_mm_sub_pi16(a, b)
1806
+ #endif
1807
+
1808
+ SIMDE__FUNCTION_ATTRIBUTES
1809
+ simde__m64
1810
+ simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) {
1811
+ #if defined(SIMDE_MMX_NATIVE)
1812
+ return _mm_sub_pi32(a, b);
1813
+ #else
1814
+ simde__m64_private r_;
1815
+ simde__m64_private a_ = simde__m64_to_private(a);
1816
+ simde__m64_private b_ = simde__m64_to_private(b);
1817
+
1818
+ #if defined(SIMDE_MMX_NEON)
1819
+ r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);
1820
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1821
+ r_.i32 = a_.i32 - b_.i32;
1822
+ #else
1823
+ SIMDE__VECTORIZE
1824
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1825
+ r_.i32[i] = a_.i32[i] - b_.i32[i];
1826
+ }
1827
+ #endif
1828
+
1829
+ return simde__m64_from_private(r_);
1830
+ #endif
1831
+ }
1832
+ #define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b)
1833
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1834
+ # define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b)
1835
+ # define _m_psubd(a, b) simde_mm_sub_pi32(a, b)
1836
+ #endif
1837
+
1838
+ SIMDE__FUNCTION_ATTRIBUTES
1839
+ simde__m64
1840
+ simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) {
1841
+ #if defined(SIMDE_MMX_NATIVE)
1842
+ return _mm_subs_pi8(a, b);
1843
+ #else
1844
+ simde__m64_private r_;
1845
+ simde__m64_private a_ = simde__m64_to_private(a);
1846
+ simde__m64_private b_ = simde__m64_to_private(b);
1847
+
1848
+ #if defined(SIMDE_MMX_NEON)
1849
+ r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);
1850
+ #else
1851
+ SIMDE__VECTORIZE
1852
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1853
+ if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {
1854
+ r_.i8[i] = INT8_MIN;
1855
+ } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) {
1856
+ r_.i8[i] = INT8_MAX;
1857
+ } else {
1858
+ r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);
1859
+ }
1860
+ }
1861
+ #endif
1862
+
1863
+ return simde__m64_from_private(r_);
1864
+ #endif
1865
+ }
1866
+ #define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b)
1867
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1868
+ # define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b)
1869
+ # define _m_psubsb(a, b) simde_mm_subs_pi8(a, b)
1870
+ #endif
1871
+
1872
+ SIMDE__FUNCTION_ATTRIBUTES
1873
+ simde__m64
1874
+ simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) {
1875
+ #if defined(SIMDE_MMX_NATIVE)
1876
+ return _mm_subs_pu8(a, b);
1877
+ #else
1878
+ simde__m64_private r_;
1879
+ simde__m64_private a_ = simde__m64_to_private(a);
1880
+ simde__m64_private b_ = simde__m64_to_private(b);
1881
+
1882
+ #if defined(SIMDE_MMX_NEON)
1883
+ r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);
1884
+ #else
1885
+ SIMDE__VECTORIZE
1886
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
1887
+ const int32_t x = a_.u8[i] - b_.u8[i];
1888
+ if (x < 0) {
1889
+ r_.u8[i] = 0;
1890
+ } else if (x > UINT8_MAX) {
1891
+ r_.u8[i] = UINT8_MAX;
1892
+ } else {
1893
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
1894
+ }
1895
+ }
1896
+ #endif
1897
+
1898
+ return simde__m64_from_private(r_);
1899
+ #endif
1900
+ }
1901
+ #define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b)
1902
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1903
+ # define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b)
1904
+ # define _m_psubusb(a, b) simde_mm_subs_pu8(a, b)
1905
+ #endif
1906
+
1907
+ SIMDE__FUNCTION_ATTRIBUTES
1908
+ simde__m64
1909
+ simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) {
1910
+ #if defined(SIMDE_MMX_NATIVE)
1911
+ return _mm_subs_pi16(a, b);
1912
+ #else
1913
+ simde__m64_private r_;
1914
+ simde__m64_private a_ = simde__m64_to_private(a);
1915
+ simde__m64_private b_ = simde__m64_to_private(b);
1916
+
1917
+ #if defined(SIMDE_MMX_NEON)
1918
+ r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);
1919
+ #else
1920
+ SIMDE__VECTORIZE
1921
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1922
+ if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) {
1923
+ r_.i16[i] = SHRT_MIN;
1924
+ } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) {
1925
+ r_.i16[i] = INT16_MAX;
1926
+ } else {
1927
+ r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);
1928
+ }
1929
+ }
1930
+ #endif
1931
+
1932
+ return simde__m64_from_private(r_);
1933
+ #endif
1934
+ }
1935
+ #define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b)
1936
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1937
+ # define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b)
1938
+ # define _m_psubsw(a, b) simde_mm_subs_pi16(a, b)
1939
+ #endif
1940
+
1941
+ SIMDE__FUNCTION_ATTRIBUTES
1942
+ simde__m64
1943
+ simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) {
1944
+ #if defined(SIMDE_MMX_NATIVE)
1945
+ return _mm_subs_pu16(a, b);
1946
+ #else
1947
+ simde__m64_private r_;
1948
+ simde__m64_private a_ = simde__m64_to_private(a);
1949
+ simde__m64_private b_ = simde__m64_to_private(b);
1950
+
1951
+ #if defined(SIMDE_MMX_NEON)
1952
+ r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);
1953
+ #else
1954
+ SIMDE__VECTORIZE
1955
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1956
+ const int x = a_.u16[i] - b_.u16[i];
1957
+ if (x < 0) {
1958
+ r_.u16[i] = 0;
1959
+ } else if (x > UINT16_MAX) {
1960
+ r_.u16[i] = UINT16_MAX;
1961
+ } else {
1962
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
1963
+ }
1964
+ }
1965
+ #endif
1966
+
1967
+ return simde__m64_from_private(r_);
1968
+ #endif
1969
+ }
1970
+ #define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b)
1971
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
1972
+ # define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b)
1973
+ # define _m_psubusw(a, b) simde_mm_subs_pu16(a, b)
1974
+ #endif
1975
+
1976
+ SIMDE__FUNCTION_ATTRIBUTES
1977
+ simde__m64
1978
+ simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) {
1979
+ #if defined(SIMDE_MMX_NATIVE)
1980
+ return _mm_unpackhi_pi8(a, b);
1981
+ #else
1982
+ simde__m64_private r_;
1983
+ simde__m64_private a_ = simde__m64_to_private(a);
1984
+ simde__m64_private b_ = simde__m64_to_private(b);
1985
+
1986
+ #if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
1987
+ r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8);
1988
+ #elif defined(SIMDE__SHUFFLE_VECTOR)
1989
+ r_.i8 = SIMDE__SHUFFLE_VECTOR(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15);
1990
+ #else
1991
+ r_.i8[0] = a_.i8[4];
1992
+ r_.i8[1] = b_.i8[4];
1993
+ r_.i8[2] = a_.i8[5];
1994
+ r_.i8[3] = b_.i8[5];
1995
+ r_.i8[4] = a_.i8[6];
1996
+ r_.i8[5] = b_.i8[6];
1997
+ r_.i8[6] = a_.i8[7];
1998
+ r_.i8[7] = b_.i8[7];
1999
+ #endif
2000
+
2001
+ return simde__m64_from_private(r_);
2002
+ #endif
2003
+ }
2004
+ #define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
2005
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
2006
+ # define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b)
2007
+ # define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
2008
+ #endif
2009
+
2010
+ SIMDE__FUNCTION_ATTRIBUTES
2011
+ simde__m64
2012
+ simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) {
2013
+ #if defined(SIMDE_MMX_NATIVE)
2014
+ return _mm_unpackhi_pi16(a, b);
2015
+ #else
2016
+ simde__m64_private r_;
2017
+ simde__m64_private a_ = simde__m64_to_private(a);
2018
+ simde__m64_private b_ = simde__m64_to_private(b);
2019
+
2020
+ #if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
2021
+ r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);
2022
+ #elif defined(SIMDE__SHUFFLE_VECTOR)
2023
+ r_.i16 = SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);
2024
+ #else
2025
+ r_.i16[0] = a_.i16[2];
2026
+ r_.i16[1] = b_.i16[2];
2027
+ r_.i16[2] = a_.i16[3];
2028
+ r_.i16[3] = b_.i16[3];
2029
+ #endif
2030
+
2031
+ return simde__m64_from_private(r_);
2032
+ #endif
2033
+ }
2034
+ #define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
2035
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
2036
+ # define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b)
2037
+ # define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
2038
+ #endif
2039
+
2040
+ SIMDE__FUNCTION_ATTRIBUTES
2041
+ simde__m64
2042
+ simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) {
2043
+ #if defined(SIMDE_MMX_NATIVE)
2044
+ return _mm_unpackhi_pi32(a, b);
2045
+ #else
2046
+ simde__m64_private r_;
2047
+ simde__m64_private a_ = simde__m64_to_private(a);
2048
+ simde__m64_private b_ = simde__m64_to_private(b);
2049
+
2050
+ #if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
2051
+ r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);
2052
+ #elif defined(SIMDE__SHUFFLE_VECTOR)
2053
+ r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 1, 3);
2054
+ #else
2055
+ r_.i32[0] = a_.i32[1];
2056
+ r_.i32[1] = b_.i32[1];
2057
+ #endif
2058
+
2059
+ return simde__m64_from_private(r_);
2060
+ #endif
2061
+ }
2062
+ #define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
2063
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
2064
+ # define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b)
2065
+ # define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
2066
+ #endif
2067
+
2068
+ SIMDE__FUNCTION_ATTRIBUTES
2069
+ simde__m64
2070
+ simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) {
2071
+ #if defined(SIMDE_MMX_NATIVE)
2072
+ return _mm_unpacklo_pi8(a, b);
2073
+ #else
2074
+ simde__m64_private r_;
2075
+ simde__m64_private a_ = simde__m64_to_private(a);
2076
+ simde__m64_private b_ = simde__m64_to_private(b);
2077
+
2078
+ #if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
2079
+ r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);
2080
+ #elif defined(SIMDE__SHUFFLE_VECTOR)
2081
+ r_.i8 = SIMDE__SHUFFLE_VECTOR(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11);
2082
+ #else
2083
+ r_.i8[0] = a_.i8[0];
2084
+ r_.i8[1] = b_.i8[0];
2085
+ r_.i8[2] = a_.i8[1];
2086
+ r_.i8[3] = b_.i8[1];
2087
+ r_.i8[4] = a_.i8[2];
2088
+ r_.i8[5] = b_.i8[2];
2089
+ r_.i8[6] = a_.i8[3];
2090
+ r_.i8[7] = b_.i8[3];
2091
+ #endif
2092
+
2093
+ return simde__m64_from_private(r_);
2094
+ #endif
2095
+ }
2096
+ #define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
2097
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
2098
+ # define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b)
2099
+ # define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
2100
+ #endif
2101
+
2102
+ SIMDE__FUNCTION_ATTRIBUTES
2103
+ simde__m64
2104
+ simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) {
2105
+ #if defined(SIMDE_MMX_NATIVE)
2106
+ return _mm_unpacklo_pi16(a, b);
2107
+ #else
2108
+ simde__m64_private r_;
2109
+ simde__m64_private a_ = simde__m64_to_private(a);
2110
+ simde__m64_private b_ = simde__m64_to_private(b);
2111
+
2112
+ #if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
2113
+ r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);
2114
+ #elif defined(SIMDE__SHUFFLE_VECTOR)
2115
+ r_.i16 = SIMDE__SHUFFLE_VECTOR(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);
2116
+ #else
2117
+ r_.i16[0] = a_.i16[0];
2118
+ r_.i16[1] = b_.i16[0];
2119
+ r_.i16[2] = a_.i16[1];
2120
+ r_.i16[3] = b_.i16[1];
2121
+ #endif
2122
+
2123
+ return simde__m64_from_private(r_);
2124
+ #endif
2125
+ }
2126
+ #define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
2127
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
2128
+ # define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b)
2129
+ # define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
2130
+ #endif
2131
+
2132
+ SIMDE__FUNCTION_ATTRIBUTES
2133
+ simde__m64
2134
+ simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) {
2135
+ #if defined(SIMDE_MMX_NATIVE)
2136
+ return _mm_unpacklo_pi32(a, b);
2137
+ #else
2138
+ simde__m64_private r_;
2139
+ simde__m64_private a_ = simde__m64_to_private(a);
2140
+ simde__m64_private b_ = simde__m64_to_private(b);
2141
+
2142
+ #if defined(SIMDE_MMX_NEON) && defined(SIMDE_ARCH_AARCH64)
2143
+ r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);
2144
+ #elif defined(SIMDE__SHUFFLE_VECTOR)
2145
+ r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 8, a_.i32, b_.i32, 0, 2);
2146
+ #else
2147
+ r_.i32[0] = a_.i32[0];
2148
+ r_.i32[1] = b_.i32[0];
2149
+ #endif
2150
+
2151
+ return simde__m64_from_private(r_);
2152
+ #endif
2153
+ }
2154
+ #define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
2155
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
2156
+ # define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b)
2157
+ # define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
2158
+ #endif
2159
+
2160
+ SIMDE__FUNCTION_ATTRIBUTES
2161
+ simde__m64
2162
+ simde_mm_xor_si64 (simde__m64 a, simde__m64 b) {
2163
+ #if defined(SIMDE_MMX_NATIVE)
2164
+ return _mm_xor_si64(a, b);
2165
+ #else
2166
+ simde__m64_private r_;
2167
+ simde__m64_private a_ = simde__m64_to_private(a);
2168
+ simde__m64_private b_ = simde__m64_to_private(b);
2169
+
2170
+ #if defined(SIMDE_MMX_NEON)
2171
+ r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32);
2172
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2173
+ r_.i32f = a_.i32f ^ b_.i32f;
2174
+ #else
2175
+ r_.u64[0] = a_.u64[0] ^ b_.u64[0];
2176
+ #endif
2177
+
2178
+ return simde__m64_from_private(r_);
2179
+ #endif
2180
+ }
2181
+ #define simde_m_pxor(a, b) simde_mm_xor_si64(a, b)
2182
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
2183
+ # define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b)
2184
+ # define _m_pxor(a, b) simde_mm_xor_si64(a, b)
2185
+ #endif
2186
+
2187
+ SIMDE__FUNCTION_ATTRIBUTES
2188
+ int32_t
2189
+ simde_m_to_int (simde__m64 a) {
2190
+ #if defined(SIMDE_MMX_NATIVE)
2191
+ return _m_to_int(a);
2192
+ #else
2193
+ simde__m64_private a_ = simde__m64_to_private(a);
2194
+
2195
+ #if defined(SIMDE_MMX_NEON)
2196
+ return vget_lane_s32(a_.neon_i32, 0);
2197
+ #else
2198
+ return a_.i32[0];
2199
+ #endif
2200
+ #endif
2201
+ }
2202
+ #if defined(SIMDE_MMX_ENABLE_NATIVE_ALIASES)
2203
+ # define _m_to_int(a) simde_m_to_int(a)
2204
+ #endif
2205
+
2206
+ SIMDE__END_DECLS
2207
+
2208
+ HEDLEY_DIAGNOSTIC_POP
2209
+
2210
+ #endif /* !defined(SIMDE__MMX_H) */