noobs 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/COPYING +339 -0
  2. package/README.md +46 -0
  3. package/bin/64bit/obs.lib +0 -0
  4. package/binding.gyp +23 -0
  5. package/dist/bin/Qt6Core.dll +0 -0
  6. package/dist/bin/Qt6Gui.dll +0 -0
  7. package/dist/bin/Qt6Network.dll +0 -0
  8. package/dist/bin/Qt6Svg.dll +0 -0
  9. package/dist/bin/Qt6Widgets.dll +0 -0
  10. package/dist/bin/Qt6Xml.dll +0 -0
  11. package/dist/bin/avcodec-61.dll +0 -0
  12. package/dist/bin/avdevice-61.dll +0 -0
  13. package/dist/bin/avfilter-10.dll +0 -0
  14. package/dist/bin/avformat-61.dll +0 -0
  15. package/dist/bin/avutil-59.dll +0 -0
  16. package/dist/bin/datachannel.dll +0 -0
  17. package/dist/bin/libcurl.dll +0 -0
  18. package/dist/bin/libobs-d3d11.dll +0 -0
  19. package/dist/bin/libobs-opengl.dll +0 -0
  20. package/dist/bin/libobs-winrt.dll +0 -0
  21. package/dist/bin/librist.dll +0 -0
  22. package/dist/bin/libx264-164.dll +0 -0
  23. package/dist/bin/lua51.dll +0 -0
  24. package/dist/bin/obs-amf-test.exe +0 -0
  25. package/dist/bin/obs-ffmpeg-mux.exe +0 -0
  26. package/dist/bin/obs-frontend-api.dll +0 -0
  27. package/dist/bin/obs-scripting.dll +0 -0
  28. package/dist/bin/obs.dll +0 -0
  29. package/dist/bin/srt.dll +0 -0
  30. package/dist/bin/swresample-5.dll +0 -0
  31. package/dist/bin/swscale-8.dll +0 -0
  32. package/dist/bin/w32-pthreads.dll +0 -0
  33. package/dist/bin/zlib.dll +0 -0
  34. package/dist/effects/area.effect +250 -0
  35. package/dist/effects/bicubic_scale.effect +236 -0
  36. package/dist/effects/bilinear_lowres_scale.effect +123 -0
  37. package/dist/effects/color.effect +172 -0
  38. package/dist/effects/default.effect +254 -0
  39. package/dist/effects/default_rect.effect +84 -0
  40. package/dist/effects/deinterlace_base.effect +325 -0
  41. package/dist/effects/deinterlace_blend.effect +21 -0
  42. package/dist/effects/deinterlace_blend_2x.effect +21 -0
  43. package/dist/effects/deinterlace_discard.effect +21 -0
  44. package/dist/effects/deinterlace_discard_2x.effect +21 -0
  45. package/dist/effects/deinterlace_linear.effect +21 -0
  46. package/dist/effects/deinterlace_linear_2x.effect +21 -0
  47. package/dist/effects/deinterlace_yadif.effect +21 -0
  48. package/dist/effects/deinterlace_yadif_2x.effect +21 -0
  49. package/dist/effects/format_conversion.effect +1823 -0
  50. package/dist/effects/lanczos_scale.effect +292 -0
  51. package/dist/effects/opaque.effect +159 -0
  52. package/dist/effects/premultiplied_alpha.effect +38 -0
  53. package/dist/effects/repeat.effect +36 -0
  54. package/dist/effects/solid.effect +80 -0
  55. package/dist/noobs.node +0 -0
  56. package/dist/plugins/obs-ffmpeg.dll +0 -0
  57. package/dist/plugins/obs-x264.dll +0 -0
  58. package/dist/plugins/win-capture.dll +0 -0
  59. package/include/audio-monitoring/osx/mac-helpers.h +13 -0
  60. package/include/audio-monitoring/pulse/pulseaudio-wrapper.h +212 -0
  61. package/include/audio-monitoring/win32/wasapi-output.h +22 -0
  62. package/include/callback/calldata.h +195 -0
  63. package/include/callback/decl.h +61 -0
  64. package/include/callback/proc.h +52 -0
  65. package/include/callback/signal.h +73 -0
  66. package/include/graphics/axisang.h +65 -0
  67. package/include/graphics/bounds.h +108 -0
  68. package/include/graphics/device-exports.h +177 -0
  69. package/include/graphics/effect-parser.h +290 -0
  70. package/include/graphics/effect.h +190 -0
  71. package/include/graphics/graphics-internal.h +335 -0
  72. package/include/graphics/graphics.h +1024 -0
  73. package/include/graphics/half.h +100 -0
  74. package/include/graphics/image-file.h +124 -0
  75. package/include/graphics/input.h +34 -0
  76. package/include/graphics/libnsgif/libnsgif.h +142 -0
  77. package/include/graphics/math-defs.h +45 -0
  78. package/include/graphics/math-extra.h +61 -0
  79. package/include/graphics/matrix3.h +98 -0
  80. package/include/graphics/matrix4.h +102 -0
  81. package/include/graphics/plane.h +85 -0
  82. package/include/graphics/quat.h +170 -0
  83. package/include/graphics/shader-parser.h +273 -0
  84. package/include/graphics/srgb.h +177 -0
  85. package/include/graphics/vec2.h +148 -0
  86. package/include/graphics/vec3.h +224 -0
  87. package/include/graphics/vec4.h +241 -0
  88. package/include/media-io/audio-io.h +228 -0
  89. package/include/media-io/audio-math.h +43 -0
  90. package/include/media-io/audio-resampler.h +44 -0
  91. package/include/media-io/format-conversion.h +50 -0
  92. package/include/media-io/frame-rate.h +29 -0
  93. package/include/media-io/media-io-defs.h +20 -0
  94. package/include/media-io/media-remux.h +37 -0
  95. package/include/media-io/video-frame.h +64 -0
  96. package/include/media-io/video-io.h +338 -0
  97. package/include/media-io/video-scaler.h +43 -0
  98. package/include/obs-audio-controls.h +250 -0
  99. package/include/obs-av1.h +47 -0
  100. package/include/obs-avc.h +55 -0
  101. package/include/obs-config.h +52 -0
  102. package/include/obs-data.h +311 -0
  103. package/include/obs-defs.h +52 -0
  104. package/include/obs-encoder.h +361 -0
  105. package/include/obs-ffmpeg-compat.h +13 -0
  106. package/include/obs-hevc.h +81 -0
  107. package/include/obs-hotkey.h +271 -0
  108. package/include/obs-hotkeys.h +653 -0
  109. package/include/obs-interaction.h +56 -0
  110. package/include/obs-internal.h +1459 -0
  111. package/include/obs-missing-files.h +53 -0
  112. package/include/obs-module.h +181 -0
  113. package/include/obs-nal.h +37 -0
  114. package/include/obs-nix-platform.h +53 -0
  115. package/include/obs-nix-wayland.h +24 -0
  116. package/include/obs-nix-x11.h +22 -0
  117. package/include/obs-nix.h +42 -0
  118. package/include/obs-output.h +96 -0
  119. package/include/obs-properties.h +364 -0
  120. package/include/obs-scene.h +127 -0
  121. package/include/obs-service.h +115 -0
  122. package/include/obs-source.h +568 -0
  123. package/include/obs.h +2608 -0
  124. package/include/obsconfig.h +13 -0
  125. package/include/obsversion.h +5 -0
  126. package/include/util/apple/cfstring-utils.h +16 -0
  127. package/include/util/array-serializer.h +37 -0
  128. package/include/util/base.h +97 -0
  129. package/include/util/bitstream.h +28 -0
  130. package/include/util/bmem.h +94 -0
  131. package/include/util/buffered-file-serializer.h +32 -0
  132. package/include/util/c99defs.h +75 -0
  133. package/include/util/cf-lexer.h +199 -0
  134. package/include/util/cf-parser.h +281 -0
  135. package/include/util/circlebuf.h +319 -0
  136. package/include/util/config-file.h +103 -0
  137. package/include/util/crc32.h +29 -0
  138. package/include/util/curl/curl-helper.h +35 -0
  139. package/include/util/darray.h +606 -0
  140. package/include/util/deque.h +319 -0
  141. package/include/util/dstr.h +320 -0
  142. package/include/util/file-serializer.h +34 -0
  143. package/include/util/lexer.h +273 -0
  144. package/include/util/pipe.h +52 -0
  145. package/include/util/platform.h +223 -0
  146. package/include/util/profiler.h +97 -0
  147. package/include/util/serializer.h +158 -0
  148. package/include/util/simde/check.h +285 -0
  149. package/include/util/simde/debug-trap.h +117 -0
  150. package/include/util/simde/hedley.h +2123 -0
  151. package/include/util/simde/simde-align.h +481 -0
  152. package/include/util/simde/simde-arch.h +537 -0
  153. package/include/util/simde/simde-common.h +918 -0
  154. package/include/util/simde/simde-constify.h +925 -0
  155. package/include/util/simde/simde-detect-clang.h +114 -0
  156. package/include/util/simde/simde-diagnostic.h +447 -0
  157. package/include/util/simde/simde-features.h +550 -0
  158. package/include/util/simde/simde-math.h +1858 -0
  159. package/include/util/simde/x86/mmx.h +2456 -0
  160. package/include/util/simde/x86/sse.h +4479 -0
  161. package/include/util/simde/x86/sse2.h +7549 -0
  162. package/include/util/source-profiler.h +66 -0
  163. package/include/util/sse-intrin.h +32 -0
  164. package/include/util/task.h +22 -0
  165. package/include/util/text-lookup.h +45 -0
  166. package/include/util/threading-posix.h +77 -0
  167. package/include/util/threading-windows.h +142 -0
  168. package/include/util/threading.h +103 -0
  169. package/include/util/utf8.h +35 -0
  170. package/include/util/uthash.h +34 -0
  171. package/include/util/util_uint128.h +108 -0
  172. package/include/util/util_uint64.h +34 -0
  173. package/include/util/windows/device-enum.h +14 -0
  174. package/include/util/windows/obfuscate.h +16 -0
  175. package/include/util/windows/win-registry.h +37 -0
  176. package/include/util/windows/win-version.h +57 -0
  177. package/include/util/windows/window-helpers.h +47 -0
  178. package/index.d.ts +38 -0
  179. package/index.js +8 -0
  180. package/package.json +31 -0
  181. package/src/main.cpp +321 -0
  182. package/src/obs_interface.cpp +605 -0
  183. package/src/obs_interface.h +74 -0
  184. package/src/utils.cpp +80 -0
  185. package/src/utils.h +3 -0
@@ -0,0 +1,2456 @@
1
+ /* SPDX-License-Identifier: MIT
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person
4
+ * obtaining a copy of this software and associated documentation
5
+ * files (the "Software"), to deal in the Software without
6
+ * restriction, including without limitation the rights to use, copy,
7
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
8
+ * of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be
12
+ * included in all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ * SOFTWARE.
22
+ *
23
+ * Copyright:
24
+ * 2017-2020 Evan Nemerson <evan@nemerson.com>
25
+ */
26
+
27
+ #if !defined(SIMDE_X86_MMX_H)
28
+ #define SIMDE_X86_MMX_H
29
+
30
+ #include "../simde-common.h"
31
+
32
+ HEDLEY_DIAGNOSTIC_PUSH
33
+ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
34
+
35
+ #if defined(SIMDE_X86_MMX_NATIVE)
36
+ #define SIMDE_X86_MMX_USE_NATIVE_TYPE
37
+ #elif defined(SIMDE_X86_SSE_NATIVE)
38
+ #define SIMDE_X86_MMX_USE_NATIVE_TYPE
39
+ #endif
40
+
41
+ #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
42
+ #include <mmintrin.h>
43
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
44
+ #include <arm_neon.h>
45
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
46
+ #include <loongson-mmiintrin.h>
47
+ #endif
48
+
49
+ #include <stdint.h>
50
+ #include <limits.h>
51
+
52
+ SIMDE_BEGIN_DECLS_
53
+
54
+ typedef union {
55
+ #if defined(SIMDE_VECTOR_SUBSCRIPT)
56
+ SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
57
+ SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
58
+ SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
59
+ SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
60
+ SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
61
+ SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
62
+ SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
63
+ SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
64
+ SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
65
+ SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
66
+ SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
67
+ #else
68
+ SIMDE_ALIGN_TO_8 int8_t i8[8];
69
+ SIMDE_ALIGN_TO_8 int16_t i16[4];
70
+ SIMDE_ALIGN_TO_8 int32_t i32[2];
71
+ SIMDE_ALIGN_TO_8 int64_t i64[1];
72
+ SIMDE_ALIGN_TO_8 uint8_t u8[8];
73
+ SIMDE_ALIGN_TO_8 uint16_t u16[4];
74
+ SIMDE_ALIGN_TO_8 uint32_t u32[2];
75
+ SIMDE_ALIGN_TO_8 uint64_t u64[1];
76
+ SIMDE_ALIGN_TO_8 simde_float32 f32[2];
77
+ SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)];
78
+ SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
79
+ #endif
80
+
81
+ #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
82
+ __m64 n;
83
+ #endif
84
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
85
+ int8x8_t neon_i8;
86
+ int16x4_t neon_i16;
87
+ int32x2_t neon_i32;
88
+ int64x1_t neon_i64;
89
+ uint8x8_t neon_u8;
90
+ uint16x4_t neon_u16;
91
+ uint32x2_t neon_u32;
92
+ uint64x1_t neon_u64;
93
+ float32x2_t neon_f32;
94
+ #endif
95
+ #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
96
+ int8x8_t mmi_i8;
97
+ int16x4_t mmi_i16;
98
+ int32x2_t mmi_i32;
99
+ int64_t mmi_i64;
100
+ uint8x8_t mmi_u8;
101
+ uint16x4_t mmi_u16;
102
+ uint32x2_t mmi_u32;
103
+ uint64_t mmi_u64;
104
+ #endif
105
+ } simde__m64_private;
106
+
107
+ #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
108
+ typedef __m64 simde__m64;
109
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
110
+ typedef int32x2_t simde__m64;
111
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
112
+ typedef int32x2_t simde__m64;
113
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT)
114
+ typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
115
+ #else
116
+ typedef simde__m64_private simde__m64;
117
+ #endif
118
+
119
+ #if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && \
120
+ defined(SIMDE_ENABLE_NATIVE_ALIASES)
121
+ #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
122
+ typedef simde__m64 __m64;
123
+ #endif
124
+
125
+ HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect");
126
+ HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect");
127
+ #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
128
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8,
129
+ "simde__m64 is not 8-byte aligned");
130
+ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8,
131
+ "simde__m64_private is not 8-byte aligned");
132
+ #endif
133
+
134
+ SIMDE_FUNCTION_ATTRIBUTES
135
+ simde__m64 simde__m64_from_private(simde__m64_private v)
136
+ {
137
+ simde__m64 r;
138
+ simde_memcpy(&r, &v, sizeof(r));
139
+ return r;
140
+ }
141
+
142
+ SIMDE_FUNCTION_ATTRIBUTES
143
+ simde__m64_private simde__m64_to_private(simde__m64 v)
144
+ {
145
+ simde__m64_private r;
146
+ simde_memcpy(&r, &v, sizeof(r));
147
+ return r;
148
+ }
149
+
150
+ #define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, \
151
+ fragment) \
152
+ SIMDE_FUNCTION_ATTRIBUTES \
153
+ simde__##simde_type simde__##simde_type##_from_##isax##_##fragment( \
154
+ source_type value) \
155
+ { \
156
+ simde__##simde_type##_private r_; \
157
+ r_.isax##_##fragment = value; \
158
+ return simde__##simde_type##_from_private(r_); \
159
+ } \
160
+ \
161
+ SIMDE_FUNCTION_ATTRIBUTES \
162
+ source_type simde__##simde_type##_to_##isax##_##fragment( \
163
+ simde__##simde_type value) \
164
+ { \
165
+ simde__##simde_type##_private r_ = \
166
+ simde__##simde_type##_to_private(value); \
167
+ return r_.isax##_##fragment; \
168
+ }
169
+
170
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
171
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8)
172
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16)
173
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32)
174
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64)
175
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8)
176
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16)
177
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32)
178
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64)
179
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32)
180
+ #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
181
+
182
+ #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
183
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8)
184
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16)
185
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32)
186
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64)
187
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8)
188
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16)
189
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32)
190
+ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64)
191
+ #endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */
192
+
193
+ SIMDE_FUNCTION_ATTRIBUTES
194
+ simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
195
+ {
196
+ #if defined(SIMDE_X86_MMX_NATIVE)
197
+ return _mm_add_pi8(a, b);
198
+ #else
199
+ simde__m64_private r_;
200
+ simde__m64_private a_ = simde__m64_to_private(a);
201
+ simde__m64_private b_ = simde__m64_to_private(b);
202
+
203
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
204
+ r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);
205
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
206
+ r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8);
207
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
208
+ r_.i8 = a_.i8 + b_.i8;
209
+ #else
210
+ SIMDE_VECTORIZE
211
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
212
+ r_.i8[i] = a_.i8[i] + b_.i8[i];
213
+ }
214
+ #endif
215
+
216
+ return simde__m64_from_private(r_);
217
+ #endif
218
+ }
219
+ #define simde_m_paddb(a, b) simde_mm_add_pi8(a, b)
220
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
221
+ #define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b)
222
+ #define _m_paddb(a, b) simde_m_paddb(a, b)
223
+ #endif
224
+
225
+ SIMDE_FUNCTION_ATTRIBUTES
226
+ simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
227
+ {
228
+ #if defined(SIMDE_X86_MMX_NATIVE)
229
+ return _mm_add_pi16(a, b);
230
+ #else
231
+ simde__m64_private r_;
232
+ simde__m64_private a_ = simde__m64_to_private(a);
233
+ simde__m64_private b_ = simde__m64_to_private(b);
234
+
235
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
236
+ r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);
237
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
238
+ r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16);
239
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
240
+ r_.i16 = a_.i16 + b_.i16;
241
+ #else
242
+ SIMDE_VECTORIZE
243
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
244
+ r_.i16[i] = a_.i16[i] + b_.i16[i];
245
+ }
246
+ #endif
247
+
248
+ return simde__m64_from_private(r_);
249
+ #endif
250
+ }
251
+ #define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
252
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
253
+ #define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)
254
+ #define _m_paddw(a, b) simde_mm_add_pi16(a, b)
255
+ #endif
256
+
257
+ SIMDE_FUNCTION_ATTRIBUTES
258
+ simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
259
+ {
260
+ #if defined(SIMDE_X86_MMX_NATIVE)
261
+ return _mm_add_pi32(a, b);
262
+ #else
263
+ simde__m64_private r_;
264
+ simde__m64_private a_ = simde__m64_to_private(a);
265
+ simde__m64_private b_ = simde__m64_to_private(b);
266
+
267
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
268
+ r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);
269
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
270
+ r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32);
271
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
272
+ r_.i32 = a_.i32 + b_.i32;
273
+ #else
274
+ SIMDE_VECTORIZE
275
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
276
+ r_.i32[i] = a_.i32[i] + b_.i32[i];
277
+ }
278
+ #endif
279
+
280
+ return simde__m64_from_private(r_);
281
+ #endif
282
+ }
283
+ #define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
284
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
285
+ #define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)
286
+ #define _m_paddd(a, b) simde_mm_add_pi32(a, b)
287
+ #endif
288
+
289
+ SIMDE_FUNCTION_ATTRIBUTES
290
+ simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
291
+ {
292
+ #if defined(SIMDE_X86_MMX_NATIVE)
293
+ return _mm_adds_pi8(a, b);
294
+ #else
295
+ simde__m64_private r_, a_ = simde__m64_to_private(a),
296
+ b_ = simde__m64_to_private(b);
297
+
298
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
299
+ r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);
300
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
301
+ r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8);
302
+ #else
303
+ SIMDE_VECTORIZE
304
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
305
+ if ((((b_.i8[i]) > 0) &&
306
+ ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) {
307
+ r_.i8[i] = INT8_MAX;
308
+ } else if ((((b_.i8[i]) < 0) &&
309
+ ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) {
310
+ r_.i8[i] = INT8_MIN;
311
+ } else {
312
+ r_.i8[i] = (a_.i8[i]) + (b_.i8[i]);
313
+ }
314
+ }
315
+ #endif
316
+
317
+ return simde__m64_from_private(r_);
318
+ #endif
319
+ }
320
+ #define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
321
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
322
+ #define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)
323
+ #define _m_paddsb(a, b) simde_mm_adds_pi8(a, b)
324
+ #endif
325
+
326
+ SIMDE_FUNCTION_ATTRIBUTES
327
+ simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b)
328
+ {
329
+ #if defined(SIMDE_X86_MMX_NATIVE)
330
+ return _mm_adds_pu8(a, b);
331
+ #else
332
+ simde__m64_private r_;
333
+ simde__m64_private a_ = simde__m64_to_private(a);
334
+ simde__m64_private b_ = simde__m64_to_private(b);
335
+
336
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
337
+ r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);
338
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
339
+ r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8);
340
+ #else
341
+ SIMDE_VECTORIZE
342
+ for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
343
+ const uint_fast16_t x =
344
+ HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) +
345
+ HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]);
346
+ if (x > UINT8_MAX)
347
+ r_.u8[i] = UINT8_MAX;
348
+ else
349
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
350
+ }
351
+ #endif
352
+
353
+ return simde__m64_from_private(r_);
354
+ #endif
355
+ }
356
+ #define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b)
357
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
358
+ #define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b)
359
+ #define _m_paddusb(a, b) simde_mm_adds_pu8(a, b)
360
+ #endif
361
+
362
+ SIMDE_FUNCTION_ATTRIBUTES
363
+ simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b)
364
+ {
365
+ #if defined(SIMDE_X86_MMX_NATIVE)
366
+ return _mm_adds_pi16(a, b);
367
+ #else
368
+ simde__m64_private r_;
369
+ simde__m64_private a_ = simde__m64_to_private(a);
370
+ simde__m64_private b_ = simde__m64_to_private(b);
371
+
372
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
373
+ r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);
374
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
375
+ r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16);
376
+ #else
377
+ SIMDE_VECTORIZE
378
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
379
+ if ((((b_.i16[i]) > 0) &&
380
+ ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) {
381
+ r_.i16[i] = INT16_MAX;
382
+ } else if ((((b_.i16[i]) < 0) &&
383
+ ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) {
384
+ r_.i16[i] = SHRT_MIN;
385
+ } else {
386
+ r_.i16[i] = (a_.i16[i]) + (b_.i16[i]);
387
+ }
388
+ }
389
+ #endif
390
+
391
+ return simde__m64_from_private(r_);
392
+ #endif
393
+ }
394
+ #define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b)
395
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
396
+ #define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b)
397
+ #define _m_paddsw(a, b) simde_mm_adds_pi16(a, b)
398
+ #endif
399
+
400
+ SIMDE_FUNCTION_ATTRIBUTES
401
+ simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b)
402
+ {
403
+ #if defined(SIMDE_X86_MMX_NATIVE)
404
+ return _mm_adds_pu16(a, b);
405
+ #else
406
+ simde__m64_private r_;
407
+ simde__m64_private a_ = simde__m64_to_private(a);
408
+ simde__m64_private b_ = simde__m64_to_private(b);
409
+
410
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
411
+ r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);
412
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
413
+ r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16);
414
+ #else
415
+ SIMDE_VECTORIZE
416
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
417
+ const uint32_t x = a_.u16[i] + b_.u16[i];
418
+ if (x > UINT16_MAX)
419
+ r_.u16[i] = UINT16_MAX;
420
+ else
421
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
422
+ }
423
+ #endif
424
+
425
+ return simde__m64_from_private(r_);
426
+ #endif
427
+ }
428
+ #define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b)
429
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
430
+ #define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b)
431
+ #define _m_paddusw(a, b) simde_mm_adds_pu16(a, b)
432
+ #endif
433
+
434
+ SIMDE_FUNCTION_ATTRIBUTES
435
+ simde__m64 simde_mm_and_si64(simde__m64 a, simde__m64 b)
436
+ {
437
+ #if defined(SIMDE_X86_MMX_NATIVE)
438
+ return _mm_and_si64(a, b);
439
+ #else
440
+ simde__m64_private r_;
441
+ simde__m64_private a_ = simde__m64_to_private(a);
442
+ simde__m64_private b_ = simde__m64_to_private(b);
443
+
444
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
445
+ r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32);
446
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
447
+ r_.i64 = a_.i64 & b_.i64;
448
+ #else
449
+ r_.i64[0] = a_.i64[0] & b_.i64[0];
450
+ #endif
451
+
452
+ return simde__m64_from_private(r_);
453
+ #endif
454
+ }
455
+ #define simde_m_pand(a, b) simde_mm_and_si64(a, b)
456
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
457
+ #define _mm_and_si64(a, b) simde_mm_and_si64(a, b)
458
+ #define _m_pand(a, b) simde_mm_and_si64(a, b)
459
+ #endif
460
+
461
+ SIMDE_FUNCTION_ATTRIBUTES
462
+ simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b)
463
+ {
464
+ #if defined(SIMDE_X86_MMX_NATIVE)
465
+ return _mm_andnot_si64(a, b);
466
+ #else
467
+ simde__m64_private r_;
468
+ simde__m64_private a_ = simde__m64_to_private(a);
469
+ simde__m64_private b_ = simde__m64_to_private(b);
470
+
471
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
472
+ r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);
473
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
474
+ r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32);
475
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
476
+ r_.i32f = ~a_.i32f & b_.i32f;
477
+ #else
478
+ r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]);
479
+ #endif
480
+
481
+ return simde__m64_from_private(r_);
482
+ #endif
483
+ }
484
+ #define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b)
485
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
486
+ #define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b)
487
+ #define _m_pandn(a, b) simde_mm_andnot_si64(a, b)
488
+ #endif
489
+
490
+ SIMDE_FUNCTION_ATTRIBUTES
491
+ simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b)
492
+ {
493
+ #if defined(SIMDE_X86_MMX_NATIVE)
494
+ return _mm_cmpeq_pi8(a, b);
495
+ #else
496
+ simde__m64_private r_;
497
+ simde__m64_private a_ = simde__m64_to_private(a);
498
+ simde__m64_private b_ = simde__m64_to_private(b);
499
+
500
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
501
+ r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8);
502
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
503
+ r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8);
504
+ #else
505
+ SIMDE_VECTORIZE
506
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
507
+ r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
508
+ }
509
+ #endif
510
+
511
+ return simde__m64_from_private(r_);
512
+ #endif
513
+ }
514
+ #define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
515
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
516
+ #define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b)
517
+ #define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
518
+ #endif
519
+
520
+ SIMDE_FUNCTION_ATTRIBUTES
521
+ simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b)
522
+ {
523
+ #if defined(SIMDE_X86_MMX_NATIVE)
524
+ return _mm_cmpeq_pi16(a, b);
525
+ #else
526
+ simde__m64_private r_;
527
+ simde__m64_private a_ = simde__m64_to_private(a);
528
+ simde__m64_private b_ = simde__m64_to_private(b);
529
+
530
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
531
+ r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16);
532
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
533
+ r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16);
534
+ #else
535
+ SIMDE_VECTORIZE
536
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
537
+ r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
538
+ }
539
+ #endif
540
+
541
+ return simde__m64_from_private(r_);
542
+ #endif
543
+ }
544
+ #define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
545
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
546
+ #define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b)
547
+ #define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
548
+ #endif
549
+
550
+ SIMDE_FUNCTION_ATTRIBUTES
551
+ simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b)
552
+ {
553
+ #if defined(SIMDE_X86_MMX_NATIVE)
554
+ return _mm_cmpeq_pi32(a, b);
555
+ #else
556
+ simde__m64_private r_;
557
+ simde__m64_private a_ = simde__m64_to_private(a);
558
+ simde__m64_private b_ = simde__m64_to_private(b);
559
+
560
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
561
+ r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32);
562
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
563
+ r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32);
564
+ #else
565
+ SIMDE_VECTORIZE
566
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
567
+ r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
568
+ }
569
+ #endif
570
+
571
+ return simde__m64_from_private(r_);
572
+ #endif
573
+ }
574
+ #define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
575
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
576
+ #define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b)
577
+ #define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
578
+ #endif
579
+
580
+ SIMDE_FUNCTION_ATTRIBUTES
581
+ simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b)
582
+ {
583
+ #if defined(SIMDE_X86_MMX_NATIVE)
584
+ return _mm_cmpgt_pi8(a, b);
585
+ #else
586
+ simde__m64_private r_;
587
+ simde__m64_private a_ = simde__m64_to_private(a);
588
+ simde__m64_private b_ = simde__m64_to_private(b);
589
+
590
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
591
+ r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8);
592
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
593
+ r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8);
594
+ #else
595
+ SIMDE_VECTORIZE
596
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
597
+ r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
598
+ }
599
+ #endif
600
+
601
+ return simde__m64_from_private(r_);
602
+ #endif
603
+ }
604
+ #define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
605
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
606
+ #define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b)
607
+ #define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
608
+ #endif
609
+
610
+ SIMDE_FUNCTION_ATTRIBUTES
611
+ simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b)
612
+ {
613
+ #if defined(SIMDE_X86_MMX_NATIVE)
614
+ return _mm_cmpgt_pi16(a, b);
615
+ #else
616
+ simde__m64_private r_;
617
+ simde__m64_private a_ = simde__m64_to_private(a);
618
+ simde__m64_private b_ = simde__m64_to_private(b);
619
+
620
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
621
+ r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16);
622
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
623
+ r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16);
624
+ #else
625
+ SIMDE_VECTORIZE
626
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
627
+ r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
628
+ }
629
+ #endif
630
+
631
+ return simde__m64_from_private(r_);
632
+ #endif
633
+ }
634
+ #define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
635
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
636
+ #define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b)
637
+ #define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
638
+ #endif
639
+
640
+ SIMDE_FUNCTION_ATTRIBUTES
641
+ simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b)
642
+ {
643
+ #if defined(SIMDE_X86_MMX_NATIVE)
644
+ return _mm_cmpgt_pi32(a, b);
645
+ #else
646
+ simde__m64_private r_;
647
+ simde__m64_private a_ = simde__m64_to_private(a);
648
+ simde__m64_private b_ = simde__m64_to_private(b);
649
+
650
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
651
+ r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32);
652
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
653
+ r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32);
654
+ #else
655
+ SIMDE_VECTORIZE
656
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
657
+ r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
658
+ }
659
+ #endif
660
+
661
+ return simde__m64_from_private(r_);
662
+ #endif
663
+ }
664
+ #define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
665
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
666
+ #define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b)
667
+ #define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
668
+ #endif
669
+
670
+ SIMDE_FUNCTION_ATTRIBUTES
671
+ int64_t simde_mm_cvtm64_si64(simde__m64 a)
672
+ {
673
+ #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
674
+ !defined(__PGI)
675
+ return _mm_cvtm64_si64(a);
676
+ #else
677
+ simde__m64_private a_ = simde__m64_to_private(a);
678
+
679
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
680
+ HEDLEY_DIAGNOSTIC_PUSH
681
+ #if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
682
+ SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
683
+ #pragma clang diagnostic ignored "-Wvector-conversion"
684
+ #endif
685
+ return vget_lane_s64(a_.neon_i64, 0);
686
+ HEDLEY_DIAGNOSTIC_POP
687
+ #else
688
+ return a_.i64[0];
689
+ #endif
690
+ #endif
691
+ }
692
+ #define simde_m_to_int64(a) simde_mm_cvtm64_si64(a)
693
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
694
+ #define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a)
695
+ #define _m_to_int64(a) simde_mm_cvtm64_si64(a)
696
+ #endif
697
+
698
+ SIMDE_FUNCTION_ATTRIBUTES
699
+ simde__m64 simde_mm_cvtsi32_si64(int32_t a)
700
+ {
701
+ #if defined(SIMDE_X86_MMX_NATIVE)
702
+ return _mm_cvtsi32_si64(a);
703
+ #else
704
+ simde__m64_private r_;
705
+
706
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
707
+ const int32_t av[sizeof(r_.neon_i32) / sizeof(r_.neon_i32[0])] = {a, 0};
708
+ r_.neon_i32 = vld1_s32(av);
709
+ #else
710
+ r_.i32[0] = a;
711
+ r_.i32[1] = 0;
712
+ #endif
713
+
714
+ return simde__m64_from_private(r_);
715
+ #endif
716
+ }
717
+ #define simde_m_from_int(a) simde_mm_cvtsi32_si64(a)
718
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
719
+ #define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a)
720
+ #define _m_from_int(a) simde_mm_cvtsi32_si64(a)
721
+ #endif
722
+
723
+ SIMDE_FUNCTION_ATTRIBUTES
724
+ simde__m64 simde_mm_cvtsi64_m64(int64_t a)
725
+ {
726
+ #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
727
+ !defined(__PGI)
728
+ return _mm_cvtsi64_m64(a);
729
+ #else
730
+ simde__m64_private r_;
731
+
732
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
733
+ r_.neon_i64 = vld1_s64(&a);
734
+ #else
735
+ r_.i64[0] = a;
736
+ #endif
737
+
738
+ return simde__m64_from_private(r_);
739
+ #endif
740
+ }
741
+ #define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a)
742
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
743
+ #define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a)
744
+ #define _m_from_int64(a) simde_mm_cvtsi64_m64(a)
745
+ #endif
746
+
747
+ SIMDE_FUNCTION_ATTRIBUTES
748
+ int32_t simde_mm_cvtsi64_si32(simde__m64 a)
749
+ {
750
+ #if defined(SIMDE_X86_MMX_NATIVE)
751
+ return _mm_cvtsi64_si32(a);
752
+ #else
753
+ simde__m64_private a_ = simde__m64_to_private(a);
754
+
755
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
756
+ HEDLEY_DIAGNOSTIC_PUSH
757
+ #if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
758
+ SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
759
+ #pragma clang diagnostic ignored "-Wvector-conversion"
760
+ #endif
761
+ return vget_lane_s32(a_.neon_i32, 0);
762
+ HEDLEY_DIAGNOSTIC_POP
763
+ #else
764
+ return a_.i32[0];
765
+ #endif
766
+ #endif
767
+ }
768
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
769
+ #define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a)
770
+ #endif
771
+
772
+ SIMDE_FUNCTION_ATTRIBUTES
773
+ void simde_mm_empty(void)
774
+ {
775
+ #if defined(SIMDE_X86_MMX_NATIVE)
776
+ _mm_empty();
777
+ #else
778
+ /* noop */
779
+ #endif
780
+ }
781
+ #define simde_m_empty() simde_mm_empty()
782
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
783
+ #define _mm_empty() simde_mm_empty()
784
+ #define _m_empty() simde_mm_empty()
785
+ #endif
786
+
787
+ SIMDE_FUNCTION_ATTRIBUTES
788
+ simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b)
789
+ {
790
+ #if defined(SIMDE_X86_MMX_NATIVE)
791
+ return _mm_madd_pi16(a, b);
792
+ #else
793
+ simde__m64_private r_;
794
+ simde__m64_private a_ = simde__m64_to_private(a);
795
+ simde__m64_private b_ = simde__m64_to_private(b);
796
+
797
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
798
+ int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);
799
+ r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));
800
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
801
+ r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16);
802
+ #else
803
+ SIMDE_VECTORIZE
804
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i += 2) {
805
+ r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) +
806
+ (a_.i16[i + 1] * b_.i16[i + 1]);
807
+ }
808
+ #endif
809
+
810
+ return simde__m64_from_private(r_);
811
+ #endif
812
+ }
813
+ #define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
814
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
815
+ #define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b)
816
+ #define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
817
+ #endif
818
+
819
+ SIMDE_FUNCTION_ATTRIBUTES
820
+ simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b)
821
+ {
822
+ #if defined(SIMDE_X86_MMX_NATIVE)
823
+ return _mm_mulhi_pi16(a, b);
824
+ #else
825
+ simde__m64_private r_;
826
+ simde__m64_private a_ = simde__m64_to_private(a);
827
+ simde__m64_private b_ = simde__m64_to_private(b);
828
+
829
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
830
+ const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
831
+ const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);
832
+ const uint16x4_t t3 = vmovn_u32(t2);
833
+ r_.neon_u16 = t3;
834
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
835
+ r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16);
836
+ #else
837
+ SIMDE_VECTORIZE
838
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
839
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t,
840
+ ((a_.i16[i] * b_.i16[i]) >> 16));
841
+ }
842
+ #endif
843
+
844
+ return simde__m64_from_private(r_);
845
+ #endif
846
+ }
847
+ #define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
848
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
849
+ #define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b)
850
+ #define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
851
+ #endif
852
+
853
+ SIMDE_FUNCTION_ATTRIBUTES
854
+ simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b)
855
+ {
856
+ #if defined(SIMDE_X86_MMX_NATIVE)
857
+ return _mm_mullo_pi16(a, b);
858
+ #else
859
+ simde__m64_private r_;
860
+ simde__m64_private a_ = simde__m64_to_private(a);
861
+ simde__m64_private b_ = simde__m64_to_private(b);
862
+
863
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
864
+ const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
865
+ const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));
866
+ r_.neon_u16 = t2;
867
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
868
+ r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16);
869
+ #else
870
+ SIMDE_VECTORIZE
871
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
872
+ r_.i16[i] = HEDLEY_STATIC_CAST(
873
+ int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff));
874
+ }
875
+ #endif
876
+
877
+ return simde__m64_from_private(r_);
878
+ #endif
879
+ }
880
+ #define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
881
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
882
+ #define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b)
883
+ #define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
884
+ #endif
885
+
886
+ SIMDE_FUNCTION_ATTRIBUTES
887
+ simde__m64 simde_mm_or_si64(simde__m64 a, simde__m64 b)
888
+ {
889
+ #if defined(SIMDE_X86_MMX_NATIVE)
890
+ return _mm_or_si64(a, b);
891
+ #else
892
+ simde__m64_private r_;
893
+ simde__m64_private a_ = simde__m64_to_private(a);
894
+ simde__m64_private b_ = simde__m64_to_private(b);
895
+
896
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
897
+ r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32);
898
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
899
+ r_.i64 = a_.i64 | b_.i64;
900
+ #else
901
+ r_.i64[0] = a_.i64[0] | b_.i64[0];
902
+ #endif
903
+
904
+ return simde__m64_from_private(r_);
905
+ #endif
906
+ }
907
+ #define simde_m_por(a, b) simde_mm_or_si64(a, b)
908
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
909
+ #define _mm_or_si64(a, b) simde_mm_or_si64(a, b)
910
+ #define _m_por(a, b) simde_mm_or_si64(a, b)
911
+ #endif
912
+
913
+ SIMDE_FUNCTION_ATTRIBUTES
914
+ simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
915
+ {
916
+ #if defined(SIMDE_X86_MMX_NATIVE)
917
+ return _mm_packs_pi16(a, b);
918
+ #else
919
+ simde__m64_private r_;
920
+ simde__m64_private a_ = simde__m64_to_private(a);
921
+ simde__m64_private b_ = simde__m64_to_private(b);
922
+
923
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
924
+ r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));
925
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
926
+ r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16);
927
+ #else
928
+ SIMDE_VECTORIZE
929
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
930
+ if (a_.i16[i] < INT8_MIN) {
931
+ r_.i8[i] = INT8_MIN;
932
+ } else if (a_.i16[i] > INT8_MAX) {
933
+ r_.i8[i] = INT8_MAX;
934
+ } else {
935
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);
936
+ }
937
+ }
938
+
939
+ SIMDE_VECTORIZE
940
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
941
+ if (b_.i16[i] < INT8_MIN) {
942
+ r_.i8[i + 4] = INT8_MIN;
943
+ } else if (b_.i16[i] > INT8_MAX) {
944
+ r_.i8[i + 4] = INT8_MAX;
945
+ } else {
946
+ r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]);
947
+ }
948
+ }
949
+ #endif
950
+
951
+ return simde__m64_from_private(r_);
952
+ #endif
953
+ }
954
+ #define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
955
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
956
+ #define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)
957
+ #define _m_packsswb(a, b) simde_mm_packs_pi16(a, b)
958
+ #endif
959
+
960
+ SIMDE_FUNCTION_ATTRIBUTES
961
+ simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b)
962
+ {
963
+ #if defined(SIMDE_X86_MMX_NATIVE)
964
+ return _mm_packs_pi32(a, b);
965
+ #else
966
+ simde__m64_private r_;
967
+ simde__m64_private a_ = simde__m64_to_private(a);
968
+ simde__m64_private b_ = simde__m64_to_private(b);
969
+
970
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
971
+ r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
972
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
973
+ r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32);
974
+ #else
975
+ SIMDE_VECTORIZE
976
+ for (size_t i = 0; i < (8 / sizeof(a_.i32[0])); i++) {
977
+ if (a_.i32[i] < SHRT_MIN) {
978
+ r_.i16[i] = SHRT_MIN;
979
+ } else if (a_.i32[i] > INT16_MAX) {
980
+ r_.i16[i] = INT16_MAX;
981
+ } else {
982
+ r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
983
+ }
984
+ }
985
+
986
+ SIMDE_VECTORIZE
987
+ for (size_t i = 0; i < (8 / sizeof(b_.i32[0])); i++) {
988
+ if (b_.i32[i] < SHRT_MIN) {
989
+ r_.i16[i + 2] = SHRT_MIN;
990
+ } else if (b_.i32[i] > INT16_MAX) {
991
+ r_.i16[i + 2] = INT16_MAX;
992
+ } else {
993
+ r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]);
994
+ }
995
+ }
996
+ #endif
997
+
998
+ return simde__m64_from_private(r_);
999
+ #endif
1000
+ }
1001
+ #define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b)
1002
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1003
+ #define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b)
1004
+ #define _m_packssdw(a, b) simde_mm_packs_pi32(a, b)
1005
+ #endif
1006
+
1007
+ SIMDE_FUNCTION_ATTRIBUTES
1008
+ simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
1009
+ {
1010
+ #if defined(SIMDE_X86_MMX_NATIVE)
1011
+ return _mm_packs_pu16(a, b);
1012
+ #else
1013
+ simde__m64_private r_;
1014
+ simde__m64_private a_ = simde__m64_to_private(a);
1015
+ simde__m64_private b_ = simde__m64_to_private(b);
1016
+
1017
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
1018
+ const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16);
1019
+
1020
+ /* Set elements which are < 0 to 0 */
1021
+ const int16x8_t t2 =
1022
+ vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));
1023
+
1024
+ /* Vector with all s16 elements set to UINT8_MAX */
1025
+ const int16x8_t vmax =
1026
+ vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX));
1027
+
1028
+ /* Elements which are within the acceptable range */
1029
+ const int16x8_t le_max =
1030
+ vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax)));
1031
+ const int16x8_t gt_max =
1032
+ vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax)));
1033
+
1034
+ /* Final values as 16-bit integers */
1035
+ const int16x8_t values = vorrq_s16(le_max, gt_max);
1036
+
1037
+ r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));
1038
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
1039
+ r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16);
1040
+ #else
1041
+ SIMDE_VECTORIZE
1042
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
1043
+ if (a_.i16[i] > UINT8_MAX) {
1044
+ r_.u8[i] = UINT8_MAX;
1045
+ } else if (a_.i16[i] < 0) {
1046
+ r_.u8[i] = 0;
1047
+ } else {
1048
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]);
1049
+ }
1050
+ }
1051
+
1052
+ SIMDE_VECTORIZE
1053
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
1054
+ if (b_.i16[i] > UINT8_MAX) {
1055
+ r_.u8[i + 4] = UINT8_MAX;
1056
+ } else if (b_.i16[i] < 0) {
1057
+ r_.u8[i + 4] = 0;
1058
+ } else {
1059
+ r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]);
1060
+ }
1061
+ }
1062
+ #endif
1063
+
1064
+ return simde__m64_from_private(r_);
1065
+ #endif
1066
+ }
1067
+ #define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b)
1068
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1069
+ #define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b)
1070
+ #define _m_packuswb(a, b) simde_mm_packs_pu16(a, b)
1071
+ #endif
1072
+
1073
+ SIMDE_FUNCTION_ATTRIBUTES
1074
+ simde__m64 simde_mm_set_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
1075
+ int8_t e3, int8_t e2, int8_t e1, int8_t e0)
1076
+ {
1077
+ #if defined(SIMDE_X86_MMX_NATIVE)
1078
+ return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
1079
+ #else
1080
+ simde__m64_private r_;
1081
+
1082
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1083
+ const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = {e0, e1, e2, e3,
1084
+ e4, e5, e6, e7};
1085
+ r_.neon_i8 = vld1_s8(v);
1086
+ #else
1087
+ r_.i8[0] = e0;
1088
+ r_.i8[1] = e1;
1089
+ r_.i8[2] = e2;
1090
+ r_.i8[3] = e3;
1091
+ r_.i8[4] = e4;
1092
+ r_.i8[5] = e5;
1093
+ r_.i8[6] = e6;
1094
+ r_.i8[7] = e7;
1095
+ #endif
1096
+
1097
+ return simde__m64_from_private(r_);
1098
+ #endif
1099
+ }
1100
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1101
+ #define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \
1102
+ simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
1103
+ #endif
1104
+
1105
+ SIMDE_FUNCTION_ATTRIBUTES
1106
+ simde__m64 simde_x_mm_set_pu8(uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4,
1107
+ uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0)
1108
+ {
1109
+ simde__m64_private r_;
1110
+
1111
+ #if defined(SIMDE_X86_MMX_NATIVE)
1112
+ r_.n = _mm_set_pi8(
1113
+ HEDLEY_STATIC_CAST(int8_t, e7), HEDLEY_STATIC_CAST(int8_t, e6),
1114
+ HEDLEY_STATIC_CAST(int8_t, e5), HEDLEY_STATIC_CAST(int8_t, e4),
1115
+ HEDLEY_STATIC_CAST(int8_t, e3), HEDLEY_STATIC_CAST(int8_t, e2),
1116
+ HEDLEY_STATIC_CAST(int8_t, e1), HEDLEY_STATIC_CAST(int8_t, e0));
1117
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1118
+ const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = {e0, e1, e2, e3,
1119
+ e4, e5, e6, e7};
1120
+ r_.neon_u8 = vld1_u8(v);
1121
+ #else
1122
+ r_.u8[0] = e0;
1123
+ r_.u8[1] = e1;
1124
+ r_.u8[2] = e2;
1125
+ r_.u8[3] = e3;
1126
+ r_.u8[4] = e4;
1127
+ r_.u8[5] = e5;
1128
+ r_.u8[6] = e6;
1129
+ r_.u8[7] = e7;
1130
+ #endif
1131
+
1132
+ return simde__m64_from_private(r_);
1133
+ }
1134
+
1135
+ SIMDE_FUNCTION_ATTRIBUTES
1136
+ simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
1137
+ {
1138
+ #if defined(SIMDE_X86_MMX_NATIVE)
1139
+ return _mm_set_pi16(e3, e2, e1, e0);
1140
+ #else
1141
+ simde__m64_private r_;
1142
+
1143
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1144
+ const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = {e0, e1, e2, e3};
1145
+ r_.neon_i16 = vld1_s16(v);
1146
+ #else
1147
+ r_.i16[0] = e0;
1148
+ r_.i16[1] = e1;
1149
+ r_.i16[2] = e2;
1150
+ r_.i16[3] = e3;
1151
+ #endif
1152
+
1153
+ return simde__m64_from_private(r_);
1154
+ #endif
1155
+ }
1156
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1157
+ #define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0)
1158
+ #endif
1159
+
1160
+ SIMDE_FUNCTION_ATTRIBUTES
1161
+ simde__m64 simde_x_mm_set_pu16(uint16_t e3, uint16_t e2, uint16_t e1,
1162
+ uint16_t e0)
1163
+ {
1164
+ simde__m64_private r_;
1165
+
1166
+ #if defined(SIMDE_X86_MMX_NATIVE)
1167
+ r_.n = _mm_set_pi16(HEDLEY_STATIC_CAST(int16_t, e3),
1168
+ HEDLEY_STATIC_CAST(int16_t, e2),
1169
+ HEDLEY_STATIC_CAST(int16_t, e1),
1170
+ HEDLEY_STATIC_CAST(int16_t, e0));
1171
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1172
+ const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = {e0, e1, e2, e3};
1173
+ r_.neon_u16 = vld1_u16(v);
1174
+ #else
1175
+ r_.u16[0] = e0;
1176
+ r_.u16[1] = e1;
1177
+ r_.u16[2] = e2;
1178
+ r_.u16[3] = e3;
1179
+ #endif
1180
+
1181
+ return simde__m64_from_private(r_);
1182
+ }
1183
+
1184
+ SIMDE_FUNCTION_ATTRIBUTES
1185
+ simde__m64 simde_x_mm_set_pu32(uint32_t e1, uint32_t e0)
1186
+ {
1187
+ simde__m64_private r_;
1188
+
1189
+ #if defined(SIMDE_X86_MMX_NATIVE)
1190
+ r_.n = _mm_set_pi32(HEDLEY_STATIC_CAST(int32_t, e1),
1191
+ HEDLEY_STATIC_CAST(int32_t, e0));
1192
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1193
+ const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = {e0, e1};
1194
+ r_.neon_u32 = vld1_u32(v);
1195
+ #else
1196
+ r_.u32[0] = e0;
1197
+ r_.u32[1] = e1;
1198
+ #endif
1199
+
1200
+ return simde__m64_from_private(r_);
1201
+ }
1202
+
1203
+ SIMDE_FUNCTION_ATTRIBUTES
1204
+ simde__m64 simde_mm_set_pi32(int32_t e1, int32_t e0)
1205
+ {
1206
+ simde__m64_private r_;
1207
+
1208
+ #if defined(SIMDE_X86_MMX_NATIVE)
1209
+ r_.n = _mm_set_pi32(e1, e0);
1210
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1211
+ const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = {e0, e1};
1212
+ r_.neon_i32 = vld1_s32(v);
1213
+ #else
1214
+ r_.i32[0] = e0;
1215
+ r_.i32[1] = e1;
1216
+ #endif
1217
+
1218
+ return simde__m64_from_private(r_);
1219
+ }
1220
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1221
+ #define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0)
1222
+ #endif
1223
+
1224
+ SIMDE_FUNCTION_ATTRIBUTES
1225
+ simde__m64 simde_x_mm_set_pi64(int64_t e0)
1226
+ {
1227
+ simde__m64_private r_;
1228
+
1229
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1230
+ const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = {e0};
1231
+ r_.neon_i64 = vld1_s64(v);
1232
+ #else
1233
+ r_.i64[0] = e0;
1234
+ #endif
1235
+
1236
+ return simde__m64_from_private(r_);
1237
+ }
1238
+
1239
+ SIMDE_FUNCTION_ATTRIBUTES
1240
+ simde__m64 simde_x_mm_set_f32x2(simde_float32 e1, simde_float32 e0)
1241
+ {
1242
+ simde__m64_private r_;
1243
+
1244
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1245
+ const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = {e0, e1};
1246
+ r_.neon_f32 = vld1_f32(v);
1247
+ #else
1248
+ r_.f32[0] = e0;
1249
+ r_.f32[1] = e1;
1250
+ #endif
1251
+
1252
+ return simde__m64_from_private(r_);
1253
+ }
1254
+
1255
+ SIMDE_FUNCTION_ATTRIBUTES
1256
+ simde__m64 simde_mm_set1_pi8(int8_t a)
1257
+ {
1258
+ #if defined(SIMDE_X86_MMX_NATIVE)
1259
+ return _mm_set1_pi8(a);
1260
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1261
+ simde__m64_private r_;
1262
+ r_.neon_i8 = vmov_n_s8(a);
1263
+ return simde__m64_from_private(r_);
1264
+ #else
1265
+ return simde_mm_set_pi8(a, a, a, a, a, a, a, a);
1266
+ #endif
1267
+ }
1268
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1269
+ #define _mm_set1_pi8(a) simde_mm_set1_pi8(a)
1270
+ #endif
1271
+
1272
+ SIMDE_FUNCTION_ATTRIBUTES
1273
+ simde__m64 simde_mm_set1_pi16(int16_t a)
1274
+ {
1275
+ #if defined(SIMDE_X86_MMX_NATIVE)
1276
+ return _mm_set1_pi16(a);
1277
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1278
+ simde__m64_private r_;
1279
+ r_.neon_i16 = vmov_n_s16(a);
1280
+ return simde__m64_from_private(r_);
1281
+ #else
1282
+ return simde_mm_set_pi16(a, a, a, a);
1283
+ #endif
1284
+ }
1285
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1286
+ #define _mm_set1_pi16(a) simde_mm_set1_pi16(a)
1287
+ #endif
1288
+
1289
+ SIMDE_FUNCTION_ATTRIBUTES
1290
+ simde__m64 simde_mm_set1_pi32(int32_t a)
1291
+ {
1292
+ #if defined(SIMDE_X86_MMX_NATIVE)
1293
+ return _mm_set1_pi32(a);
1294
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1295
+ simde__m64_private r_;
1296
+ r_.neon_i32 = vmov_n_s32(a);
1297
+ return simde__m64_from_private(r_);
1298
+ #else
1299
+ return simde_mm_set_pi32(a, a);
1300
+ #endif
1301
+ }
1302
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1303
+ #define _mm_set1_pi32(a) simde_mm_set1_pi32(a)
1304
+ #endif
1305
+
1306
+ SIMDE_FUNCTION_ATTRIBUTES
1307
+ simde__m64 simde_mm_setr_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
1308
+ int8_t e3, int8_t e2, int8_t e1, int8_t e0)
1309
+ {
1310
+ #if defined(SIMDE_X86_MMX_NATIVE)
1311
+ return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
1312
+ #else
1313
+ return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7);
1314
+ #endif
1315
+ }
1316
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1317
+ #define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \
1318
+ simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
1319
+ #endif
1320
+
1321
+ SIMDE_FUNCTION_ATTRIBUTES
1322
+ simde__m64 simde_mm_setr_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
1323
+ {
1324
+ #if defined(SIMDE_X86_MMX_NATIVE)
1325
+ return _mm_setr_pi16(e3, e2, e1, e0);
1326
+ #else
1327
+ return simde_mm_set_pi16(e0, e1, e2, e3);
1328
+ #endif
1329
+ }
1330
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1331
+ #define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0)
1332
+ #endif
1333
+
1334
+ SIMDE_FUNCTION_ATTRIBUTES
1335
+ simde__m64 simde_mm_setr_pi32(int32_t e1, int32_t e0)
1336
+ {
1337
+ #if defined(SIMDE_X86_MMX_NATIVE)
1338
+ return _mm_setr_pi32(e1, e0);
1339
+ #else
1340
+ return simde_mm_set_pi32(e0, e1);
1341
+ #endif
1342
+ }
1343
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1344
+ #define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0)
1345
+ #endif
1346
+
1347
+ SIMDE_FUNCTION_ATTRIBUTES
1348
+ simde__m64 simde_mm_setzero_si64(void)
1349
+ {
1350
+ #if defined(SIMDE_X86_MMX_NATIVE)
1351
+ return _mm_setzero_si64();
1352
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1353
+ simde__m64_private r_;
1354
+ r_.neon_u32 = vmov_n_u32(0);
1355
+ return simde__m64_from_private(r_);
1356
+ #else
1357
+ return simde_mm_set_pi32(0, 0);
1358
+ #endif
1359
+ }
1360
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1361
+ #define _mm_setzero_si64() simde_mm_setzero_si64()
1362
+ #endif
1363
+
1364
+ SIMDE_FUNCTION_ATTRIBUTES
1365
+ simde__m64 simde_x_mm_load_si64(const void *mem_addr)
1366
+ {
1367
+ simde__m64 r;
1368
+ simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64),
1369
+ sizeof(r));
1370
+ return r;
1371
+ }
1372
+
1373
+ SIMDE_FUNCTION_ATTRIBUTES
1374
+ simde__m64 simde_x_mm_loadu_si64(const void *mem_addr)
1375
+ {
1376
+ simde__m64 r;
1377
+ simde_memcpy(&r, mem_addr, sizeof(r));
1378
+ return r;
1379
+ }
1380
+
1381
+ SIMDE_FUNCTION_ATTRIBUTES
1382
+ void simde_x_mm_store_si64(void *mem_addr, simde__m64 value)
1383
+ {
1384
+ simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value,
1385
+ sizeof(value));
1386
+ }
1387
+
1388
+ SIMDE_FUNCTION_ATTRIBUTES
1389
+ void simde_x_mm_storeu_si64(void *mem_addr, simde__m64 value)
1390
+ {
1391
+ simde_memcpy(mem_addr, &value, sizeof(value));
1392
+ }
1393
+
1394
+ SIMDE_FUNCTION_ATTRIBUTES
1395
+ simde__m64 simde_x_mm_setone_si64(void)
1396
+ {
1397
+ return simde_mm_set1_pi32(~INT32_C(0));
1398
+ }
1399
+
1400
+ SIMDE_FUNCTION_ATTRIBUTES
1401
+ simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count)
1402
+ {
1403
+ #if defined(SIMDE_X86_MMX_NATIVE)
1404
+ return _mm_sll_pi16(a, count);
1405
+ #else
1406
+ simde__m64_private r_;
1407
+ simde__m64_private a_ = simde__m64_to_private(a);
1408
+ simde__m64_private count_ = simde__m64_to_private(count);
1409
+
1410
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1411
+ HEDLEY_DIAGNOSTIC_PUSH
1412
+ #if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
1413
+ SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
1414
+ #pragma clang diagnostic ignored "-Wvector-conversion"
1415
+ #endif
1416
+ r_.neon_i16 =
1417
+ vshl_s16(a_.neon_i16,
1418
+ vmov_n_s16(HEDLEY_STATIC_CAST(
1419
+ int16_t, vget_lane_u64(count_.neon_u64, 0))));
1420
+ HEDLEY_DIAGNOSTIC_POP
1421
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
1422
+ defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
1423
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
1424
+ return simde_mm_setzero_si64();
1425
+
1426
+ r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
1427
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1428
+ r_.i16 = a_.i16 << count_.u64[0];
1429
+ #else
1430
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
1431
+ simde_memset(&r_, 0, sizeof(r_));
1432
+ return simde__m64_from_private(r_);
1433
+ }
1434
+
1435
+ SIMDE_VECTORIZE
1436
+ for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
1437
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t,
1438
+ a_.u16[i] << count_.u64[0]);
1439
+ }
1440
+ #endif
1441
+
1442
+ return simde__m64_from_private(r_);
1443
+ #endif
1444
+ }
1445
+ #define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count)
1446
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1447
+ #define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count)
1448
+ #define _m_psllw(a, count) simde_mm_sll_pi16(a, count)
1449
+ #endif
1450
+
1451
+ SIMDE_FUNCTION_ATTRIBUTES
1452
+ simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count)
1453
+ {
1454
+ #if defined(SIMDE_X86_MMX_NATIVE)
1455
+ return _mm_sll_pi32(a, count);
1456
+ #else
1457
+ simde__m64_private r_;
1458
+ simde__m64_private a_ = simde__m64_to_private(a);
1459
+ simde__m64_private count_ = simde__m64_to_private(count);
1460
+
1461
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1462
+ HEDLEY_DIAGNOSTIC_PUSH
1463
+ #if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
1464
+ SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
1465
+ #pragma clang diagnostic ignored "-Wvector-conversion"
1466
+ #endif
1467
+ r_.neon_i32 =
1468
+ vshl_s32(a_.neon_i32,
1469
+ vmov_n_s32(HEDLEY_STATIC_CAST(
1470
+ int32_t, vget_lane_u64(count_.neon_u64, 0))));
1471
+ HEDLEY_DIAGNOSTIC_POP
1472
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1473
+ r_.i32 = a_.i32 << count_.u64[0];
1474
+ #else
1475
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
1476
+ simde_memset(&r_, 0, sizeof(r_));
1477
+ return simde__m64_from_private(r_);
1478
+ }
1479
+
1480
+ SIMDE_VECTORIZE
1481
+ for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
1482
+ r_.u32[i] = a_.u32[i] << count_.u64[0];
1483
+ }
1484
+ #endif
1485
+
1486
+ return simde__m64_from_private(r_);
1487
+ #endif
1488
+ }
1489
+ #define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count)
1490
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1491
+ #define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count)
1492
+ #define _m_pslld(a, count) simde_mm_sll_pi32(a, count)
1493
+ #endif
1494
+
1495
+ SIMDE_FUNCTION_ATTRIBUTES
1496
+ simde__m64 simde_mm_slli_pi16(simde__m64 a, int count)
1497
+ {
1498
+ #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
1499
+ return _mm_slli_pi16(a, count);
1500
+ #else
1501
+ simde__m64_private r_;
1502
+ simde__m64_private a_ = simde__m64_to_private(a);
1503
+
1504
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
1505
+ defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
1506
+ if (HEDLEY_UNLIKELY(count > 15))
1507
+ return simde_mm_setzero_si64();
1508
+
1509
+ r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count);
1510
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1511
+ r_.i16 = a_.i16 << count;
1512
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1513
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1514
+ r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)count));
1515
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
1516
+ r_.mmi_i16 = psllh_s(a_.mmi_i16, b_.mmi_i16);
1517
+ #else
1518
+ SIMDE_VECTORIZE
1519
+ for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
1520
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count);
1521
+ }
1522
+ #endif
1523
+
1524
+ return simde__m64_from_private(r_);
1525
+ #endif
1526
+ }
1527
+ #define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count)
1528
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1529
+ #define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count)
1530
+ #define _m_psllwi(a, count) simde_mm_slli_pi16(a, count)
1531
+ #endif
1532
+
1533
+ SIMDE_FUNCTION_ATTRIBUTES
1534
+ simde__m64 simde_mm_slli_pi32(simde__m64 a, int count)
1535
+ {
1536
+ #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
1537
+ return _mm_slli_pi32(a, count);
1538
+ #else
1539
+ simde__m64_private r_;
1540
+ simde__m64_private a_ = simde__m64_to_private(a);
1541
+
1542
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1543
+ r_.i32 = a_.i32 << count;
1544
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1545
+ r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)count));
1546
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
1547
+ r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32);
1548
+ #else
1549
+ SIMDE_VECTORIZE
1550
+ for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
1551
+ r_.u32[i] = a_.u32[i] << count;
1552
+ }
1553
+ #endif
1554
+
1555
+ return simde__m64_from_private(r_);
1556
+ #endif
1557
+ }
1558
+ #define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b)
1559
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1560
+ #define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count)
1561
+ #define _m_pslldi(a, count) simde_mm_slli_pi32(a, count)
1562
+ #endif
1563
+
1564
+ SIMDE_FUNCTION_ATTRIBUTES
1565
+ simde__m64 simde_mm_slli_si64(simde__m64 a, int count)
1566
+ {
1567
+ #if defined(SIMDE_X86_MMX_NATIVE)
1568
+ return _mm_slli_si64(a, count);
1569
+ #else
1570
+ simde__m64_private r_;
1571
+ simde__m64_private a_ = simde__m64_to_private(a);
1572
+
1573
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1574
+ r_.i64 = a_.i64 << count;
1575
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1576
+ r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t)count));
1577
+ #else
1578
+ r_.u64[0] = a_.u64[0] << count;
1579
+ #endif
1580
+
1581
+ return simde__m64_from_private(r_);
1582
+ #endif
1583
+ }
1584
+ #define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count)
1585
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1586
+ #define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count)
1587
+ #define _m_psllqi(a, count) simde_mm_slli_si64(a, count)
1588
+ #endif
1589
+
1590
+ SIMDE_FUNCTION_ATTRIBUTES
1591
+ simde__m64 simde_mm_sll_si64(simde__m64 a, simde__m64 count)
1592
+ {
1593
+ #if defined(SIMDE_X86_MMX_NATIVE)
1594
+ return _mm_sll_si64(a, count);
1595
+ #else
1596
+ simde__m64_private r_;
1597
+ simde__m64_private a_ = simde__m64_to_private(a);
1598
+ simde__m64_private count_ = simde__m64_to_private(count);
1599
+
1600
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1601
+ r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64);
1602
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1603
+ r_.i64 = a_.i64 << count_.i64;
1604
+ #else
1605
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
1606
+ simde_memset(&r_, 0, sizeof(r_));
1607
+ return simde__m64_from_private(r_);
1608
+ }
1609
+
1610
+ r_.u64[0] = a_.u64[0] << count_.u64[0];
1611
+ #endif
1612
+
1613
+ return simde__m64_from_private(r_);
1614
+ #endif
1615
+ }
1616
+ #define simde_m_psllq(a, count) simde_mm_sll_si64(a, count)
1617
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1618
+ #define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count)
1619
+ #define _m_psllq(a, count) simde_mm_sll_si64(a, count)
1620
+ #endif
1621
+
1622
+ SIMDE_FUNCTION_ATTRIBUTES
1623
+ simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count)
1624
+ {
1625
+ #if defined(SIMDE_X86_MMX_NATIVE)
1626
+ return _mm_srl_pi16(a, count);
1627
+ #else
1628
+ simde__m64_private r_;
1629
+ simde__m64_private a_ = simde__m64_to_private(a);
1630
+ simde__m64_private count_ = simde__m64_to_private(count);
1631
+
1632
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
1633
+ defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
1634
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
1635
+ return simde_mm_setzero_si64();
1636
+
1637
+ r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
1638
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1639
+ r_.u16 = a_.u16 >> count_.u64[0];
1640
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1641
+ r_.neon_u16 = vshl_u16(
1642
+ a_.neon_u16,
1643
+ vmov_n_s16(-((int16_t)vget_lane_u64(count_.neon_u64, 0))));
1644
+ #else
1645
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
1646
+ simde_memset(&r_, 0, sizeof(r_));
1647
+ return simde__m64_from_private(r_);
1648
+ }
1649
+
1650
+ SIMDE_VECTORIZE
1651
+ for (size_t i = 0; i < sizeof(r_.u16) / sizeof(r_.u16[0]); i++) {
1652
+ r_.u16[i] = a_.u16[i] >> count_.u64[0];
1653
+ }
1654
+ #endif
1655
+
1656
+ return simde__m64_from_private(r_);
1657
+ #endif
1658
+ }
1659
+ #define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count)
1660
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1661
+ #define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count)
1662
+ #define _m_psrlw(a, count) simde_mm_srl_pi16(a, count)
1663
+ #endif
1664
+
1665
+ SIMDE_FUNCTION_ATTRIBUTES
1666
+ simde__m64 simde_mm_srl_pi32(simde__m64 a, simde__m64 count)
1667
+ {
1668
+ #if defined(SIMDE_X86_MMX_NATIVE)
1669
+ return _mm_srl_pi32(a, count);
1670
+ #else
1671
+ simde__m64_private r_;
1672
+ simde__m64_private a_ = simde__m64_to_private(a);
1673
+ simde__m64_private count_ = simde__m64_to_private(count);
1674
+
1675
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1676
+ r_.u32 = a_.u32 >> count_.u64[0];
1677
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1678
+ r_.neon_u32 = vshl_u32(
1679
+ a_.neon_u32,
1680
+ vmov_n_s32(-((int32_t)vget_lane_u64(count_.neon_u64, 0))));
1681
+ #else
1682
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
1683
+ simde_memset(&r_, 0, sizeof(r_));
1684
+ return simde__m64_from_private(r_);
1685
+ }
1686
+
1687
+ SIMDE_VECTORIZE
1688
+ for (size_t i = 0; i < sizeof(r_.u32) / sizeof(r_.u32[0]); i++) {
1689
+ r_.u32[i] = a_.u32[i] >> count_.u64[0];
1690
+ }
1691
+ #endif
1692
+
1693
+ return simde__m64_from_private(r_);
1694
+ #endif
1695
+ }
1696
+ #define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count)
1697
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1698
+ #define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count)
1699
+ #define _m_psrld(a, count) simde_mm_srl_pi32(a, count)
1700
+ #endif
1701
+
1702
+ SIMDE_FUNCTION_ATTRIBUTES
1703
+ simde__m64 simde_mm_srli_pi16(simde__m64 a, int count)
1704
+ {
1705
+ #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
1706
+ return _mm_srli_pi16(a, count);
1707
+ #else
1708
+ simde__m64_private r_;
1709
+ simde__m64_private a_ = simde__m64_to_private(a);
1710
+
1711
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1712
+ r_.u16 = a_.u16 >> count;
1713
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1714
+ r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t)count)));
1715
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
1716
+ r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16);
1717
+ #else
1718
+ SIMDE_VECTORIZE
1719
+ for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
1720
+ r_.u16[i] = a_.u16[i] >> count;
1721
+ }
1722
+ #endif
1723
+
1724
+ return simde__m64_from_private(r_);
1725
+ #endif
1726
+ }
1727
+ #define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
1728
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1729
+ #define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count)
1730
+ #define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
1731
+ #endif
1732
+
1733
+ SIMDE_FUNCTION_ATTRIBUTES
1734
+ simde__m64 simde_mm_srli_pi32(simde__m64 a, int count)
1735
+ {
1736
+ #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
1737
+ return _mm_srli_pi32(a, count);
1738
+ #else
1739
+ simde__m64_private r_;
1740
+ simde__m64_private a_ = simde__m64_to_private(a);
1741
+
1742
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1743
+ r_.u32 = a_.u32 >> count;
1744
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1745
+ r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t)count)));
1746
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
1747
+ r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32);
1748
+ #else
1749
+ SIMDE_VECTORIZE
1750
+ for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
1751
+ r_.u32[i] = a_.u32[i] >> count;
1752
+ }
1753
+ #endif
1754
+
1755
+ return simde__m64_from_private(r_);
1756
+ #endif
1757
+ }
1758
+ #define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count)
1759
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1760
+ #define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count)
1761
+ #define _m_psrldi(a, count) simde_mm_srli_pi32(a, count)
1762
+ #endif
1763
+
1764
+ SIMDE_FUNCTION_ATTRIBUTES
1765
+ simde__m64 simde_mm_srli_si64(simde__m64 a, int count)
1766
+ {
1767
+ #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
1768
+ return _mm_srli_si64(a, count);
1769
+ #else
1770
+ simde__m64_private r_;
1771
+ simde__m64_private a_ = simde__m64_to_private(a);
1772
+
1773
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1774
+ r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count));
1775
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1776
+ r_.u64 = a_.u64 >> count;
1777
+ #else
1778
+ r_.u64[0] = a_.u64[0] >> count;
1779
+ #endif
1780
+
1781
+ return simde__m64_from_private(r_);
1782
+ #endif
1783
+ }
1784
+ #define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count)
1785
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1786
+ #define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count)
1787
+ #define _m_psrlqi(a, count) simde_mm_srli_si64(a, count)
1788
+ #endif
1789
+
1790
+ SIMDE_FUNCTION_ATTRIBUTES
1791
+ simde__m64 simde_mm_srl_si64(simde__m64 a, simde__m64 count)
1792
+ {
1793
+ #if defined(SIMDE_X86_MMX_NATIVE)
1794
+ return _mm_srl_si64(a, count);
1795
+ #else
1796
+ simde__m64_private r_;
1797
+ simde__m64_private a_ = simde__m64_to_private(a);
1798
+ simde__m64_private count_ = simde__m64_to_private(count);
1799
+
1800
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
1801
+ r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64));
1802
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1803
+ r_.u64 = a_.u64 >> count_.u64;
1804
+ #else
1805
+ if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
1806
+ simde_memset(&r_, 0, sizeof(r_));
1807
+ return simde__m64_from_private(r_);
1808
+ }
1809
+
1810
+ r_.u64[0] = a_.u64[0] >> count_.u64[0];
1811
+ #endif
1812
+
1813
+ return simde__m64_from_private(r_);
1814
+ #endif
1815
+ }
1816
+ #define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count)
1817
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1818
+ #define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count)
1819
+ #define _m_psrlq(a, count) simde_mm_srl_si64(a, count)
1820
+ #endif
1821
+
1822
+ SIMDE_FUNCTION_ATTRIBUTES
1823
+ simde__m64 simde_mm_srai_pi16(simde__m64 a, int count)
1824
+ {
1825
+ #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
1826
+ return _mm_srai_pi16(a, count);
1827
+ #else
1828
+ simde__m64_private r_;
1829
+ simde__m64_private a_ = simde__m64_to_private(a);
1830
+
1831
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1832
+ r_.i16 = a_.i16 >> (count & 0xff);
1833
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1834
+ r_.neon_i16 = vshl_s16(a_.neon_i16,
1835
+ vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count)));
1836
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
1837
+ r_.mmi_i16 = psrah_s(a_.mmi_i16, count);
1838
+ #else
1839
+ SIMDE_VECTORIZE
1840
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
1841
+ r_.i16[i] = a_.i16[i] >> (count & 0xff);
1842
+ }
1843
+ #endif
1844
+
1845
+ return simde__m64_from_private(r_);
1846
+ #endif
1847
+ }
1848
+ #define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count)
1849
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1850
+ #define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count)
1851
+ #define _m_psrawi(a, count) simde_mm_srai_pi16(a, count)
1852
+ #endif
1853
+
1854
+ SIMDE_FUNCTION_ATTRIBUTES
1855
+ simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
1856
+ {
1857
+ #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
1858
+ return _mm_srai_pi32(a, count);
1859
+ #else
1860
+ simde__m64_private r_;
1861
+ simde__m64_private a_ = simde__m64_to_private(a);
1862
+
1863
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1864
+ r_.i32 = a_.i32 >> (count & 0xff);
1865
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1866
+ r_.neon_i32 = vshl_s32(a_.neon_i32,
1867
+ vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));
1868
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
1869
+ r_.mmi_i32 = psraw_s(a_.mmi_i32, count);
1870
+ #else
1871
+ SIMDE_VECTORIZE
1872
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
1873
+ r_.i32[i] = a_.i32[i] >> (count & 0xff);
1874
+ }
1875
+ #endif
1876
+
1877
+ return simde__m64_from_private(r_);
1878
+ #endif
1879
+ }
1880
+ #define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)
1881
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1882
+ #define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
1883
+ #define _m_psradi(a, count) simde_mm_srai_pi32(a, count)
1884
+ #endif
1885
+
1886
+ SIMDE_FUNCTION_ATTRIBUTES
1887
+ simde__m64 simde_mm_sra_pi16(simde__m64 a, simde__m64 count)
1888
+ {
1889
+ #if defined(SIMDE_X86_MMX_NATIVE)
1890
+ return _mm_sra_pi16(a, count);
1891
+ #else
1892
+ simde__m64_private r_;
1893
+ simde__m64_private a_ = simde__m64_to_private(a);
1894
+ simde__m64_private count_ = simde__m64_to_private(count);
1895
+ const int cnt = HEDLEY_STATIC_CAST(
1896
+ int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));
1897
+
1898
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1899
+ r_.i16 = a_.i16 >> cnt;
1900
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1901
+ r_.neon_i16 =
1902
+ vshl_s16(a_.neon_i16,
1903
+ vmov_n_s16(-HEDLEY_STATIC_CAST(
1904
+ int16_t, vget_lane_u64(count_.neon_u64, 0))));
1905
+ #else
1906
+ SIMDE_VECTORIZE
1907
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
1908
+ r_.i16[i] = a_.i16[i] >> cnt;
1909
+ }
1910
+ #endif
1911
+
1912
+ return simde__m64_from_private(r_);
1913
+ #endif
1914
+ }
1915
+ #define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count)
1916
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1917
+ #define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count)
1918
+ #define _m_psraw(a, count) simde_mm_sra_pi16(a, count)
1919
+ #endif
1920
+
1921
+ SIMDE_FUNCTION_ATTRIBUTES
1922
+ simde__m64 simde_mm_sra_pi32(simde__m64 a, simde__m64 count)
1923
+ {
1924
+ #if defined(SIMDE_X86_MMX_NATIVE)
1925
+ return _mm_sra_pi32(a, count);
1926
+ #else
1927
+ simde__m64_private r_;
1928
+ simde__m64_private a_ = simde__m64_to_private(a);
1929
+ simde__m64_private count_ = simde__m64_to_private(count);
1930
+ const int32_t cnt =
1931
+ (count_.u64[0] > 31)
1932
+ ? 31
1933
+ : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]);
1934
+
1935
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
1936
+ r_.i32 = a_.i32 >> cnt;
1937
+ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1938
+ r_.neon_i32 =
1939
+ vshl_s32(a_.neon_i32,
1940
+ vmov_n_s32(-HEDLEY_STATIC_CAST(
1941
+ int32_t, vget_lane_u64(count_.neon_u64, 0))));
1942
+ #else
1943
+ SIMDE_VECTORIZE
1944
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
1945
+ r_.i32[i] = a_.i32[i] >> cnt;
1946
+ }
1947
+ #endif
1948
+
1949
+ return simde__m64_from_private(r_);
1950
+ #endif
1951
+ }
1952
+ #define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b)
1953
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1954
+ #define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count)
1955
+ #define _m_psrad(a, count) simde_mm_sra_pi32(a, count)
1956
+ #endif
1957
+
1958
+ SIMDE_FUNCTION_ATTRIBUTES
1959
+ simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b)
1960
+ {
1961
+ #if defined(SIMDE_X86_MMX_NATIVE)
1962
+ return _mm_sub_pi8(a, b);
1963
+ #else
1964
+ simde__m64_private r_;
1965
+ simde__m64_private a_ = simde__m64_to_private(a);
1966
+ simde__m64_private b_ = simde__m64_to_private(b);
1967
+
1968
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
1969
+ r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);
1970
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
1971
+ r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8);
1972
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
1973
+ r_.i8 = a_.i8 - b_.i8;
1974
+ #else
1975
+ SIMDE_VECTORIZE
1976
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
1977
+ r_.i8[i] = a_.i8[i] - b_.i8[i];
1978
+ }
1979
+ #endif
1980
+
1981
+ return simde__m64_from_private(r_);
1982
+ #endif
1983
+ }
1984
+ #define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b)
1985
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
1986
+ #define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b)
1987
+ #define _m_psubb(a, b) simde_mm_sub_pi8(a, b)
1988
+ #endif
1989
+
1990
+ SIMDE_FUNCTION_ATTRIBUTES
1991
+ simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b)
1992
+ {
1993
+ #if defined(SIMDE_X86_MMX_NATIVE)
1994
+ return _mm_sub_pi16(a, b);
1995
+ #else
1996
+ simde__m64_private r_;
1997
+ simde__m64_private a_ = simde__m64_to_private(a);
1998
+ simde__m64_private b_ = simde__m64_to_private(b);
1999
+
2000
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2001
+ r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);
2002
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2003
+ r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16);
2004
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2005
+ r_.i16 = a_.i16 - b_.i16;
2006
+ #else
2007
+ SIMDE_VECTORIZE
2008
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
2009
+ r_.i16[i] = a_.i16[i] - b_.i16[i];
2010
+ }
2011
+ #endif
2012
+
2013
+ return simde__m64_from_private(r_);
2014
+ #endif
2015
+ }
2016
+ #define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b)
2017
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2018
+ #define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b)
2019
+ #define _m_psubw(a, b) simde_mm_sub_pi16(a, b)
2020
+ #endif
2021
+
2022
+ SIMDE_FUNCTION_ATTRIBUTES
2023
+ simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b)
2024
+ {
2025
+ #if defined(SIMDE_X86_MMX_NATIVE)
2026
+ return _mm_sub_pi32(a, b);
2027
+ #else
2028
+ simde__m64_private r_;
2029
+ simde__m64_private a_ = simde__m64_to_private(a);
2030
+ simde__m64_private b_ = simde__m64_to_private(b);
2031
+
2032
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2033
+ r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);
2034
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2035
+ r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32);
2036
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2037
+ r_.i32 = a_.i32 - b_.i32;
2038
+ #else
2039
+ SIMDE_VECTORIZE
2040
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
2041
+ r_.i32[i] = a_.i32[i] - b_.i32[i];
2042
+ }
2043
+ #endif
2044
+
2045
+ return simde__m64_from_private(r_);
2046
+ #endif
2047
+ }
2048
+ #define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b)
2049
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2050
+ #define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b)
2051
+ #define _m_psubd(a, b) simde_mm_sub_pi32(a, b)
2052
+ #endif
2053
+
2054
+ SIMDE_FUNCTION_ATTRIBUTES
2055
+ simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b)
2056
+ {
2057
+ #if defined(SIMDE_X86_MMX_NATIVE)
2058
+ return _mm_subs_pi8(a, b);
2059
+ #else
2060
+ simde__m64_private r_;
2061
+ simde__m64_private a_ = simde__m64_to_private(a);
2062
+ simde__m64_private b_ = simde__m64_to_private(b);
2063
+
2064
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2065
+ r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);
2066
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2067
+ r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8);
2068
+ #else
2069
+ SIMDE_VECTORIZE
2070
+ for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
2071
+ if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {
2072
+ r_.i8[i] = INT8_MIN;
2073
+ } else if ((b_.i8[i]) < 0 &&
2074
+ (a_.i8[i]) > INT8_MAX + (b_.i8[i])) {
2075
+ r_.i8[i] = INT8_MAX;
2076
+ } else {
2077
+ r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);
2078
+ }
2079
+ }
2080
+ #endif
2081
+
2082
+ return simde__m64_from_private(r_);
2083
+ #endif
2084
+ }
2085
+ #define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b)
2086
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2087
+ #define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b)
2088
+ #define _m_psubsb(a, b) simde_mm_subs_pi8(a, b)
2089
+ #endif
2090
+
2091
+ SIMDE_FUNCTION_ATTRIBUTES
2092
+ simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b)
2093
+ {
2094
+ #if defined(SIMDE_X86_MMX_NATIVE)
2095
+ return _mm_subs_pu8(a, b);
2096
+ #else
2097
+ simde__m64_private r_;
2098
+ simde__m64_private a_ = simde__m64_to_private(a);
2099
+ simde__m64_private b_ = simde__m64_to_private(b);
2100
+
2101
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2102
+ r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);
2103
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2104
+ r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8);
2105
+ #else
2106
+ SIMDE_VECTORIZE
2107
+ for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
2108
+ const int32_t x = a_.u8[i] - b_.u8[i];
2109
+ if (x < 0) {
2110
+ r_.u8[i] = 0;
2111
+ } else if (x > UINT8_MAX) {
2112
+ r_.u8[i] = UINT8_MAX;
2113
+ } else {
2114
+ r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
2115
+ }
2116
+ }
2117
+ #endif
2118
+
2119
+ return simde__m64_from_private(r_);
2120
+ #endif
2121
+ }
2122
+ #define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b)
2123
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2124
+ #define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b)
2125
+ #define _m_psubusb(a, b) simde_mm_subs_pu8(a, b)
2126
+ #endif
2127
+
2128
+ SIMDE_FUNCTION_ATTRIBUTES
2129
+ simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b)
2130
+ {
2131
+ #if defined(SIMDE_X86_MMX_NATIVE)
2132
+ return _mm_subs_pi16(a, b);
2133
+ #else
2134
+ simde__m64_private r_;
2135
+ simde__m64_private a_ = simde__m64_to_private(a);
2136
+ simde__m64_private b_ = simde__m64_to_private(b);
2137
+
2138
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2139
+ r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);
2140
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2141
+ r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16);
2142
+ #else
2143
+ SIMDE_VECTORIZE
2144
+ for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
2145
+ if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) {
2146
+ r_.i16[i] = SHRT_MIN;
2147
+ } else if ((b_.i16[i]) < 0 &&
2148
+ (a_.i16[i]) > INT16_MAX + (b_.i16[i])) {
2149
+ r_.i16[i] = INT16_MAX;
2150
+ } else {
2151
+ r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);
2152
+ }
2153
+ }
2154
+ #endif
2155
+
2156
+ return simde__m64_from_private(r_);
2157
+ #endif
2158
+ }
2159
+ #define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b)
2160
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2161
+ #define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b)
2162
+ #define _m_psubsw(a, b) simde_mm_subs_pi16(a, b)
2163
+ #endif
2164
+
2165
+ SIMDE_FUNCTION_ATTRIBUTES
2166
+ simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b)
2167
+ {
2168
+ #if defined(SIMDE_X86_MMX_NATIVE)
2169
+ return _mm_subs_pu16(a, b);
2170
+ #else
2171
+ simde__m64_private r_;
2172
+ simde__m64_private a_ = simde__m64_to_private(a);
2173
+ simde__m64_private b_ = simde__m64_to_private(b);
2174
+
2175
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2176
+ r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);
2177
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2178
+ r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16);
2179
+ #else
2180
+ SIMDE_VECTORIZE
2181
+ for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
2182
+ const int x = a_.u16[i] - b_.u16[i];
2183
+ if (x < 0) {
2184
+ r_.u16[i] = 0;
2185
+ } else if (x > UINT16_MAX) {
2186
+ r_.u16[i] = UINT16_MAX;
2187
+ } else {
2188
+ r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
2189
+ }
2190
+ }
2191
+ #endif
2192
+
2193
+ return simde__m64_from_private(r_);
2194
+ #endif
2195
+ }
2196
+ #define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b)
2197
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2198
+ #define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b)
2199
+ #define _m_psubusw(a, b) simde_mm_subs_pu16(a, b)
2200
+ #endif
2201
+
2202
+ SIMDE_FUNCTION_ATTRIBUTES
2203
+ simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b)
2204
+ {
2205
+ #if defined(SIMDE_X86_MMX_NATIVE)
2206
+ return _mm_unpackhi_pi8(a, b);
2207
+ #else
2208
+ simde__m64_private r_;
2209
+ simde__m64_private a_ = simde__m64_to_private(a);
2210
+ simde__m64_private b_ = simde__m64_to_private(b);
2211
+
2212
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2213
+ r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8);
2214
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
2215
+ r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14,
2216
+ 7, 15);
2217
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2218
+ r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8);
2219
+ #else
2220
+ r_.i8[0] = a_.i8[4];
2221
+ r_.i8[1] = b_.i8[4];
2222
+ r_.i8[2] = a_.i8[5];
2223
+ r_.i8[3] = b_.i8[5];
2224
+ r_.i8[4] = a_.i8[6];
2225
+ r_.i8[5] = b_.i8[6];
2226
+ r_.i8[6] = a_.i8[7];
2227
+ r_.i8[7] = b_.i8[7];
2228
+ #endif
2229
+
2230
+ return simde__m64_from_private(r_);
2231
+ #endif
2232
+ }
2233
+ #define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
2234
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2235
+ #define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b)
2236
+ #define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
2237
+ #endif
2238
+
2239
+ SIMDE_FUNCTION_ATTRIBUTES
2240
+ simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b)
2241
+ {
2242
+ #if defined(SIMDE_X86_MMX_NATIVE)
2243
+ return _mm_unpackhi_pi16(a, b);
2244
+ #else
2245
+ simde__m64_private r_;
2246
+ simde__m64_private a_ = simde__m64_to_private(a);
2247
+ simde__m64_private b_ = simde__m64_to_private(b);
2248
+
2249
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2250
+ r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);
2251
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2252
+ r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16);
2253
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
2254
+ r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);
2255
+ #else
2256
+ r_.i16[0] = a_.i16[2];
2257
+ r_.i16[1] = b_.i16[2];
2258
+ r_.i16[2] = a_.i16[3];
2259
+ r_.i16[3] = b_.i16[3];
2260
+ #endif
2261
+
2262
+ return simde__m64_from_private(r_);
2263
+ #endif
2264
+ }
2265
+ #define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
2266
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2267
+ #define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b)
2268
+ #define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
2269
+ #endif
2270
+
2271
+ SIMDE_FUNCTION_ATTRIBUTES
2272
+ simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b)
2273
+ {
2274
+ #if defined(SIMDE_X86_MMX_NATIVE)
2275
+ return _mm_unpackhi_pi32(a, b);
2276
+ #else
2277
+ simde__m64_private r_;
2278
+ simde__m64_private a_ = simde__m64_to_private(a);
2279
+ simde__m64_private b_ = simde__m64_to_private(b);
2280
+
2281
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2282
+ r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);
2283
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2284
+ r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32);
2285
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
2286
+ r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);
2287
+ #else
2288
+ r_.i32[0] = a_.i32[1];
2289
+ r_.i32[1] = b_.i32[1];
2290
+ #endif
2291
+
2292
+ return simde__m64_from_private(r_);
2293
+ #endif
2294
+ }
2295
+ #define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
2296
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2297
+ #define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b)
2298
+ #define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
2299
+ #endif
2300
+
2301
+ SIMDE_FUNCTION_ATTRIBUTES
2302
+ simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b)
2303
+ {
2304
+ #if defined(SIMDE_X86_MMX_NATIVE)
2305
+ return _mm_unpacklo_pi8(a, b);
2306
+ #else
2307
+ simde__m64_private r_;
2308
+ simde__m64_private a_ = simde__m64_to_private(a);
2309
+ simde__m64_private b_ = simde__m64_to_private(b);
2310
+
2311
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2312
+ r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);
2313
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2314
+ r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8);
2315
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
2316
+ r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3,
2317
+ 11);
2318
+ #else
2319
+ r_.i8[0] = a_.i8[0];
2320
+ r_.i8[1] = b_.i8[0];
2321
+ r_.i8[2] = a_.i8[1];
2322
+ r_.i8[3] = b_.i8[1];
2323
+ r_.i8[4] = a_.i8[2];
2324
+ r_.i8[5] = b_.i8[2];
2325
+ r_.i8[6] = a_.i8[3];
2326
+ r_.i8[7] = b_.i8[3];
2327
+ #endif
2328
+
2329
+ return simde__m64_from_private(r_);
2330
+ #endif
2331
+ }
2332
+ #define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
2333
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2334
+ #define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b)
2335
+ #define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
2336
+ #endif
2337
+
2338
+ SIMDE_FUNCTION_ATTRIBUTES
2339
+ simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b)
2340
+ {
2341
+ #if defined(SIMDE_X86_MMX_NATIVE)
2342
+ return _mm_unpacklo_pi16(a, b);
2343
+ #else
2344
+ simde__m64_private r_;
2345
+ simde__m64_private a_ = simde__m64_to_private(a);
2346
+ simde__m64_private b_ = simde__m64_to_private(b);
2347
+
2348
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2349
+ r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);
2350
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2351
+ r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16);
2352
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
2353
+ r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);
2354
+ #else
2355
+ r_.i16[0] = a_.i16[0];
2356
+ r_.i16[1] = b_.i16[0];
2357
+ r_.i16[2] = a_.i16[1];
2358
+ r_.i16[3] = b_.i16[1];
2359
+ #endif
2360
+
2361
+ return simde__m64_from_private(r_);
2362
+ #endif
2363
+ }
2364
+ #define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
2365
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2366
+ #define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b)
2367
+ #define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
2368
+ #endif
2369
+
2370
+ SIMDE_FUNCTION_ATTRIBUTES
2371
+ simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b)
2372
+ {
2373
+ #if defined(SIMDE_X86_MMX_NATIVE)
2374
+ return _mm_unpacklo_pi32(a, b);
2375
+ #else
2376
+ simde__m64_private r_;
2377
+ simde__m64_private a_ = simde__m64_to_private(a);
2378
+ simde__m64_private b_ = simde__m64_to_private(b);
2379
+
2380
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2381
+ r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);
2382
+ #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
2383
+ r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32);
2384
+ #elif defined(SIMDE_SHUFFLE_VECTOR_)
2385
+ r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2);
2386
+ #else
2387
+ r_.i32[0] = a_.i32[0];
2388
+ r_.i32[1] = b_.i32[0];
2389
+ #endif
2390
+
2391
+ return simde__m64_from_private(r_);
2392
+ #endif
2393
+ }
2394
+ #define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
2395
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2396
+ #define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b)
2397
+ #define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
2398
+ #endif
2399
+
2400
+ SIMDE_FUNCTION_ATTRIBUTES
2401
+ simde__m64 simde_mm_xor_si64(simde__m64 a, simde__m64 b)
2402
+ {
2403
+ #if defined(SIMDE_X86_MMX_NATIVE)
2404
+ return _mm_xor_si64(a, b);
2405
+ #else
2406
+ simde__m64_private r_;
2407
+ simde__m64_private a_ = simde__m64_to_private(a);
2408
+ simde__m64_private b_ = simde__m64_to_private(b);
2409
+
2410
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2411
+ r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32);
2412
+ #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2413
+ r_.i32f = a_.i32f ^ b_.i32f;
2414
+ #else
2415
+ r_.u64[0] = a_.u64[0] ^ b_.u64[0];
2416
+ #endif
2417
+
2418
+ return simde__m64_from_private(r_);
2419
+ #endif
2420
+ }
2421
+ #define simde_m_pxor(a, b) simde_mm_xor_si64(a, b)
2422
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2423
+ #define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b)
2424
+ #define _m_pxor(a, b) simde_mm_xor_si64(a, b)
2425
+ #endif
2426
+
2427
+ SIMDE_FUNCTION_ATTRIBUTES
2428
+ int32_t simde_m_to_int(simde__m64 a)
2429
+ {
2430
+ #if defined(SIMDE_X86_MMX_NATIVE)
2431
+ return _m_to_int(a);
2432
+ #else
2433
+ simde__m64_private a_ = simde__m64_to_private(a);
2434
+
2435
+ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2436
+ HEDLEY_DIAGNOSTIC_PUSH
2437
+ #if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
2438
+ SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
2439
+ #pragma clang diagnostic ignored "-Wvector-conversion"
2440
+ #endif
2441
+ return vget_lane_s32(a_.neon_i32, 0);
2442
+ HEDLEY_DIAGNOSTIC_POP
2443
+ #else
2444
+ return a_.i32[0];
2445
+ #endif
2446
+ #endif
2447
+ }
2448
+ #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
2449
+ #define _m_to_int(a) simde_m_to_int(a)
2450
+ #endif
2451
+
2452
+ SIMDE_END_DECLS_
2453
+
2454
+ HEDLEY_DIAGNOSTIC_POP
2455
+
2456
+ #endif /* !defined(SIMDE_X86_MMX_H) */