noobs 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/COPYING +339 -0
- package/README.md +46 -0
- package/bin/64bit/obs.lib +0 -0
- package/binding.gyp +23 -0
- package/dist/bin/Qt6Core.dll +0 -0
- package/dist/bin/Qt6Gui.dll +0 -0
- package/dist/bin/Qt6Network.dll +0 -0
- package/dist/bin/Qt6Svg.dll +0 -0
- package/dist/bin/Qt6Widgets.dll +0 -0
- package/dist/bin/Qt6Xml.dll +0 -0
- package/dist/bin/avcodec-61.dll +0 -0
- package/dist/bin/avdevice-61.dll +0 -0
- package/dist/bin/avfilter-10.dll +0 -0
- package/dist/bin/avformat-61.dll +0 -0
- package/dist/bin/avutil-59.dll +0 -0
- package/dist/bin/datachannel.dll +0 -0
- package/dist/bin/libcurl.dll +0 -0
- package/dist/bin/libobs-d3d11.dll +0 -0
- package/dist/bin/libobs-opengl.dll +0 -0
- package/dist/bin/libobs-winrt.dll +0 -0
- package/dist/bin/librist.dll +0 -0
- package/dist/bin/libx264-164.dll +0 -0
- package/dist/bin/lua51.dll +0 -0
- package/dist/bin/obs-amf-test.exe +0 -0
- package/dist/bin/obs-ffmpeg-mux.exe +0 -0
- package/dist/bin/obs-frontend-api.dll +0 -0
- package/dist/bin/obs-scripting.dll +0 -0
- package/dist/bin/obs.dll +0 -0
- package/dist/bin/srt.dll +0 -0
- package/dist/bin/swresample-5.dll +0 -0
- package/dist/bin/swscale-8.dll +0 -0
- package/dist/bin/w32-pthreads.dll +0 -0
- package/dist/bin/zlib.dll +0 -0
- package/dist/effects/area.effect +250 -0
- package/dist/effects/bicubic_scale.effect +236 -0
- package/dist/effects/bilinear_lowres_scale.effect +123 -0
- package/dist/effects/color.effect +172 -0
- package/dist/effects/default.effect +254 -0
- package/dist/effects/default_rect.effect +84 -0
- package/dist/effects/deinterlace_base.effect +325 -0
- package/dist/effects/deinterlace_blend.effect +21 -0
- package/dist/effects/deinterlace_blend_2x.effect +21 -0
- package/dist/effects/deinterlace_discard.effect +21 -0
- package/dist/effects/deinterlace_discard_2x.effect +21 -0
- package/dist/effects/deinterlace_linear.effect +21 -0
- package/dist/effects/deinterlace_linear_2x.effect +21 -0
- package/dist/effects/deinterlace_yadif.effect +21 -0
- package/dist/effects/deinterlace_yadif_2x.effect +21 -0
- package/dist/effects/format_conversion.effect +1823 -0
- package/dist/effects/lanczos_scale.effect +292 -0
- package/dist/effects/opaque.effect +159 -0
- package/dist/effects/premultiplied_alpha.effect +38 -0
- package/dist/effects/repeat.effect +36 -0
- package/dist/effects/solid.effect +80 -0
- package/dist/noobs.node +0 -0
- package/dist/plugins/obs-ffmpeg.dll +0 -0
- package/dist/plugins/obs-x264.dll +0 -0
- package/dist/plugins/win-capture.dll +0 -0
- package/include/audio-monitoring/osx/mac-helpers.h +13 -0
- package/include/audio-monitoring/pulse/pulseaudio-wrapper.h +212 -0
- package/include/audio-monitoring/win32/wasapi-output.h +22 -0
- package/include/callback/calldata.h +195 -0
- package/include/callback/decl.h +61 -0
- package/include/callback/proc.h +52 -0
- package/include/callback/signal.h +73 -0
- package/include/graphics/axisang.h +65 -0
- package/include/graphics/bounds.h +108 -0
- package/include/graphics/device-exports.h +177 -0
- package/include/graphics/effect-parser.h +290 -0
- package/include/graphics/effect.h +190 -0
- package/include/graphics/graphics-internal.h +335 -0
- package/include/graphics/graphics.h +1024 -0
- package/include/graphics/half.h +100 -0
- package/include/graphics/image-file.h +124 -0
- package/include/graphics/input.h +34 -0
- package/include/graphics/libnsgif/libnsgif.h +142 -0
- package/include/graphics/math-defs.h +45 -0
- package/include/graphics/math-extra.h +61 -0
- package/include/graphics/matrix3.h +98 -0
- package/include/graphics/matrix4.h +102 -0
- package/include/graphics/plane.h +85 -0
- package/include/graphics/quat.h +170 -0
- package/include/graphics/shader-parser.h +273 -0
- package/include/graphics/srgb.h +177 -0
- package/include/graphics/vec2.h +148 -0
- package/include/graphics/vec3.h +224 -0
- package/include/graphics/vec4.h +241 -0
- package/include/media-io/audio-io.h +228 -0
- package/include/media-io/audio-math.h +43 -0
- package/include/media-io/audio-resampler.h +44 -0
- package/include/media-io/format-conversion.h +50 -0
- package/include/media-io/frame-rate.h +29 -0
- package/include/media-io/media-io-defs.h +20 -0
- package/include/media-io/media-remux.h +37 -0
- package/include/media-io/video-frame.h +64 -0
- package/include/media-io/video-io.h +338 -0
- package/include/media-io/video-scaler.h +43 -0
- package/include/obs-audio-controls.h +250 -0
- package/include/obs-av1.h +47 -0
- package/include/obs-avc.h +55 -0
- package/include/obs-config.h +52 -0
- package/include/obs-data.h +311 -0
- package/include/obs-defs.h +52 -0
- package/include/obs-encoder.h +361 -0
- package/include/obs-ffmpeg-compat.h +13 -0
- package/include/obs-hevc.h +81 -0
- package/include/obs-hotkey.h +271 -0
- package/include/obs-hotkeys.h +653 -0
- package/include/obs-interaction.h +56 -0
- package/include/obs-internal.h +1459 -0
- package/include/obs-missing-files.h +53 -0
- package/include/obs-module.h +181 -0
- package/include/obs-nal.h +37 -0
- package/include/obs-nix-platform.h +53 -0
- package/include/obs-nix-wayland.h +24 -0
- package/include/obs-nix-x11.h +22 -0
- package/include/obs-nix.h +42 -0
- package/include/obs-output.h +96 -0
- package/include/obs-properties.h +364 -0
- package/include/obs-scene.h +127 -0
- package/include/obs-service.h +115 -0
- package/include/obs-source.h +568 -0
- package/include/obs.h +2608 -0
- package/include/obsconfig.h +13 -0
- package/include/obsversion.h +5 -0
- package/include/util/apple/cfstring-utils.h +16 -0
- package/include/util/array-serializer.h +37 -0
- package/include/util/base.h +97 -0
- package/include/util/bitstream.h +28 -0
- package/include/util/bmem.h +94 -0
- package/include/util/buffered-file-serializer.h +32 -0
- package/include/util/c99defs.h +75 -0
- package/include/util/cf-lexer.h +199 -0
- package/include/util/cf-parser.h +281 -0
- package/include/util/circlebuf.h +319 -0
- package/include/util/config-file.h +103 -0
- package/include/util/crc32.h +29 -0
- package/include/util/curl/curl-helper.h +35 -0
- package/include/util/darray.h +606 -0
- package/include/util/deque.h +319 -0
- package/include/util/dstr.h +320 -0
- package/include/util/file-serializer.h +34 -0
- package/include/util/lexer.h +273 -0
- package/include/util/pipe.h +52 -0
- package/include/util/platform.h +223 -0
- package/include/util/profiler.h +97 -0
- package/include/util/serializer.h +158 -0
- package/include/util/simde/check.h +285 -0
- package/include/util/simde/debug-trap.h +117 -0
- package/include/util/simde/hedley.h +2123 -0
- package/include/util/simde/simde-align.h +481 -0
- package/include/util/simde/simde-arch.h +537 -0
- package/include/util/simde/simde-common.h +918 -0
- package/include/util/simde/simde-constify.h +925 -0
- package/include/util/simde/simde-detect-clang.h +114 -0
- package/include/util/simde/simde-diagnostic.h +447 -0
- package/include/util/simde/simde-features.h +550 -0
- package/include/util/simde/simde-math.h +1858 -0
- package/include/util/simde/x86/mmx.h +2456 -0
- package/include/util/simde/x86/sse.h +4479 -0
- package/include/util/simde/x86/sse2.h +7549 -0
- package/include/util/source-profiler.h +66 -0
- package/include/util/sse-intrin.h +32 -0
- package/include/util/task.h +22 -0
- package/include/util/text-lookup.h +45 -0
- package/include/util/threading-posix.h +77 -0
- package/include/util/threading-windows.h +142 -0
- package/include/util/threading.h +103 -0
- package/include/util/utf8.h +35 -0
- package/include/util/uthash.h +34 -0
- package/include/util/util_uint128.h +108 -0
- package/include/util/util_uint64.h +34 -0
- package/include/util/windows/device-enum.h +14 -0
- package/include/util/windows/obfuscate.h +16 -0
- package/include/util/windows/win-registry.h +37 -0
- package/include/util/windows/win-version.h +57 -0
- package/include/util/windows/window-helpers.h +47 -0
- package/index.d.ts +38 -0
- package/index.js +8 -0
- package/package.json +31 -0
- package/src/main.cpp +321 -0
- package/src/obs_interface.cpp +605 -0
- package/src/obs_interface.h +74 -0
- package/src/utils.cpp +80 -0
- package/src/utils.h +3 -0
|
@@ -0,0 +1,2456 @@
|
|
|
1
|
+
/* SPDX-License-Identifier: MIT
|
|
2
|
+
*
|
|
3
|
+
* Permission is hereby granted, free of charge, to any person
|
|
4
|
+
* obtaining a copy of this software and associated documentation
|
|
5
|
+
* files (the "Software"), to deal in the Software without
|
|
6
|
+
* restriction, including without limitation the rights to use, copy,
|
|
7
|
+
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
8
|
+
* of the Software, and to permit persons to whom the Software is
|
|
9
|
+
* furnished to do so, subject to the following conditions:
|
|
10
|
+
*
|
|
11
|
+
* The above copyright notice and this permission notice shall be
|
|
12
|
+
* included in all copies or substantial portions of the Software.
|
|
13
|
+
*
|
|
14
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
18
|
+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
19
|
+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
20
|
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
* SOFTWARE.
|
|
22
|
+
*
|
|
23
|
+
* Copyright:
|
|
24
|
+
* 2017-2020 Evan Nemerson <evan@nemerson.com>
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
#if !defined(SIMDE_X86_MMX_H)
|
|
28
|
+
#define SIMDE_X86_MMX_H
|
|
29
|
+
|
|
30
|
+
#include "../simde-common.h"
|
|
31
|
+
|
|
32
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
33
|
+
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|
34
|
+
|
|
35
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
36
|
+
#define SIMDE_X86_MMX_USE_NATIVE_TYPE
|
|
37
|
+
#elif defined(SIMDE_X86_SSE_NATIVE)
|
|
38
|
+
#define SIMDE_X86_MMX_USE_NATIVE_TYPE
|
|
39
|
+
#endif
|
|
40
|
+
|
|
41
|
+
#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
|
|
42
|
+
#include <mmintrin.h>
|
|
43
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
44
|
+
#include <arm_neon.h>
|
|
45
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
46
|
+
#include <loongson-mmiintrin.h>
|
|
47
|
+
#endif
|
|
48
|
+
|
|
49
|
+
#include <stdint.h>
|
|
50
|
+
#include <limits.h>
|
|
51
|
+
|
|
52
|
+
SIMDE_BEGIN_DECLS_
|
|
53
|
+
|
|
54
|
+
typedef union {
|
|
55
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
56
|
+
SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
57
|
+
SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
58
|
+
SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
59
|
+
SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
60
|
+
SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
61
|
+
SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
62
|
+
SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
63
|
+
SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
64
|
+
SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
65
|
+
SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
66
|
+
SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
67
|
+
#else
|
|
68
|
+
SIMDE_ALIGN_TO_8 int8_t i8[8];
|
|
69
|
+
SIMDE_ALIGN_TO_8 int16_t i16[4];
|
|
70
|
+
SIMDE_ALIGN_TO_8 int32_t i32[2];
|
|
71
|
+
SIMDE_ALIGN_TO_8 int64_t i64[1];
|
|
72
|
+
SIMDE_ALIGN_TO_8 uint8_t u8[8];
|
|
73
|
+
SIMDE_ALIGN_TO_8 uint16_t u16[4];
|
|
74
|
+
SIMDE_ALIGN_TO_8 uint32_t u32[2];
|
|
75
|
+
SIMDE_ALIGN_TO_8 uint64_t u64[1];
|
|
76
|
+
SIMDE_ALIGN_TO_8 simde_float32 f32[2];
|
|
77
|
+
SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)];
|
|
78
|
+
SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
|
|
79
|
+
#endif
|
|
80
|
+
|
|
81
|
+
#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
|
|
82
|
+
__m64 n;
|
|
83
|
+
#endif
|
|
84
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
85
|
+
int8x8_t neon_i8;
|
|
86
|
+
int16x4_t neon_i16;
|
|
87
|
+
int32x2_t neon_i32;
|
|
88
|
+
int64x1_t neon_i64;
|
|
89
|
+
uint8x8_t neon_u8;
|
|
90
|
+
uint16x4_t neon_u16;
|
|
91
|
+
uint32x2_t neon_u32;
|
|
92
|
+
uint64x1_t neon_u64;
|
|
93
|
+
float32x2_t neon_f32;
|
|
94
|
+
#endif
|
|
95
|
+
#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
96
|
+
int8x8_t mmi_i8;
|
|
97
|
+
int16x4_t mmi_i16;
|
|
98
|
+
int32x2_t mmi_i32;
|
|
99
|
+
int64_t mmi_i64;
|
|
100
|
+
uint8x8_t mmi_u8;
|
|
101
|
+
uint16x4_t mmi_u16;
|
|
102
|
+
uint32x2_t mmi_u32;
|
|
103
|
+
uint64_t mmi_u64;
|
|
104
|
+
#endif
|
|
105
|
+
} simde__m64_private;
|
|
106
|
+
|
|
107
|
+
#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
|
|
108
|
+
typedef __m64 simde__m64;
|
|
109
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
110
|
+
typedef int32x2_t simde__m64;
|
|
111
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
112
|
+
typedef int32x2_t simde__m64;
|
|
113
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
|
|
114
|
+
typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
|
115
|
+
#else
|
|
116
|
+
typedef simde__m64_private simde__m64;
|
|
117
|
+
#endif
|
|
118
|
+
|
|
119
|
+
#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && \
|
|
120
|
+
defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
|
121
|
+
#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
|
|
122
|
+
typedef simde__m64 __m64;
|
|
123
|
+
#endif
|
|
124
|
+
|
|
125
|
+
HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect");
|
|
126
|
+
HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect");
|
|
127
|
+
#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
|
|
128
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8,
|
|
129
|
+
"simde__m64 is not 8-byte aligned");
|
|
130
|
+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8,
|
|
131
|
+
"simde__m64_private is not 8-byte aligned");
|
|
132
|
+
#endif
|
|
133
|
+
|
|
134
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
135
|
+
simde__m64 simde__m64_from_private(simde__m64_private v)
|
|
136
|
+
{
|
|
137
|
+
simde__m64 r;
|
|
138
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
139
|
+
return r;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
143
|
+
simde__m64_private simde__m64_to_private(simde__m64 v)
|
|
144
|
+
{
|
|
145
|
+
simde__m64_private r;
|
|
146
|
+
simde_memcpy(&r, &v, sizeof(r));
|
|
147
|
+
return r;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, \
|
|
151
|
+
fragment) \
|
|
152
|
+
SIMDE_FUNCTION_ATTRIBUTES \
|
|
153
|
+
simde__##simde_type simde__##simde_type##_from_##isax##_##fragment( \
|
|
154
|
+
source_type value) \
|
|
155
|
+
{ \
|
|
156
|
+
simde__##simde_type##_private r_; \
|
|
157
|
+
r_.isax##_##fragment = value; \
|
|
158
|
+
return simde__##simde_type##_from_private(r_); \
|
|
159
|
+
} \
|
|
160
|
+
\
|
|
161
|
+
SIMDE_FUNCTION_ATTRIBUTES \
|
|
162
|
+
source_type simde__##simde_type##_to_##isax##_##fragment( \
|
|
163
|
+
simde__##simde_type value) \
|
|
164
|
+
{ \
|
|
165
|
+
simde__##simde_type##_private r_ = \
|
|
166
|
+
simde__##simde_type##_to_private(value); \
|
|
167
|
+
return r_.isax##_##fragment; \
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
171
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8)
|
|
172
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16)
|
|
173
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32)
|
|
174
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64)
|
|
175
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8)
|
|
176
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16)
|
|
177
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32)
|
|
178
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64)
|
|
179
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32)
|
|
180
|
+
#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
|
|
181
|
+
|
|
182
|
+
#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
183
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8)
|
|
184
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16)
|
|
185
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32)
|
|
186
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64)
|
|
187
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8)
|
|
188
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16)
|
|
189
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32)
|
|
190
|
+
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64)
|
|
191
|
+
#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */
|
|
192
|
+
|
|
193
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
194
|
+
simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
|
|
195
|
+
{
|
|
196
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
197
|
+
return _mm_add_pi8(a, b);
|
|
198
|
+
#else
|
|
199
|
+
simde__m64_private r_;
|
|
200
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
201
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
202
|
+
|
|
203
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
204
|
+
r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);
|
|
205
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
206
|
+
r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8);
|
|
207
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
208
|
+
r_.i8 = a_.i8 + b_.i8;
|
|
209
|
+
#else
|
|
210
|
+
SIMDE_VECTORIZE
|
|
211
|
+
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
212
|
+
r_.i8[i] = a_.i8[i] + b_.i8[i];
|
|
213
|
+
}
|
|
214
|
+
#endif
|
|
215
|
+
|
|
216
|
+
return simde__m64_from_private(r_);
|
|
217
|
+
#endif
|
|
218
|
+
}
|
|
219
|
+
#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b)
|
|
220
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
221
|
+
#define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b)
|
|
222
|
+
#define _m_paddb(a, b) simde_m_paddb(a, b)
|
|
223
|
+
#endif
|
|
224
|
+
|
|
225
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
226
|
+
simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
|
|
227
|
+
{
|
|
228
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
229
|
+
return _mm_add_pi16(a, b);
|
|
230
|
+
#else
|
|
231
|
+
simde__m64_private r_;
|
|
232
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
233
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
234
|
+
|
|
235
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
236
|
+
r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);
|
|
237
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
238
|
+
r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16);
|
|
239
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
240
|
+
r_.i16 = a_.i16 + b_.i16;
|
|
241
|
+
#else
|
|
242
|
+
SIMDE_VECTORIZE
|
|
243
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
244
|
+
r_.i16[i] = a_.i16[i] + b_.i16[i];
|
|
245
|
+
}
|
|
246
|
+
#endif
|
|
247
|
+
|
|
248
|
+
return simde__m64_from_private(r_);
|
|
249
|
+
#endif
|
|
250
|
+
}
|
|
251
|
+
#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
|
|
252
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
253
|
+
#define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)
|
|
254
|
+
#define _m_paddw(a, b) simde_mm_add_pi16(a, b)
|
|
255
|
+
#endif
|
|
256
|
+
|
|
257
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
258
|
+
simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
|
|
259
|
+
{
|
|
260
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
261
|
+
return _mm_add_pi32(a, b);
|
|
262
|
+
#else
|
|
263
|
+
simde__m64_private r_;
|
|
264
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
265
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
266
|
+
|
|
267
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
268
|
+
r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);
|
|
269
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
270
|
+
r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32);
|
|
271
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
272
|
+
r_.i32 = a_.i32 + b_.i32;
|
|
273
|
+
#else
|
|
274
|
+
SIMDE_VECTORIZE
|
|
275
|
+
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
276
|
+
r_.i32[i] = a_.i32[i] + b_.i32[i];
|
|
277
|
+
}
|
|
278
|
+
#endif
|
|
279
|
+
|
|
280
|
+
return simde__m64_from_private(r_);
|
|
281
|
+
#endif
|
|
282
|
+
}
|
|
283
|
+
#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
|
|
284
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
285
|
+
#define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)
|
|
286
|
+
#define _m_paddd(a, b) simde_mm_add_pi32(a, b)
|
|
287
|
+
#endif
|
|
288
|
+
|
|
289
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
290
|
+
simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
|
|
291
|
+
{
|
|
292
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
293
|
+
return _mm_adds_pi8(a, b);
|
|
294
|
+
#else
|
|
295
|
+
simde__m64_private r_, a_ = simde__m64_to_private(a),
|
|
296
|
+
b_ = simde__m64_to_private(b);
|
|
297
|
+
|
|
298
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
299
|
+
r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);
|
|
300
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
301
|
+
r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8);
|
|
302
|
+
#else
|
|
303
|
+
SIMDE_VECTORIZE
|
|
304
|
+
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
305
|
+
if ((((b_.i8[i]) > 0) &&
|
|
306
|
+
((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) {
|
|
307
|
+
r_.i8[i] = INT8_MAX;
|
|
308
|
+
} else if ((((b_.i8[i]) < 0) &&
|
|
309
|
+
((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) {
|
|
310
|
+
r_.i8[i] = INT8_MIN;
|
|
311
|
+
} else {
|
|
312
|
+
r_.i8[i] = (a_.i8[i]) + (b_.i8[i]);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
#endif
|
|
316
|
+
|
|
317
|
+
return simde__m64_from_private(r_);
|
|
318
|
+
#endif
|
|
319
|
+
}
|
|
320
|
+
#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
|
321
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
322
|
+
#define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)
|
|
323
|
+
#define _m_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
|
324
|
+
#endif
|
|
325
|
+
|
|
326
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
327
|
+
simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b)
|
|
328
|
+
{
|
|
329
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
330
|
+
return _mm_adds_pu8(a, b);
|
|
331
|
+
#else
|
|
332
|
+
simde__m64_private r_;
|
|
333
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
334
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
335
|
+
|
|
336
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
337
|
+
r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);
|
|
338
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
339
|
+
r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8);
|
|
340
|
+
#else
|
|
341
|
+
SIMDE_VECTORIZE
|
|
342
|
+
for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
|
|
343
|
+
const uint_fast16_t x =
|
|
344
|
+
HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) +
|
|
345
|
+
HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]);
|
|
346
|
+
if (x > UINT8_MAX)
|
|
347
|
+
r_.u8[i] = UINT8_MAX;
|
|
348
|
+
else
|
|
349
|
+
r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
|
|
350
|
+
}
|
|
351
|
+
#endif
|
|
352
|
+
|
|
353
|
+
return simde__m64_from_private(r_);
|
|
354
|
+
#endif
|
|
355
|
+
}
|
|
356
|
+
#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b)
|
|
357
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
358
|
+
#define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b)
|
|
359
|
+
#define _m_paddusb(a, b) simde_mm_adds_pu8(a, b)
|
|
360
|
+
#endif
|
|
361
|
+
|
|
362
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
363
|
+
simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b)
|
|
364
|
+
{
|
|
365
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
366
|
+
return _mm_adds_pi16(a, b);
|
|
367
|
+
#else
|
|
368
|
+
simde__m64_private r_;
|
|
369
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
370
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
371
|
+
|
|
372
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
373
|
+
r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);
|
|
374
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
375
|
+
r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16);
|
|
376
|
+
#else
|
|
377
|
+
SIMDE_VECTORIZE
|
|
378
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
379
|
+
if ((((b_.i16[i]) > 0) &&
|
|
380
|
+
((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) {
|
|
381
|
+
r_.i16[i] = INT16_MAX;
|
|
382
|
+
} else if ((((b_.i16[i]) < 0) &&
|
|
383
|
+
((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) {
|
|
384
|
+
r_.i16[i] = SHRT_MIN;
|
|
385
|
+
} else {
|
|
386
|
+
r_.i16[i] = (a_.i16[i]) + (b_.i16[i]);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
#endif
|
|
390
|
+
|
|
391
|
+
return simde__m64_from_private(r_);
|
|
392
|
+
#endif
|
|
393
|
+
}
|
|
394
|
+
#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b)
|
|
395
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
396
|
+
#define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b)
|
|
397
|
+
#define _m_paddsw(a, b) simde_mm_adds_pi16(a, b)
|
|
398
|
+
#endif
|
|
399
|
+
|
|
400
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
401
|
+
simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b)
|
|
402
|
+
{
|
|
403
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
404
|
+
return _mm_adds_pu16(a, b);
|
|
405
|
+
#else
|
|
406
|
+
simde__m64_private r_;
|
|
407
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
408
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
409
|
+
|
|
410
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
411
|
+
r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);
|
|
412
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
413
|
+
r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16);
|
|
414
|
+
#else
|
|
415
|
+
SIMDE_VECTORIZE
|
|
416
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
417
|
+
const uint32_t x = a_.u16[i] + b_.u16[i];
|
|
418
|
+
if (x > UINT16_MAX)
|
|
419
|
+
r_.u16[i] = UINT16_MAX;
|
|
420
|
+
else
|
|
421
|
+
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
|
|
422
|
+
}
|
|
423
|
+
#endif
|
|
424
|
+
|
|
425
|
+
return simde__m64_from_private(r_);
|
|
426
|
+
#endif
|
|
427
|
+
}
|
|
428
|
+
#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b)
|
|
429
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
430
|
+
#define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b)
|
|
431
|
+
#define _m_paddusw(a, b) simde_mm_adds_pu16(a, b)
|
|
432
|
+
#endif
|
|
433
|
+
|
|
434
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
435
|
+
simde__m64 simde_mm_and_si64(simde__m64 a, simde__m64 b)
|
|
436
|
+
{
|
|
437
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
438
|
+
return _mm_and_si64(a, b);
|
|
439
|
+
#else
|
|
440
|
+
simde__m64_private r_;
|
|
441
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
442
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
443
|
+
|
|
444
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
445
|
+
r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32);
|
|
446
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
447
|
+
r_.i64 = a_.i64 & b_.i64;
|
|
448
|
+
#else
|
|
449
|
+
r_.i64[0] = a_.i64[0] & b_.i64[0];
|
|
450
|
+
#endif
|
|
451
|
+
|
|
452
|
+
return simde__m64_from_private(r_);
|
|
453
|
+
#endif
|
|
454
|
+
}
|
|
455
|
+
#define simde_m_pand(a, b) simde_mm_and_si64(a, b)
|
|
456
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
457
|
+
#define _mm_and_si64(a, b) simde_mm_and_si64(a, b)
|
|
458
|
+
#define _m_pand(a, b) simde_mm_and_si64(a, b)
|
|
459
|
+
#endif
|
|
460
|
+
|
|
461
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
462
|
+
simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b)
|
|
463
|
+
{
|
|
464
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
465
|
+
return _mm_andnot_si64(a, b);
|
|
466
|
+
#else
|
|
467
|
+
simde__m64_private r_;
|
|
468
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
469
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
470
|
+
|
|
471
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
472
|
+
r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);
|
|
473
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
474
|
+
r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32);
|
|
475
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
476
|
+
r_.i32f = ~a_.i32f & b_.i32f;
|
|
477
|
+
#else
|
|
478
|
+
r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]);
|
|
479
|
+
#endif
|
|
480
|
+
|
|
481
|
+
return simde__m64_from_private(r_);
|
|
482
|
+
#endif
|
|
483
|
+
}
|
|
484
|
+
#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b)
|
|
485
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
486
|
+
#define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b)
|
|
487
|
+
#define _m_pandn(a, b) simde_mm_andnot_si64(a, b)
|
|
488
|
+
#endif
|
|
489
|
+
|
|
490
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
491
|
+
simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b)
|
|
492
|
+
{
|
|
493
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
494
|
+
return _mm_cmpeq_pi8(a, b);
|
|
495
|
+
#else
|
|
496
|
+
simde__m64_private r_;
|
|
497
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
498
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
499
|
+
|
|
500
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
501
|
+
r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8);
|
|
502
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
503
|
+
r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8);
|
|
504
|
+
#else
|
|
505
|
+
SIMDE_VECTORIZE
|
|
506
|
+
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
507
|
+
r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
|
|
508
|
+
}
|
|
509
|
+
#endif
|
|
510
|
+
|
|
511
|
+
return simde__m64_from_private(r_);
|
|
512
|
+
#endif
|
|
513
|
+
}
|
|
514
|
+
#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
515
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
516
|
+
#define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
517
|
+
#define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b)
|
|
518
|
+
#endif
|
|
519
|
+
|
|
520
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
521
|
+
simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b)
|
|
522
|
+
{
|
|
523
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
524
|
+
return _mm_cmpeq_pi16(a, b);
|
|
525
|
+
#else
|
|
526
|
+
simde__m64_private r_;
|
|
527
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
528
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
529
|
+
|
|
530
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
531
|
+
r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16);
|
|
532
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
533
|
+
r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16);
|
|
534
|
+
#else
|
|
535
|
+
SIMDE_VECTORIZE
|
|
536
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
537
|
+
r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
|
|
538
|
+
}
|
|
539
|
+
#endif
|
|
540
|
+
|
|
541
|
+
return simde__m64_from_private(r_);
|
|
542
|
+
#endif
|
|
543
|
+
}
|
|
544
|
+
#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
545
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
546
|
+
#define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
547
|
+
#define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b)
|
|
548
|
+
#endif
|
|
549
|
+
|
|
550
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
551
|
+
simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b)
|
|
552
|
+
{
|
|
553
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
554
|
+
return _mm_cmpeq_pi32(a, b);
|
|
555
|
+
#else
|
|
556
|
+
simde__m64_private r_;
|
|
557
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
558
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
559
|
+
|
|
560
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
561
|
+
r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32);
|
|
562
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
563
|
+
r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32);
|
|
564
|
+
#else
|
|
565
|
+
SIMDE_VECTORIZE
|
|
566
|
+
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
567
|
+
r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
|
|
568
|
+
}
|
|
569
|
+
#endif
|
|
570
|
+
|
|
571
|
+
return simde__m64_from_private(r_);
|
|
572
|
+
#endif
|
|
573
|
+
}
|
|
574
|
+
#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
575
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
576
|
+
#define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
577
|
+
#define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b)
|
|
578
|
+
#endif
|
|
579
|
+
|
|
580
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
581
|
+
simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b)
|
|
582
|
+
{
|
|
583
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
584
|
+
return _mm_cmpgt_pi8(a, b);
|
|
585
|
+
#else
|
|
586
|
+
simde__m64_private r_;
|
|
587
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
588
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
589
|
+
|
|
590
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
591
|
+
r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8);
|
|
592
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
593
|
+
r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8);
|
|
594
|
+
#else
|
|
595
|
+
SIMDE_VECTORIZE
|
|
596
|
+
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
597
|
+
r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
|
|
598
|
+
}
|
|
599
|
+
#endif
|
|
600
|
+
|
|
601
|
+
return simde__m64_from_private(r_);
|
|
602
|
+
#endif
|
|
603
|
+
}
|
|
604
|
+
#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
605
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
606
|
+
#define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
607
|
+
#define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b)
|
|
608
|
+
#endif
|
|
609
|
+
|
|
610
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
611
|
+
simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b)
|
|
612
|
+
{
|
|
613
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
614
|
+
return _mm_cmpgt_pi16(a, b);
|
|
615
|
+
#else
|
|
616
|
+
simde__m64_private r_;
|
|
617
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
618
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
619
|
+
|
|
620
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
621
|
+
r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16);
|
|
622
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
623
|
+
r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16);
|
|
624
|
+
#else
|
|
625
|
+
SIMDE_VECTORIZE
|
|
626
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
627
|
+
r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
|
|
628
|
+
}
|
|
629
|
+
#endif
|
|
630
|
+
|
|
631
|
+
return simde__m64_from_private(r_);
|
|
632
|
+
#endif
|
|
633
|
+
}
|
|
634
|
+
#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
635
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
636
|
+
#define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
637
|
+
#define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b)
|
|
638
|
+
#endif
|
|
639
|
+
|
|
640
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
641
|
+
simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b)
|
|
642
|
+
{
|
|
643
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
644
|
+
return _mm_cmpgt_pi32(a, b);
|
|
645
|
+
#else
|
|
646
|
+
simde__m64_private r_;
|
|
647
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
648
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
649
|
+
|
|
650
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
651
|
+
r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32);
|
|
652
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
653
|
+
r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32);
|
|
654
|
+
#else
|
|
655
|
+
SIMDE_VECTORIZE
|
|
656
|
+
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
657
|
+
r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
|
|
658
|
+
}
|
|
659
|
+
#endif
|
|
660
|
+
|
|
661
|
+
return simde__m64_from_private(r_);
|
|
662
|
+
#endif
|
|
663
|
+
}
|
|
664
|
+
#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
665
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
666
|
+
#define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
667
|
+
#define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b)
|
|
668
|
+
#endif
|
|
669
|
+
|
|
670
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
671
|
+
int64_t simde_mm_cvtm64_si64(simde__m64 a)
|
|
672
|
+
{
|
|
673
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
|
|
674
|
+
!defined(__PGI)
|
|
675
|
+
return _mm_cvtm64_si64(a);
|
|
676
|
+
#else
|
|
677
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
678
|
+
|
|
679
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
680
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
681
|
+
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
|
682
|
+
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
|
683
|
+
#pragma clang diagnostic ignored "-Wvector-conversion"
|
|
684
|
+
#endif
|
|
685
|
+
return vget_lane_s64(a_.neon_i64, 0);
|
|
686
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
687
|
+
#else
|
|
688
|
+
return a_.i64[0];
|
|
689
|
+
#endif
|
|
690
|
+
#endif
|
|
691
|
+
}
|
|
692
|
+
#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a)
|
|
693
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
694
|
+
#define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a)
|
|
695
|
+
#define _m_to_int64(a) simde_mm_cvtm64_si64(a)
|
|
696
|
+
#endif
|
|
697
|
+
|
|
698
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
699
|
+
simde__m64 simde_mm_cvtsi32_si64(int32_t a)
|
|
700
|
+
{
|
|
701
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
702
|
+
return _mm_cvtsi32_si64(a);
|
|
703
|
+
#else
|
|
704
|
+
simde__m64_private r_;
|
|
705
|
+
|
|
706
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
707
|
+
const int32_t av[sizeof(r_.neon_i32) / sizeof(r_.neon_i32[0])] = {a, 0};
|
|
708
|
+
r_.neon_i32 = vld1_s32(av);
|
|
709
|
+
#else
|
|
710
|
+
r_.i32[0] = a;
|
|
711
|
+
r_.i32[1] = 0;
|
|
712
|
+
#endif
|
|
713
|
+
|
|
714
|
+
return simde__m64_from_private(r_);
|
|
715
|
+
#endif
|
|
716
|
+
}
|
|
717
|
+
#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a)
|
|
718
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
719
|
+
#define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a)
|
|
720
|
+
#define _m_from_int(a) simde_mm_cvtsi32_si64(a)
|
|
721
|
+
#endif
|
|
722
|
+
|
|
723
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
724
|
+
simde__m64 simde_mm_cvtsi64_m64(int64_t a)
|
|
725
|
+
{
|
|
726
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
|
|
727
|
+
!defined(__PGI)
|
|
728
|
+
return _mm_cvtsi64_m64(a);
|
|
729
|
+
#else
|
|
730
|
+
simde__m64_private r_;
|
|
731
|
+
|
|
732
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
733
|
+
r_.neon_i64 = vld1_s64(&a);
|
|
734
|
+
#else
|
|
735
|
+
r_.i64[0] = a;
|
|
736
|
+
#endif
|
|
737
|
+
|
|
738
|
+
return simde__m64_from_private(r_);
|
|
739
|
+
#endif
|
|
740
|
+
}
|
|
741
|
+
#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a)
|
|
742
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
743
|
+
#define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a)
|
|
744
|
+
#define _m_from_int64(a) simde_mm_cvtsi64_m64(a)
|
|
745
|
+
#endif
|
|
746
|
+
|
|
747
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
748
|
+
int32_t simde_mm_cvtsi64_si32(simde__m64 a)
|
|
749
|
+
{
|
|
750
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
751
|
+
return _mm_cvtsi64_si32(a);
|
|
752
|
+
#else
|
|
753
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
754
|
+
|
|
755
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
756
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
757
|
+
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
|
758
|
+
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
|
759
|
+
#pragma clang diagnostic ignored "-Wvector-conversion"
|
|
760
|
+
#endif
|
|
761
|
+
return vget_lane_s32(a_.neon_i32, 0);
|
|
762
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
763
|
+
#else
|
|
764
|
+
return a_.i32[0];
|
|
765
|
+
#endif
|
|
766
|
+
#endif
|
|
767
|
+
}
|
|
768
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
769
|
+
#define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a)
|
|
770
|
+
#endif
|
|
771
|
+
|
|
772
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
773
|
+
void simde_mm_empty(void)
|
|
774
|
+
{
|
|
775
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
776
|
+
_mm_empty();
|
|
777
|
+
#else
|
|
778
|
+
/* noop */
|
|
779
|
+
#endif
|
|
780
|
+
}
|
|
781
|
+
#define simde_m_empty() simde_mm_empty()
|
|
782
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
783
|
+
#define _mm_empty() simde_mm_empty()
|
|
784
|
+
#define _m_empty() simde_mm_empty()
|
|
785
|
+
#endif
|
|
786
|
+
|
|
787
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
788
|
+
simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b)
|
|
789
|
+
{
|
|
790
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
791
|
+
return _mm_madd_pi16(a, b);
|
|
792
|
+
#else
|
|
793
|
+
simde__m64_private r_;
|
|
794
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
795
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
796
|
+
|
|
797
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
798
|
+
int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
799
|
+
r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));
|
|
800
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
801
|
+
r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16);
|
|
802
|
+
#else
|
|
803
|
+
SIMDE_VECTORIZE
|
|
804
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i += 2) {
|
|
805
|
+
r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) +
|
|
806
|
+
(a_.i16[i + 1] * b_.i16[i + 1]);
|
|
807
|
+
}
|
|
808
|
+
#endif
|
|
809
|
+
|
|
810
|
+
return simde__m64_from_private(r_);
|
|
811
|
+
#endif
|
|
812
|
+
}
|
|
813
|
+
#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
|
|
814
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
815
|
+
#define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b)
|
|
816
|
+
#define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b)
|
|
817
|
+
#endif
|
|
818
|
+
|
|
819
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
820
|
+
simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b)
|
|
821
|
+
{
|
|
822
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
823
|
+
return _mm_mulhi_pi16(a, b);
|
|
824
|
+
#else
|
|
825
|
+
simde__m64_private r_;
|
|
826
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
827
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
828
|
+
|
|
829
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
830
|
+
const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
831
|
+
const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);
|
|
832
|
+
const uint16x4_t t3 = vmovn_u32(t2);
|
|
833
|
+
r_.neon_u16 = t3;
|
|
834
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
835
|
+
r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16);
|
|
836
|
+
#else
|
|
837
|
+
SIMDE_VECTORIZE
|
|
838
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
839
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t,
|
|
840
|
+
((a_.i16[i] * b_.i16[i]) >> 16));
|
|
841
|
+
}
|
|
842
|
+
#endif
|
|
843
|
+
|
|
844
|
+
return simde__m64_from_private(r_);
|
|
845
|
+
#endif
|
|
846
|
+
}
|
|
847
|
+
#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
|
|
848
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
849
|
+
#define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b)
|
|
850
|
+
#define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b)
|
|
851
|
+
#endif
|
|
852
|
+
|
|
853
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
854
|
+
simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b)
|
|
855
|
+
{
|
|
856
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
857
|
+
return _mm_mullo_pi16(a, b);
|
|
858
|
+
#else
|
|
859
|
+
simde__m64_private r_;
|
|
860
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
861
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
862
|
+
|
|
863
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
864
|
+
const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
|
865
|
+
const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));
|
|
866
|
+
r_.neon_u16 = t2;
|
|
867
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
868
|
+
r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16);
|
|
869
|
+
#else
|
|
870
|
+
SIMDE_VECTORIZE
|
|
871
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
872
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(
|
|
873
|
+
int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff));
|
|
874
|
+
}
|
|
875
|
+
#endif
|
|
876
|
+
|
|
877
|
+
return simde__m64_from_private(r_);
|
|
878
|
+
#endif
|
|
879
|
+
}
|
|
880
|
+
#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
|
|
881
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
882
|
+
#define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b)
|
|
883
|
+
#define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b)
|
|
884
|
+
#endif
|
|
885
|
+
|
|
886
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
887
|
+
simde__m64 simde_mm_or_si64(simde__m64 a, simde__m64 b)
|
|
888
|
+
{
|
|
889
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
890
|
+
return _mm_or_si64(a, b);
|
|
891
|
+
#else
|
|
892
|
+
simde__m64_private r_;
|
|
893
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
894
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
895
|
+
|
|
896
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
897
|
+
r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32);
|
|
898
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
899
|
+
r_.i64 = a_.i64 | b_.i64;
|
|
900
|
+
#else
|
|
901
|
+
r_.i64[0] = a_.i64[0] | b_.i64[0];
|
|
902
|
+
#endif
|
|
903
|
+
|
|
904
|
+
return simde__m64_from_private(r_);
|
|
905
|
+
#endif
|
|
906
|
+
}
|
|
907
|
+
#define simde_m_por(a, b) simde_mm_or_si64(a, b)
|
|
908
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
909
|
+
#define _mm_or_si64(a, b) simde_mm_or_si64(a, b)
|
|
910
|
+
#define _m_por(a, b) simde_mm_or_si64(a, b)
|
|
911
|
+
#endif
|
|
912
|
+
|
|
913
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
914
|
+
simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
|
|
915
|
+
{
|
|
916
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
917
|
+
return _mm_packs_pi16(a, b);
|
|
918
|
+
#else
|
|
919
|
+
simde__m64_private r_;
|
|
920
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
921
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
922
|
+
|
|
923
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
924
|
+
r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));
|
|
925
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
926
|
+
r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16);
|
|
927
|
+
#else
|
|
928
|
+
SIMDE_VECTORIZE
|
|
929
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
930
|
+
if (a_.i16[i] < INT8_MIN) {
|
|
931
|
+
r_.i8[i] = INT8_MIN;
|
|
932
|
+
} else if (a_.i16[i] > INT8_MAX) {
|
|
933
|
+
r_.i8[i] = INT8_MAX;
|
|
934
|
+
} else {
|
|
935
|
+
r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
SIMDE_VECTORIZE
|
|
940
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
941
|
+
if (b_.i16[i] < INT8_MIN) {
|
|
942
|
+
r_.i8[i + 4] = INT8_MIN;
|
|
943
|
+
} else if (b_.i16[i] > INT8_MAX) {
|
|
944
|
+
r_.i8[i + 4] = INT8_MAX;
|
|
945
|
+
} else {
|
|
946
|
+
r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]);
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
#endif
|
|
950
|
+
|
|
951
|
+
return simde__m64_from_private(r_);
|
|
952
|
+
#endif
|
|
953
|
+
}
|
|
954
|
+
#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
|
|
955
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
956
|
+
#define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)
|
|
957
|
+
#define _m_packsswb(a, b) simde_mm_packs_pi16(a, b)
|
|
958
|
+
#endif
|
|
959
|
+
|
|
960
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
961
|
+
simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b)
|
|
962
|
+
{
|
|
963
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
964
|
+
return _mm_packs_pi32(a, b);
|
|
965
|
+
#else
|
|
966
|
+
simde__m64_private r_;
|
|
967
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
968
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
969
|
+
|
|
970
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
971
|
+
r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
|
|
972
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
973
|
+
r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32);
|
|
974
|
+
#else
|
|
975
|
+
SIMDE_VECTORIZE
|
|
976
|
+
for (size_t i = 0; i < (8 / sizeof(a_.i32[0])); i++) {
|
|
977
|
+
if (a_.i32[i] < SHRT_MIN) {
|
|
978
|
+
r_.i16[i] = SHRT_MIN;
|
|
979
|
+
} else if (a_.i32[i] > INT16_MAX) {
|
|
980
|
+
r_.i16[i] = INT16_MAX;
|
|
981
|
+
} else {
|
|
982
|
+
r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]);
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
SIMDE_VECTORIZE
|
|
987
|
+
for (size_t i = 0; i < (8 / sizeof(b_.i32[0])); i++) {
|
|
988
|
+
if (b_.i32[i] < SHRT_MIN) {
|
|
989
|
+
r_.i16[i + 2] = SHRT_MIN;
|
|
990
|
+
} else if (b_.i32[i] > INT16_MAX) {
|
|
991
|
+
r_.i16[i + 2] = INT16_MAX;
|
|
992
|
+
} else {
|
|
993
|
+
r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]);
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
#endif
|
|
997
|
+
|
|
998
|
+
return simde__m64_from_private(r_);
|
|
999
|
+
#endif
|
|
1000
|
+
}
|
|
1001
|
+
#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b)
|
|
1002
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1003
|
+
#define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b)
|
|
1004
|
+
#define _m_packssdw(a, b) simde_mm_packs_pi32(a, b)
|
|
1005
|
+
#endif
|
|
1006
|
+
|
|
1007
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1008
|
+
simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
|
|
1009
|
+
{
|
|
1010
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1011
|
+
return _mm_packs_pu16(a, b);
|
|
1012
|
+
#else
|
|
1013
|
+
simde__m64_private r_;
|
|
1014
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1015
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1016
|
+
|
|
1017
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
1018
|
+
const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16);
|
|
1019
|
+
|
|
1020
|
+
/* Set elements which are < 0 to 0 */
|
|
1021
|
+
const int16x8_t t2 =
|
|
1022
|
+
vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));
|
|
1023
|
+
|
|
1024
|
+
/* Vector with all s16 elements set to UINT8_MAX */
|
|
1025
|
+
const int16x8_t vmax =
|
|
1026
|
+
vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX));
|
|
1027
|
+
|
|
1028
|
+
/* Elements which are within the acceptable range */
|
|
1029
|
+
const int16x8_t le_max =
|
|
1030
|
+
vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax)));
|
|
1031
|
+
const int16x8_t gt_max =
|
|
1032
|
+
vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax)));
|
|
1033
|
+
|
|
1034
|
+
/* Final values as 16-bit integers */
|
|
1035
|
+
const int16x8_t values = vorrq_s16(le_max, gt_max);
|
|
1036
|
+
|
|
1037
|
+
r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));
|
|
1038
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
1039
|
+
r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16);
|
|
1040
|
+
#else
|
|
1041
|
+
SIMDE_VECTORIZE
|
|
1042
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
1043
|
+
if (a_.i16[i] > UINT8_MAX) {
|
|
1044
|
+
r_.u8[i] = UINT8_MAX;
|
|
1045
|
+
} else if (a_.i16[i] < 0) {
|
|
1046
|
+
r_.u8[i] = 0;
|
|
1047
|
+
} else {
|
|
1048
|
+
r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]);
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
SIMDE_VECTORIZE
|
|
1053
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
1054
|
+
if (b_.i16[i] > UINT8_MAX) {
|
|
1055
|
+
r_.u8[i + 4] = UINT8_MAX;
|
|
1056
|
+
} else if (b_.i16[i] < 0) {
|
|
1057
|
+
r_.u8[i + 4] = 0;
|
|
1058
|
+
} else {
|
|
1059
|
+
r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]);
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
#endif
|
|
1063
|
+
|
|
1064
|
+
return simde__m64_from_private(r_);
|
|
1065
|
+
#endif
|
|
1066
|
+
}
|
|
1067
|
+
#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b)
|
|
1068
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1069
|
+
#define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b)
|
|
1070
|
+
#define _m_packuswb(a, b) simde_mm_packs_pu16(a, b)
|
|
1071
|
+
#endif
|
|
1072
|
+
|
|
1073
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1074
|
+
simde__m64 simde_mm_set_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
|
|
1075
|
+
int8_t e3, int8_t e2, int8_t e1, int8_t e0)
|
|
1076
|
+
{
|
|
1077
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1078
|
+
return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
|
|
1079
|
+
#else
|
|
1080
|
+
simde__m64_private r_;
|
|
1081
|
+
|
|
1082
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1083
|
+
const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = {e0, e1, e2, e3,
|
|
1084
|
+
e4, e5, e6, e7};
|
|
1085
|
+
r_.neon_i8 = vld1_s8(v);
|
|
1086
|
+
#else
|
|
1087
|
+
r_.i8[0] = e0;
|
|
1088
|
+
r_.i8[1] = e1;
|
|
1089
|
+
r_.i8[2] = e2;
|
|
1090
|
+
r_.i8[3] = e3;
|
|
1091
|
+
r_.i8[4] = e4;
|
|
1092
|
+
r_.i8[5] = e5;
|
|
1093
|
+
r_.i8[6] = e6;
|
|
1094
|
+
r_.i8[7] = e7;
|
|
1095
|
+
#endif
|
|
1096
|
+
|
|
1097
|
+
return simde__m64_from_private(r_);
|
|
1098
|
+
#endif
|
|
1099
|
+
}
|
|
1100
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1101
|
+
#define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \
|
|
1102
|
+
simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1103
|
+
#endif
|
|
1104
|
+
|
|
1105
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1106
|
+
simde__m64 simde_x_mm_set_pu8(uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4,
|
|
1107
|
+
uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0)
|
|
1108
|
+
{
|
|
1109
|
+
simde__m64_private r_;
|
|
1110
|
+
|
|
1111
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1112
|
+
r_.n = _mm_set_pi8(
|
|
1113
|
+
HEDLEY_STATIC_CAST(int8_t, e7), HEDLEY_STATIC_CAST(int8_t, e6),
|
|
1114
|
+
HEDLEY_STATIC_CAST(int8_t, e5), HEDLEY_STATIC_CAST(int8_t, e4),
|
|
1115
|
+
HEDLEY_STATIC_CAST(int8_t, e3), HEDLEY_STATIC_CAST(int8_t, e2),
|
|
1116
|
+
HEDLEY_STATIC_CAST(int8_t, e1), HEDLEY_STATIC_CAST(int8_t, e0));
|
|
1117
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1118
|
+
const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = {e0, e1, e2, e3,
|
|
1119
|
+
e4, e5, e6, e7};
|
|
1120
|
+
r_.neon_u8 = vld1_u8(v);
|
|
1121
|
+
#else
|
|
1122
|
+
r_.u8[0] = e0;
|
|
1123
|
+
r_.u8[1] = e1;
|
|
1124
|
+
r_.u8[2] = e2;
|
|
1125
|
+
r_.u8[3] = e3;
|
|
1126
|
+
r_.u8[4] = e4;
|
|
1127
|
+
r_.u8[5] = e5;
|
|
1128
|
+
r_.u8[6] = e6;
|
|
1129
|
+
r_.u8[7] = e7;
|
|
1130
|
+
#endif
|
|
1131
|
+
|
|
1132
|
+
return simde__m64_from_private(r_);
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1136
|
+
simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
|
|
1137
|
+
{
|
|
1138
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1139
|
+
return _mm_set_pi16(e3, e2, e1, e0);
|
|
1140
|
+
#else
|
|
1141
|
+
simde__m64_private r_;
|
|
1142
|
+
|
|
1143
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1144
|
+
const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = {e0, e1, e2, e3};
|
|
1145
|
+
r_.neon_i16 = vld1_s16(v);
|
|
1146
|
+
#else
|
|
1147
|
+
r_.i16[0] = e0;
|
|
1148
|
+
r_.i16[1] = e1;
|
|
1149
|
+
r_.i16[2] = e2;
|
|
1150
|
+
r_.i16[3] = e3;
|
|
1151
|
+
#endif
|
|
1152
|
+
|
|
1153
|
+
return simde__m64_from_private(r_);
|
|
1154
|
+
#endif
|
|
1155
|
+
}
|
|
1156
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1157
|
+
#define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0)
|
|
1158
|
+
#endif
|
|
1159
|
+
|
|
1160
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1161
|
+
simde__m64 simde_x_mm_set_pu16(uint16_t e3, uint16_t e2, uint16_t e1,
|
|
1162
|
+
uint16_t e0)
|
|
1163
|
+
{
|
|
1164
|
+
simde__m64_private r_;
|
|
1165
|
+
|
|
1166
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1167
|
+
r_.n = _mm_set_pi16(HEDLEY_STATIC_CAST(int16_t, e3),
|
|
1168
|
+
HEDLEY_STATIC_CAST(int16_t, e2),
|
|
1169
|
+
HEDLEY_STATIC_CAST(int16_t, e1),
|
|
1170
|
+
HEDLEY_STATIC_CAST(int16_t, e0));
|
|
1171
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1172
|
+
const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = {e0, e1, e2, e3};
|
|
1173
|
+
r_.neon_u16 = vld1_u16(v);
|
|
1174
|
+
#else
|
|
1175
|
+
r_.u16[0] = e0;
|
|
1176
|
+
r_.u16[1] = e1;
|
|
1177
|
+
r_.u16[2] = e2;
|
|
1178
|
+
r_.u16[3] = e3;
|
|
1179
|
+
#endif
|
|
1180
|
+
|
|
1181
|
+
return simde__m64_from_private(r_);
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1185
|
+
simde__m64 simde_x_mm_set_pu32(uint32_t e1, uint32_t e0)
|
|
1186
|
+
{
|
|
1187
|
+
simde__m64_private r_;
|
|
1188
|
+
|
|
1189
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1190
|
+
r_.n = _mm_set_pi32(HEDLEY_STATIC_CAST(int32_t, e1),
|
|
1191
|
+
HEDLEY_STATIC_CAST(int32_t, e0));
|
|
1192
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1193
|
+
const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = {e0, e1};
|
|
1194
|
+
r_.neon_u32 = vld1_u32(v);
|
|
1195
|
+
#else
|
|
1196
|
+
r_.u32[0] = e0;
|
|
1197
|
+
r_.u32[1] = e1;
|
|
1198
|
+
#endif
|
|
1199
|
+
|
|
1200
|
+
return simde__m64_from_private(r_);
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1204
|
+
simde__m64 simde_mm_set_pi32(int32_t e1, int32_t e0)
|
|
1205
|
+
{
|
|
1206
|
+
simde__m64_private r_;
|
|
1207
|
+
|
|
1208
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1209
|
+
r_.n = _mm_set_pi32(e1, e0);
|
|
1210
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1211
|
+
const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = {e0, e1};
|
|
1212
|
+
r_.neon_i32 = vld1_s32(v);
|
|
1213
|
+
#else
|
|
1214
|
+
r_.i32[0] = e0;
|
|
1215
|
+
r_.i32[1] = e1;
|
|
1216
|
+
#endif
|
|
1217
|
+
|
|
1218
|
+
return simde__m64_from_private(r_);
|
|
1219
|
+
}
|
|
1220
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1221
|
+
#define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0)
|
|
1222
|
+
#endif
|
|
1223
|
+
|
|
1224
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1225
|
+
simde__m64 simde_x_mm_set_pi64(int64_t e0)
|
|
1226
|
+
{
|
|
1227
|
+
simde__m64_private r_;
|
|
1228
|
+
|
|
1229
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1230
|
+
const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = {e0};
|
|
1231
|
+
r_.neon_i64 = vld1_s64(v);
|
|
1232
|
+
#else
|
|
1233
|
+
r_.i64[0] = e0;
|
|
1234
|
+
#endif
|
|
1235
|
+
|
|
1236
|
+
return simde__m64_from_private(r_);
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1240
|
+
simde__m64 simde_x_mm_set_f32x2(simde_float32 e1, simde_float32 e0)
|
|
1241
|
+
{
|
|
1242
|
+
simde__m64_private r_;
|
|
1243
|
+
|
|
1244
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1245
|
+
const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = {e0, e1};
|
|
1246
|
+
r_.neon_f32 = vld1_f32(v);
|
|
1247
|
+
#else
|
|
1248
|
+
r_.f32[0] = e0;
|
|
1249
|
+
r_.f32[1] = e1;
|
|
1250
|
+
#endif
|
|
1251
|
+
|
|
1252
|
+
return simde__m64_from_private(r_);
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1256
|
+
simde__m64 simde_mm_set1_pi8(int8_t a)
|
|
1257
|
+
{
|
|
1258
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1259
|
+
return _mm_set1_pi8(a);
|
|
1260
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1261
|
+
simde__m64_private r_;
|
|
1262
|
+
r_.neon_i8 = vmov_n_s8(a);
|
|
1263
|
+
return simde__m64_from_private(r_);
|
|
1264
|
+
#else
|
|
1265
|
+
return simde_mm_set_pi8(a, a, a, a, a, a, a, a);
|
|
1266
|
+
#endif
|
|
1267
|
+
}
|
|
1268
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1269
|
+
#define _mm_set1_pi8(a) simde_mm_set1_pi8(a)
|
|
1270
|
+
#endif
|
|
1271
|
+
|
|
1272
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1273
|
+
simde__m64 simde_mm_set1_pi16(int16_t a)
|
|
1274
|
+
{
|
|
1275
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1276
|
+
return _mm_set1_pi16(a);
|
|
1277
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1278
|
+
simde__m64_private r_;
|
|
1279
|
+
r_.neon_i16 = vmov_n_s16(a);
|
|
1280
|
+
return simde__m64_from_private(r_);
|
|
1281
|
+
#else
|
|
1282
|
+
return simde_mm_set_pi16(a, a, a, a);
|
|
1283
|
+
#endif
|
|
1284
|
+
}
|
|
1285
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1286
|
+
#define _mm_set1_pi16(a) simde_mm_set1_pi16(a)
|
|
1287
|
+
#endif
|
|
1288
|
+
|
|
1289
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1290
|
+
simde__m64 simde_mm_set1_pi32(int32_t a)
|
|
1291
|
+
{
|
|
1292
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1293
|
+
return _mm_set1_pi32(a);
|
|
1294
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1295
|
+
simde__m64_private r_;
|
|
1296
|
+
r_.neon_i32 = vmov_n_s32(a);
|
|
1297
|
+
return simde__m64_from_private(r_);
|
|
1298
|
+
#else
|
|
1299
|
+
return simde_mm_set_pi32(a, a);
|
|
1300
|
+
#endif
|
|
1301
|
+
}
|
|
1302
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1303
|
+
#define _mm_set1_pi32(a) simde_mm_set1_pi32(a)
|
|
1304
|
+
#endif
|
|
1305
|
+
|
|
1306
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1307
|
+
simde__m64 simde_mm_setr_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4,
|
|
1308
|
+
int8_t e3, int8_t e2, int8_t e1, int8_t e0)
|
|
1309
|
+
{
|
|
1310
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1311
|
+
return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0);
|
|
1312
|
+
#else
|
|
1313
|
+
return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7);
|
|
1314
|
+
#endif
|
|
1315
|
+
}
|
|
1316
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1317
|
+
#define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \
|
|
1318
|
+
simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0)
|
|
1319
|
+
#endif
|
|
1320
|
+
|
|
1321
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1322
|
+
simde__m64 simde_mm_setr_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
|
|
1323
|
+
{
|
|
1324
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1325
|
+
return _mm_setr_pi16(e3, e2, e1, e0);
|
|
1326
|
+
#else
|
|
1327
|
+
return simde_mm_set_pi16(e0, e1, e2, e3);
|
|
1328
|
+
#endif
|
|
1329
|
+
}
|
|
1330
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1331
|
+
#define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0)
|
|
1332
|
+
#endif
|
|
1333
|
+
|
|
1334
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1335
|
+
simde__m64 simde_mm_setr_pi32(int32_t e1, int32_t e0)
|
|
1336
|
+
{
|
|
1337
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1338
|
+
return _mm_setr_pi32(e1, e0);
|
|
1339
|
+
#else
|
|
1340
|
+
return simde_mm_set_pi32(e0, e1);
|
|
1341
|
+
#endif
|
|
1342
|
+
}
|
|
1343
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1344
|
+
#define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0)
|
|
1345
|
+
#endif
|
|
1346
|
+
|
|
1347
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1348
|
+
simde__m64 simde_mm_setzero_si64(void)
|
|
1349
|
+
{
|
|
1350
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1351
|
+
return _mm_setzero_si64();
|
|
1352
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1353
|
+
simde__m64_private r_;
|
|
1354
|
+
r_.neon_u32 = vmov_n_u32(0);
|
|
1355
|
+
return simde__m64_from_private(r_);
|
|
1356
|
+
#else
|
|
1357
|
+
return simde_mm_set_pi32(0, 0);
|
|
1358
|
+
#endif
|
|
1359
|
+
}
|
|
1360
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1361
|
+
#define _mm_setzero_si64() simde_mm_setzero_si64()
|
|
1362
|
+
#endif
|
|
1363
|
+
|
|
1364
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1365
|
+
simde__m64 simde_x_mm_load_si64(const void *mem_addr)
|
|
1366
|
+
{
|
|
1367
|
+
simde__m64 r;
|
|
1368
|
+
simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64),
|
|
1369
|
+
sizeof(r));
|
|
1370
|
+
return r;
|
|
1371
|
+
}
|
|
1372
|
+
|
|
1373
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1374
|
+
simde__m64 simde_x_mm_loadu_si64(const void *mem_addr)
|
|
1375
|
+
{
|
|
1376
|
+
simde__m64 r;
|
|
1377
|
+
simde_memcpy(&r, mem_addr, sizeof(r));
|
|
1378
|
+
return r;
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1382
|
+
void simde_x_mm_store_si64(void *mem_addr, simde__m64 value)
|
|
1383
|
+
{
|
|
1384
|
+
simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value,
|
|
1385
|
+
sizeof(value));
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1389
|
+
void simde_x_mm_storeu_si64(void *mem_addr, simde__m64 value)
|
|
1390
|
+
{
|
|
1391
|
+
simde_memcpy(mem_addr, &value, sizeof(value));
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1394
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1395
|
+
simde__m64 simde_x_mm_setone_si64(void)
|
|
1396
|
+
{
|
|
1397
|
+
return simde_mm_set1_pi32(~INT32_C(0));
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1401
|
+
simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count)
|
|
1402
|
+
{
|
|
1403
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1404
|
+
return _mm_sll_pi16(a, count);
|
|
1405
|
+
#else
|
|
1406
|
+
simde__m64_private r_;
|
|
1407
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1408
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1409
|
+
|
|
1410
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1411
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
1412
|
+
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
|
1413
|
+
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
|
1414
|
+
#pragma clang diagnostic ignored "-Wvector-conversion"
|
|
1415
|
+
#endif
|
|
1416
|
+
r_.neon_i16 =
|
|
1417
|
+
vshl_s16(a_.neon_i16,
|
|
1418
|
+
vmov_n_s16(HEDLEY_STATIC_CAST(
|
|
1419
|
+
int16_t, vget_lane_u64(count_.neon_u64, 0))));
|
|
1420
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
1421
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
|
|
1422
|
+
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
|
|
1423
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
|
|
1424
|
+
return simde_mm_setzero_si64();
|
|
1425
|
+
|
|
1426
|
+
r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
|
|
1427
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1428
|
+
r_.i16 = a_.i16 << count_.u64[0];
|
|
1429
|
+
#else
|
|
1430
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
|
|
1431
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1432
|
+
return simde__m64_from_private(r_);
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
SIMDE_VECTORIZE
|
|
1436
|
+
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
|
1437
|
+
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t,
|
|
1438
|
+
a_.u16[i] << count_.u64[0]);
|
|
1439
|
+
}
|
|
1440
|
+
#endif
|
|
1441
|
+
|
|
1442
|
+
return simde__m64_from_private(r_);
|
|
1443
|
+
#endif
|
|
1444
|
+
}
|
|
1445
|
+
#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count)
|
|
1446
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1447
|
+
#define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count)
|
|
1448
|
+
#define _m_psllw(a, count) simde_mm_sll_pi16(a, count)
|
|
1449
|
+
#endif
|
|
1450
|
+
|
|
1451
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1452
|
+
simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count)
|
|
1453
|
+
{
|
|
1454
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1455
|
+
return _mm_sll_pi32(a, count);
|
|
1456
|
+
#else
|
|
1457
|
+
simde__m64_private r_;
|
|
1458
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1459
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1460
|
+
|
|
1461
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1462
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
1463
|
+
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
|
1464
|
+
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
|
1465
|
+
#pragma clang diagnostic ignored "-Wvector-conversion"
|
|
1466
|
+
#endif
|
|
1467
|
+
r_.neon_i32 =
|
|
1468
|
+
vshl_s32(a_.neon_i32,
|
|
1469
|
+
vmov_n_s32(HEDLEY_STATIC_CAST(
|
|
1470
|
+
int32_t, vget_lane_u64(count_.neon_u64, 0))));
|
|
1471
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
1472
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1473
|
+
r_.i32 = a_.i32 << count_.u64[0];
|
|
1474
|
+
#else
|
|
1475
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
|
|
1476
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1477
|
+
return simde__m64_from_private(r_);
|
|
1478
|
+
}
|
|
1479
|
+
|
|
1480
|
+
SIMDE_VECTORIZE
|
|
1481
|
+
for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
|
|
1482
|
+
r_.u32[i] = a_.u32[i] << count_.u64[0];
|
|
1483
|
+
}
|
|
1484
|
+
#endif
|
|
1485
|
+
|
|
1486
|
+
return simde__m64_from_private(r_);
|
|
1487
|
+
#endif
|
|
1488
|
+
}
|
|
1489
|
+
#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count)
|
|
1490
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1491
|
+
#define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count)
|
|
1492
|
+
#define _m_pslld(a, count) simde_mm_sll_pi32(a, count)
|
|
1493
|
+
#endif
|
|
1494
|
+
|
|
1495
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1496
|
+
simde__m64 simde_mm_slli_pi16(simde__m64 a, int count)
|
|
1497
|
+
{
|
|
1498
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
1499
|
+
return _mm_slli_pi16(a, count);
|
|
1500
|
+
#else
|
|
1501
|
+
simde__m64_private r_;
|
|
1502
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1503
|
+
|
|
1504
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
|
|
1505
|
+
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
|
|
1506
|
+
if (HEDLEY_UNLIKELY(count > 15))
|
|
1507
|
+
return simde_mm_setzero_si64();
|
|
1508
|
+
|
|
1509
|
+
r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count);
|
|
1510
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1511
|
+
r_.i16 = a_.i16 << count;
|
|
1512
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1513
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1514
|
+
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)count));
|
|
1515
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
1516
|
+
r_.mmi_i16 = psllh_s(a_.mmi_i16, b_.mmi_i16);
|
|
1517
|
+
#else
|
|
1518
|
+
SIMDE_VECTORIZE
|
|
1519
|
+
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
|
1520
|
+
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count);
|
|
1521
|
+
}
|
|
1522
|
+
#endif
|
|
1523
|
+
|
|
1524
|
+
return simde__m64_from_private(r_);
|
|
1525
|
+
#endif
|
|
1526
|
+
}
|
|
1527
|
+
#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count)
|
|
1528
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1529
|
+
#define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count)
|
|
1530
|
+
#define _m_psllwi(a, count) simde_mm_slli_pi16(a, count)
|
|
1531
|
+
#endif
|
|
1532
|
+
|
|
1533
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1534
|
+
simde__m64 simde_mm_slli_pi32(simde__m64 a, int count)
|
|
1535
|
+
{
|
|
1536
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
1537
|
+
return _mm_slli_pi32(a, count);
|
|
1538
|
+
#else
|
|
1539
|
+
simde__m64_private r_;
|
|
1540
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1541
|
+
|
|
1542
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1543
|
+
r_.i32 = a_.i32 << count;
|
|
1544
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1545
|
+
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)count));
|
|
1546
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
1547
|
+
r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32);
|
|
1548
|
+
#else
|
|
1549
|
+
SIMDE_VECTORIZE
|
|
1550
|
+
for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
|
|
1551
|
+
r_.u32[i] = a_.u32[i] << count;
|
|
1552
|
+
}
|
|
1553
|
+
#endif
|
|
1554
|
+
|
|
1555
|
+
return simde__m64_from_private(r_);
|
|
1556
|
+
#endif
|
|
1557
|
+
}
|
|
1558
|
+
#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b)
|
|
1559
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1560
|
+
#define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count)
|
|
1561
|
+
#define _m_pslldi(a, count) simde_mm_slli_pi32(a, count)
|
|
1562
|
+
#endif
|
|
1563
|
+
|
|
1564
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1565
|
+
simde__m64 simde_mm_slli_si64(simde__m64 a, int count)
|
|
1566
|
+
{
|
|
1567
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1568
|
+
return _mm_slli_si64(a, count);
|
|
1569
|
+
#else
|
|
1570
|
+
simde__m64_private r_;
|
|
1571
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1572
|
+
|
|
1573
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1574
|
+
r_.i64 = a_.i64 << count;
|
|
1575
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1576
|
+
r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t)count));
|
|
1577
|
+
#else
|
|
1578
|
+
r_.u64[0] = a_.u64[0] << count;
|
|
1579
|
+
#endif
|
|
1580
|
+
|
|
1581
|
+
return simde__m64_from_private(r_);
|
|
1582
|
+
#endif
|
|
1583
|
+
}
|
|
1584
|
+
#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count)
|
|
1585
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1586
|
+
#define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count)
|
|
1587
|
+
#define _m_psllqi(a, count) simde_mm_slli_si64(a, count)
|
|
1588
|
+
#endif
|
|
1589
|
+
|
|
1590
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1591
|
+
simde__m64 simde_mm_sll_si64(simde__m64 a, simde__m64 count)
|
|
1592
|
+
{
|
|
1593
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1594
|
+
return _mm_sll_si64(a, count);
|
|
1595
|
+
#else
|
|
1596
|
+
simde__m64_private r_;
|
|
1597
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1598
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1599
|
+
|
|
1600
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1601
|
+
r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64);
|
|
1602
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1603
|
+
r_.i64 = a_.i64 << count_.i64;
|
|
1604
|
+
#else
|
|
1605
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
|
|
1606
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1607
|
+
return simde__m64_from_private(r_);
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
r_.u64[0] = a_.u64[0] << count_.u64[0];
|
|
1611
|
+
#endif
|
|
1612
|
+
|
|
1613
|
+
return simde__m64_from_private(r_);
|
|
1614
|
+
#endif
|
|
1615
|
+
}
|
|
1616
|
+
#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count)
|
|
1617
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1618
|
+
#define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count)
|
|
1619
|
+
#define _m_psllq(a, count) simde_mm_sll_si64(a, count)
|
|
1620
|
+
#endif
|
|
1621
|
+
|
|
1622
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1623
|
+
simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count)
|
|
1624
|
+
{
|
|
1625
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1626
|
+
return _mm_srl_pi16(a, count);
|
|
1627
|
+
#else
|
|
1628
|
+
simde__m64_private r_;
|
|
1629
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1630
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1631
|
+
|
|
1632
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
|
|
1633
|
+
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
|
|
1634
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
|
|
1635
|
+
return simde_mm_setzero_si64();
|
|
1636
|
+
|
|
1637
|
+
r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
|
|
1638
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1639
|
+
r_.u16 = a_.u16 >> count_.u64[0];
|
|
1640
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1641
|
+
r_.neon_u16 = vshl_u16(
|
|
1642
|
+
a_.neon_u16,
|
|
1643
|
+
vmov_n_s16(-((int16_t)vget_lane_u64(count_.neon_u64, 0))));
|
|
1644
|
+
#else
|
|
1645
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) {
|
|
1646
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1647
|
+
return simde__m64_from_private(r_);
|
|
1648
|
+
}
|
|
1649
|
+
|
|
1650
|
+
SIMDE_VECTORIZE
|
|
1651
|
+
for (size_t i = 0; i < sizeof(r_.u16) / sizeof(r_.u16[0]); i++) {
|
|
1652
|
+
r_.u16[i] = a_.u16[i] >> count_.u64[0];
|
|
1653
|
+
}
|
|
1654
|
+
#endif
|
|
1655
|
+
|
|
1656
|
+
return simde__m64_from_private(r_);
|
|
1657
|
+
#endif
|
|
1658
|
+
}
|
|
1659
|
+
#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count)
|
|
1660
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1661
|
+
#define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count)
|
|
1662
|
+
#define _m_psrlw(a, count) simde_mm_srl_pi16(a, count)
|
|
1663
|
+
#endif
|
|
1664
|
+
|
|
1665
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1666
|
+
simde__m64 simde_mm_srl_pi32(simde__m64 a, simde__m64 count)
|
|
1667
|
+
{
|
|
1668
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1669
|
+
return _mm_srl_pi32(a, count);
|
|
1670
|
+
#else
|
|
1671
|
+
simde__m64_private r_;
|
|
1672
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1673
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1674
|
+
|
|
1675
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1676
|
+
r_.u32 = a_.u32 >> count_.u64[0];
|
|
1677
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1678
|
+
r_.neon_u32 = vshl_u32(
|
|
1679
|
+
a_.neon_u32,
|
|
1680
|
+
vmov_n_s32(-((int32_t)vget_lane_u64(count_.neon_u64, 0))));
|
|
1681
|
+
#else
|
|
1682
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) {
|
|
1683
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1684
|
+
return simde__m64_from_private(r_);
|
|
1685
|
+
}
|
|
1686
|
+
|
|
1687
|
+
SIMDE_VECTORIZE
|
|
1688
|
+
for (size_t i = 0; i < sizeof(r_.u32) / sizeof(r_.u32[0]); i++) {
|
|
1689
|
+
r_.u32[i] = a_.u32[i] >> count_.u64[0];
|
|
1690
|
+
}
|
|
1691
|
+
#endif
|
|
1692
|
+
|
|
1693
|
+
return simde__m64_from_private(r_);
|
|
1694
|
+
#endif
|
|
1695
|
+
}
|
|
1696
|
+
#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count)
|
|
1697
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1698
|
+
#define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count)
|
|
1699
|
+
#define _m_psrld(a, count) simde_mm_srl_pi32(a, count)
|
|
1700
|
+
#endif
|
|
1701
|
+
|
|
1702
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1703
|
+
simde__m64 simde_mm_srli_pi16(simde__m64 a, int count)
|
|
1704
|
+
{
|
|
1705
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
1706
|
+
return _mm_srli_pi16(a, count);
|
|
1707
|
+
#else
|
|
1708
|
+
simde__m64_private r_;
|
|
1709
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1710
|
+
|
|
1711
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1712
|
+
r_.u16 = a_.u16 >> count;
|
|
1713
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1714
|
+
r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t)count)));
|
|
1715
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
1716
|
+
r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16);
|
|
1717
|
+
#else
|
|
1718
|
+
SIMDE_VECTORIZE
|
|
1719
|
+
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
|
1720
|
+
r_.u16[i] = a_.u16[i] >> count;
|
|
1721
|
+
}
|
|
1722
|
+
#endif
|
|
1723
|
+
|
|
1724
|
+
return simde__m64_from_private(r_);
|
|
1725
|
+
#endif
|
|
1726
|
+
}
|
|
1727
|
+
#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
|
|
1728
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1729
|
+
#define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count)
|
|
1730
|
+
#define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count)
|
|
1731
|
+
#endif
|
|
1732
|
+
|
|
1733
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1734
|
+
simde__m64 simde_mm_srli_pi32(simde__m64 a, int count)
|
|
1735
|
+
{
|
|
1736
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
1737
|
+
return _mm_srli_pi32(a, count);
|
|
1738
|
+
#else
|
|
1739
|
+
simde__m64_private r_;
|
|
1740
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1741
|
+
|
|
1742
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1743
|
+
r_.u32 = a_.u32 >> count;
|
|
1744
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1745
|
+
r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t)count)));
|
|
1746
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
1747
|
+
r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32);
|
|
1748
|
+
#else
|
|
1749
|
+
SIMDE_VECTORIZE
|
|
1750
|
+
for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
|
|
1751
|
+
r_.u32[i] = a_.u32[i] >> count;
|
|
1752
|
+
}
|
|
1753
|
+
#endif
|
|
1754
|
+
|
|
1755
|
+
return simde__m64_from_private(r_);
|
|
1756
|
+
#endif
|
|
1757
|
+
}
|
|
1758
|
+
#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count)
|
|
1759
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1760
|
+
#define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count)
|
|
1761
|
+
#define _m_psrldi(a, count) simde_mm_srli_pi32(a, count)
|
|
1762
|
+
#endif
|
|
1763
|
+
|
|
1764
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1765
|
+
simde__m64 simde_mm_srli_si64(simde__m64 a, int count)
|
|
1766
|
+
{
|
|
1767
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
1768
|
+
return _mm_srli_si64(a, count);
|
|
1769
|
+
#else
|
|
1770
|
+
simde__m64_private r_;
|
|
1771
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1772
|
+
|
|
1773
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1774
|
+
r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count));
|
|
1775
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1776
|
+
r_.u64 = a_.u64 >> count;
|
|
1777
|
+
#else
|
|
1778
|
+
r_.u64[0] = a_.u64[0] >> count;
|
|
1779
|
+
#endif
|
|
1780
|
+
|
|
1781
|
+
return simde__m64_from_private(r_);
|
|
1782
|
+
#endif
|
|
1783
|
+
}
|
|
1784
|
+
#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count)
|
|
1785
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1786
|
+
#define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count)
|
|
1787
|
+
#define _m_psrlqi(a, count) simde_mm_srli_si64(a, count)
|
|
1788
|
+
#endif
|
|
1789
|
+
|
|
1790
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1791
|
+
simde__m64 simde_mm_srl_si64(simde__m64 a, simde__m64 count)
|
|
1792
|
+
{
|
|
1793
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1794
|
+
return _mm_srl_si64(a, count);
|
|
1795
|
+
#else
|
|
1796
|
+
simde__m64_private r_;
|
|
1797
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1798
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1799
|
+
|
|
1800
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
1801
|
+
r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64));
|
|
1802
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1803
|
+
r_.u64 = a_.u64 >> count_.u64;
|
|
1804
|
+
#else
|
|
1805
|
+
if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) {
|
|
1806
|
+
simde_memset(&r_, 0, sizeof(r_));
|
|
1807
|
+
return simde__m64_from_private(r_);
|
|
1808
|
+
}
|
|
1809
|
+
|
|
1810
|
+
r_.u64[0] = a_.u64[0] >> count_.u64[0];
|
|
1811
|
+
#endif
|
|
1812
|
+
|
|
1813
|
+
return simde__m64_from_private(r_);
|
|
1814
|
+
#endif
|
|
1815
|
+
}
|
|
1816
|
+
#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count)
|
|
1817
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1818
|
+
#define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count)
|
|
1819
|
+
#define _m_psrlq(a, count) simde_mm_srl_si64(a, count)
|
|
1820
|
+
#endif
|
|
1821
|
+
|
|
1822
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1823
|
+
simde__m64 simde_mm_srai_pi16(simde__m64 a, int count)
|
|
1824
|
+
{
|
|
1825
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
1826
|
+
return _mm_srai_pi16(a, count);
|
|
1827
|
+
#else
|
|
1828
|
+
simde__m64_private r_;
|
|
1829
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1830
|
+
|
|
1831
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1832
|
+
r_.i16 = a_.i16 >> (count & 0xff);
|
|
1833
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1834
|
+
r_.neon_i16 = vshl_s16(a_.neon_i16,
|
|
1835
|
+
vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count)));
|
|
1836
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
1837
|
+
r_.mmi_i16 = psrah_s(a_.mmi_i16, count);
|
|
1838
|
+
#else
|
|
1839
|
+
SIMDE_VECTORIZE
|
|
1840
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
1841
|
+
r_.i16[i] = a_.i16[i] >> (count & 0xff);
|
|
1842
|
+
}
|
|
1843
|
+
#endif
|
|
1844
|
+
|
|
1845
|
+
return simde__m64_from_private(r_);
|
|
1846
|
+
#endif
|
|
1847
|
+
}
|
|
1848
|
+
#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count)
|
|
1849
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1850
|
+
#define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count)
|
|
1851
|
+
#define _m_psrawi(a, count) simde_mm_srai_pi16(a, count)
|
|
1852
|
+
#endif
|
|
1853
|
+
|
|
1854
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1855
|
+
simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
|
|
1856
|
+
{
|
|
1857
|
+
#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI)
|
|
1858
|
+
return _mm_srai_pi32(a, count);
|
|
1859
|
+
#else
|
|
1860
|
+
simde__m64_private r_;
|
|
1861
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1862
|
+
|
|
1863
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1864
|
+
r_.i32 = a_.i32 >> (count & 0xff);
|
|
1865
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1866
|
+
r_.neon_i32 = vshl_s32(a_.neon_i32,
|
|
1867
|
+
vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));
|
|
1868
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
1869
|
+
r_.mmi_i32 = psraw_s(a_.mmi_i32, count);
|
|
1870
|
+
#else
|
|
1871
|
+
SIMDE_VECTORIZE
|
|
1872
|
+
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
1873
|
+
r_.i32[i] = a_.i32[i] >> (count & 0xff);
|
|
1874
|
+
}
|
|
1875
|
+
#endif
|
|
1876
|
+
|
|
1877
|
+
return simde__m64_from_private(r_);
|
|
1878
|
+
#endif
|
|
1879
|
+
}
|
|
1880
|
+
#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)
|
|
1881
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1882
|
+
#define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
|
|
1883
|
+
#define _m_psradi(a, count) simde_mm_srai_pi32(a, count)
|
|
1884
|
+
#endif
|
|
1885
|
+
|
|
1886
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1887
|
+
simde__m64 simde_mm_sra_pi16(simde__m64 a, simde__m64 count)
|
|
1888
|
+
{
|
|
1889
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1890
|
+
return _mm_sra_pi16(a, count);
|
|
1891
|
+
#else
|
|
1892
|
+
simde__m64_private r_;
|
|
1893
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1894
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1895
|
+
const int cnt = HEDLEY_STATIC_CAST(
|
|
1896
|
+
int, (count_.i64[0] > 15 ? 15 : count_.i64[0]));
|
|
1897
|
+
|
|
1898
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1899
|
+
r_.i16 = a_.i16 >> cnt;
|
|
1900
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1901
|
+
r_.neon_i16 =
|
|
1902
|
+
vshl_s16(a_.neon_i16,
|
|
1903
|
+
vmov_n_s16(-HEDLEY_STATIC_CAST(
|
|
1904
|
+
int16_t, vget_lane_u64(count_.neon_u64, 0))));
|
|
1905
|
+
#else
|
|
1906
|
+
SIMDE_VECTORIZE
|
|
1907
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
1908
|
+
r_.i16[i] = a_.i16[i] >> cnt;
|
|
1909
|
+
}
|
|
1910
|
+
#endif
|
|
1911
|
+
|
|
1912
|
+
return simde__m64_from_private(r_);
|
|
1913
|
+
#endif
|
|
1914
|
+
}
|
|
1915
|
+
#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count)
|
|
1916
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1917
|
+
#define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count)
|
|
1918
|
+
#define _m_psraw(a, count) simde_mm_sra_pi16(a, count)
|
|
1919
|
+
#endif
|
|
1920
|
+
|
|
1921
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1922
|
+
simde__m64 simde_mm_sra_pi32(simde__m64 a, simde__m64 count)
|
|
1923
|
+
{
|
|
1924
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1925
|
+
return _mm_sra_pi32(a, count);
|
|
1926
|
+
#else
|
|
1927
|
+
simde__m64_private r_;
|
|
1928
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1929
|
+
simde__m64_private count_ = simde__m64_to_private(count);
|
|
1930
|
+
const int32_t cnt =
|
|
1931
|
+
(count_.u64[0] > 31)
|
|
1932
|
+
? 31
|
|
1933
|
+
: HEDLEY_STATIC_CAST(int32_t, count_.u64[0]);
|
|
1934
|
+
|
|
1935
|
+
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
|
1936
|
+
r_.i32 = a_.i32 >> cnt;
|
|
1937
|
+
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1938
|
+
r_.neon_i32 =
|
|
1939
|
+
vshl_s32(a_.neon_i32,
|
|
1940
|
+
vmov_n_s32(-HEDLEY_STATIC_CAST(
|
|
1941
|
+
int32_t, vget_lane_u64(count_.neon_u64, 0))));
|
|
1942
|
+
#else
|
|
1943
|
+
SIMDE_VECTORIZE
|
|
1944
|
+
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
1945
|
+
r_.i32[i] = a_.i32[i] >> cnt;
|
|
1946
|
+
}
|
|
1947
|
+
#endif
|
|
1948
|
+
|
|
1949
|
+
return simde__m64_from_private(r_);
|
|
1950
|
+
#endif
|
|
1951
|
+
}
|
|
1952
|
+
#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b)
|
|
1953
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1954
|
+
#define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count)
|
|
1955
|
+
#define _m_psrad(a, count) simde_mm_sra_pi32(a, count)
|
|
1956
|
+
#endif
|
|
1957
|
+
|
|
1958
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1959
|
+
simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b)
|
|
1960
|
+
{
|
|
1961
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1962
|
+
return _mm_sub_pi8(a, b);
|
|
1963
|
+
#else
|
|
1964
|
+
simde__m64_private r_;
|
|
1965
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1966
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1967
|
+
|
|
1968
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
1969
|
+
r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);
|
|
1970
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
1971
|
+
r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8);
|
|
1972
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
1973
|
+
r_.i8 = a_.i8 - b_.i8;
|
|
1974
|
+
#else
|
|
1975
|
+
SIMDE_VECTORIZE
|
|
1976
|
+
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
1977
|
+
r_.i8[i] = a_.i8[i] - b_.i8[i];
|
|
1978
|
+
}
|
|
1979
|
+
#endif
|
|
1980
|
+
|
|
1981
|
+
return simde__m64_from_private(r_);
|
|
1982
|
+
#endif
|
|
1983
|
+
}
|
|
1984
|
+
#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b)
|
|
1985
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
1986
|
+
#define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b)
|
|
1987
|
+
#define _m_psubb(a, b) simde_mm_sub_pi8(a, b)
|
|
1988
|
+
#endif
|
|
1989
|
+
|
|
1990
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
1991
|
+
simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b)
|
|
1992
|
+
{
|
|
1993
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
1994
|
+
return _mm_sub_pi16(a, b);
|
|
1995
|
+
#else
|
|
1996
|
+
simde__m64_private r_;
|
|
1997
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
1998
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
1999
|
+
|
|
2000
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
2001
|
+
r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);
|
|
2002
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2003
|
+
r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16);
|
|
2004
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2005
|
+
r_.i16 = a_.i16 - b_.i16;
|
|
2006
|
+
#else
|
|
2007
|
+
SIMDE_VECTORIZE
|
|
2008
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
2009
|
+
r_.i16[i] = a_.i16[i] - b_.i16[i];
|
|
2010
|
+
}
|
|
2011
|
+
#endif
|
|
2012
|
+
|
|
2013
|
+
return simde__m64_from_private(r_);
|
|
2014
|
+
#endif
|
|
2015
|
+
}
|
|
2016
|
+
#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b)
|
|
2017
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2018
|
+
#define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b)
|
|
2019
|
+
#define _m_psubw(a, b) simde_mm_sub_pi16(a, b)
|
|
2020
|
+
#endif
|
|
2021
|
+
|
|
2022
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2023
|
+
simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b)
|
|
2024
|
+
{
|
|
2025
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2026
|
+
return _mm_sub_pi32(a, b);
|
|
2027
|
+
#else
|
|
2028
|
+
simde__m64_private r_;
|
|
2029
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2030
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2031
|
+
|
|
2032
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
2033
|
+
r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);
|
|
2034
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2035
|
+
r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32);
|
|
2036
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2037
|
+
r_.i32 = a_.i32 - b_.i32;
|
|
2038
|
+
#else
|
|
2039
|
+
SIMDE_VECTORIZE
|
|
2040
|
+
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
|
2041
|
+
r_.i32[i] = a_.i32[i] - b_.i32[i];
|
|
2042
|
+
}
|
|
2043
|
+
#endif
|
|
2044
|
+
|
|
2045
|
+
return simde__m64_from_private(r_);
|
|
2046
|
+
#endif
|
|
2047
|
+
}
|
|
2048
|
+
#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b)
|
|
2049
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2050
|
+
#define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b)
|
|
2051
|
+
#define _m_psubd(a, b) simde_mm_sub_pi32(a, b)
|
|
2052
|
+
#endif
|
|
2053
|
+
|
|
2054
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2055
|
+
simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b)
|
|
2056
|
+
{
|
|
2057
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2058
|
+
return _mm_subs_pi8(a, b);
|
|
2059
|
+
#else
|
|
2060
|
+
simde__m64_private r_;
|
|
2061
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2062
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2063
|
+
|
|
2064
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
2065
|
+
r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);
|
|
2066
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2067
|
+
r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8);
|
|
2068
|
+
#else
|
|
2069
|
+
SIMDE_VECTORIZE
|
|
2070
|
+
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
|
2071
|
+
if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) {
|
|
2072
|
+
r_.i8[i] = INT8_MIN;
|
|
2073
|
+
} else if ((b_.i8[i]) < 0 &&
|
|
2074
|
+
(a_.i8[i]) > INT8_MAX + (b_.i8[i])) {
|
|
2075
|
+
r_.i8[i] = INT8_MAX;
|
|
2076
|
+
} else {
|
|
2077
|
+
r_.i8[i] = (a_.i8[i]) - (b_.i8[i]);
|
|
2078
|
+
}
|
|
2079
|
+
}
|
|
2080
|
+
#endif
|
|
2081
|
+
|
|
2082
|
+
return simde__m64_from_private(r_);
|
|
2083
|
+
#endif
|
|
2084
|
+
}
|
|
2085
|
+
#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b)
|
|
2086
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2087
|
+
#define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b)
|
|
2088
|
+
#define _m_psubsb(a, b) simde_mm_subs_pi8(a, b)
|
|
2089
|
+
#endif
|
|
2090
|
+
|
|
2091
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2092
|
+
simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b)
|
|
2093
|
+
{
|
|
2094
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2095
|
+
return _mm_subs_pu8(a, b);
|
|
2096
|
+
#else
|
|
2097
|
+
simde__m64_private r_;
|
|
2098
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2099
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2100
|
+
|
|
2101
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
2102
|
+
r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);
|
|
2103
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2104
|
+
r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8);
|
|
2105
|
+
#else
|
|
2106
|
+
SIMDE_VECTORIZE
|
|
2107
|
+
for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
|
|
2108
|
+
const int32_t x = a_.u8[i] - b_.u8[i];
|
|
2109
|
+
if (x < 0) {
|
|
2110
|
+
r_.u8[i] = 0;
|
|
2111
|
+
} else if (x > UINT8_MAX) {
|
|
2112
|
+
r_.u8[i] = UINT8_MAX;
|
|
2113
|
+
} else {
|
|
2114
|
+
r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x);
|
|
2115
|
+
}
|
|
2116
|
+
}
|
|
2117
|
+
#endif
|
|
2118
|
+
|
|
2119
|
+
return simde__m64_from_private(r_);
|
|
2120
|
+
#endif
|
|
2121
|
+
}
|
|
2122
|
+
#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b)
|
|
2123
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2124
|
+
#define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b)
|
|
2125
|
+
#define _m_psubusb(a, b) simde_mm_subs_pu8(a, b)
|
|
2126
|
+
#endif
|
|
2127
|
+
|
|
2128
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2129
|
+
simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b)
|
|
2130
|
+
{
|
|
2131
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2132
|
+
return _mm_subs_pi16(a, b);
|
|
2133
|
+
#else
|
|
2134
|
+
simde__m64_private r_;
|
|
2135
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2136
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2137
|
+
|
|
2138
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
2139
|
+
r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);
|
|
2140
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2141
|
+
r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16);
|
|
2142
|
+
#else
|
|
2143
|
+
SIMDE_VECTORIZE
|
|
2144
|
+
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
|
2145
|
+
if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) {
|
|
2146
|
+
r_.i16[i] = SHRT_MIN;
|
|
2147
|
+
} else if ((b_.i16[i]) < 0 &&
|
|
2148
|
+
(a_.i16[i]) > INT16_MAX + (b_.i16[i])) {
|
|
2149
|
+
r_.i16[i] = INT16_MAX;
|
|
2150
|
+
} else {
|
|
2151
|
+
r_.i16[i] = (a_.i16[i]) - (b_.i16[i]);
|
|
2152
|
+
}
|
|
2153
|
+
}
|
|
2154
|
+
#endif
|
|
2155
|
+
|
|
2156
|
+
return simde__m64_from_private(r_);
|
|
2157
|
+
#endif
|
|
2158
|
+
}
|
|
2159
|
+
#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b)
|
|
2160
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2161
|
+
#define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b)
|
|
2162
|
+
#define _m_psubsw(a, b) simde_mm_subs_pi16(a, b)
|
|
2163
|
+
#endif
|
|
2164
|
+
|
|
2165
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2166
|
+
simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b)
|
|
2167
|
+
{
|
|
2168
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2169
|
+
return _mm_subs_pu16(a, b);
|
|
2170
|
+
#else
|
|
2171
|
+
simde__m64_private r_;
|
|
2172
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2173
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2174
|
+
|
|
2175
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
2176
|
+
r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);
|
|
2177
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2178
|
+
r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16);
|
|
2179
|
+
#else
|
|
2180
|
+
SIMDE_VECTORIZE
|
|
2181
|
+
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
|
2182
|
+
const int x = a_.u16[i] - b_.u16[i];
|
|
2183
|
+
if (x < 0) {
|
|
2184
|
+
r_.u16[i] = 0;
|
|
2185
|
+
} else if (x > UINT16_MAX) {
|
|
2186
|
+
r_.u16[i] = UINT16_MAX;
|
|
2187
|
+
} else {
|
|
2188
|
+
r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x);
|
|
2189
|
+
}
|
|
2190
|
+
}
|
|
2191
|
+
#endif
|
|
2192
|
+
|
|
2193
|
+
return simde__m64_from_private(r_);
|
|
2194
|
+
#endif
|
|
2195
|
+
}
|
|
2196
|
+
#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b)
|
|
2197
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2198
|
+
#define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b)
|
|
2199
|
+
#define _m_psubusw(a, b) simde_mm_subs_pu16(a, b)
|
|
2200
|
+
#endif
|
|
2201
|
+
|
|
2202
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2203
|
+
simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b)
|
|
2204
|
+
{
|
|
2205
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2206
|
+
return _mm_unpackhi_pi8(a, b);
|
|
2207
|
+
#else
|
|
2208
|
+
simde__m64_private r_;
|
|
2209
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2210
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2211
|
+
|
|
2212
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
2213
|
+
r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8);
|
|
2214
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
2215
|
+
r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14,
|
|
2216
|
+
7, 15);
|
|
2217
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2218
|
+
r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8);
|
|
2219
|
+
#else
|
|
2220
|
+
r_.i8[0] = a_.i8[4];
|
|
2221
|
+
r_.i8[1] = b_.i8[4];
|
|
2222
|
+
r_.i8[2] = a_.i8[5];
|
|
2223
|
+
r_.i8[3] = b_.i8[5];
|
|
2224
|
+
r_.i8[4] = a_.i8[6];
|
|
2225
|
+
r_.i8[5] = b_.i8[6];
|
|
2226
|
+
r_.i8[6] = a_.i8[7];
|
|
2227
|
+
r_.i8[7] = b_.i8[7];
|
|
2228
|
+
#endif
|
|
2229
|
+
|
|
2230
|
+
return simde__m64_from_private(r_);
|
|
2231
|
+
#endif
|
|
2232
|
+
}
|
|
2233
|
+
#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
2234
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2235
|
+
#define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
2236
|
+
#define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b)
|
|
2237
|
+
#endif
|
|
2238
|
+
|
|
2239
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2240
|
+
simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b)
|
|
2241
|
+
{
|
|
2242
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2243
|
+
return _mm_unpackhi_pi16(a, b);
|
|
2244
|
+
#else
|
|
2245
|
+
simde__m64_private r_;
|
|
2246
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2247
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2248
|
+
|
|
2249
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
2250
|
+
r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);
|
|
2251
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2252
|
+
r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16);
|
|
2253
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
2254
|
+
r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);
|
|
2255
|
+
#else
|
|
2256
|
+
r_.i16[0] = a_.i16[2];
|
|
2257
|
+
r_.i16[1] = b_.i16[2];
|
|
2258
|
+
r_.i16[2] = a_.i16[3];
|
|
2259
|
+
r_.i16[3] = b_.i16[3];
|
|
2260
|
+
#endif
|
|
2261
|
+
|
|
2262
|
+
return simde__m64_from_private(r_);
|
|
2263
|
+
#endif
|
|
2264
|
+
}
|
|
2265
|
+
#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
2266
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2267
|
+
#define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
2268
|
+
#define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b)
|
|
2269
|
+
#endif
|
|
2270
|
+
|
|
2271
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2272
|
+
simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b)
|
|
2273
|
+
{
|
|
2274
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2275
|
+
return _mm_unpackhi_pi32(a, b);
|
|
2276
|
+
#else
|
|
2277
|
+
simde__m64_private r_;
|
|
2278
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2279
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2280
|
+
|
|
2281
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
2282
|
+
r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);
|
|
2283
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2284
|
+
r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32);
|
|
2285
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
2286
|
+
r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);
|
|
2287
|
+
#else
|
|
2288
|
+
r_.i32[0] = a_.i32[1];
|
|
2289
|
+
r_.i32[1] = b_.i32[1];
|
|
2290
|
+
#endif
|
|
2291
|
+
|
|
2292
|
+
return simde__m64_from_private(r_);
|
|
2293
|
+
#endif
|
|
2294
|
+
}
|
|
2295
|
+
#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
2296
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2297
|
+
#define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
2298
|
+
#define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b)
|
|
2299
|
+
#endif
|
|
2300
|
+
|
|
2301
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2302
|
+
simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b)
|
|
2303
|
+
{
|
|
2304
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2305
|
+
return _mm_unpacklo_pi8(a, b);
|
|
2306
|
+
#else
|
|
2307
|
+
simde__m64_private r_;
|
|
2308
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2309
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2310
|
+
|
|
2311
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
2312
|
+
r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);
|
|
2313
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2314
|
+
r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8);
|
|
2315
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
2316
|
+
r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3,
|
|
2317
|
+
11);
|
|
2318
|
+
#else
|
|
2319
|
+
r_.i8[0] = a_.i8[0];
|
|
2320
|
+
r_.i8[1] = b_.i8[0];
|
|
2321
|
+
r_.i8[2] = a_.i8[1];
|
|
2322
|
+
r_.i8[3] = b_.i8[1];
|
|
2323
|
+
r_.i8[4] = a_.i8[2];
|
|
2324
|
+
r_.i8[5] = b_.i8[2];
|
|
2325
|
+
r_.i8[6] = a_.i8[3];
|
|
2326
|
+
r_.i8[7] = b_.i8[3];
|
|
2327
|
+
#endif
|
|
2328
|
+
|
|
2329
|
+
return simde__m64_from_private(r_);
|
|
2330
|
+
#endif
|
|
2331
|
+
}
|
|
2332
|
+
#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
2333
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2334
|
+
#define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
2335
|
+
#define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b)
|
|
2336
|
+
#endif
|
|
2337
|
+
|
|
2338
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2339
|
+
simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b)
|
|
2340
|
+
{
|
|
2341
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2342
|
+
return _mm_unpacklo_pi16(a, b);
|
|
2343
|
+
#else
|
|
2344
|
+
simde__m64_private r_;
|
|
2345
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2346
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2347
|
+
|
|
2348
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
2349
|
+
r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);
|
|
2350
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2351
|
+
r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16);
|
|
2352
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
2353
|
+
r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);
|
|
2354
|
+
#else
|
|
2355
|
+
r_.i16[0] = a_.i16[0];
|
|
2356
|
+
r_.i16[1] = b_.i16[0];
|
|
2357
|
+
r_.i16[2] = a_.i16[1];
|
|
2358
|
+
r_.i16[3] = b_.i16[1];
|
|
2359
|
+
#endif
|
|
2360
|
+
|
|
2361
|
+
return simde__m64_from_private(r_);
|
|
2362
|
+
#endif
|
|
2363
|
+
}
|
|
2364
|
+
#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
2365
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2366
|
+
#define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
2367
|
+
#define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b)
|
|
2368
|
+
#endif
|
|
2369
|
+
|
|
2370
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2371
|
+
simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b)
|
|
2372
|
+
{
|
|
2373
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2374
|
+
return _mm_unpacklo_pi32(a, b);
|
|
2375
|
+
#else
|
|
2376
|
+
simde__m64_private r_;
|
|
2377
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2378
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2379
|
+
|
|
2380
|
+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
|
2381
|
+
r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);
|
|
2382
|
+
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
|
2383
|
+
r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32);
|
|
2384
|
+
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
|
2385
|
+
r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2);
|
|
2386
|
+
#else
|
|
2387
|
+
r_.i32[0] = a_.i32[0];
|
|
2388
|
+
r_.i32[1] = b_.i32[0];
|
|
2389
|
+
#endif
|
|
2390
|
+
|
|
2391
|
+
return simde__m64_from_private(r_);
|
|
2392
|
+
#endif
|
|
2393
|
+
}
|
|
2394
|
+
#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
2395
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2396
|
+
#define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
2397
|
+
#define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b)
|
|
2398
|
+
#endif
|
|
2399
|
+
|
|
2400
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2401
|
+
simde__m64 simde_mm_xor_si64(simde__m64 a, simde__m64 b)
|
|
2402
|
+
{
|
|
2403
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2404
|
+
return _mm_xor_si64(a, b);
|
|
2405
|
+
#else
|
|
2406
|
+
simde__m64_private r_;
|
|
2407
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2408
|
+
simde__m64_private b_ = simde__m64_to_private(b);
|
|
2409
|
+
|
|
2410
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
2411
|
+
r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32);
|
|
2412
|
+
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
|
2413
|
+
r_.i32f = a_.i32f ^ b_.i32f;
|
|
2414
|
+
#else
|
|
2415
|
+
r_.u64[0] = a_.u64[0] ^ b_.u64[0];
|
|
2416
|
+
#endif
|
|
2417
|
+
|
|
2418
|
+
return simde__m64_from_private(r_);
|
|
2419
|
+
#endif
|
|
2420
|
+
}
|
|
2421
|
+
#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b)
|
|
2422
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2423
|
+
#define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b)
|
|
2424
|
+
#define _m_pxor(a, b) simde_mm_xor_si64(a, b)
|
|
2425
|
+
#endif
|
|
2426
|
+
|
|
2427
|
+
SIMDE_FUNCTION_ATTRIBUTES
|
|
2428
|
+
int32_t simde_m_to_int(simde__m64 a)
|
|
2429
|
+
{
|
|
2430
|
+
#if defined(SIMDE_X86_MMX_NATIVE)
|
|
2431
|
+
return _m_to_int(a);
|
|
2432
|
+
#else
|
|
2433
|
+
simde__m64_private a_ = simde__m64_to_private(a);
|
|
2434
|
+
|
|
2435
|
+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
|
2436
|
+
HEDLEY_DIAGNOSTIC_PUSH
|
|
2437
|
+
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
|
2438
|
+
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
|
2439
|
+
#pragma clang diagnostic ignored "-Wvector-conversion"
|
|
2440
|
+
#endif
|
|
2441
|
+
return vget_lane_s32(a_.neon_i32, 0);
|
|
2442
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
2443
|
+
#else
|
|
2444
|
+
return a_.i32[0];
|
|
2445
|
+
#endif
|
|
2446
|
+
#endif
|
|
2447
|
+
}
|
|
2448
|
+
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
|
2449
|
+
#define _m_to_int(a) simde_m_to_int(a)
|
|
2450
|
+
#endif
|
|
2451
|
+
|
|
2452
|
+
SIMDE_END_DECLS_
|
|
2453
|
+
|
|
2454
|
+
HEDLEY_DIAGNOSTIC_POP
|
|
2455
|
+
|
|
2456
|
+
#endif /* !defined(SIMDE_X86_MMX_H) */
|