xcodebuild-helper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.codeclimate.yml +20 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +110 -0
- data/Guardfile +18 -0
- data/README.md +7 -0
- data/Rakefile +7 -0
- data/TODO.md +3 -0
- data/bin/oclint +5 -0
- data/bin/oclint-0.8 +5 -0
- data/bin/oclint-json-compilation-database +5 -0
- data/bin/oclint-xcodebuild +5 -0
- data/externals/oclint/LICENSE +69 -0
- data/externals/oclint/bin/oclint +0 -0
- data/externals/oclint/bin/oclint-0.10.2 +0 -0
- data/externals/oclint/bin/oclint-json-compilation-database +88 -0
- data/externals/oclint/bin/oclint-xcodebuild +218 -0
- data/externals/oclint/lib/clang/3.7.0/asan_blacklist.txt +13 -0
- data/externals/oclint/lib/clang/3.7.0/include/Intrin.h +958 -0
- data/externals/oclint/lib/clang/3.7.0/include/__stddef_max_align_t.h +43 -0
- data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_aes.h +72 -0
- data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_pclmul.h +34 -0
- data/externals/oclint/lib/clang/3.7.0/include/adxintrin.h +88 -0
- data/externals/oclint/lib/clang/3.7.0/include/altivec.h +13528 -0
- data/externals/oclint/lib/clang/3.7.0/include/ammintrin.h +215 -0
- data/externals/oclint/lib/clang/3.7.0/include/arm_acle.h +304 -0
- data/externals/oclint/lib/clang/3.7.0/include/arm_neon.h +68419 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx2intrin.h +1256 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx512bwintrin.h +1250 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx512cdintrin.h +131 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx512dqintrin.h +242 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx512erintrin.h +285 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx512fintrin.h +2457 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx512vlbwintrin.h +1907 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx512vldqintrin.h +353 -0
- data/externals/oclint/lib/clang/3.7.0/include/avx512vlintrin.h +1982 -0
- data/externals/oclint/lib/clang/3.7.0/include/avxintrin.h +1308 -0
- data/externals/oclint/lib/clang/3.7.0/include/bmi2intrin.h +99 -0
- data/externals/oclint/lib/clang/3.7.0/include/bmiintrin.h +153 -0
- data/externals/oclint/lib/clang/3.7.0/include/cpuid.h +209 -0
- data/externals/oclint/lib/clang/3.7.0/include/cuda_builtin_vars.h +110 -0
- data/externals/oclint/lib/clang/3.7.0/include/emmintrin.h +1480 -0
- data/externals/oclint/lib/clang/3.7.0/include/f16cintrin.h +63 -0
- data/externals/oclint/lib/clang/3.7.0/include/float.h +124 -0
- data/externals/oclint/lib/clang/3.7.0/include/fma4intrin.h +236 -0
- data/externals/oclint/lib/clang/3.7.0/include/fmaintrin.h +234 -0
- data/externals/oclint/lib/clang/3.7.0/include/fxsrintrin.h +55 -0
- data/externals/oclint/lib/clang/3.7.0/include/htmintrin.h +226 -0
- data/externals/oclint/lib/clang/3.7.0/include/htmxlintrin.h +363 -0
- data/externals/oclint/lib/clang/3.7.0/include/ia32intrin.h +101 -0
- data/externals/oclint/lib/clang/3.7.0/include/immintrin.h +203 -0
- data/externals/oclint/lib/clang/3.7.0/include/inttypes.h +102 -0
- data/externals/oclint/lib/clang/3.7.0/include/iso646.h +43 -0
- data/externals/oclint/lib/clang/3.7.0/include/limits.h +118 -0
- data/externals/oclint/lib/clang/3.7.0/include/lzcntintrin.h +72 -0
- data/externals/oclint/lib/clang/3.7.0/include/mm3dnow.h +167 -0
- data/externals/oclint/lib/clang/3.7.0/include/mm_malloc.h +75 -0
- data/externals/oclint/lib/clang/3.7.0/include/mmintrin.h +507 -0
- data/externals/oclint/lib/clang/3.7.0/include/module.modulemap +196 -0
- data/externals/oclint/lib/clang/3.7.0/include/nmmintrin.h +35 -0
- data/externals/oclint/lib/clang/3.7.0/include/pmmintrin.h +122 -0
- data/externals/oclint/lib/clang/3.7.0/include/popcntintrin.h +50 -0
- data/externals/oclint/lib/clang/3.7.0/include/prfchwintrin.h +39 -0
- data/externals/oclint/lib/clang/3.7.0/include/rdseedintrin.h +59 -0
- data/externals/oclint/lib/clang/3.7.0/include/rtmintrin.h +59 -0
- data/externals/oclint/lib/clang/3.7.0/include/s390intrin.h +39 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/allocator_interface.h +66 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/asan_interface.h +155 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/common_interface_defs.h +118 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/coverage_interface.h +63 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/dfsan_interface.h +114 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/linux_syscall_hooks.h +3070 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/lsan_interface.h +84 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/msan_interface.h +107 -0
- data/externals/oclint/lib/clang/3.7.0/include/sanitizer/tsan_interface_atomic.h +222 -0
- data/externals/oclint/lib/clang/3.7.0/include/shaintrin.h +79 -0
- data/externals/oclint/lib/clang/3.7.0/include/smmintrin.h +487 -0
- data/externals/oclint/lib/clang/3.7.0/include/stdalign.h +35 -0
- data/externals/oclint/lib/clang/3.7.0/include/stdarg.h +52 -0
- data/externals/oclint/lib/clang/3.7.0/include/stdatomic.h +190 -0
- data/externals/oclint/lib/clang/3.7.0/include/stdbool.h +44 -0
- data/externals/oclint/lib/clang/3.7.0/include/stddef.h +137 -0
- data/externals/oclint/lib/clang/3.7.0/include/stdint.h +707 -0
- data/externals/oclint/lib/clang/3.7.0/include/stdnoreturn.h +30 -0
- data/externals/oclint/lib/clang/3.7.0/include/tbmintrin.h +154 -0
- data/externals/oclint/lib/clang/3.7.0/include/tgmath.h +1374 -0
- data/externals/oclint/lib/clang/3.7.0/include/tmmintrin.h +230 -0
- data/externals/oclint/lib/clang/3.7.0/include/unwind.h +282 -0
- data/externals/oclint/lib/clang/3.7.0/include/vadefs.h +65 -0
- data/externals/oclint/lib/clang/3.7.0/include/varargs.h +26 -0
- data/externals/oclint/lib/clang/3.7.0/include/vecintrin.h +8946 -0
- data/externals/oclint/lib/clang/3.7.0/include/wmmintrin.h +42 -0
- data/externals/oclint/lib/clang/3.7.0/include/x86intrin.h +81 -0
- data/externals/oclint/lib/clang/3.7.0/include/xmmintrin.h +1008 -0
- data/externals/oclint/lib/clang/3.7.0/include/xopintrin.h +809 -0
- data/externals/oclint/lib/clang/3.7.0/include/xtestintrin.h +41 -0
- data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib +0 -0
- data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
- data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-i386.a +0 -0
- data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-x86_64.a +0 -0
- data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.profile_osx.a +0 -0
- data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.safestack_osx.a +0 -0
- data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib +0 -0
- data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib +0 -0
- data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
- data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
- data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
- data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
- data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
- data/externals/oclint/lib/oclint/reporters/libXcodeReporter.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libAvoidDefaultArgumentsOnVirtualMethodsRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libAvoidPrivateStaticMembersRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libBaseClassDestructorShouldBeVirtualOrProtectedRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libCoveredSwitchStatementsDontNeedDefaultRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libDestructorOfVirtualClassRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCAssignIvarOutsideAccessorsRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCVerifyIsEqualHashRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCVerifyMustCallSuperRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCVerifyProhibitedCallRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCVerifyProtectedMethodRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libObjCVerifySubclassMustImplementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libPreferEarlyExitRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
- data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
- data/lib/coverage_plan.rb +19 -0
- data/lib/device.rb +27 -0
- data/lib/execute.rb +7 -0
- data/lib/lint_plan.rb +41 -0
- data/lib/rules.rb +23 -0
- data/lib/test_plan.rb +11 -0
- data/lib/version.rb +3 -0
- data/lib/xcode.rb +128 -0
- data/lib/xcodebuild-helper.rb +110 -0
- data/spec/coverage_plan_spec.rb +18 -0
- data/spec/device_spec.rb +24 -0
- data/spec/lint_plan_spec.rb +35 -0
- data/spec/rule_spec.rb +37 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/test_plan_spec.rb +11 -0
- data/spec/xcode_dsl_actions_spec.rb +136 -0
- data/spec/xcode_dsl_spec.rb +176 -0
- data/spec/xcode_spec.rb +79 -0
- data/xcodebuild-helper.gemspec +26 -0
- metadata +327 -0
|
@@ -0,0 +1,2457 @@
|
|
|
1
|
+
/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
|
|
2
|
+
*
|
|
3
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
* of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
* in the Software without restriction, including without limitation the rights
|
|
6
|
+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
* copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
* furnished to do so, subject to the following conditions:
|
|
9
|
+
*
|
|
10
|
+
* The above copyright notice and this permission notice shall be included in
|
|
11
|
+
* all copies or substantial portions of the Software.
|
|
12
|
+
*
|
|
13
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
19
|
+
* THE SOFTWARE.
|
|
20
|
+
*
|
|
21
|
+
*===-----------------------------------------------------------------------===
|
|
22
|
+
*/
|
|
23
|
+
#ifndef __IMMINTRIN_H
|
|
24
|
+
#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
#ifndef __AVX512FINTRIN_H
|
|
28
|
+
#define __AVX512FINTRIN_H
|
|
29
|
+
|
|
30
|
+
typedef double __v8df __attribute__((__vector_size__(64)));
|
|
31
|
+
typedef float __v16sf __attribute__((__vector_size__(64)));
|
|
32
|
+
typedef long long __v8di __attribute__((__vector_size__(64)));
|
|
33
|
+
typedef int __v16si __attribute__((__vector_size__(64)));
|
|
34
|
+
|
|
35
|
+
typedef float __m512 __attribute__((__vector_size__(64)));
|
|
36
|
+
typedef double __m512d __attribute__((__vector_size__(64)));
|
|
37
|
+
typedef long long __m512i __attribute__((__vector_size__(64)));
|
|
38
|
+
|
|
39
|
+
typedef unsigned char __mmask8;
|
|
40
|
+
typedef unsigned short __mmask16;
|
|
41
|
+
|
|
42
|
+
/* Rounding mode macros. */
|
|
43
|
+
#define _MM_FROUND_TO_NEAREST_INT 0x00
|
|
44
|
+
#define _MM_FROUND_TO_NEG_INF 0x01
|
|
45
|
+
#define _MM_FROUND_TO_POS_INF 0x02
|
|
46
|
+
#define _MM_FROUND_TO_ZERO 0x03
|
|
47
|
+
#define _MM_FROUND_CUR_DIRECTION 0x04
|
|
48
|
+
|
|
49
|
+
/* Define the default attributes for the functions in this file. */
|
|
50
|
+
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
|
51
|
+
|
|
52
|
+
/* Create vectors with repeated elements */
|
|
53
|
+
|
|
54
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
55
|
+
_mm512_setzero_si512(void)
|
|
56
|
+
{
|
|
57
|
+
return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
61
|
+
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
|
|
62
|
+
{
|
|
63
|
+
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
|
|
64
|
+
(__v16si)
|
|
65
|
+
_mm512_setzero_si512 (),
|
|
66
|
+
__M);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
70
|
+
_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
|
|
71
|
+
{
|
|
72
|
+
#ifdef __x86_64__
|
|
73
|
+
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
|
|
74
|
+
(__v8di)
|
|
75
|
+
_mm512_setzero_si512 (),
|
|
76
|
+
__M);
|
|
77
|
+
#else
|
|
78
|
+
return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
|
|
79
|
+
(__v8di)
|
|
80
|
+
_mm512_setzero_si512 (),
|
|
81
|
+
__M);
|
|
82
|
+
#endif
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
86
|
+
_mm512_setzero_ps(void)
|
|
87
|
+
{
|
|
88
|
+
return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
|
|
89
|
+
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
|
|
90
|
+
}
|
|
91
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
92
|
+
_mm512_setzero_pd(void)
|
|
93
|
+
{
|
|
94
|
+
return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
98
|
+
_mm512_set1_ps(float __w)
|
|
99
|
+
{
|
|
100
|
+
return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
|
|
101
|
+
__w, __w, __w, __w, __w, __w, __w, __w };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
105
|
+
_mm512_set1_pd(double __w)
|
|
106
|
+
{
|
|
107
|
+
return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
111
|
+
_mm512_set1_epi32(int __s)
|
|
112
|
+
{
|
|
113
|
+
return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
|
|
114
|
+
__s, __s, __s, __s, __s, __s, __s, __s };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
118
|
+
_mm512_set1_epi64(long long __d)
|
|
119
|
+
{
|
|
120
|
+
return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
124
|
+
_mm512_broadcastss_ps(__m128 __X)
|
|
125
|
+
{
|
|
126
|
+
float __f = __X[0];
|
|
127
|
+
return (__v16sf){ __f, __f, __f, __f,
|
|
128
|
+
__f, __f, __f, __f,
|
|
129
|
+
__f, __f, __f, __f,
|
|
130
|
+
__f, __f, __f, __f };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
134
|
+
_mm512_broadcastsd_pd(__m128d __X)
|
|
135
|
+
{
|
|
136
|
+
double __d = __X[0];
|
|
137
|
+
return (__v8df){ __d, __d, __d, __d,
|
|
138
|
+
__d, __d, __d, __d };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/* Cast between vector types */
|
|
142
|
+
|
|
143
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
144
|
+
_mm512_castpd256_pd512(__m256d __a)
|
|
145
|
+
{
|
|
146
|
+
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
150
|
+
_mm512_castps256_ps512(__m256 __a)
|
|
151
|
+
{
|
|
152
|
+
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
|
|
153
|
+
-1, -1, -1, -1, -1, -1, -1, -1);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
static __inline __m128d __DEFAULT_FN_ATTRS
|
|
157
|
+
_mm512_castpd512_pd128(__m512d __a)
|
|
158
|
+
{
|
|
159
|
+
return __builtin_shufflevector(__a, __a, 0, 1);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
static __inline __m128 __DEFAULT_FN_ATTRS
|
|
163
|
+
_mm512_castps512_ps128(__m512 __a)
|
|
164
|
+
{
|
|
165
|
+
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/* Bitwise operators */
|
|
169
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
170
|
+
_mm512_and_epi32(__m512i __a, __m512i __b)
|
|
171
|
+
{
|
|
172
|
+
return __a & __b;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
176
|
+
_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
|
|
177
|
+
{
|
|
178
|
+
return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
|
|
179
|
+
(__v16si) __b,
|
|
180
|
+
(__v16si) __src,
|
|
181
|
+
(__mmask16) __k);
|
|
182
|
+
}
|
|
183
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
184
|
+
_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
|
|
185
|
+
{
|
|
186
|
+
return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
|
|
187
|
+
(__v16si) __b,
|
|
188
|
+
(__v16si)
|
|
189
|
+
_mm512_setzero_si512 (),
|
|
190
|
+
(__mmask16) __k);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
194
|
+
_mm512_and_epi64(__m512i __a, __m512i __b)
|
|
195
|
+
{
|
|
196
|
+
return __a & __b;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
200
|
+
_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
|
|
201
|
+
{
|
|
202
|
+
return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
|
|
203
|
+
(__v8di) __b,
|
|
204
|
+
(__v8di) __src,
|
|
205
|
+
(__mmask8) __k);
|
|
206
|
+
}
|
|
207
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
208
|
+
_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
|
|
209
|
+
{
|
|
210
|
+
return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
|
|
211
|
+
(__v8di) __b,
|
|
212
|
+
(__v8di)
|
|
213
|
+
_mm512_setzero_si512 (),
|
|
214
|
+
(__mmask8) __k);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
218
|
+
_mm512_andnot_epi32 (__m512i __A, __m512i __B)
|
|
219
|
+
{
|
|
220
|
+
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
|
|
221
|
+
(__v16si) __B,
|
|
222
|
+
(__v16si)
|
|
223
|
+
_mm512_setzero_si512 (),
|
|
224
|
+
(__mmask16) -1);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
228
|
+
_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
|
|
229
|
+
{
|
|
230
|
+
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
|
|
231
|
+
(__v16si) __B,
|
|
232
|
+
(__v16si) __W,
|
|
233
|
+
(__mmask16) __U);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
237
|
+
_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
|
|
238
|
+
{
|
|
239
|
+
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
|
|
240
|
+
(__v16si) __B,
|
|
241
|
+
(__v16si)
|
|
242
|
+
_mm512_setzero_si512 (),
|
|
243
|
+
(__mmask16) __U);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
247
|
+
_mm512_andnot_epi64 (__m512i __A, __m512i __B)
|
|
248
|
+
{
|
|
249
|
+
return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
|
|
250
|
+
(__v8di) __B,
|
|
251
|
+
(__v8di)
|
|
252
|
+
_mm512_setzero_si512 (),
|
|
253
|
+
(__mmask8) -1);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
257
|
+
_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
|
|
258
|
+
{
|
|
259
|
+
return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
|
|
260
|
+
(__v8di) __B,
|
|
261
|
+
(__v8di) __W, __U);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
265
|
+
_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
|
|
266
|
+
{
|
|
267
|
+
return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
|
|
268
|
+
(__v8di) __B,
|
|
269
|
+
(__v8di)
|
|
270
|
+
_mm512_setzero_pd (),
|
|
271
|
+
__U);
|
|
272
|
+
}
|
|
273
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
274
|
+
_mm512_or_epi32(__m512i __a, __m512i __b)
|
|
275
|
+
{
|
|
276
|
+
return __a | __b;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
280
|
+
_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
|
|
281
|
+
{
|
|
282
|
+
return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
|
|
283
|
+
(__v16si) __b,
|
|
284
|
+
(__v16si) __src,
|
|
285
|
+
(__mmask16) __k);
|
|
286
|
+
}
|
|
287
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
288
|
+
_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
|
|
289
|
+
{
|
|
290
|
+
return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
|
|
291
|
+
(__v16si) __b,
|
|
292
|
+
(__v16si)
|
|
293
|
+
_mm512_setzero_si512 (),
|
|
294
|
+
(__mmask16) __k);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
298
|
+
_mm512_or_epi64(__m512i __a, __m512i __b)
|
|
299
|
+
{
|
|
300
|
+
return __a | __b;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
304
|
+
_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
|
|
305
|
+
{
|
|
306
|
+
return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
|
|
307
|
+
(__v8di) __b,
|
|
308
|
+
(__v8di) __src,
|
|
309
|
+
(__mmask8) __k);
|
|
310
|
+
}
|
|
311
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
312
|
+
_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
|
|
313
|
+
{
|
|
314
|
+
return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
|
|
315
|
+
(__v8di) __b,
|
|
316
|
+
(__v8di)
|
|
317
|
+
_mm512_setzero_si512 (),
|
|
318
|
+
(__mmask8) __k);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
322
|
+
_mm512_xor_epi32(__m512i __a, __m512i __b)
|
|
323
|
+
{
|
|
324
|
+
return __a ^ __b;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
328
|
+
_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
|
|
329
|
+
{
|
|
330
|
+
return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
|
|
331
|
+
(__v16si) __b,
|
|
332
|
+
(__v16si) __src,
|
|
333
|
+
(__mmask16) __k);
|
|
334
|
+
}
|
|
335
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
336
|
+
_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
|
|
337
|
+
{
|
|
338
|
+
return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
|
|
339
|
+
(__v16si) __b,
|
|
340
|
+
(__v16si)
|
|
341
|
+
_mm512_setzero_si512 (),
|
|
342
|
+
(__mmask16) __k);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
346
|
+
_mm512_xor_epi64(__m512i __a, __m512i __b)
|
|
347
|
+
{
|
|
348
|
+
return __a ^ __b;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
352
|
+
_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
|
|
353
|
+
{
|
|
354
|
+
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
|
|
355
|
+
(__v8di) __b,
|
|
356
|
+
(__v8di) __src,
|
|
357
|
+
(__mmask8) __k);
|
|
358
|
+
}
|
|
359
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
360
|
+
_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
|
|
361
|
+
{
|
|
362
|
+
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
|
|
363
|
+
(__v8di) __b,
|
|
364
|
+
(__v8di)
|
|
365
|
+
_mm512_setzero_si512 (),
|
|
366
|
+
(__mmask8) __k);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
370
|
+
_mm512_and_si512(__m512i __a, __m512i __b)
|
|
371
|
+
{
|
|
372
|
+
return __a & __b;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
376
|
+
_mm512_or_si512(__m512i __a, __m512i __b)
|
|
377
|
+
{
|
|
378
|
+
return __a | __b;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
382
|
+
_mm512_xor_si512(__m512i __a, __m512i __b)
|
|
383
|
+
{
|
|
384
|
+
return __a ^ __b;
|
|
385
|
+
}
|
|
386
|
+
/* Arithmetic */
|
|
387
|
+
|
|
388
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
389
|
+
_mm512_add_pd(__m512d __a, __m512d __b)
|
|
390
|
+
{
|
|
391
|
+
return __a + __b;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
395
|
+
_mm512_add_ps(__m512 __a, __m512 __b)
|
|
396
|
+
{
|
|
397
|
+
return __a + __b;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
401
|
+
_mm512_mul_pd(__m512d __a, __m512d __b)
|
|
402
|
+
{
|
|
403
|
+
return __a * __b;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
407
|
+
_mm512_mul_ps(__m512 __a, __m512 __b)
|
|
408
|
+
{
|
|
409
|
+
return __a * __b;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
413
|
+
_mm512_sub_pd(__m512d __a, __m512d __b)
|
|
414
|
+
{
|
|
415
|
+
return __a - __b;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
419
|
+
_mm512_sub_ps(__m512 __a, __m512 __b)
|
|
420
|
+
{
|
|
421
|
+
return __a - __b;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
425
|
+
_mm512_add_epi64 (__m512i __A, __m512i __B)
|
|
426
|
+
{
|
|
427
|
+
return (__m512i) ((__v8di) __A + (__v8di) __B);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
431
|
+
_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
|
|
432
|
+
{
|
|
433
|
+
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
|
|
434
|
+
(__v8di) __B,
|
|
435
|
+
(__v8di) __W,
|
|
436
|
+
(__mmask8) __U);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
440
|
+
_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
|
|
441
|
+
{
|
|
442
|
+
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
|
|
443
|
+
(__v8di) __B,
|
|
444
|
+
(__v8di)
|
|
445
|
+
_mm512_setzero_si512 (),
|
|
446
|
+
(__mmask8) __U);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
450
|
+
_mm512_sub_epi64 (__m512i __A, __m512i __B)
|
|
451
|
+
{
|
|
452
|
+
return (__m512i) ((__v8di) __A - (__v8di) __B);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
456
|
+
_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
|
|
457
|
+
{
|
|
458
|
+
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
|
|
459
|
+
(__v8di) __B,
|
|
460
|
+
(__v8di) __W,
|
|
461
|
+
(__mmask8) __U);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
465
|
+
_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
|
|
466
|
+
{
|
|
467
|
+
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
|
|
468
|
+
(__v8di) __B,
|
|
469
|
+
(__v8di)
|
|
470
|
+
_mm512_setzero_si512 (),
|
|
471
|
+
(__mmask8) __U);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
475
|
+
_mm512_add_epi32 (__m512i __A, __m512i __B)
|
|
476
|
+
{
|
|
477
|
+
return (__m512i) ((__v16si) __A + (__v16si) __B);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
481
|
+
_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
|
|
482
|
+
{
|
|
483
|
+
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
|
|
484
|
+
(__v16si) __B,
|
|
485
|
+
(__v16si) __W,
|
|
486
|
+
(__mmask16) __U);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
490
|
+
_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
|
|
491
|
+
{
|
|
492
|
+
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
|
|
493
|
+
(__v16si) __B,
|
|
494
|
+
(__v16si)
|
|
495
|
+
_mm512_setzero_si512 (),
|
|
496
|
+
(__mmask16) __U);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
500
|
+
_mm512_sub_epi32 (__m512i __A, __m512i __B)
|
|
501
|
+
{
|
|
502
|
+
return (__m512i) ((__v16si) __A - (__v16si) __B);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
506
|
+
_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
|
|
507
|
+
{
|
|
508
|
+
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
|
|
509
|
+
(__v16si) __B,
|
|
510
|
+
(__v16si) __W,
|
|
511
|
+
(__mmask16) __U);
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
|
515
|
+
_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
|
|
516
|
+
{
|
|
517
|
+
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
|
|
518
|
+
(__v16si) __B,
|
|
519
|
+
(__v16si)
|
|
520
|
+
_mm512_setzero_si512 (),
|
|
521
|
+
(__mmask16) __U);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
525
|
+
_mm512_max_pd(__m512d __A, __m512d __B)
|
|
526
|
+
{
|
|
527
|
+
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
|
|
528
|
+
(__v8df) __B,
|
|
529
|
+
(__v8df)
|
|
530
|
+
_mm512_setzero_pd (),
|
|
531
|
+
(__mmask8) -1,
|
|
532
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
536
|
+
_mm512_max_ps(__m512 __A, __m512 __B)
|
|
537
|
+
{
|
|
538
|
+
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
|
|
539
|
+
(__v16sf) __B,
|
|
540
|
+
(__v16sf)
|
|
541
|
+
_mm512_setzero_ps (),
|
|
542
|
+
(__mmask16) -1,
|
|
543
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
static __inline __m512i
|
|
547
|
+
__DEFAULT_FN_ATTRS
|
|
548
|
+
_mm512_max_epi32(__m512i __A, __m512i __B)
|
|
549
|
+
{
|
|
550
|
+
return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
|
|
551
|
+
(__v16si) __B,
|
|
552
|
+
(__v16si)
|
|
553
|
+
_mm512_setzero_si512 (),
|
|
554
|
+
(__mmask16) -1);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
558
|
+
_mm512_max_epu32(__m512i __A, __m512i __B)
|
|
559
|
+
{
|
|
560
|
+
return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
|
|
561
|
+
(__v16si) __B,
|
|
562
|
+
(__v16si)
|
|
563
|
+
_mm512_setzero_si512 (),
|
|
564
|
+
(__mmask16) -1);
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
568
|
+
_mm512_max_epi64(__m512i __A, __m512i __B)
|
|
569
|
+
{
|
|
570
|
+
return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
|
|
571
|
+
(__v8di) __B,
|
|
572
|
+
(__v8di)
|
|
573
|
+
_mm512_setzero_si512 (),
|
|
574
|
+
(__mmask8) -1);
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
578
|
+
_mm512_max_epu64(__m512i __A, __m512i __B)
|
|
579
|
+
{
|
|
580
|
+
return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
|
|
581
|
+
(__v8di) __B,
|
|
582
|
+
(__v8di)
|
|
583
|
+
_mm512_setzero_si512 (),
|
|
584
|
+
(__mmask8) -1);
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
588
|
+
_mm512_min_pd(__m512d __A, __m512d __B)
|
|
589
|
+
{
|
|
590
|
+
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
|
|
591
|
+
(__v8df) __B,
|
|
592
|
+
(__v8df)
|
|
593
|
+
_mm512_setzero_pd (),
|
|
594
|
+
(__mmask8) -1,
|
|
595
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
599
|
+
_mm512_min_ps(__m512 __A, __m512 __B)
|
|
600
|
+
{
|
|
601
|
+
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
|
|
602
|
+
(__v16sf) __B,
|
|
603
|
+
(__v16sf)
|
|
604
|
+
_mm512_setzero_ps (),
|
|
605
|
+
(__mmask16) -1,
|
|
606
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
static __inline __m512i
|
|
610
|
+
__DEFAULT_FN_ATTRS
|
|
611
|
+
_mm512_min_epi32(__m512i __A, __m512i __B)
|
|
612
|
+
{
|
|
613
|
+
return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
|
|
614
|
+
(__v16si) __B,
|
|
615
|
+
(__v16si)
|
|
616
|
+
_mm512_setzero_si512 (),
|
|
617
|
+
(__mmask16) -1);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
621
|
+
_mm512_min_epu32(__m512i __A, __m512i __B)
|
|
622
|
+
{
|
|
623
|
+
return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
|
|
624
|
+
(__v16si) __B,
|
|
625
|
+
(__v16si)
|
|
626
|
+
_mm512_setzero_si512 (),
|
|
627
|
+
(__mmask16) -1);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
631
|
+
_mm512_min_epi64(__m512i __A, __m512i __B)
|
|
632
|
+
{
|
|
633
|
+
return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
|
|
634
|
+
(__v8di) __B,
|
|
635
|
+
(__v8di)
|
|
636
|
+
_mm512_setzero_si512 (),
|
|
637
|
+
(__mmask8) -1);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
641
|
+
_mm512_min_epu64(__m512i __A, __m512i __B)
|
|
642
|
+
{
|
|
643
|
+
return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
|
|
644
|
+
(__v8di) __B,
|
|
645
|
+
(__v8di)
|
|
646
|
+
_mm512_setzero_si512 (),
|
|
647
|
+
(__mmask8) -1);
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
651
|
+
_mm512_mul_epi32(__m512i __X, __m512i __Y)
|
|
652
|
+
{
|
|
653
|
+
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
|
654
|
+
(__v16si) __Y,
|
|
655
|
+
(__v8di)
|
|
656
|
+
_mm512_setzero_si512 (),
|
|
657
|
+
(__mmask8) -1);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
661
|
+
_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
|
|
662
|
+
{
|
|
663
|
+
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
|
664
|
+
(__v16si) __Y,
|
|
665
|
+
(__v8di) __W, __M);
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
669
|
+
_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
|
|
670
|
+
{
|
|
671
|
+
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
|
672
|
+
(__v16si) __Y,
|
|
673
|
+
(__v8di)
|
|
674
|
+
_mm512_setzero_si512 (),
|
|
675
|
+
__M);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
679
|
+
_mm512_mul_epu32(__m512i __X, __m512i __Y)
|
|
680
|
+
{
|
|
681
|
+
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
|
682
|
+
(__v16si) __Y,
|
|
683
|
+
(__v8di)
|
|
684
|
+
_mm512_setzero_si512 (),
|
|
685
|
+
(__mmask8) -1);
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
689
|
+
_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
|
|
690
|
+
{
|
|
691
|
+
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
|
692
|
+
(__v16si) __Y,
|
|
693
|
+
(__v8di) __W, __M);
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
697
|
+
_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
|
|
698
|
+
{
|
|
699
|
+
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
|
700
|
+
(__v16si) __Y,
|
|
701
|
+
(__v8di)
|
|
702
|
+
_mm512_setzero_si512 (),
|
|
703
|
+
__M);
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
707
|
+
_mm512_mullo_epi32 (__m512i __A, __m512i __B)
|
|
708
|
+
{
|
|
709
|
+
return (__m512i) ((__v16si) __A * (__v16si) __B);
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
713
|
+
_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
|
|
714
|
+
{
|
|
715
|
+
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
|
|
716
|
+
(__v16si) __B,
|
|
717
|
+
(__v16si)
|
|
718
|
+
_mm512_setzero_si512 (),
|
|
719
|
+
__M);
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
723
|
+
_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
|
|
724
|
+
{
|
|
725
|
+
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
|
|
726
|
+
(__v16si) __B,
|
|
727
|
+
(__v16si) __W, __M);
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
731
|
+
_mm512_sqrt_pd(__m512d a)
|
|
732
|
+
{
|
|
733
|
+
return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
|
|
734
|
+
(__v8df) _mm512_setzero_pd (),
|
|
735
|
+
(__mmask8) -1,
|
|
736
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
740
|
+
_mm512_sqrt_ps(__m512 a)
|
|
741
|
+
{
|
|
742
|
+
return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
|
|
743
|
+
(__v16sf) _mm512_setzero_ps (),
|
|
744
|
+
(__mmask16) -1,
|
|
745
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
749
|
+
_mm512_rsqrt14_pd(__m512d __A)
|
|
750
|
+
{
|
|
751
|
+
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
|
|
752
|
+
(__v8df)
|
|
753
|
+
_mm512_setzero_pd (),
|
|
754
|
+
(__mmask8) -1);}
|
|
755
|
+
|
|
756
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
757
|
+
_mm512_rsqrt14_ps(__m512 __A)
|
|
758
|
+
{
|
|
759
|
+
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
|
|
760
|
+
(__v16sf)
|
|
761
|
+
_mm512_setzero_ps (),
|
|
762
|
+
(__mmask16) -1);
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
|
766
|
+
_mm_rsqrt14_ss(__m128 __A, __m128 __B)
|
|
767
|
+
{
|
|
768
|
+
return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
|
|
769
|
+
(__v4sf) __B,
|
|
770
|
+
(__v4sf)
|
|
771
|
+
_mm_setzero_ps (),
|
|
772
|
+
(__mmask8) -1);
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
|
776
|
+
_mm_rsqrt14_sd(__m128d __A, __m128d __B)
|
|
777
|
+
{
|
|
778
|
+
return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
|
|
779
|
+
(__v2df) __B,
|
|
780
|
+
(__v2df)
|
|
781
|
+
_mm_setzero_pd (),
|
|
782
|
+
(__mmask8) -1);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
786
|
+
_mm512_rcp14_pd(__m512d __A)
|
|
787
|
+
{
|
|
788
|
+
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
|
|
789
|
+
(__v8df)
|
|
790
|
+
_mm512_setzero_pd (),
|
|
791
|
+
(__mmask8) -1);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
795
|
+
_mm512_rcp14_ps(__m512 __A)
|
|
796
|
+
{
|
|
797
|
+
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
|
|
798
|
+
(__v16sf)
|
|
799
|
+
_mm512_setzero_ps (),
|
|
800
|
+
(__mmask16) -1);
|
|
801
|
+
}
|
|
802
|
+
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
|
803
|
+
_mm_rcp14_ss(__m128 __A, __m128 __B)
|
|
804
|
+
{
|
|
805
|
+
return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
|
|
806
|
+
(__v4sf) __B,
|
|
807
|
+
(__v4sf)
|
|
808
|
+
_mm_setzero_ps (),
|
|
809
|
+
(__mmask8) -1);
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
|
813
|
+
_mm_rcp14_sd(__m128d __A, __m128d __B)
|
|
814
|
+
{
|
|
815
|
+
return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
|
|
816
|
+
(__v2df) __B,
|
|
817
|
+
(__v2df)
|
|
818
|
+
_mm_setzero_pd (),
|
|
819
|
+
(__mmask8) -1);
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
823
|
+
_mm512_floor_ps(__m512 __A)
|
|
824
|
+
{
|
|
825
|
+
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
|
|
826
|
+
_MM_FROUND_FLOOR,
|
|
827
|
+
(__v16sf) __A, -1,
|
|
828
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
832
|
+
_mm512_floor_pd(__m512d __A)
|
|
833
|
+
{
|
|
834
|
+
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
|
|
835
|
+
_MM_FROUND_FLOOR,
|
|
836
|
+
(__v8df) __A, -1,
|
|
837
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
841
|
+
_mm512_ceil_ps(__m512 __A)
|
|
842
|
+
{
|
|
843
|
+
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
|
|
844
|
+
_MM_FROUND_CEIL,
|
|
845
|
+
(__v16sf) __A, -1,
|
|
846
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
850
|
+
_mm512_ceil_pd(__m512d __A)
|
|
851
|
+
{
|
|
852
|
+
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
|
|
853
|
+
_MM_FROUND_CEIL,
|
|
854
|
+
(__v8df) __A, -1,
|
|
855
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
859
|
+
_mm512_abs_epi64(__m512i __A)
|
|
860
|
+
{
|
|
861
|
+
return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
|
|
862
|
+
(__v8di)
|
|
863
|
+
_mm512_setzero_si512 (),
|
|
864
|
+
(__mmask8) -1);
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
868
|
+
_mm512_abs_epi32(__m512i __A)
|
|
869
|
+
{
|
|
870
|
+
return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
|
|
871
|
+
(__v16si)
|
|
872
|
+
_mm512_setzero_si512 (),
|
|
873
|
+
(__mmask16) -1);
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
#define _mm512_roundscale_ps(A, B) __extension__ ({ \
|
|
877
|
+
(__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
|
|
878
|
+
-1, _MM_FROUND_CUR_DIRECTION); })
|
|
879
|
+
|
|
880
|
+
#define _mm512_roundscale_pd(A, B) __extension__ ({ \
|
|
881
|
+
(__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
|
|
882
|
+
-1, _MM_FROUND_CUR_DIRECTION); })
|
|
883
|
+
|
|
884
|
+
#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
|
|
885
|
+
(__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
|
|
886
|
+
(__v8df) (B), (__v8df) (C), \
|
|
887
|
+
(__mmask8) -1, (R)); })
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
|
|
891
|
+
(__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
|
|
892
|
+
(__v8df) (B), (__v8df) (C), \
|
|
893
|
+
(__mmask8) (U), (R)); })
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
|
|
897
|
+
(__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
|
|
898
|
+
(__v8df) (B), (__v8df) (C), \
|
|
899
|
+
(__mmask8) (U), (R)); })
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
|
|
903
|
+
(__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
|
|
904
|
+
(__v8df) (B), (__v8df) (C), \
|
|
905
|
+
(__mmask8) (U), (R)); })
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
|
|
909
|
+
(__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
|
|
910
|
+
(__v8df) (B), -(__v8df) (C), \
|
|
911
|
+
(__mmask8) -1, (R)); })
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
|
|
915
|
+
(__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
|
|
916
|
+
(__v8df) (B), -(__v8df) (C), \
|
|
917
|
+
(__mmask8) (U), (R)); })
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
|
|
921
|
+
(__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
|
|
922
|
+
(__v8df) (B), -(__v8df) (C), \
|
|
923
|
+
(__mmask8) (U), (R)); })
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
|
|
927
|
+
(__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
|
|
928
|
+
(__v8df) (B), (__v8df) (C), \
|
|
929
|
+
(__mmask8) -1, (R)); })
|
|
930
|
+
|
|
931
|
+
|
|
932
|
+
#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
|
|
933
|
+
(__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
|
|
934
|
+
(__v8df) (B), (__v8df) (C), \
|
|
935
|
+
(__mmask8) (U), (R)); })
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
|
|
939
|
+
(__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
|
|
940
|
+
(__v8df) (B), (__v8df) (C), \
|
|
941
|
+
(__mmask8) (U), (R)); })
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
|
|
945
|
+
(__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
|
|
946
|
+
(__v8df) (B), -(__v8df) (C), \
|
|
947
|
+
(__mmask8) -1, (R)); })
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
|
|
951
|
+
(__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
|
|
952
|
+
(__v8df) (B), -(__v8df) (C), \
|
|
953
|
+
(__mmask8) (U), (R)); })
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
957
|
+
_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
|
|
958
|
+
{
|
|
959
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
|
|
960
|
+
(__v8df) __B,
|
|
961
|
+
(__v8df) __C,
|
|
962
|
+
(__mmask8) -1,
|
|
963
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
967
|
+
_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
|
|
968
|
+
{
|
|
969
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
|
|
970
|
+
(__v8df) __B,
|
|
971
|
+
(__v8df) __C,
|
|
972
|
+
(__mmask8) __U,
|
|
973
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
977
|
+
_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
|
|
978
|
+
{
|
|
979
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
|
|
980
|
+
(__v8df) __B,
|
|
981
|
+
(__v8df) __C,
|
|
982
|
+
(__mmask8) __U,
|
|
983
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
987
|
+
_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
|
|
988
|
+
{
|
|
989
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
|
|
990
|
+
(__v8df) __B,
|
|
991
|
+
(__v8df) __C,
|
|
992
|
+
(__mmask8) __U,
|
|
993
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
997
|
+
_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
|
|
998
|
+
{
|
|
999
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
|
|
1000
|
+
(__v8df) __B,
|
|
1001
|
+
-(__v8df) __C,
|
|
1002
|
+
(__mmask8) -1,
|
|
1003
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1007
|
+
_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
|
|
1008
|
+
{
|
|
1009
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
|
|
1010
|
+
(__v8df) __B,
|
|
1011
|
+
-(__v8df) __C,
|
|
1012
|
+
(__mmask8) __U,
|
|
1013
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1017
|
+
_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
|
|
1018
|
+
{
|
|
1019
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
|
|
1020
|
+
(__v8df) __B,
|
|
1021
|
+
-(__v8df) __C,
|
|
1022
|
+
(__mmask8) __U,
|
|
1023
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1027
|
+
_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
|
|
1028
|
+
{
|
|
1029
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
|
|
1030
|
+
(__v8df) __B,
|
|
1031
|
+
(__v8df) __C,
|
|
1032
|
+
(__mmask8) -1,
|
|
1033
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1037
|
+
_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
|
|
1038
|
+
{
|
|
1039
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
|
|
1040
|
+
(__v8df) __B,
|
|
1041
|
+
(__v8df) __C,
|
|
1042
|
+
(__mmask8) __U,
|
|
1043
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1047
|
+
_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
|
|
1048
|
+
{
|
|
1049
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
|
|
1050
|
+
(__v8df) __B,
|
|
1051
|
+
(__v8df) __C,
|
|
1052
|
+
(__mmask8) __U,
|
|
1053
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1057
|
+
_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
|
|
1058
|
+
{
|
|
1059
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
|
|
1060
|
+
(__v8df) __B,
|
|
1061
|
+
-(__v8df) __C,
|
|
1062
|
+
(__mmask8) -1,
|
|
1063
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1067
|
+
_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
|
|
1068
|
+
{
|
|
1069
|
+
return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
|
|
1070
|
+
(__v8df) __B,
|
|
1071
|
+
-(__v8df) __C,
|
|
1072
|
+
(__mmask8) __U,
|
|
1073
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
|
|
1077
|
+
(__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
|
|
1078
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1079
|
+
(__mmask16) -1, (R)); })
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
|
|
1083
|
+
(__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
|
|
1084
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1085
|
+
(__mmask16) (U), (R)); })
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
|
|
1089
|
+
(__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
|
|
1090
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1091
|
+
(__mmask16) (U), (R)); })
|
|
1092
|
+
|
|
1093
|
+
|
|
1094
|
+
#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
|
|
1095
|
+
(__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
|
|
1096
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1097
|
+
(__mmask16) (U), (R)); })
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
|
|
1101
|
+
(__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
|
|
1102
|
+
(__v16sf) (B), -(__v16sf) (C), \
|
|
1103
|
+
(__mmask16) -1, (R)); })
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
|
|
1107
|
+
(__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
|
|
1108
|
+
(__v16sf) (B), -(__v16sf) (C), \
|
|
1109
|
+
(__mmask16) (U), (R)); })
|
|
1110
|
+
|
|
1111
|
+
|
|
1112
|
+
#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
|
|
1113
|
+
(__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
|
|
1114
|
+
(__v16sf) (B), -(__v16sf) (C), \
|
|
1115
|
+
(__mmask16) (U), (R)); })
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
|
|
1119
|
+
(__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
|
|
1120
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1121
|
+
(__mmask16) -1, (R)); })
|
|
1122
|
+
|
|
1123
|
+
|
|
1124
|
+
#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
|
|
1125
|
+
(__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
|
|
1126
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1127
|
+
(__mmask16) (U), (R)); })
|
|
1128
|
+
|
|
1129
|
+
|
|
1130
|
+
#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
|
|
1131
|
+
(__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
|
|
1132
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1133
|
+
(__mmask16) (U), (R)); })
|
|
1134
|
+
|
|
1135
|
+
|
|
1136
|
+
#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
|
|
1137
|
+
(__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
|
|
1138
|
+
(__v16sf) (B), -(__v16sf) (C), \
|
|
1139
|
+
(__mmask16) -1, (R)); })
|
|
1140
|
+
|
|
1141
|
+
|
|
1142
|
+
#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
|
|
1143
|
+
(__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
|
|
1144
|
+
(__v16sf) (B), -(__v16sf) (C), \
|
|
1145
|
+
(__mmask16) (U), (R)); })
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1149
|
+
_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
|
|
1150
|
+
{
|
|
1151
|
+
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
|
|
1152
|
+
(__v16sf) __B,
|
|
1153
|
+
(__v16sf) __C,
|
|
1154
|
+
(__mmask16) -1,
|
|
1155
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1159
|
+
_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
|
|
1160
|
+
{
|
|
1161
|
+
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
|
|
1162
|
+
(__v16sf) __B,
|
|
1163
|
+
(__v16sf) __C,
|
|
1164
|
+
(__mmask16) __U,
|
|
1165
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1169
|
+
_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
|
|
1170
|
+
{
|
|
1171
|
+
return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
|
|
1172
|
+
(__v16sf) __B,
|
|
1173
|
+
(__v16sf) __C,
|
|
1174
|
+
(__mmask16) __U,
|
|
1175
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1179
|
+
_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
|
|
1180
|
+
{
|
|
1181
|
+
return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
|
|
1182
|
+
(__v16sf) __B,
|
|
1183
|
+
(__v16sf) __C,
|
|
1184
|
+
(__mmask16) __U,
|
|
1185
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1186
|
+
}
|
|
1187
|
+
|
|
1188
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1189
|
+
_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
|
|
1190
|
+
{
|
|
1191
|
+
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
|
|
1192
|
+
(__v16sf) __B,
|
|
1193
|
+
-(__v16sf) __C,
|
|
1194
|
+
(__mmask16) -1,
|
|
1195
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1199
|
+
_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
|
|
1200
|
+
{
|
|
1201
|
+
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
|
|
1202
|
+
(__v16sf) __B,
|
|
1203
|
+
-(__v16sf) __C,
|
|
1204
|
+
(__mmask16) __U,
|
|
1205
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1209
|
+
_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
|
|
1210
|
+
{
|
|
1211
|
+
return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
|
|
1212
|
+
(__v16sf) __B,
|
|
1213
|
+
-(__v16sf) __C,
|
|
1214
|
+
(__mmask16) __U,
|
|
1215
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1219
|
+
_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
|
|
1220
|
+
{
|
|
1221
|
+
return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
|
|
1222
|
+
(__v16sf) __B,
|
|
1223
|
+
(__v16sf) __C,
|
|
1224
|
+
(__mmask16) -1,
|
|
1225
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1229
|
+
_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
|
|
1230
|
+
{
|
|
1231
|
+
return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
|
|
1232
|
+
(__v16sf) __B,
|
|
1233
|
+
(__v16sf) __C,
|
|
1234
|
+
(__mmask16) __U,
|
|
1235
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1239
|
+
_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
|
|
1240
|
+
{
|
|
1241
|
+
return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
|
|
1242
|
+
(__v16sf) __B,
|
|
1243
|
+
(__v16sf) __C,
|
|
1244
|
+
(__mmask16) __U,
|
|
1245
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1249
|
+
_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
|
|
1250
|
+
{
|
|
1251
|
+
return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
|
|
1252
|
+
(__v16sf) __B,
|
|
1253
|
+
-(__v16sf) __C,
|
|
1254
|
+
(__mmask16) -1,
|
|
1255
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1259
|
+
_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
|
|
1260
|
+
{
|
|
1261
|
+
return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
|
|
1262
|
+
(__v16sf) __B,
|
|
1263
|
+
-(__v16sf) __C,
|
|
1264
|
+
(__mmask16) __U,
|
|
1265
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1268
|
+
#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
|
|
1269
|
+
(__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
|
|
1270
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1271
|
+
(__mmask8) -1, (R)); })
|
|
1272
|
+
|
|
1273
|
+
|
|
1274
|
+
#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
|
|
1275
|
+
(__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
|
|
1276
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1277
|
+
(__mmask8) (U), (R)); })
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
|
|
1281
|
+
(__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
|
|
1282
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1283
|
+
(__mmask8) (U), (R)); })
|
|
1284
|
+
|
|
1285
|
+
|
|
1286
|
+
#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
|
|
1287
|
+
(__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
|
|
1288
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1289
|
+
(__mmask8) (U), (R)); })
|
|
1290
|
+
|
|
1291
|
+
|
|
1292
|
+
#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
|
|
1293
|
+
(__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
|
|
1294
|
+
(__v8df) (B), -(__v8df) (C), \
|
|
1295
|
+
(__mmask8) -1, (R)); })
|
|
1296
|
+
|
|
1297
|
+
|
|
1298
|
+
#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
|
|
1299
|
+
(__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
|
|
1300
|
+
(__v8df) (B), -(__v8df) (C), \
|
|
1301
|
+
(__mmask8) (U), (R)); })
|
|
1302
|
+
|
|
1303
|
+
|
|
1304
|
+
#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
|
|
1305
|
+
(__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
|
|
1306
|
+
(__v8df) (B), -(__v8df) (C), \
|
|
1307
|
+
(__mmask8) (U), (R)); })
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1311
|
+
_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
|
|
1312
|
+
{
|
|
1313
|
+
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
|
|
1314
|
+
(__v8df) __B,
|
|
1315
|
+
(__v8df) __C,
|
|
1316
|
+
(__mmask8) -1,
|
|
1317
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1321
|
+
_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
|
|
1322
|
+
{
|
|
1323
|
+
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
|
|
1324
|
+
(__v8df) __B,
|
|
1325
|
+
(__v8df) __C,
|
|
1326
|
+
(__mmask8) __U,
|
|
1327
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1331
|
+
_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
|
|
1332
|
+
{
|
|
1333
|
+
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
|
|
1334
|
+
(__v8df) __B,
|
|
1335
|
+
(__v8df) __C,
|
|
1336
|
+
(__mmask8) __U,
|
|
1337
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1338
|
+
}
|
|
1339
|
+
|
|
1340
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1341
|
+
_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
|
|
1342
|
+
{
|
|
1343
|
+
return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
|
|
1344
|
+
(__v8df) __B,
|
|
1345
|
+
(__v8df) __C,
|
|
1346
|
+
(__mmask8) __U,
|
|
1347
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1351
|
+
_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
|
|
1352
|
+
{
|
|
1353
|
+
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
|
|
1354
|
+
(__v8df) __B,
|
|
1355
|
+
-(__v8df) __C,
|
|
1356
|
+
(__mmask8) -1,
|
|
1357
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1361
|
+
_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
|
|
1362
|
+
{
|
|
1363
|
+
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
|
|
1364
|
+
(__v8df) __B,
|
|
1365
|
+
-(__v8df) __C,
|
|
1366
|
+
(__mmask8) __U,
|
|
1367
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1371
|
+
_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
|
|
1372
|
+
{
|
|
1373
|
+
return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
|
|
1374
|
+
(__v8df) __B,
|
|
1375
|
+
-(__v8df) __C,
|
|
1376
|
+
(__mmask8) __U,
|
|
1377
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
|
|
1381
|
+
(__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
|
|
1382
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1383
|
+
(__mmask16) -1, (R)); })
|
|
1384
|
+
|
|
1385
|
+
|
|
1386
|
+
#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
|
|
1387
|
+
(__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
|
|
1388
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1389
|
+
(__mmask16) (U), (R)); })
|
|
1390
|
+
|
|
1391
|
+
|
|
1392
|
+
#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
|
|
1393
|
+
(__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
|
|
1394
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1395
|
+
(__mmask16) (U), (R)); })
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
|
|
1399
|
+
(__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
|
|
1400
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1401
|
+
(__mmask16) (U), (R)); })
|
|
1402
|
+
|
|
1403
|
+
|
|
1404
|
+
#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
|
|
1405
|
+
(__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
|
|
1406
|
+
(__v16sf) (B), -(__v16sf) (C), \
|
|
1407
|
+
(__mmask16) -1, (R)); })
|
|
1408
|
+
|
|
1409
|
+
|
|
1410
|
+
#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
|
|
1411
|
+
(__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
|
|
1412
|
+
(__v16sf) (B), -(__v16sf) (C), \
|
|
1413
|
+
(__mmask16) (U), (R)); })
|
|
1414
|
+
|
|
1415
|
+
|
|
1416
|
+
#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
|
|
1417
|
+
(__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
|
|
1418
|
+
(__v16sf) (B), -(__v16sf) (C), \
|
|
1419
|
+
(__mmask16) (U), (R)); })
|
|
1420
|
+
|
|
1421
|
+
|
|
1422
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1423
|
+
_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
|
|
1424
|
+
{
|
|
1425
|
+
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
|
|
1426
|
+
(__v16sf) __B,
|
|
1427
|
+
(__v16sf) __C,
|
|
1428
|
+
(__mmask16) -1,
|
|
1429
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1433
|
+
_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
|
|
1434
|
+
{
|
|
1435
|
+
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
|
|
1436
|
+
(__v16sf) __B,
|
|
1437
|
+
(__v16sf) __C,
|
|
1438
|
+
(__mmask16) __U,
|
|
1439
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1443
|
+
_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
|
|
1444
|
+
{
|
|
1445
|
+
return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
|
|
1446
|
+
(__v16sf) __B,
|
|
1447
|
+
(__v16sf) __C,
|
|
1448
|
+
(__mmask16) __U,
|
|
1449
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1453
|
+
_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
|
|
1454
|
+
{
|
|
1455
|
+
return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
|
|
1456
|
+
(__v16sf) __B,
|
|
1457
|
+
(__v16sf) __C,
|
|
1458
|
+
(__mmask16) __U,
|
|
1459
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1463
|
+
_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
|
|
1464
|
+
{
|
|
1465
|
+
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
|
|
1466
|
+
(__v16sf) __B,
|
|
1467
|
+
-(__v16sf) __C,
|
|
1468
|
+
(__mmask16) -1,
|
|
1469
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1470
|
+
}
|
|
1471
|
+
|
|
1472
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1473
|
+
_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
|
|
1474
|
+
{
|
|
1475
|
+
return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
|
|
1476
|
+
(__v16sf) __B,
|
|
1477
|
+
-(__v16sf) __C,
|
|
1478
|
+
(__mmask16) __U,
|
|
1479
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1483
|
+
_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
|
|
1484
|
+
{
|
|
1485
|
+
return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
|
|
1486
|
+
(__v16sf) __B,
|
|
1487
|
+
-(__v16sf) __C,
|
|
1488
|
+
(__mmask16) __U,
|
|
1489
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1492
|
+
#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
|
|
1493
|
+
(__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
|
|
1494
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1495
|
+
(__mmask8) (U), (R)); })
|
|
1496
|
+
|
|
1497
|
+
|
|
1498
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1499
|
+
_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
|
|
1500
|
+
{
|
|
1501
|
+
return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
|
|
1502
|
+
(__v8df) __B,
|
|
1503
|
+
(__v8df) __C,
|
|
1504
|
+
(__mmask8) __U,
|
|
1505
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
|
|
1509
|
+
(__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
|
|
1510
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1511
|
+
(__mmask16) (U), (R)); })
|
|
1512
|
+
|
|
1513
|
+
|
|
1514
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1515
|
+
_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
|
|
1516
|
+
{
|
|
1517
|
+
return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
|
|
1518
|
+
(__v16sf) __B,
|
|
1519
|
+
(__v16sf) __C,
|
|
1520
|
+
(__mmask16) __U,
|
|
1521
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1524
|
+
#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
|
|
1525
|
+
(__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
|
|
1526
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1527
|
+
(__mmask8) (U), (R)); })
|
|
1528
|
+
|
|
1529
|
+
|
|
1530
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1531
|
+
_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
|
|
1532
|
+
{
|
|
1533
|
+
return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
|
|
1534
|
+
(__v8df) __B,
|
|
1535
|
+
(__v8df) __C,
|
|
1536
|
+
(__mmask8) __U,
|
|
1537
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
|
|
1541
|
+
(__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
|
|
1542
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1543
|
+
(__mmask16) (U), (R)); })
|
|
1544
|
+
|
|
1545
|
+
|
|
1546
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1547
|
+
_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
|
|
1548
|
+
{
|
|
1549
|
+
return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
|
|
1550
|
+
(__v16sf) __B,
|
|
1551
|
+
(__v16sf) __C,
|
|
1552
|
+
(__mmask16) __U,
|
|
1553
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
|
|
1557
|
+
(__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
|
|
1558
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1559
|
+
(__mmask8) (U), (R)); })
|
|
1560
|
+
|
|
1561
|
+
|
|
1562
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1563
|
+
_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
|
|
1564
|
+
{
|
|
1565
|
+
return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
|
|
1566
|
+
(__v8df) __B,
|
|
1567
|
+
(__v8df) __C,
|
|
1568
|
+
(__mmask8) __U,
|
|
1569
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
|
|
1573
|
+
(__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
|
|
1574
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1575
|
+
(__mmask16) (U), (R)); })
|
|
1576
|
+
|
|
1577
|
+
|
|
1578
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1579
|
+
_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
|
|
1580
|
+
{
|
|
1581
|
+
return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
|
|
1582
|
+
(__v16sf) __B,
|
|
1583
|
+
(__v16sf) __C,
|
|
1584
|
+
(__mmask16) __U,
|
|
1585
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1586
|
+
}
|
|
1587
|
+
|
|
1588
|
+
#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
|
|
1589
|
+
(__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
|
|
1590
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1591
|
+
(__mmask8) (U), (R)); })
|
|
1592
|
+
|
|
1593
|
+
|
|
1594
|
+
#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
|
|
1595
|
+
(__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
|
|
1596
|
+
(__v8df) (B), (__v8df) (C), \
|
|
1597
|
+
(__mmask8) (U), (R)); })
|
|
1598
|
+
|
|
1599
|
+
|
|
1600
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1601
|
+
_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
|
|
1602
|
+
{
|
|
1603
|
+
return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
|
|
1604
|
+
(__v8df) __B,
|
|
1605
|
+
(__v8df) __C,
|
|
1606
|
+
(__mmask8) __U,
|
|
1607
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
|
1611
|
+
_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
|
|
1612
|
+
{
|
|
1613
|
+
return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
|
|
1614
|
+
(__v8df) __B,
|
|
1615
|
+
(__v8df) __C,
|
|
1616
|
+
(__mmask8) __U,
|
|
1617
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
|
|
1621
|
+
(__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
|
|
1622
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1623
|
+
(__mmask16) (U), (R)); })
|
|
1624
|
+
|
|
1625
|
+
|
|
1626
|
+
#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
|
|
1627
|
+
(__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
|
|
1628
|
+
(__v16sf) (B), (__v16sf) (C), \
|
|
1629
|
+
(__mmask16) (U), (R)); })
|
|
1630
|
+
|
|
1631
|
+
|
|
1632
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1633
|
+
_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
|
|
1634
|
+
{
|
|
1635
|
+
return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
|
|
1636
|
+
(__v16sf) __B,
|
|
1637
|
+
(__v16sf) __C,
|
|
1638
|
+
(__mmask16) __U,
|
|
1639
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
|
1643
|
+
_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
|
|
1644
|
+
{
|
|
1645
|
+
return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
|
|
1646
|
+
(__v16sf) __B,
|
|
1647
|
+
(__v16sf) __C,
|
|
1648
|
+
(__mmask16) __U,
|
|
1649
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
|
|
1653
|
+
|
|
1654
|
+
/* Vector permutations */
|
|
1655
|
+
|
|
1656
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
1657
|
+
_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
|
|
1658
|
+
{
|
|
1659
|
+
return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
|
|
1660
|
+
/* idx */ ,
|
|
1661
|
+
(__v16si) __A,
|
|
1662
|
+
(__v16si) __B,
|
|
1663
|
+
(__mmask16) -1);
|
|
1664
|
+
}
|
|
1665
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
1666
|
+
_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
|
|
1667
|
+
{
|
|
1668
|
+
return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
|
|
1669
|
+
/* idx */ ,
|
|
1670
|
+
(__v8di) __A,
|
|
1671
|
+
(__v8di) __B,
|
|
1672
|
+
(__mmask8) -1);
|
|
1673
|
+
}
|
|
1674
|
+
|
|
1675
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
1676
|
+
_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
|
|
1677
|
+
{
|
|
1678
|
+
return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
|
|
1679
|
+
/* idx */ ,
|
|
1680
|
+
(__v8df) __A,
|
|
1681
|
+
(__v8df) __B,
|
|
1682
|
+
(__mmask8) -1);
|
|
1683
|
+
}
|
|
1684
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
1685
|
+
_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
|
|
1686
|
+
{
|
|
1687
|
+
return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
|
|
1688
|
+
/* idx */ ,
|
|
1689
|
+
(__v16sf) __A,
|
|
1690
|
+
(__v16sf) __B,
|
|
1691
|
+
(__mmask16) -1);
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
|
|
1695
|
+
(__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
|
|
1696
|
+
(__v8di)(__m512i)(B), \
|
|
1697
|
+
(I), (__v8di)_mm512_setzero_si512(), \
|
|
1698
|
+
(__mmask8)-1); })
|
|
1699
|
+
|
|
1700
|
+
#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
|
|
1701
|
+
(__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
|
|
1702
|
+
(__v16si)(__m512i)(B), \
|
|
1703
|
+
(I), (__v16si)_mm512_setzero_si512(), \
|
|
1704
|
+
(__mmask16)-1); })
|
|
1705
|
+
|
|
1706
|
+
/* Vector Extract */
|
|
1707
|
+
|
|
1708
|
+
#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
|
|
1709
|
+
__m512d __A = (A); \
|
|
1710
|
+
(__m256d) \
|
|
1711
|
+
__builtin_ia32_extractf64x4_mask((__v8df)__A, \
|
|
1712
|
+
(I), \
|
|
1713
|
+
(__v4df)_mm256_setzero_si256(), \
|
|
1714
|
+
(__mmask8) -1); })
|
|
1715
|
+
|
|
1716
|
+
#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
|
|
1717
|
+
__m512 __A = (A); \
|
|
1718
|
+
(__m128) \
|
|
1719
|
+
__builtin_ia32_extractf32x4_mask((__v16sf)__A, \
|
|
1720
|
+
(I), \
|
|
1721
|
+
(__v4sf)_mm_setzero_ps(), \
|
|
1722
|
+
(__mmask8) -1); })
|
|
1723
|
+
|
|
1724
|
+
/* Vector Blend */
|
|
1725
|
+
|
|
1726
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
1727
|
+
_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
|
|
1728
|
+
{
|
|
1729
|
+
return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
|
|
1730
|
+
(__v8df) __W,
|
|
1731
|
+
(__mmask8) __U);
|
|
1732
|
+
}
|
|
1733
|
+
|
|
1734
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
1735
|
+
_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
|
|
1736
|
+
{
|
|
1737
|
+
return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
|
|
1738
|
+
(__v16sf) __W,
|
|
1739
|
+
(__mmask16) __U);
|
|
1740
|
+
}
|
|
1741
|
+
|
|
1742
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
1743
|
+
_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
|
|
1744
|
+
{
|
|
1745
|
+
return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
|
|
1746
|
+
(__v8di) __W,
|
|
1747
|
+
(__mmask8) __U);
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
1751
|
+
_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
|
|
1752
|
+
{
|
|
1753
|
+
return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
|
|
1754
|
+
(__v16si) __W,
|
|
1755
|
+
(__mmask16) __U);
|
|
1756
|
+
}
|
|
1757
|
+
|
|
1758
|
+
/* Compare */
|
|
1759
|
+
|
|
1760
|
+
#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
|
|
1761
|
+
(__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
|
|
1762
|
+
(__v16sf)(__m512)(B), \
|
|
1763
|
+
(P), (__mmask16)-1, (R)); })
|
|
1764
|
+
|
|
1765
|
+
#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
|
|
1766
|
+
(__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
|
|
1767
|
+
(__v16sf)(__m512)(B), \
|
|
1768
|
+
(P), (__mmask16)(U), (R)); })
|
|
1769
|
+
|
|
1770
|
+
#define _mm512_cmp_ps_mask(A, B, P) \
|
|
1771
|
+
_mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
|
|
1772
|
+
|
|
1773
|
+
#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
|
|
1774
|
+
_mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
|
|
1775
|
+
|
|
1776
|
+
#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
|
|
1777
|
+
(__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
|
|
1778
|
+
(__v8df)(__m512d)(B), \
|
|
1779
|
+
(P), (__mmask8)-1, (R)); })
|
|
1780
|
+
|
|
1781
|
+
#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
|
|
1782
|
+
(__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
|
|
1783
|
+
(__v8df)(__m512d)(B), \
|
|
1784
|
+
(P), (__mmask8)(U), (R)); })
|
|
1785
|
+
|
|
1786
|
+
#define _mm512_cmp_pd_mask(A, B, P) \
|
|
1787
|
+
_mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
|
|
1788
|
+
|
|
1789
|
+
#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
|
|
1790
|
+
_mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
|
|
1791
|
+
|
|
1792
|
+
/* Conversion */
|
|
1793
|
+
|
|
1794
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
1795
|
+
_mm512_cvttps_epu32(__m512 __A)
|
|
1796
|
+
{
|
|
1797
|
+
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
|
|
1798
|
+
(__v16si)
|
|
1799
|
+
_mm512_setzero_si512 (),
|
|
1800
|
+
(__mmask16) -1,
|
|
1801
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
|
|
1805
|
+
(__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
|
|
1806
|
+
(__v16sf)_mm512_setzero_ps(), \
|
|
1807
|
+
(__mmask16)-1, (R)); })
|
|
1808
|
+
|
|
1809
|
+
#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
|
|
1810
|
+
(__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
|
|
1811
|
+
(__v16sf)_mm512_setzero_ps(), \
|
|
1812
|
+
(__mmask16)-1, (R)); })
|
|
1813
|
+
|
|
1814
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
1815
|
+
_mm512_cvtepi32_pd(__m256i __A)
|
|
1816
|
+
{
|
|
1817
|
+
return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
|
|
1818
|
+
(__v8df)
|
|
1819
|
+
_mm512_setzero_pd (),
|
|
1820
|
+
(__mmask8) -1);
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
1824
|
+
_mm512_cvtepu32_pd(__m256i __A)
|
|
1825
|
+
{
|
|
1826
|
+
return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
|
|
1827
|
+
(__v8df)
|
|
1828
|
+
_mm512_setzero_pd (),
|
|
1829
|
+
(__mmask8) -1);
|
|
1830
|
+
}
|
|
1831
|
+
|
|
1832
|
+
#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
|
|
1833
|
+
(__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
|
|
1834
|
+
(__v8sf)_mm256_setzero_ps(), \
|
|
1835
|
+
(__mmask8)-1, (R)); })
|
|
1836
|
+
|
|
1837
|
+
#define _mm512_cvtps_ph(A, I) __extension__ ({ \
|
|
1838
|
+
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
|
|
1839
|
+
(__v16hi)_mm256_setzero_si256(), \
|
|
1840
|
+
-1); })
|
|
1841
|
+
|
|
1842
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
1843
|
+
_mm512_cvtph_ps(__m256i __A)
|
|
1844
|
+
{
|
|
1845
|
+
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
|
|
1846
|
+
(__v16sf)
|
|
1847
|
+
_mm512_setzero_ps (),
|
|
1848
|
+
(__mmask16) -1,
|
|
1849
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1850
|
+
}
|
|
1851
|
+
|
|
1852
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
1853
|
+
_mm512_cvttps_epi32(__m512 a)
|
|
1854
|
+
{
|
|
1855
|
+
return (__m512i)
|
|
1856
|
+
__builtin_ia32_cvttps2dq512_mask((__v16sf) a,
|
|
1857
|
+
(__v16si) _mm512_setzero_si512 (),
|
|
1858
|
+
(__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1861
|
+
static __inline __m256i __DEFAULT_FN_ATTRS
|
|
1862
|
+
_mm512_cvttpd_epi32(__m512d a)
|
|
1863
|
+
{
|
|
1864
|
+
return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
|
|
1865
|
+
(__v8si)_mm256_setzero_si256(),
|
|
1866
|
+
(__mmask8) -1,
|
|
1867
|
+
_MM_FROUND_CUR_DIRECTION);
|
|
1868
|
+
}
|
|
1869
|
+
|
|
1870
|
+
#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
|
|
1871
|
+
(__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
|
|
1872
|
+
(__v8si)_mm256_setzero_si256(), \
|
|
1873
|
+
(__mmask8)-1, (R)); })
|
|
1874
|
+
|
|
1875
|
+
#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
|
|
1876
|
+
(__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
|
|
1877
|
+
(__v16si)_mm512_setzero_si512(), \
|
|
1878
|
+
(__mmask16)-1, (R)); })
|
|
1879
|
+
|
|
1880
|
+
#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
|
|
1881
|
+
(__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
|
|
1882
|
+
(__v16si)_mm512_setzero_si512(), \
|
|
1883
|
+
(__mmask16)-1, (R)); })
|
|
1884
|
+
|
|
1885
|
+
#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
|
|
1886
|
+
(__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
|
|
1887
|
+
(__v8si)_mm256_setzero_si256(), \
|
|
1888
|
+
(__mmask8)-1, (R)); })
|
|
1889
|
+
|
|
1890
|
+
#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
|
|
1891
|
+
(__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
|
|
1892
|
+
(__v16si)_mm512_setzero_si512(), \
|
|
1893
|
+
(__mmask16)-1, (R)); })
|
|
1894
|
+
|
|
1895
|
+
#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
|
|
1896
|
+
(__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
|
|
1897
|
+
(__v8si)_mm256_setzero_si256(), \
|
|
1898
|
+
(__mmask8) -1, (R)); })
|
|
1899
|
+
|
|
1900
|
+
/* Unpack and Interleave */
|
|
1901
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
1902
|
+
_mm512_unpackhi_pd(__m512d __a, __m512d __b)
|
|
1903
|
+
{
|
|
1904
|
+
return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
1908
|
+
_mm512_unpacklo_pd(__m512d __a, __m512d __b)
|
|
1909
|
+
{
|
|
1910
|
+
return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
|
|
1911
|
+
}
|
|
1912
|
+
|
|
1913
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
1914
|
+
_mm512_unpackhi_ps(__m512 __a, __m512 __b)
|
|
1915
|
+
{
|
|
1916
|
+
return __builtin_shufflevector(__a, __b,
|
|
1917
|
+
2, 18, 3, 19,
|
|
1918
|
+
2+4, 18+4, 3+4, 19+4,
|
|
1919
|
+
2+8, 18+8, 3+8, 19+8,
|
|
1920
|
+
2+12, 18+12, 3+12, 19+12);
|
|
1921
|
+
}
|
|
1922
|
+
|
|
1923
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
1924
|
+
_mm512_unpacklo_ps(__m512 __a, __m512 __b)
|
|
1925
|
+
{
|
|
1926
|
+
return __builtin_shufflevector(__a, __b,
|
|
1927
|
+
0, 16, 1, 17,
|
|
1928
|
+
0+4, 16+4, 1+4, 17+4,
|
|
1929
|
+
0+8, 16+8, 1+8, 17+8,
|
|
1930
|
+
0+12, 16+12, 1+12, 17+12);
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
/* Bit Test */
|
|
1934
|
+
|
|
1935
|
+
static __inline __mmask16 __DEFAULT_FN_ATTRS
|
|
1936
|
+
_mm512_test_epi32_mask(__m512i __A, __m512i __B)
|
|
1937
|
+
{
|
|
1938
|
+
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
|
|
1939
|
+
(__v16si) __B,
|
|
1940
|
+
(__mmask16) -1);
|
|
1941
|
+
}
|
|
1942
|
+
|
|
1943
|
+
static __inline __mmask8 __DEFAULT_FN_ATTRS
|
|
1944
|
+
_mm512_test_epi64_mask(__m512i __A, __m512i __B)
|
|
1945
|
+
{
|
|
1946
|
+
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
|
|
1947
|
+
(__v8di) __B,
|
|
1948
|
+
(__mmask8) -1);
|
|
1949
|
+
}
|
|
1950
|
+
|
|
1951
|
+
/* SIMD load ops */
|
|
1952
|
+
|
|
1953
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
1954
|
+
_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
|
|
1955
|
+
{
|
|
1956
|
+
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
|
|
1957
|
+
(__v16si)
|
|
1958
|
+
_mm512_setzero_si512 (),
|
|
1959
|
+
(__mmask16) __U);
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
static __inline __m512i __DEFAULT_FN_ATTRS
|
|
1963
|
+
_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
|
|
1964
|
+
{
|
|
1965
|
+
return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
|
|
1966
|
+
(__v8di)
|
|
1967
|
+
_mm512_setzero_si512 (),
|
|
1968
|
+
(__mmask8) __U);
|
|
1969
|
+
}
|
|
1970
|
+
|
|
1971
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
1972
|
+
_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
|
|
1973
|
+
{
|
|
1974
|
+
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
|
|
1975
|
+
(__v16sf)
|
|
1976
|
+
_mm512_setzero_ps (),
|
|
1977
|
+
(__mmask16) __U);
|
|
1978
|
+
}
|
|
1979
|
+
|
|
1980
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
1981
|
+
_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
|
|
1982
|
+
{
|
|
1983
|
+
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
|
|
1984
|
+
(__v8df)
|
|
1985
|
+
_mm512_setzero_pd (),
|
|
1986
|
+
(__mmask8) __U);
|
|
1987
|
+
}
|
|
1988
|
+
|
|
1989
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
1990
|
+
_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
|
|
1991
|
+
{
|
|
1992
|
+
return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
|
|
1993
|
+
(__v16sf)
|
|
1994
|
+
_mm512_setzero_ps (),
|
|
1995
|
+
(__mmask16) __U);
|
|
1996
|
+
}
|
|
1997
|
+
|
|
1998
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
1999
|
+
_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
|
|
2000
|
+
{
|
|
2001
|
+
return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
|
|
2002
|
+
(__v8df)
|
|
2003
|
+
_mm512_setzero_pd (),
|
|
2004
|
+
(__mmask8) __U);
|
|
2005
|
+
}
|
|
2006
|
+
|
|
2007
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
2008
|
+
_mm512_loadu_pd(double const *__p)
|
|
2009
|
+
{
|
|
2010
|
+
struct __loadu_pd {
|
|
2011
|
+
__m512d __v;
|
|
2012
|
+
} __attribute__((__packed__, __may_alias__));
|
|
2013
|
+
return ((struct __loadu_pd*)__p)->__v;
|
|
2014
|
+
}
|
|
2015
|
+
|
|
2016
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
2017
|
+
_mm512_loadu_ps(float const *__p)
|
|
2018
|
+
{
|
|
2019
|
+
struct __loadu_ps {
|
|
2020
|
+
__m512 __v;
|
|
2021
|
+
} __attribute__((__packed__, __may_alias__));
|
|
2022
|
+
return ((struct __loadu_ps*)__p)->__v;
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
static __inline __m512 __DEFAULT_FN_ATTRS
|
|
2026
|
+
_mm512_load_ps(double const *__p)
|
|
2027
|
+
{
|
|
2028
|
+
return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
|
|
2029
|
+
(__v16sf)
|
|
2030
|
+
_mm512_setzero_ps (),
|
|
2031
|
+
(__mmask16) -1);
|
|
2032
|
+
}
|
|
2033
|
+
|
|
2034
|
+
static __inline __m512d __DEFAULT_FN_ATTRS
|
|
2035
|
+
_mm512_load_pd(float const *__p)
|
|
2036
|
+
{
|
|
2037
|
+
return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
|
|
2038
|
+
(__v8df)
|
|
2039
|
+
_mm512_setzero_pd (),
|
|
2040
|
+
(__mmask8) -1);
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
/* SIMD store ops */
|
|
2044
|
+
|
|
2045
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2046
|
+
_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
|
|
2047
|
+
{
|
|
2048
|
+
__builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
|
|
2049
|
+
(__mmask8) __U);
|
|
2050
|
+
}
|
|
2051
|
+
|
|
2052
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2053
|
+
_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
|
|
2054
|
+
{
|
|
2055
|
+
__builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
|
|
2056
|
+
(__mmask16) __U);
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2060
|
+
_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
|
|
2061
|
+
{
|
|
2062
|
+
__builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
|
|
2063
|
+
}
|
|
2064
|
+
|
|
2065
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2066
|
+
_mm512_storeu_pd(void *__P, __m512d __A)
|
|
2067
|
+
{
|
|
2068
|
+
__builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
|
|
2069
|
+
}
|
|
2070
|
+
|
|
2071
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2072
|
+
_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
|
|
2073
|
+
{
|
|
2074
|
+
__builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
|
|
2075
|
+
(__mmask16) __U);
|
|
2076
|
+
}
|
|
2077
|
+
|
|
2078
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2079
|
+
_mm512_storeu_ps(void *__P, __m512 __A)
|
|
2080
|
+
{
|
|
2081
|
+
__builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
|
|
2082
|
+
}
|
|
2083
|
+
|
|
2084
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2085
|
+
_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
|
|
2086
|
+
{
|
|
2087
|
+
__builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
|
|
2088
|
+
}
|
|
2089
|
+
|
|
2090
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2091
|
+
_mm512_store_pd(void *__P, __m512d __A)
|
|
2092
|
+
{
|
|
2093
|
+
*(__m512d*)__P = __A;
|
|
2094
|
+
}
|
|
2095
|
+
|
|
2096
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2097
|
+
_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
|
|
2098
|
+
{
|
|
2099
|
+
__builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
|
|
2100
|
+
(__mmask16) __U);
|
|
2101
|
+
}
|
|
2102
|
+
|
|
2103
|
+
static __inline void __DEFAULT_FN_ATTRS
|
|
2104
|
+
_mm512_store_ps(void *__P, __m512 __A)
|
|
2105
|
+
{
|
|
2106
|
+
*(__m512*)__P = __A;
|
|
2107
|
+
}
|
|
2108
|
+
|
|
2109
|
+
/* Mask ops */
|
|
2110
|
+
|
|
2111
|
+
static __inline __mmask16 __DEFAULT_FN_ATTRS
|
|
2112
|
+
_mm512_knot(__mmask16 __M)
|
|
2113
|
+
{
|
|
2114
|
+
return __builtin_ia32_knothi(__M);
|
|
2115
|
+
}
|
|
2116
|
+
|
|
2117
|
+
/* Integer compare */
|
|
2118
|
+
|
|
2119
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2120
|
+
_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
|
|
2121
|
+
return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
|
|
2122
|
+
(__mmask16)-1);
|
|
2123
|
+
}
|
|
2124
|
+
|
|
2125
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2126
|
+
_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2127
|
+
return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
|
|
2128
|
+
__u);
|
|
2129
|
+
}
|
|
2130
|
+
|
|
2131
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2132
|
+
_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
|
|
2133
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
|
|
2134
|
+
(__mmask16)-1);
|
|
2135
|
+
}
|
|
2136
|
+
|
|
2137
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2138
|
+
_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2139
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
|
|
2140
|
+
__u);
|
|
2141
|
+
}
|
|
2142
|
+
|
|
2143
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2144
|
+
_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2145
|
+
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
|
|
2146
|
+
__u);
|
|
2147
|
+
}
|
|
2148
|
+
|
|
2149
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2150
|
+
_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
|
|
2151
|
+
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
|
|
2152
|
+
(__mmask8)-1);
|
|
2153
|
+
}
|
|
2154
|
+
|
|
2155
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2156
|
+
_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
|
|
2157
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
|
|
2158
|
+
(__mmask8)-1);
|
|
2159
|
+
}
|
|
2160
|
+
|
|
2161
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2162
|
+
_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2163
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
|
|
2164
|
+
__u);
|
|
2165
|
+
}
|
|
2166
|
+
|
|
2167
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2168
|
+
_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
|
|
2169
|
+
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
|
|
2170
|
+
(__mmask16)-1);
|
|
2171
|
+
}
|
|
2172
|
+
|
|
2173
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2174
|
+
_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2175
|
+
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
|
|
2176
|
+
__u);
|
|
2177
|
+
}
|
|
2178
|
+
|
|
2179
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2180
|
+
_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
|
|
2181
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
|
|
2182
|
+
(__mmask16)-1);
|
|
2183
|
+
}
|
|
2184
|
+
|
|
2185
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2186
|
+
_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2187
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
|
|
2188
|
+
__u);
|
|
2189
|
+
}
|
|
2190
|
+
|
|
2191
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2192
|
+
_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
|
|
2193
|
+
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
|
|
2194
|
+
(__mmask8)-1);
|
|
2195
|
+
}
|
|
2196
|
+
|
|
2197
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2198
|
+
_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2199
|
+
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
|
|
2200
|
+
__u);
|
|
2201
|
+
}
|
|
2202
|
+
|
|
2203
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2204
|
+
_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
|
|
2205
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
|
|
2206
|
+
(__mmask8)-1);
|
|
2207
|
+
}
|
|
2208
|
+
|
|
2209
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2210
|
+
_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2211
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
|
|
2212
|
+
__u);
|
|
2213
|
+
}
|
|
2214
|
+
|
|
2215
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2216
|
+
_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
|
|
2217
|
+
return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
|
|
2218
|
+
(__mmask16)-1);
|
|
2219
|
+
}
|
|
2220
|
+
|
|
2221
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2222
|
+
_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2223
|
+
return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
|
|
2224
|
+
__u);
|
|
2225
|
+
}
|
|
2226
|
+
|
|
2227
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2228
|
+
_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
|
|
2229
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
|
|
2230
|
+
(__mmask16)-1);
|
|
2231
|
+
}
|
|
2232
|
+
|
|
2233
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2234
|
+
_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2235
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
|
|
2236
|
+
__u);
|
|
2237
|
+
}
|
|
2238
|
+
|
|
2239
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2240
|
+
_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2241
|
+
return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
|
|
2242
|
+
__u);
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2245
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2246
|
+
_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
|
|
2247
|
+
return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
|
|
2248
|
+
(__mmask8)-1);
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2252
|
+
_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
|
|
2253
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
|
|
2254
|
+
(__mmask8)-1);
|
|
2255
|
+
}
|
|
2256
|
+
|
|
2257
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2258
|
+
_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2259
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
|
|
2260
|
+
__u);
|
|
2261
|
+
}
|
|
2262
|
+
|
|
2263
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2264
|
+
_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
|
|
2265
|
+
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
|
|
2266
|
+
(__mmask16)-1);
|
|
2267
|
+
}
|
|
2268
|
+
|
|
2269
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2270
|
+
_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2271
|
+
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
|
|
2272
|
+
__u);
|
|
2273
|
+
}
|
|
2274
|
+
|
|
2275
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2276
|
+
_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
|
|
2277
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
|
|
2278
|
+
(__mmask16)-1);
|
|
2279
|
+
}
|
|
2280
|
+
|
|
2281
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2282
|
+
_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2283
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
|
|
2284
|
+
__u);
|
|
2285
|
+
}
|
|
2286
|
+
|
|
2287
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2288
|
+
_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
|
|
2289
|
+
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
|
|
2290
|
+
(__mmask8)-1);
|
|
2291
|
+
}
|
|
2292
|
+
|
|
2293
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2294
|
+
_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2295
|
+
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
|
|
2296
|
+
__u);
|
|
2297
|
+
}
|
|
2298
|
+
|
|
2299
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2300
|
+
_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
|
|
2301
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
|
|
2302
|
+
(__mmask8)-1);
|
|
2303
|
+
}
|
|
2304
|
+
|
|
2305
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2306
|
+
_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2307
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
|
|
2308
|
+
__u);
|
|
2309
|
+
}
|
|
2310
|
+
|
|
2311
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2312
|
+
_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
|
|
2313
|
+
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
|
|
2314
|
+
(__mmask16)-1);
|
|
2315
|
+
}
|
|
2316
|
+
|
|
2317
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2318
|
+
_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2319
|
+
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
|
|
2320
|
+
__u);
|
|
2321
|
+
}
|
|
2322
|
+
|
|
2323
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2324
|
+
_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
|
|
2325
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
|
|
2326
|
+
(__mmask16)-1);
|
|
2327
|
+
}
|
|
2328
|
+
|
|
2329
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2330
|
+
_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2331
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
|
|
2332
|
+
__u);
|
|
2333
|
+
}
|
|
2334
|
+
|
|
2335
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2336
|
+
_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
|
|
2337
|
+
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
|
|
2338
|
+
(__mmask8)-1);
|
|
2339
|
+
}
|
|
2340
|
+
|
|
2341
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2342
|
+
_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2343
|
+
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
|
|
2344
|
+
__u);
|
|
2345
|
+
}
|
|
2346
|
+
|
|
2347
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2348
|
+
_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
|
|
2349
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
|
|
2350
|
+
(__mmask8)-1);
|
|
2351
|
+
}
|
|
2352
|
+
|
|
2353
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2354
|
+
_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2355
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
|
|
2356
|
+
__u);
|
|
2357
|
+
}
|
|
2358
|
+
|
|
2359
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2360
|
+
_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
|
|
2361
|
+
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
|
|
2362
|
+
(__mmask16)-1);
|
|
2363
|
+
}
|
|
2364
|
+
|
|
2365
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2366
|
+
_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2367
|
+
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
|
|
2368
|
+
__u);
|
|
2369
|
+
}
|
|
2370
|
+
|
|
2371
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2372
|
+
_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
|
|
2373
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
|
|
2374
|
+
(__mmask16)-1);
|
|
2375
|
+
}
|
|
2376
|
+
|
|
2377
|
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
|
2378
|
+
_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
|
2379
|
+
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
|
|
2380
|
+
__u);
|
|
2381
|
+
}
|
|
2382
|
+
|
|
2383
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2384
|
+
_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
|
|
2385
|
+
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
|
|
2386
|
+
(__mmask8)-1);
|
|
2387
|
+
}
|
|
2388
|
+
|
|
2389
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2390
|
+
_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2391
|
+
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
|
|
2392
|
+
__u);
|
|
2393
|
+
}
|
|
2394
|
+
|
|
2395
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2396
|
+
_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
|
|
2397
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
|
|
2398
|
+
(__mmask8)-1);
|
|
2399
|
+
}
|
|
2400
|
+
|
|
2401
|
+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
|
2402
|
+
_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
|
2403
|
+
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
|
|
2404
|
+
__u);
|
|
2405
|
+
}
|
|
2406
|
+
|
|
2407
|
+
#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
|
|
2408
|
+
__m512i __a = (a); \
|
|
2409
|
+
__m512i __b = (b); \
|
|
2410
|
+
(__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
|
|
2411
|
+
(__mmask16)-1); })
|
|
2412
|
+
|
|
2413
|
+
#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
|
|
2414
|
+
__m512i __a = (a); \
|
|
2415
|
+
__m512i __b = (b); \
|
|
2416
|
+
(__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
|
|
2417
|
+
(__mmask16)-1); })
|
|
2418
|
+
|
|
2419
|
+
#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
|
|
2420
|
+
__m512i __a = (a); \
|
|
2421
|
+
__m512i __b = (b); \
|
|
2422
|
+
(__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
|
|
2423
|
+
(__mmask8)-1); })
|
|
2424
|
+
|
|
2425
|
+
#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
|
|
2426
|
+
__m512i __a = (a); \
|
|
2427
|
+
__m512i __b = (b); \
|
|
2428
|
+
(__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
|
|
2429
|
+
(__mmask8)-1); })
|
|
2430
|
+
|
|
2431
|
+
#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
|
|
2432
|
+
__m512i __a = (a); \
|
|
2433
|
+
__m512i __b = (b); \
|
|
2434
|
+
(__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
|
|
2435
|
+
(__mmask16)(m)); })
|
|
2436
|
+
|
|
2437
|
+
#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
|
|
2438
|
+
__m512i __a = (a); \
|
|
2439
|
+
__m512i __b = (b); \
|
|
2440
|
+
(__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
|
|
2441
|
+
(__mmask16)(m)); })
|
|
2442
|
+
|
|
2443
|
+
#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
|
|
2444
|
+
__m512i __a = (a); \
|
|
2445
|
+
__m512i __b = (b); \
|
|
2446
|
+
(__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
|
|
2447
|
+
(__mmask8)(m)); })
|
|
2448
|
+
|
|
2449
|
+
#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
|
|
2450
|
+
__m512i __a = (a); \
|
|
2451
|
+
__m512i __b = (b); \
|
|
2452
|
+
(__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
|
|
2453
|
+
(__mmask8)(m)); })
|
|
2454
|
+
|
|
2455
|
+
#undef __DEFAULT_FN_ATTRS
|
|
2456
|
+
|
|
2457
|
+
#endif // __AVX512FINTRIN_H
|