xcodebuild-helper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +20 -0
  3. data/.gitignore +1 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +110 -0
  8. data/Guardfile +18 -0
  9. data/README.md +7 -0
  10. data/Rakefile +7 -0
  11. data/TODO.md +3 -0
  12. data/bin/oclint +5 -0
  13. data/bin/oclint-0.8 +5 -0
  14. data/bin/oclint-json-compilation-database +5 -0
  15. data/bin/oclint-xcodebuild +5 -0
  16. data/externals/oclint/LICENSE +69 -0
  17. data/externals/oclint/bin/oclint +0 -0
  18. data/externals/oclint/bin/oclint-0.10.2 +0 -0
  19. data/externals/oclint/bin/oclint-json-compilation-database +88 -0
  20. data/externals/oclint/bin/oclint-xcodebuild +218 -0
  21. data/externals/oclint/lib/clang/3.7.0/asan_blacklist.txt +13 -0
  22. data/externals/oclint/lib/clang/3.7.0/include/Intrin.h +958 -0
  23. data/externals/oclint/lib/clang/3.7.0/include/__stddef_max_align_t.h +43 -0
  24. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_aes.h +72 -0
  25. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_pclmul.h +34 -0
  26. data/externals/oclint/lib/clang/3.7.0/include/adxintrin.h +88 -0
  27. data/externals/oclint/lib/clang/3.7.0/include/altivec.h +13528 -0
  28. data/externals/oclint/lib/clang/3.7.0/include/ammintrin.h +215 -0
  29. data/externals/oclint/lib/clang/3.7.0/include/arm_acle.h +304 -0
  30. data/externals/oclint/lib/clang/3.7.0/include/arm_neon.h +68419 -0
  31. data/externals/oclint/lib/clang/3.7.0/include/avx2intrin.h +1256 -0
  32. data/externals/oclint/lib/clang/3.7.0/include/avx512bwintrin.h +1250 -0
  33. data/externals/oclint/lib/clang/3.7.0/include/avx512cdintrin.h +131 -0
  34. data/externals/oclint/lib/clang/3.7.0/include/avx512dqintrin.h +242 -0
  35. data/externals/oclint/lib/clang/3.7.0/include/avx512erintrin.h +285 -0
  36. data/externals/oclint/lib/clang/3.7.0/include/avx512fintrin.h +2457 -0
  37. data/externals/oclint/lib/clang/3.7.0/include/avx512vlbwintrin.h +1907 -0
  38. data/externals/oclint/lib/clang/3.7.0/include/avx512vldqintrin.h +353 -0
  39. data/externals/oclint/lib/clang/3.7.0/include/avx512vlintrin.h +1982 -0
  40. data/externals/oclint/lib/clang/3.7.0/include/avxintrin.h +1308 -0
  41. data/externals/oclint/lib/clang/3.7.0/include/bmi2intrin.h +99 -0
  42. data/externals/oclint/lib/clang/3.7.0/include/bmiintrin.h +153 -0
  43. data/externals/oclint/lib/clang/3.7.0/include/cpuid.h +209 -0
  44. data/externals/oclint/lib/clang/3.7.0/include/cuda_builtin_vars.h +110 -0
  45. data/externals/oclint/lib/clang/3.7.0/include/emmintrin.h +1480 -0
  46. data/externals/oclint/lib/clang/3.7.0/include/f16cintrin.h +63 -0
  47. data/externals/oclint/lib/clang/3.7.0/include/float.h +124 -0
  48. data/externals/oclint/lib/clang/3.7.0/include/fma4intrin.h +236 -0
  49. data/externals/oclint/lib/clang/3.7.0/include/fmaintrin.h +234 -0
  50. data/externals/oclint/lib/clang/3.7.0/include/fxsrintrin.h +55 -0
  51. data/externals/oclint/lib/clang/3.7.0/include/htmintrin.h +226 -0
  52. data/externals/oclint/lib/clang/3.7.0/include/htmxlintrin.h +363 -0
  53. data/externals/oclint/lib/clang/3.7.0/include/ia32intrin.h +101 -0
  54. data/externals/oclint/lib/clang/3.7.0/include/immintrin.h +203 -0
  55. data/externals/oclint/lib/clang/3.7.0/include/inttypes.h +102 -0
  56. data/externals/oclint/lib/clang/3.7.0/include/iso646.h +43 -0
  57. data/externals/oclint/lib/clang/3.7.0/include/limits.h +118 -0
  58. data/externals/oclint/lib/clang/3.7.0/include/lzcntintrin.h +72 -0
  59. data/externals/oclint/lib/clang/3.7.0/include/mm3dnow.h +167 -0
  60. data/externals/oclint/lib/clang/3.7.0/include/mm_malloc.h +75 -0
  61. data/externals/oclint/lib/clang/3.7.0/include/mmintrin.h +507 -0
  62. data/externals/oclint/lib/clang/3.7.0/include/module.modulemap +196 -0
  63. data/externals/oclint/lib/clang/3.7.0/include/nmmintrin.h +35 -0
  64. data/externals/oclint/lib/clang/3.7.0/include/pmmintrin.h +122 -0
  65. data/externals/oclint/lib/clang/3.7.0/include/popcntintrin.h +50 -0
  66. data/externals/oclint/lib/clang/3.7.0/include/prfchwintrin.h +39 -0
  67. data/externals/oclint/lib/clang/3.7.0/include/rdseedintrin.h +59 -0
  68. data/externals/oclint/lib/clang/3.7.0/include/rtmintrin.h +59 -0
  69. data/externals/oclint/lib/clang/3.7.0/include/s390intrin.h +39 -0
  70. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/allocator_interface.h +66 -0
  71. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/asan_interface.h +155 -0
  72. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/common_interface_defs.h +118 -0
  73. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/coverage_interface.h +63 -0
  74. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/dfsan_interface.h +114 -0
  75. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/linux_syscall_hooks.h +3070 -0
  76. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/lsan_interface.h +84 -0
  77. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/msan_interface.h +107 -0
  78. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/tsan_interface_atomic.h +222 -0
  79. data/externals/oclint/lib/clang/3.7.0/include/shaintrin.h +79 -0
  80. data/externals/oclint/lib/clang/3.7.0/include/smmintrin.h +487 -0
  81. data/externals/oclint/lib/clang/3.7.0/include/stdalign.h +35 -0
  82. data/externals/oclint/lib/clang/3.7.0/include/stdarg.h +52 -0
  83. data/externals/oclint/lib/clang/3.7.0/include/stdatomic.h +190 -0
  84. data/externals/oclint/lib/clang/3.7.0/include/stdbool.h +44 -0
  85. data/externals/oclint/lib/clang/3.7.0/include/stddef.h +137 -0
  86. data/externals/oclint/lib/clang/3.7.0/include/stdint.h +707 -0
  87. data/externals/oclint/lib/clang/3.7.0/include/stdnoreturn.h +30 -0
  88. data/externals/oclint/lib/clang/3.7.0/include/tbmintrin.h +154 -0
  89. data/externals/oclint/lib/clang/3.7.0/include/tgmath.h +1374 -0
  90. data/externals/oclint/lib/clang/3.7.0/include/tmmintrin.h +230 -0
  91. data/externals/oclint/lib/clang/3.7.0/include/unwind.h +282 -0
  92. data/externals/oclint/lib/clang/3.7.0/include/vadefs.h +65 -0
  93. data/externals/oclint/lib/clang/3.7.0/include/varargs.h +26 -0
  94. data/externals/oclint/lib/clang/3.7.0/include/vecintrin.h +8946 -0
  95. data/externals/oclint/lib/clang/3.7.0/include/wmmintrin.h +42 -0
  96. data/externals/oclint/lib/clang/3.7.0/include/x86intrin.h +81 -0
  97. data/externals/oclint/lib/clang/3.7.0/include/xmmintrin.h +1008 -0
  98. data/externals/oclint/lib/clang/3.7.0/include/xopintrin.h +809 -0
  99. data/externals/oclint/lib/clang/3.7.0/include/xtestintrin.h +41 -0
  100. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib +0 -0
  101. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
  102. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-i386.a +0 -0
  103. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-x86_64.a +0 -0
  104. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.profile_osx.a +0 -0
  105. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.safestack_osx.a +0 -0
  106. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib +0 -0
  107. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib +0 -0
  108. data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
  109. data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
  110. data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
  111. data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
  112. data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
  113. data/externals/oclint/lib/oclint/reporters/libXcodeReporter.dylib +0 -0
  114. data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
  115. data/externals/oclint/lib/oclint/rules/libAvoidDefaultArgumentsOnVirtualMethodsRule.dylib +0 -0
  116. data/externals/oclint/lib/oclint/rules/libAvoidPrivateStaticMembersRule.dylib +0 -0
  117. data/externals/oclint/lib/oclint/rules/libBaseClassDestructorShouldBeVirtualOrProtectedRule.dylib +0 -0
  118. data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
  119. data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
  120. data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
  121. data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
  122. data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
  123. data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
  124. data/externals/oclint/lib/oclint/rules/libCoveredSwitchStatementsDontNeedDefaultRule.dylib +0 -0
  125. data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
  126. data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
  127. data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
  128. data/externals/oclint/lib/oclint/rules/libDestructorOfVirtualClassRule.dylib +0 -0
  129. data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
  130. data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
  131. data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
  132. data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
  133. data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
  134. data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
  135. data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
  136. data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
  137. data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
  138. data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
  139. data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
  140. data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
  141. data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
  142. data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
  143. data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
  144. data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
  145. data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
  146. data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
  147. data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
  148. data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
  149. data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
  150. data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
  151. data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
  152. data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
  153. data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
  154. data/externals/oclint/lib/oclint/rules/libObjCAssignIvarOutsideAccessorsRule.dylib +0 -0
  155. data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
  156. data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
  157. data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
  158. data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
  159. data/externals/oclint/lib/oclint/rules/libObjCVerifyIsEqualHashRule.dylib +0 -0
  160. data/externals/oclint/lib/oclint/rules/libObjCVerifyMustCallSuperRule.dylib +0 -0
  161. data/externals/oclint/lib/oclint/rules/libObjCVerifyProhibitedCallRule.dylib +0 -0
  162. data/externals/oclint/lib/oclint/rules/libObjCVerifyProtectedMethodRule.dylib +0 -0
  163. data/externals/oclint/lib/oclint/rules/libObjCVerifySubclassMustImplementRule.dylib +0 -0
  164. data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
  165. data/externals/oclint/lib/oclint/rules/libPreferEarlyExitRule.dylib +0 -0
  166. data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
  167. data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
  168. data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
  169. data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
  170. data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
  171. data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
  172. data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
  173. data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
  174. data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
  175. data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
  176. data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
  177. data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
  178. data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
  179. data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
  180. data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
  181. data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
  182. data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
  183. data/lib/coverage_plan.rb +19 -0
  184. data/lib/device.rb +27 -0
  185. data/lib/execute.rb +7 -0
  186. data/lib/lint_plan.rb +41 -0
  187. data/lib/rules.rb +23 -0
  188. data/lib/test_plan.rb +11 -0
  189. data/lib/version.rb +3 -0
  190. data/lib/xcode.rb +128 -0
  191. data/lib/xcodebuild-helper.rb +110 -0
  192. data/spec/coverage_plan_spec.rb +18 -0
  193. data/spec/device_spec.rb +24 -0
  194. data/spec/lint_plan_spec.rb +35 -0
  195. data/spec/rule_spec.rb +37 -0
  196. data/spec/spec_helper.rb +17 -0
  197. data/spec/test_plan_spec.rb +11 -0
  198. data/spec/xcode_dsl_actions_spec.rb +136 -0
  199. data/spec/xcode_dsl_spec.rb +176 -0
  200. data/spec/xcode_spec.rb +79 -0
  201. data/xcodebuild-helper.gemspec +26 -0
  202. metadata +327 -0
@@ -0,0 +1,2457 @@
1
+ /*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+ #ifndef __IMMINTRIN_H
24
+ #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25
+ #endif
26
+
27
+ #ifndef __AVX512FINTRIN_H
28
+ #define __AVX512FINTRIN_H
29
+
30
+ typedef double __v8df __attribute__((__vector_size__(64)));
31
+ typedef float __v16sf __attribute__((__vector_size__(64)));
32
+ typedef long long __v8di __attribute__((__vector_size__(64)));
33
+ typedef int __v16si __attribute__((__vector_size__(64)));
34
+
35
+ typedef float __m512 __attribute__((__vector_size__(64)));
36
+ typedef double __m512d __attribute__((__vector_size__(64)));
37
+ typedef long long __m512i __attribute__((__vector_size__(64)));
38
+
39
+ typedef unsigned char __mmask8;
40
+ typedef unsigned short __mmask16;
41
+
42
+ /* Rounding mode macros. */
43
+ #define _MM_FROUND_TO_NEAREST_INT 0x00
44
+ #define _MM_FROUND_TO_NEG_INF 0x01
45
+ #define _MM_FROUND_TO_POS_INF 0x02
46
+ #define _MM_FROUND_TO_ZERO 0x03
47
+ #define _MM_FROUND_CUR_DIRECTION 0x04
48
+
49
+ /* Define the default attributes for the functions in this file. */
50
+ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
51
+
52
+ /* Create vectors with repeated elements */
53
+
54
+ static __inline __m512i __DEFAULT_FN_ATTRS
55
+ _mm512_setzero_si512(void)
56
+ {
57
+ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
58
+ }
59
+
60
+ static __inline __m512i __DEFAULT_FN_ATTRS
61
+ _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
62
+ {
63
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
64
+ (__v16si)
65
+ _mm512_setzero_si512 (),
66
+ __M);
67
+ }
68
+
69
+ static __inline __m512i __DEFAULT_FN_ATTRS
70
+ _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
71
+ {
72
+ #ifdef __x86_64__
73
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
74
+ (__v8di)
75
+ _mm512_setzero_si512 (),
76
+ __M);
77
+ #else
78
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
79
+ (__v8di)
80
+ _mm512_setzero_si512 (),
81
+ __M);
82
+ #endif
83
+ }
84
+
85
+ static __inline __m512 __DEFAULT_FN_ATTRS
86
+ _mm512_setzero_ps(void)
87
+ {
88
+ return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
89
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
90
+ }
91
+ static __inline __m512d __DEFAULT_FN_ATTRS
92
+ _mm512_setzero_pd(void)
93
+ {
94
+ return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
95
+ }
96
+
97
+ static __inline __m512 __DEFAULT_FN_ATTRS
98
+ _mm512_set1_ps(float __w)
99
+ {
100
+ return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
101
+ __w, __w, __w, __w, __w, __w, __w, __w };
102
+ }
103
+
104
+ static __inline __m512d __DEFAULT_FN_ATTRS
105
+ _mm512_set1_pd(double __w)
106
+ {
107
+ return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
108
+ }
109
+
110
+ static __inline __m512i __DEFAULT_FN_ATTRS
111
+ _mm512_set1_epi32(int __s)
112
+ {
113
+ return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
114
+ __s, __s, __s, __s, __s, __s, __s, __s };
115
+ }
116
+
117
+ static __inline __m512i __DEFAULT_FN_ATTRS
118
+ _mm512_set1_epi64(long long __d)
119
+ {
120
+ return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
121
+ }
122
+
123
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
124
+ _mm512_broadcastss_ps(__m128 __X)
125
+ {
126
+ float __f = __X[0];
127
+ return (__v16sf){ __f, __f, __f, __f,
128
+ __f, __f, __f, __f,
129
+ __f, __f, __f, __f,
130
+ __f, __f, __f, __f };
131
+ }
132
+
133
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
134
+ _mm512_broadcastsd_pd(__m128d __X)
135
+ {
136
+ double __d = __X[0];
137
+ return (__v8df){ __d, __d, __d, __d,
138
+ __d, __d, __d, __d };
139
+ }
140
+
141
+ /* Cast between vector types */
142
+
143
+ static __inline __m512d __DEFAULT_FN_ATTRS
144
+ _mm512_castpd256_pd512(__m256d __a)
145
+ {
146
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
147
+ }
148
+
149
+ static __inline __m512 __DEFAULT_FN_ATTRS
150
+ _mm512_castps256_ps512(__m256 __a)
151
+ {
152
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
153
+ -1, -1, -1, -1, -1, -1, -1, -1);
154
+ }
155
+
156
+ static __inline __m128d __DEFAULT_FN_ATTRS
157
+ _mm512_castpd512_pd128(__m512d __a)
158
+ {
159
+ return __builtin_shufflevector(__a, __a, 0, 1);
160
+ }
161
+
162
+ static __inline __m128 __DEFAULT_FN_ATTRS
163
+ _mm512_castps512_ps128(__m512 __a)
164
+ {
165
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
166
+ }
167
+
168
+ /* Bitwise operators */
169
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
170
+ _mm512_and_epi32(__m512i __a, __m512i __b)
171
+ {
172
+ return __a & __b;
173
+ }
174
+
175
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
176
+ _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
177
+ {
178
+ return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
179
+ (__v16si) __b,
180
+ (__v16si) __src,
181
+ (__mmask16) __k);
182
+ }
183
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
184
+ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
185
+ {
186
+ return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
187
+ (__v16si) __b,
188
+ (__v16si)
189
+ _mm512_setzero_si512 (),
190
+ (__mmask16) __k);
191
+ }
192
+
193
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
194
+ _mm512_and_epi64(__m512i __a, __m512i __b)
195
+ {
196
+ return __a & __b;
197
+ }
198
+
199
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
200
+ _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
201
+ {
202
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
203
+ (__v8di) __b,
204
+ (__v8di) __src,
205
+ (__mmask8) __k);
206
+ }
207
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
208
+ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
209
+ {
210
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
211
+ (__v8di) __b,
212
+ (__v8di)
213
+ _mm512_setzero_si512 (),
214
+ (__mmask8) __k);
215
+ }
216
+
217
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
218
+ _mm512_andnot_epi32 (__m512i __A, __m512i __B)
219
+ {
220
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
221
+ (__v16si) __B,
222
+ (__v16si)
223
+ _mm512_setzero_si512 (),
224
+ (__mmask16) -1);
225
+ }
226
+
227
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
228
+ _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
229
+ {
230
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
231
+ (__v16si) __B,
232
+ (__v16si) __W,
233
+ (__mmask16) __U);
234
+ }
235
+
236
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
237
+ _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
238
+ {
239
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
240
+ (__v16si) __B,
241
+ (__v16si)
242
+ _mm512_setzero_si512 (),
243
+ (__mmask16) __U);
244
+ }
245
+
246
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
247
+ _mm512_andnot_epi64 (__m512i __A, __m512i __B)
248
+ {
249
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
250
+ (__v8di) __B,
251
+ (__v8di)
252
+ _mm512_setzero_si512 (),
253
+ (__mmask8) -1);
254
+ }
255
+
256
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
257
+ _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
258
+ {
259
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
260
+ (__v8di) __B,
261
+ (__v8di) __W, __U);
262
+ }
263
+
264
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
265
+ _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
266
+ {
267
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
268
+ (__v8di) __B,
269
+ (__v8di)
270
+ _mm512_setzero_pd (),
271
+ __U);
272
+ }
273
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
274
+ _mm512_or_epi32(__m512i __a, __m512i __b)
275
+ {
276
+ return __a | __b;
277
+ }
278
+
279
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
280
+ _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
281
+ {
282
+ return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
283
+ (__v16si) __b,
284
+ (__v16si) __src,
285
+ (__mmask16) __k);
286
+ }
287
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
288
+ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
289
+ {
290
+ return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
291
+ (__v16si) __b,
292
+ (__v16si)
293
+ _mm512_setzero_si512 (),
294
+ (__mmask16) __k);
295
+ }
296
+
297
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
298
+ _mm512_or_epi64(__m512i __a, __m512i __b)
299
+ {
300
+ return __a | __b;
301
+ }
302
+
303
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
304
+ _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
305
+ {
306
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
307
+ (__v8di) __b,
308
+ (__v8di) __src,
309
+ (__mmask8) __k);
310
+ }
311
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
312
+ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
313
+ {
314
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
315
+ (__v8di) __b,
316
+ (__v8di)
317
+ _mm512_setzero_si512 (),
318
+ (__mmask8) __k);
319
+ }
320
+
321
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
322
+ _mm512_xor_epi32(__m512i __a, __m512i __b)
323
+ {
324
+ return __a ^ __b;
325
+ }
326
+
327
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
328
+ _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
329
+ {
330
+ return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
331
+ (__v16si) __b,
332
+ (__v16si) __src,
333
+ (__mmask16) __k);
334
+ }
335
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
336
+ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
337
+ {
338
+ return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
339
+ (__v16si) __b,
340
+ (__v16si)
341
+ _mm512_setzero_si512 (),
342
+ (__mmask16) __k);
343
+ }
344
+
345
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
346
+ _mm512_xor_epi64(__m512i __a, __m512i __b)
347
+ {
348
+ return __a ^ __b;
349
+ }
350
+
351
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
352
+ _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
353
+ {
354
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
355
+ (__v8di) __b,
356
+ (__v8di) __src,
357
+ (__mmask8) __k);
358
+ }
359
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
360
+ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
361
+ {
362
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
363
+ (__v8di) __b,
364
+ (__v8di)
365
+ _mm512_setzero_si512 (),
366
+ (__mmask8) __k);
367
+ }
368
+
369
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
370
+ _mm512_and_si512(__m512i __a, __m512i __b)
371
+ {
372
+ return __a & __b;
373
+ }
374
+
375
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
376
+ _mm512_or_si512(__m512i __a, __m512i __b)
377
+ {
378
+ return __a | __b;
379
+ }
380
+
381
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
382
+ _mm512_xor_si512(__m512i __a, __m512i __b)
383
+ {
384
+ return __a ^ __b;
385
+ }
386
+ /* Arithmetic */
387
+
388
+ static __inline __m512d __DEFAULT_FN_ATTRS
389
+ _mm512_add_pd(__m512d __a, __m512d __b)
390
+ {
391
+ return __a + __b;
392
+ }
393
+
394
+ static __inline __m512 __DEFAULT_FN_ATTRS
395
+ _mm512_add_ps(__m512 __a, __m512 __b)
396
+ {
397
+ return __a + __b;
398
+ }
399
+
400
+ static __inline __m512d __DEFAULT_FN_ATTRS
401
+ _mm512_mul_pd(__m512d __a, __m512d __b)
402
+ {
403
+ return __a * __b;
404
+ }
405
+
406
+ static __inline __m512 __DEFAULT_FN_ATTRS
407
+ _mm512_mul_ps(__m512 __a, __m512 __b)
408
+ {
409
+ return __a * __b;
410
+ }
411
+
412
+ static __inline __m512d __DEFAULT_FN_ATTRS
413
+ _mm512_sub_pd(__m512d __a, __m512d __b)
414
+ {
415
+ return __a - __b;
416
+ }
417
+
418
+ static __inline __m512 __DEFAULT_FN_ATTRS
419
+ _mm512_sub_ps(__m512 __a, __m512 __b)
420
+ {
421
+ return __a - __b;
422
+ }
423
+
424
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
425
+ _mm512_add_epi64 (__m512i __A, __m512i __B)
426
+ {
427
+ return (__m512i) ((__v8di) __A + (__v8di) __B);
428
+ }
429
+
430
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
431
+ _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
432
+ {
433
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
434
+ (__v8di) __B,
435
+ (__v8di) __W,
436
+ (__mmask8) __U);
437
+ }
438
+
439
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
440
+ _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
441
+ {
442
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
443
+ (__v8di) __B,
444
+ (__v8di)
445
+ _mm512_setzero_si512 (),
446
+ (__mmask8) __U);
447
+ }
448
+
449
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
450
+ _mm512_sub_epi64 (__m512i __A, __m512i __B)
451
+ {
452
+ return (__m512i) ((__v8di) __A - (__v8di) __B);
453
+ }
454
+
455
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
456
+ _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
457
+ {
458
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
459
+ (__v8di) __B,
460
+ (__v8di) __W,
461
+ (__mmask8) __U);
462
+ }
463
+
464
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
465
+ _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
466
+ {
467
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
468
+ (__v8di) __B,
469
+ (__v8di)
470
+ _mm512_setzero_si512 (),
471
+ (__mmask8) __U);
472
+ }
473
+
474
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
475
+ _mm512_add_epi32 (__m512i __A, __m512i __B)
476
+ {
477
+ return (__m512i) ((__v16si) __A + (__v16si) __B);
478
+ }
479
+
480
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
481
+ _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
482
+ {
483
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
484
+ (__v16si) __B,
485
+ (__v16si) __W,
486
+ (__mmask16) __U);
487
+ }
488
+
489
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
490
+ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
491
+ {
492
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
493
+ (__v16si) __B,
494
+ (__v16si)
495
+ _mm512_setzero_si512 (),
496
+ (__mmask16) __U);
497
+ }
498
+
499
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
500
+ _mm512_sub_epi32 (__m512i __A, __m512i __B)
501
+ {
502
+ return (__m512i) ((__v16si) __A - (__v16si) __B);
503
+ }
504
+
505
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
506
+ _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
507
+ {
508
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
509
+ (__v16si) __B,
510
+ (__v16si) __W,
511
+ (__mmask16) __U);
512
+ }
513
+
514
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
515
+ _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
516
+ {
517
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
518
+ (__v16si) __B,
519
+ (__v16si)
520
+ _mm512_setzero_si512 (),
521
+ (__mmask16) __U);
522
+ }
523
+
524
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
525
+ _mm512_max_pd(__m512d __A, __m512d __B)
526
+ {
527
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
528
+ (__v8df) __B,
529
+ (__v8df)
530
+ _mm512_setzero_pd (),
531
+ (__mmask8) -1,
532
+ _MM_FROUND_CUR_DIRECTION);
533
+ }
534
+
535
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
536
+ _mm512_max_ps(__m512 __A, __m512 __B)
537
+ {
538
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
539
+ (__v16sf) __B,
540
+ (__v16sf)
541
+ _mm512_setzero_ps (),
542
+ (__mmask16) -1,
543
+ _MM_FROUND_CUR_DIRECTION);
544
+ }
545
+
546
+ static __inline __m512i
547
+ __DEFAULT_FN_ATTRS
548
+ _mm512_max_epi32(__m512i __A, __m512i __B)
549
+ {
550
+ return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
551
+ (__v16si) __B,
552
+ (__v16si)
553
+ _mm512_setzero_si512 (),
554
+ (__mmask16) -1);
555
+ }
556
+
557
+ static __inline __m512i __DEFAULT_FN_ATTRS
558
+ _mm512_max_epu32(__m512i __A, __m512i __B)
559
+ {
560
+ return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
561
+ (__v16si) __B,
562
+ (__v16si)
563
+ _mm512_setzero_si512 (),
564
+ (__mmask16) -1);
565
+ }
566
+
567
+ static __inline __m512i __DEFAULT_FN_ATTRS
568
+ _mm512_max_epi64(__m512i __A, __m512i __B)
569
+ {
570
+ return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
571
+ (__v8di) __B,
572
+ (__v8di)
573
+ _mm512_setzero_si512 (),
574
+ (__mmask8) -1);
575
+ }
576
+
577
+ static __inline __m512i __DEFAULT_FN_ATTRS
578
+ _mm512_max_epu64(__m512i __A, __m512i __B)
579
+ {
580
+ return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
581
+ (__v8di) __B,
582
+ (__v8di)
583
+ _mm512_setzero_si512 (),
584
+ (__mmask8) -1);
585
+ }
586
+
587
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
588
+ _mm512_min_pd(__m512d __A, __m512d __B)
589
+ {
590
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
591
+ (__v8df) __B,
592
+ (__v8df)
593
+ _mm512_setzero_pd (),
594
+ (__mmask8) -1,
595
+ _MM_FROUND_CUR_DIRECTION);
596
+ }
597
+
598
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
599
+ _mm512_min_ps(__m512 __A, __m512 __B)
600
+ {
601
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
602
+ (__v16sf) __B,
603
+ (__v16sf)
604
+ _mm512_setzero_ps (),
605
+ (__mmask16) -1,
606
+ _MM_FROUND_CUR_DIRECTION);
607
+ }
608
+
609
+ static __inline __m512i
610
+ __DEFAULT_FN_ATTRS
611
+ _mm512_min_epi32(__m512i __A, __m512i __B)
612
+ {
613
+ return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
614
+ (__v16si) __B,
615
+ (__v16si)
616
+ _mm512_setzero_si512 (),
617
+ (__mmask16) -1);
618
+ }
619
+
620
+ static __inline __m512i __DEFAULT_FN_ATTRS
621
+ _mm512_min_epu32(__m512i __A, __m512i __B)
622
+ {
623
+ return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
624
+ (__v16si) __B,
625
+ (__v16si)
626
+ _mm512_setzero_si512 (),
627
+ (__mmask16) -1);
628
+ }
629
+
630
+ static __inline __m512i __DEFAULT_FN_ATTRS
631
+ _mm512_min_epi64(__m512i __A, __m512i __B)
632
+ {
633
+ return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
634
+ (__v8di) __B,
635
+ (__v8di)
636
+ _mm512_setzero_si512 (),
637
+ (__mmask8) -1);
638
+ }
639
+
640
+ static __inline __m512i __DEFAULT_FN_ATTRS
641
+ _mm512_min_epu64(__m512i __A, __m512i __B)
642
+ {
643
+ return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
644
+ (__v8di) __B,
645
+ (__v8di)
646
+ _mm512_setzero_si512 (),
647
+ (__mmask8) -1);
648
+ }
649
+
650
+ static __inline __m512i __DEFAULT_FN_ATTRS
651
+ _mm512_mul_epi32(__m512i __X, __m512i __Y)
652
+ {
653
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
654
+ (__v16si) __Y,
655
+ (__v8di)
656
+ _mm512_setzero_si512 (),
657
+ (__mmask8) -1);
658
+ }
659
+
660
+ static __inline __m512i __DEFAULT_FN_ATTRS
661
+ _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
662
+ {
663
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
664
+ (__v16si) __Y,
665
+ (__v8di) __W, __M);
666
+ }
667
+
668
+ static __inline __m512i __DEFAULT_FN_ATTRS
669
+ _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
670
+ {
671
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
672
+ (__v16si) __Y,
673
+ (__v8di)
674
+ _mm512_setzero_si512 (),
675
+ __M);
676
+ }
677
+
678
+ static __inline __m512i __DEFAULT_FN_ATTRS
679
+ _mm512_mul_epu32(__m512i __X, __m512i __Y)
680
+ {
681
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
682
+ (__v16si) __Y,
683
+ (__v8di)
684
+ _mm512_setzero_si512 (),
685
+ (__mmask8) -1);
686
+ }
687
+
688
+ static __inline __m512i __DEFAULT_FN_ATTRS
689
+ _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
690
+ {
691
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
692
+ (__v16si) __Y,
693
+ (__v8di) __W, __M);
694
+ }
695
+
696
+ static __inline __m512i __DEFAULT_FN_ATTRS
697
+ _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
698
+ {
699
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
700
+ (__v16si) __Y,
701
+ (__v8di)
702
+ _mm512_setzero_si512 (),
703
+ __M);
704
+ }
705
+
706
+ static __inline __m512i __DEFAULT_FN_ATTRS
707
+ _mm512_mullo_epi32 (__m512i __A, __m512i __B)
708
+ {
709
+ return (__m512i) ((__v16si) __A * (__v16si) __B);
710
+ }
711
+
712
+ static __inline __m512i __DEFAULT_FN_ATTRS
713
+ _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
714
+ {
715
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
716
+ (__v16si) __B,
717
+ (__v16si)
718
+ _mm512_setzero_si512 (),
719
+ __M);
720
+ }
721
+
722
+ static __inline __m512i __DEFAULT_FN_ATTRS
723
+ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
724
+ {
725
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
726
+ (__v16si) __B,
727
+ (__v16si) __W, __M);
728
+ }
729
+
730
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
731
+ _mm512_sqrt_pd(__m512d a)
732
+ {
733
+ return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
734
+ (__v8df) _mm512_setzero_pd (),
735
+ (__mmask8) -1,
736
+ _MM_FROUND_CUR_DIRECTION);
737
+ }
738
+
739
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
740
+ _mm512_sqrt_ps(__m512 a)
741
+ {
742
+ return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
743
+ (__v16sf) _mm512_setzero_ps (),
744
+ (__mmask16) -1,
745
+ _MM_FROUND_CUR_DIRECTION);
746
+ }
747
+
748
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
749
+ _mm512_rsqrt14_pd(__m512d __A)
750
+ {
751
+ return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
752
+ (__v8df)
753
+ _mm512_setzero_pd (),
754
+ (__mmask8) -1);}
755
+
756
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
757
+ _mm512_rsqrt14_ps(__m512 __A)
758
+ {
759
+ return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
760
+ (__v16sf)
761
+ _mm512_setzero_ps (),
762
+ (__mmask16) -1);
763
+ }
764
+
765
+ static __inline__ __m128 __DEFAULT_FN_ATTRS
766
+ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
767
+ {
768
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
769
+ (__v4sf) __B,
770
+ (__v4sf)
771
+ _mm_setzero_ps (),
772
+ (__mmask8) -1);
773
+ }
774
+
775
+ static __inline__ __m128d __DEFAULT_FN_ATTRS
776
+ _mm_rsqrt14_sd(__m128d __A, __m128d __B)
777
+ {
778
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
779
+ (__v2df) __B,
780
+ (__v2df)
781
+ _mm_setzero_pd (),
782
+ (__mmask8) -1);
783
+ }
784
+
785
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
786
+ _mm512_rcp14_pd(__m512d __A)
787
+ {
788
+ return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
789
+ (__v8df)
790
+ _mm512_setzero_pd (),
791
+ (__mmask8) -1);
792
+ }
793
+
794
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
795
+ _mm512_rcp14_ps(__m512 __A)
796
+ {
797
+ return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
798
+ (__v16sf)
799
+ _mm512_setzero_ps (),
800
+ (__mmask16) -1);
801
+ }
802
+ static __inline__ __m128 __DEFAULT_FN_ATTRS
803
+ _mm_rcp14_ss(__m128 __A, __m128 __B)
804
+ {
805
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
806
+ (__v4sf) __B,
807
+ (__v4sf)
808
+ _mm_setzero_ps (),
809
+ (__mmask8) -1);
810
+ }
811
+
812
+ static __inline__ __m128d __DEFAULT_FN_ATTRS
813
+ _mm_rcp14_sd(__m128d __A, __m128d __B)
814
+ {
815
+ return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
816
+ (__v2df) __B,
817
+ (__v2df)
818
+ _mm_setzero_pd (),
819
+ (__mmask8) -1);
820
+ }
821
+
822
+ static __inline __m512 __DEFAULT_FN_ATTRS
823
+ _mm512_floor_ps(__m512 __A)
824
+ {
825
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
826
+ _MM_FROUND_FLOOR,
827
+ (__v16sf) __A, -1,
828
+ _MM_FROUND_CUR_DIRECTION);
829
+ }
830
+
831
+ static __inline __m512d __DEFAULT_FN_ATTRS
832
+ _mm512_floor_pd(__m512d __A)
833
+ {
834
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
835
+ _MM_FROUND_FLOOR,
836
+ (__v8df) __A, -1,
837
+ _MM_FROUND_CUR_DIRECTION);
838
+ }
839
+
840
+ static __inline __m512 __DEFAULT_FN_ATTRS
841
+ _mm512_ceil_ps(__m512 __A)
842
+ {
843
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
844
+ _MM_FROUND_CEIL,
845
+ (__v16sf) __A, -1,
846
+ _MM_FROUND_CUR_DIRECTION);
847
+ }
848
+
849
+ static __inline __m512d __DEFAULT_FN_ATTRS
850
+ _mm512_ceil_pd(__m512d __A)
851
+ {
852
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
853
+ _MM_FROUND_CEIL,
854
+ (__v8df) __A, -1,
855
+ _MM_FROUND_CUR_DIRECTION);
856
+ }
857
+
858
+ static __inline __m512i __DEFAULT_FN_ATTRS
859
+ _mm512_abs_epi64(__m512i __A)
860
+ {
861
+ return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
862
+ (__v8di)
863
+ _mm512_setzero_si512 (),
864
+ (__mmask8) -1);
865
+ }
866
+
867
+ static __inline __m512i __DEFAULT_FN_ATTRS
868
+ _mm512_abs_epi32(__m512i __A)
869
+ {
870
+ return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
871
+ (__v16si)
872
+ _mm512_setzero_si512 (),
873
+ (__mmask16) -1);
874
+ }
875
+
876
+ #define _mm512_roundscale_ps(A, B) __extension__ ({ \
877
+ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
878
+ -1, _MM_FROUND_CUR_DIRECTION); })
879
+
880
+ #define _mm512_roundscale_pd(A, B) __extension__ ({ \
881
+ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
882
+ -1, _MM_FROUND_CUR_DIRECTION); })
883
+
884
+ #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
885
+ (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
886
+ (__v8df) (B), (__v8df) (C), \
887
+ (__mmask8) -1, (R)); })
888
+
889
+
890
+ #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
891
+ (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
892
+ (__v8df) (B), (__v8df) (C), \
893
+ (__mmask8) (U), (R)); })
894
+
895
+
896
+ #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
897
+ (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
898
+ (__v8df) (B), (__v8df) (C), \
899
+ (__mmask8) (U), (R)); })
900
+
901
+
902
+ #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
903
+ (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
904
+ (__v8df) (B), (__v8df) (C), \
905
+ (__mmask8) (U), (R)); })
906
+
907
+
908
+ #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
909
+ (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
910
+ (__v8df) (B), -(__v8df) (C), \
911
+ (__mmask8) -1, (R)); })
912
+
913
+
914
+ #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
915
+ (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
916
+ (__v8df) (B), -(__v8df) (C), \
917
+ (__mmask8) (U), (R)); })
918
+
919
+
920
+ #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
921
+ (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
922
+ (__v8df) (B), -(__v8df) (C), \
923
+ (__mmask8) (U), (R)); })
924
+
925
+
926
+ #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
927
+ (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
928
+ (__v8df) (B), (__v8df) (C), \
929
+ (__mmask8) -1, (R)); })
930
+
931
+
932
+ #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
933
+ (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
934
+ (__v8df) (B), (__v8df) (C), \
935
+ (__mmask8) (U), (R)); })
936
+
937
+
938
+ #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
939
+ (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
940
+ (__v8df) (B), (__v8df) (C), \
941
+ (__mmask8) (U), (R)); })
942
+
943
+
944
+ #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
945
+ (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
946
+ (__v8df) (B), -(__v8df) (C), \
947
+ (__mmask8) -1, (R)); })
948
+
949
+
950
+ #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
951
+ (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
952
+ (__v8df) (B), -(__v8df) (C), \
953
+ (__mmask8) (U), (R)); })
954
+
955
+
956
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
957
+ _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
958
+ {
959
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
960
+ (__v8df) __B,
961
+ (__v8df) __C,
962
+ (__mmask8) -1,
963
+ _MM_FROUND_CUR_DIRECTION);
964
+ }
965
+
966
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
967
+ _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
968
+ {
969
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
970
+ (__v8df) __B,
971
+ (__v8df) __C,
972
+ (__mmask8) __U,
973
+ _MM_FROUND_CUR_DIRECTION);
974
+ }
975
+
976
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
977
+ _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
978
+ {
979
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
980
+ (__v8df) __B,
981
+ (__v8df) __C,
982
+ (__mmask8) __U,
983
+ _MM_FROUND_CUR_DIRECTION);
984
+ }
985
+
986
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
987
+ _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
988
+ {
989
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
990
+ (__v8df) __B,
991
+ (__v8df) __C,
992
+ (__mmask8) __U,
993
+ _MM_FROUND_CUR_DIRECTION);
994
+ }
995
+
996
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
997
+ _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
998
+ {
999
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1000
+ (__v8df) __B,
1001
+ -(__v8df) __C,
1002
+ (__mmask8) -1,
1003
+ _MM_FROUND_CUR_DIRECTION);
1004
+ }
1005
+
1006
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1007
+ _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1008
+ {
1009
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1010
+ (__v8df) __B,
1011
+ -(__v8df) __C,
1012
+ (__mmask8) __U,
1013
+ _MM_FROUND_CUR_DIRECTION);
1014
+ }
1015
+
1016
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1017
+ _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1018
+ {
1019
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1020
+ (__v8df) __B,
1021
+ -(__v8df) __C,
1022
+ (__mmask8) __U,
1023
+ _MM_FROUND_CUR_DIRECTION);
1024
+ }
1025
+
1026
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1027
+ _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1028
+ {
1029
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1030
+ (__v8df) __B,
1031
+ (__v8df) __C,
1032
+ (__mmask8) -1,
1033
+ _MM_FROUND_CUR_DIRECTION);
1034
+ }
1035
+
1036
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1037
+ _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1038
+ {
1039
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
1040
+ (__v8df) __B,
1041
+ (__v8df) __C,
1042
+ (__mmask8) __U,
1043
+ _MM_FROUND_CUR_DIRECTION);
1044
+ }
1045
+
1046
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1047
+ _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1048
+ {
1049
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1050
+ (__v8df) __B,
1051
+ (__v8df) __C,
1052
+ (__mmask8) __U,
1053
+ _MM_FROUND_CUR_DIRECTION);
1054
+ }
1055
+
1056
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1057
+ _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1058
+ {
1059
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1060
+ (__v8df) __B,
1061
+ -(__v8df) __C,
1062
+ (__mmask8) -1,
1063
+ _MM_FROUND_CUR_DIRECTION);
1064
+ }
1065
+
1066
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1067
+ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1068
+ {
1069
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1070
+ (__v8df) __B,
1071
+ -(__v8df) __C,
1072
+ (__mmask8) __U,
1073
+ _MM_FROUND_CUR_DIRECTION);
1074
+ }
1075
+
1076
+ #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
1077
+ (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1078
+ (__v16sf) (B), (__v16sf) (C), \
1079
+ (__mmask16) -1, (R)); })
1080
+
1081
+
1082
+ #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1083
+ (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1084
+ (__v16sf) (B), (__v16sf) (C), \
1085
+ (__mmask16) (U), (R)); })
1086
+
1087
+
1088
+ #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1089
+ (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
1090
+ (__v16sf) (B), (__v16sf) (C), \
1091
+ (__mmask16) (U), (R)); })
1092
+
1093
+
1094
+ #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1095
+ (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1096
+ (__v16sf) (B), (__v16sf) (C), \
1097
+ (__mmask16) (U), (R)); })
1098
+
1099
+
1100
+ #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
1101
+ (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1102
+ (__v16sf) (B), -(__v16sf) (C), \
1103
+ (__mmask16) -1, (R)); })
1104
+
1105
+
1106
+ #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1107
+ (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1108
+ (__v16sf) (B), -(__v16sf) (C), \
1109
+ (__mmask16) (U), (R)); })
1110
+
1111
+
1112
+ #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1113
+ (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1114
+ (__v16sf) (B), -(__v16sf) (C), \
1115
+ (__mmask16) (U), (R)); })
1116
+
1117
+
1118
+ #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
1119
+ (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1120
+ (__v16sf) (B), (__v16sf) (C), \
1121
+ (__mmask16) -1, (R)); })
1122
+
1123
+
1124
+ #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1125
+ (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
1126
+ (__v16sf) (B), (__v16sf) (C), \
1127
+ (__mmask16) (U), (R)); })
1128
+
1129
+
1130
+ #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1131
+ (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1132
+ (__v16sf) (B), (__v16sf) (C), \
1133
+ (__mmask16) (U), (R)); })
1134
+
1135
+
1136
+ #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
1137
+ (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1138
+ (__v16sf) (B), -(__v16sf) (C), \
1139
+ (__mmask16) -1, (R)); })
1140
+
1141
+
1142
+ #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1143
+ (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1144
+ (__v16sf) (B), -(__v16sf) (C), \
1145
+ (__mmask16) (U), (R)); })
1146
+
1147
+
1148
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1149
+ _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1150
+ {
1151
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1152
+ (__v16sf) __B,
1153
+ (__v16sf) __C,
1154
+ (__mmask16) -1,
1155
+ _MM_FROUND_CUR_DIRECTION);
1156
+ }
1157
+
1158
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1159
+ _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1160
+ {
1161
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1162
+ (__v16sf) __B,
1163
+ (__v16sf) __C,
1164
+ (__mmask16) __U,
1165
+ _MM_FROUND_CUR_DIRECTION);
1166
+ }
1167
+
1168
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1169
+ _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1170
+ {
1171
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
1172
+ (__v16sf) __B,
1173
+ (__v16sf) __C,
1174
+ (__mmask16) __U,
1175
+ _MM_FROUND_CUR_DIRECTION);
1176
+ }
1177
+
1178
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1179
+ _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1180
+ {
1181
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1182
+ (__v16sf) __B,
1183
+ (__v16sf) __C,
1184
+ (__mmask16) __U,
1185
+ _MM_FROUND_CUR_DIRECTION);
1186
+ }
1187
+
1188
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1189
+ _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1190
+ {
1191
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1192
+ (__v16sf) __B,
1193
+ -(__v16sf) __C,
1194
+ (__mmask16) -1,
1195
+ _MM_FROUND_CUR_DIRECTION);
1196
+ }
1197
+
1198
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1199
+ _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1200
+ {
1201
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1202
+ (__v16sf) __B,
1203
+ -(__v16sf) __C,
1204
+ (__mmask16) __U,
1205
+ _MM_FROUND_CUR_DIRECTION);
1206
+ }
1207
+
1208
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1209
+ _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1210
+ {
1211
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1212
+ (__v16sf) __B,
1213
+ -(__v16sf) __C,
1214
+ (__mmask16) __U,
1215
+ _MM_FROUND_CUR_DIRECTION);
1216
+ }
1217
+
1218
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1219
+ _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1220
+ {
1221
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1222
+ (__v16sf) __B,
1223
+ (__v16sf) __C,
1224
+ (__mmask16) -1,
1225
+ _MM_FROUND_CUR_DIRECTION);
1226
+ }
1227
+
1228
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1229
+ _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1230
+ {
1231
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
1232
+ (__v16sf) __B,
1233
+ (__v16sf) __C,
1234
+ (__mmask16) __U,
1235
+ _MM_FROUND_CUR_DIRECTION);
1236
+ }
1237
+
1238
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1239
+ _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1240
+ {
1241
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1242
+ (__v16sf) __B,
1243
+ (__v16sf) __C,
1244
+ (__mmask16) __U,
1245
+ _MM_FROUND_CUR_DIRECTION);
1246
+ }
1247
+
1248
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1249
+ _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1250
+ {
1251
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1252
+ (__v16sf) __B,
1253
+ -(__v16sf) __C,
1254
+ (__mmask16) -1,
1255
+ _MM_FROUND_CUR_DIRECTION);
1256
+ }
1257
+
1258
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1259
+ _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1260
+ {
1261
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1262
+ (__v16sf) __B,
1263
+ -(__v16sf) __C,
1264
+ (__mmask16) __U,
1265
+ _MM_FROUND_CUR_DIRECTION);
1266
+ }
1267
+
1268
+ #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
1269
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1270
+ (__v8df) (B), (__v8df) (C), \
1271
+ (__mmask8) -1, (R)); })
1272
+
1273
+
1274
+ #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
1275
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1276
+ (__v8df) (B), (__v8df) (C), \
1277
+ (__mmask8) (U), (R)); })
1278
+
1279
+
1280
+ #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
1281
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
1282
+ (__v8df) (B), (__v8df) (C), \
1283
+ (__mmask8) (U), (R)); })
1284
+
1285
+
1286
+ #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
1287
+ (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1288
+ (__v8df) (B), (__v8df) (C), \
1289
+ (__mmask8) (U), (R)); })
1290
+
1291
+
1292
+ #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
1293
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1294
+ (__v8df) (B), -(__v8df) (C), \
1295
+ (__mmask8) -1, (R)); })
1296
+
1297
+
1298
+ #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
1299
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1300
+ (__v8df) (B), -(__v8df) (C), \
1301
+ (__mmask8) (U), (R)); })
1302
+
1303
+
1304
+ #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
1305
+ (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1306
+ (__v8df) (B), -(__v8df) (C), \
1307
+ (__mmask8) (U), (R)); })
1308
+
1309
+
1310
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1311
+ _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
1312
+ {
1313
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1314
+ (__v8df) __B,
1315
+ (__v8df) __C,
1316
+ (__mmask8) -1,
1317
+ _MM_FROUND_CUR_DIRECTION);
1318
+ }
1319
+
1320
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1321
+ _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1322
+ {
1323
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1324
+ (__v8df) __B,
1325
+ (__v8df) __C,
1326
+ (__mmask8) __U,
1327
+ _MM_FROUND_CUR_DIRECTION);
1328
+ }
1329
+
1330
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1331
+ _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1332
+ {
1333
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
1334
+ (__v8df) __B,
1335
+ (__v8df) __C,
1336
+ (__mmask8) __U,
1337
+ _MM_FROUND_CUR_DIRECTION);
1338
+ }
1339
+
1340
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1341
+ _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1342
+ {
1343
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
1344
+ (__v8df) __B,
1345
+ (__v8df) __C,
1346
+ (__mmask8) __U,
1347
+ _MM_FROUND_CUR_DIRECTION);
1348
+ }
1349
+
1350
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1351
+ _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
1352
+ {
1353
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1354
+ (__v8df) __B,
1355
+ -(__v8df) __C,
1356
+ (__mmask8) -1,
1357
+ _MM_FROUND_CUR_DIRECTION);
1358
+ }
1359
+
1360
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1361
+ _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1362
+ {
1363
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1364
+ (__v8df) __B,
1365
+ -(__v8df) __C,
1366
+ (__mmask8) __U,
1367
+ _MM_FROUND_CUR_DIRECTION);
1368
+ }
1369
+
1370
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1371
+ _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1372
+ {
1373
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
1374
+ (__v8df) __B,
1375
+ -(__v8df) __C,
1376
+ (__mmask8) __U,
1377
+ _MM_FROUND_CUR_DIRECTION);
1378
+ }
1379
+
1380
+ #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
1381
+ (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
1382
+ (__v16sf) (B), (__v16sf) (C), \
1383
+ (__mmask16) -1, (R)); })
1384
+
1385
+
1386
+ #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
1387
+ (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
1388
+ (__v16sf) (B), (__v16sf) (C), \
1389
+ (__mmask16) (U), (R)); })
1390
+
1391
+
1392
+ #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
1393
+ (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
1394
+ (__v16sf) (B), (__v16sf) (C), \
1395
+ (__mmask16) (U), (R)); })
1396
+
1397
+
1398
+ #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
1399
+ (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
1400
+ (__v16sf) (B), (__v16sf) (C), \
1401
+ (__mmask16) (U), (R)); })
1402
+
1403
+
1404
+ #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
1405
+ (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
1406
+ (__v16sf) (B), -(__v16sf) (C), \
1407
+ (__mmask16) -1, (R)); })
1408
+
1409
+
1410
+ #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
1411
+ (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
1412
+ (__v16sf) (B), -(__v16sf) (C), \
1413
+ (__mmask16) (U), (R)); })
1414
+
1415
+
1416
+ #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
1417
+ (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
1418
+ (__v16sf) (B), -(__v16sf) (C), \
1419
+ (__mmask16) (U), (R)); })
1420
+
1421
+
1422
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1423
+ _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
1424
+ {
1425
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
1426
+ (__v16sf) __B,
1427
+ (__v16sf) __C,
1428
+ (__mmask16) -1,
1429
+ _MM_FROUND_CUR_DIRECTION);
1430
+ }
1431
+
1432
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1433
+ _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1434
+ {
1435
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
1436
+ (__v16sf) __B,
1437
+ (__v16sf) __C,
1438
+ (__mmask16) __U,
1439
+ _MM_FROUND_CUR_DIRECTION);
1440
+ }
1441
+
1442
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1443
+ _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1444
+ {
1445
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
1446
+ (__v16sf) __B,
1447
+ (__v16sf) __C,
1448
+ (__mmask16) __U,
1449
+ _MM_FROUND_CUR_DIRECTION);
1450
+ }
1451
+
1452
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1453
+ _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1454
+ {
1455
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
1456
+ (__v16sf) __B,
1457
+ (__v16sf) __C,
1458
+ (__mmask16) __U,
1459
+ _MM_FROUND_CUR_DIRECTION);
1460
+ }
1461
+
1462
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1463
+ _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
1464
+ {
1465
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
1466
+ (__v16sf) __B,
1467
+ -(__v16sf) __C,
1468
+ (__mmask16) -1,
1469
+ _MM_FROUND_CUR_DIRECTION);
1470
+ }
1471
+
1472
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1473
+ _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1474
+ {
1475
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
1476
+ (__v16sf) __B,
1477
+ -(__v16sf) __C,
1478
+ (__mmask16) __U,
1479
+ _MM_FROUND_CUR_DIRECTION);
1480
+ }
1481
+
1482
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1483
+ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1484
+ {
1485
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
1486
+ (__v16sf) __B,
1487
+ -(__v16sf) __C,
1488
+ (__mmask16) __U,
1489
+ _MM_FROUND_CUR_DIRECTION);
1490
+ }
1491
+
1492
+ #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
1493
+ (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
1494
+ (__v8df) (B), (__v8df) (C), \
1495
+ (__mmask8) (U), (R)); })
1496
+
1497
+
1498
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1499
+ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1500
+ {
1501
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
1502
+ (__v8df) __B,
1503
+ (__v8df) __C,
1504
+ (__mmask8) __U,
1505
+ _MM_FROUND_CUR_DIRECTION);
1506
+ }
1507
+
1508
+ #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
1509
+ (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
1510
+ (__v16sf) (B), (__v16sf) (C), \
1511
+ (__mmask16) (U), (R)); })
1512
+
1513
+
1514
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1515
+ _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1516
+ {
1517
+ return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
1518
+ (__v16sf) __B,
1519
+ (__v16sf) __C,
1520
+ (__mmask16) __U,
1521
+ _MM_FROUND_CUR_DIRECTION);
1522
+ }
1523
+
1524
+ #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
1525
+ (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
1526
+ (__v8df) (B), (__v8df) (C), \
1527
+ (__mmask8) (U), (R)); })
1528
+
1529
+
1530
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1531
+ _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1532
+ {
1533
+ return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
1534
+ (__v8df) __B,
1535
+ (__v8df) __C,
1536
+ (__mmask8) __U,
1537
+ _MM_FROUND_CUR_DIRECTION);
1538
+ }
1539
+
1540
+ #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
1541
+ (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
1542
+ (__v16sf) (B), (__v16sf) (C), \
1543
+ (__mmask16) (U), (R)); })
1544
+
1545
+
1546
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1547
+ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1548
+ {
1549
+ return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
1550
+ (__v16sf) __B,
1551
+ (__v16sf) __C,
1552
+ (__mmask16) __U,
1553
+ _MM_FROUND_CUR_DIRECTION);
1554
+ }
1555
+
1556
+ #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
1557
+ (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
1558
+ (__v8df) (B), (__v8df) (C), \
1559
+ (__mmask8) (U), (R)); })
1560
+
1561
+
1562
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1563
+ _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1564
+ {
1565
+ return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
1566
+ (__v8df) __B,
1567
+ (__v8df) __C,
1568
+ (__mmask8) __U,
1569
+ _MM_FROUND_CUR_DIRECTION);
1570
+ }
1571
+
1572
+ #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1573
+ (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
1574
+ (__v16sf) (B), (__v16sf) (C), \
1575
+ (__mmask16) (U), (R)); })
1576
+
1577
+
1578
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1579
+ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1580
+ {
1581
+ return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
1582
+ (__v16sf) __B,
1583
+ (__v16sf) __C,
1584
+ (__mmask16) __U,
1585
+ _MM_FROUND_CUR_DIRECTION);
1586
+ }
1587
+
1588
+ #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
1589
+ (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
1590
+ (__v8df) (B), (__v8df) (C), \
1591
+ (__mmask8) (U), (R)); })
1592
+
1593
+
1594
+ #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
1595
+ (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
1596
+ (__v8df) (B), (__v8df) (C), \
1597
+ (__mmask8) (U), (R)); })
1598
+
1599
+
1600
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1601
+ _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1602
+ {
1603
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
1604
+ (__v8df) __B,
1605
+ (__v8df) __C,
1606
+ (__mmask8) __U,
1607
+ _MM_FROUND_CUR_DIRECTION);
1608
+ }
1609
+
1610
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1611
+ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1612
+ {
1613
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
1614
+ (__v8df) __B,
1615
+ (__v8df) __C,
1616
+ (__mmask8) __U,
1617
+ _MM_FROUND_CUR_DIRECTION);
1618
+ }
1619
+
1620
+ #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1621
+ (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
1622
+ (__v16sf) (B), (__v16sf) (C), \
1623
+ (__mmask16) (U), (R)); })
1624
+
1625
+
1626
+ #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
1627
+ (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
1628
+ (__v16sf) (B), (__v16sf) (C), \
1629
+ (__mmask16) (U), (R)); })
1630
+
1631
+
1632
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1633
+ _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1634
+ {
1635
+ return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
1636
+ (__v16sf) __B,
1637
+ (__v16sf) __C,
1638
+ (__mmask16) __U,
1639
+ _MM_FROUND_CUR_DIRECTION);
1640
+ }
1641
+
1642
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1643
+ _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1644
+ {
1645
+ return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
1646
+ (__v16sf) __B,
1647
+ (__v16sf) __C,
1648
+ (__mmask16) __U,
1649
+ _MM_FROUND_CUR_DIRECTION);
1650
+ }
1651
+
1652
+
1653
+
1654
+ /* Vector permutations */
1655
+
1656
+ static __inline __m512i __DEFAULT_FN_ATTRS
1657
+ _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
1658
+ {
1659
+ return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
1660
+ /* idx */ ,
1661
+ (__v16si) __A,
1662
+ (__v16si) __B,
1663
+ (__mmask16) -1);
1664
+ }
1665
+ static __inline __m512i __DEFAULT_FN_ATTRS
1666
+ _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
1667
+ {
1668
+ return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
1669
+ /* idx */ ,
1670
+ (__v8di) __A,
1671
+ (__v8di) __B,
1672
+ (__mmask8) -1);
1673
+ }
1674
+
1675
+ static __inline __m512d __DEFAULT_FN_ATTRS
1676
+ _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
1677
+ {
1678
+ return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
1679
+ /* idx */ ,
1680
+ (__v8df) __A,
1681
+ (__v8df) __B,
1682
+ (__mmask8) -1);
1683
+ }
1684
+ static __inline __m512 __DEFAULT_FN_ATTRS
1685
+ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
1686
+ {
1687
+ return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
1688
+ /* idx */ ,
1689
+ (__v16sf) __A,
1690
+ (__v16sf) __B,
1691
+ (__mmask16) -1);
1692
+ }
1693
+
1694
+ #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
1695
+ (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
1696
+ (__v8di)(__m512i)(B), \
1697
+ (I), (__v8di)_mm512_setzero_si512(), \
1698
+ (__mmask8)-1); })
1699
+
1700
+ #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
1701
+ (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
1702
+ (__v16si)(__m512i)(B), \
1703
+ (I), (__v16si)_mm512_setzero_si512(), \
1704
+ (__mmask16)-1); })
1705
+
1706
+ /* Vector Extract */
1707
+
1708
+ #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
1709
+ __m512d __A = (A); \
1710
+ (__m256d) \
1711
+ __builtin_ia32_extractf64x4_mask((__v8df)__A, \
1712
+ (I), \
1713
+ (__v4df)_mm256_setzero_si256(), \
1714
+ (__mmask8) -1); })
1715
+
1716
+ #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
1717
+ __m512 __A = (A); \
1718
+ (__m128) \
1719
+ __builtin_ia32_extractf32x4_mask((__v16sf)__A, \
1720
+ (I), \
1721
+ (__v4sf)_mm_setzero_ps(), \
1722
+ (__mmask8) -1); })
1723
+
1724
+ /* Vector Blend */
1725
+
1726
+ static __inline __m512d __DEFAULT_FN_ATTRS
1727
+ _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
1728
+ {
1729
+ return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
1730
+ (__v8df) __W,
1731
+ (__mmask8) __U);
1732
+ }
1733
+
1734
+ static __inline __m512 __DEFAULT_FN_ATTRS
1735
+ _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
1736
+ {
1737
+ return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
1738
+ (__v16sf) __W,
1739
+ (__mmask16) __U);
1740
+ }
1741
+
1742
+ static __inline __m512i __DEFAULT_FN_ATTRS
1743
+ _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
1744
+ {
1745
+ return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
1746
+ (__v8di) __W,
1747
+ (__mmask8) __U);
1748
+ }
1749
+
1750
+ static __inline __m512i __DEFAULT_FN_ATTRS
1751
+ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
1752
+ {
1753
+ return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
1754
+ (__v16si) __W,
1755
+ (__mmask16) __U);
1756
+ }
1757
+
1758
+ /* Compare */
1759
+
1760
+ #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
1761
+ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
1762
+ (__v16sf)(__m512)(B), \
1763
+ (P), (__mmask16)-1, (R)); })
1764
+
1765
+ #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
1766
+ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
1767
+ (__v16sf)(__m512)(B), \
1768
+ (P), (__mmask16)(U), (R)); })
1769
+
1770
+ #define _mm512_cmp_ps_mask(A, B, P) \
1771
+ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
1772
+
1773
+ #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
1774
+ _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
1775
+
1776
+ #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
1777
+ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
1778
+ (__v8df)(__m512d)(B), \
1779
+ (P), (__mmask8)-1, (R)); })
1780
+
1781
+ #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
1782
+ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
1783
+ (__v8df)(__m512d)(B), \
1784
+ (P), (__mmask8)(U), (R)); })
1785
+
1786
+ #define _mm512_cmp_pd_mask(A, B, P) \
1787
+ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
1788
+
1789
+ #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
1790
+ _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
1791
+
1792
+ /* Conversion */
1793
+
1794
+ static __inline __m512i __DEFAULT_FN_ATTRS
1795
+ _mm512_cvttps_epu32(__m512 __A)
1796
+ {
1797
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
1798
+ (__v16si)
1799
+ _mm512_setzero_si512 (),
1800
+ (__mmask16) -1,
1801
+ _MM_FROUND_CUR_DIRECTION);
1802
+ }
1803
+
1804
+ #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
1805
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
1806
+ (__v16sf)_mm512_setzero_ps(), \
1807
+ (__mmask16)-1, (R)); })
1808
+
1809
+ #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
1810
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
1811
+ (__v16sf)_mm512_setzero_ps(), \
1812
+ (__mmask16)-1, (R)); })
1813
+
1814
+ static __inline __m512d __DEFAULT_FN_ATTRS
1815
+ _mm512_cvtepi32_pd(__m256i __A)
1816
+ {
1817
+ return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
1818
+ (__v8df)
1819
+ _mm512_setzero_pd (),
1820
+ (__mmask8) -1);
1821
+ }
1822
+
1823
+ static __inline __m512d __DEFAULT_FN_ATTRS
1824
+ _mm512_cvtepu32_pd(__m256i __A)
1825
+ {
1826
+ return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
1827
+ (__v8df)
1828
+ _mm512_setzero_pd (),
1829
+ (__mmask8) -1);
1830
+ }
1831
+
1832
+ #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
1833
+ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
1834
+ (__v8sf)_mm256_setzero_ps(), \
1835
+ (__mmask8)-1, (R)); })
1836
+
1837
+ #define _mm512_cvtps_ph(A, I) __extension__ ({ \
1838
+ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
1839
+ (__v16hi)_mm256_setzero_si256(), \
1840
+ -1); })
1841
+
1842
+ static __inline __m512 __DEFAULT_FN_ATTRS
1843
+ _mm512_cvtph_ps(__m256i __A)
1844
+ {
1845
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
1846
+ (__v16sf)
1847
+ _mm512_setzero_ps (),
1848
+ (__mmask16) -1,
1849
+ _MM_FROUND_CUR_DIRECTION);
1850
+ }
1851
+
1852
+ static __inline __m512i __DEFAULT_FN_ATTRS
1853
+ _mm512_cvttps_epi32(__m512 a)
1854
+ {
1855
+ return (__m512i)
1856
+ __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
1857
+ (__v16si) _mm512_setzero_si512 (),
1858
+ (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
1859
+ }
1860
+
1861
+ static __inline __m256i __DEFAULT_FN_ATTRS
1862
+ _mm512_cvttpd_epi32(__m512d a)
1863
+ {
1864
+ return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
1865
+ (__v8si)_mm256_setzero_si256(),
1866
+ (__mmask8) -1,
1867
+ _MM_FROUND_CUR_DIRECTION);
1868
+ }
1869
+
1870
+ #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
1871
+ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
1872
+ (__v8si)_mm256_setzero_si256(), \
1873
+ (__mmask8)-1, (R)); })
1874
+
1875
+ #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
1876
+ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
1877
+ (__v16si)_mm512_setzero_si512(), \
1878
+ (__mmask16)-1, (R)); })
1879
+
1880
+ #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
1881
+ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
1882
+ (__v16si)_mm512_setzero_si512(), \
1883
+ (__mmask16)-1, (R)); })
1884
+
1885
+ #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
1886
+ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
1887
+ (__v8si)_mm256_setzero_si256(), \
1888
+ (__mmask8)-1, (R)); })
1889
+
1890
+ #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
1891
+ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
1892
+ (__v16si)_mm512_setzero_si512(), \
1893
+ (__mmask16)-1, (R)); })
1894
+
1895
+ #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
1896
+ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
1897
+ (__v8si)_mm256_setzero_si256(), \
1898
+ (__mmask8) -1, (R)); })
1899
+
1900
+ /* Unpack and Interleave */
1901
+ static __inline __m512d __DEFAULT_FN_ATTRS
1902
+ _mm512_unpackhi_pd(__m512d __a, __m512d __b)
1903
+ {
1904
+ return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
1905
+ }
1906
+
1907
+ static __inline __m512d __DEFAULT_FN_ATTRS
1908
+ _mm512_unpacklo_pd(__m512d __a, __m512d __b)
1909
+ {
1910
+ return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
1911
+ }
1912
+
1913
+ static __inline __m512 __DEFAULT_FN_ATTRS
1914
+ _mm512_unpackhi_ps(__m512 __a, __m512 __b)
1915
+ {
1916
+ return __builtin_shufflevector(__a, __b,
1917
+ 2, 18, 3, 19,
1918
+ 2+4, 18+4, 3+4, 19+4,
1919
+ 2+8, 18+8, 3+8, 19+8,
1920
+ 2+12, 18+12, 3+12, 19+12);
1921
+ }
1922
+
1923
+ static __inline __m512 __DEFAULT_FN_ATTRS
1924
+ _mm512_unpacklo_ps(__m512 __a, __m512 __b)
1925
+ {
1926
+ return __builtin_shufflevector(__a, __b,
1927
+ 0, 16, 1, 17,
1928
+ 0+4, 16+4, 1+4, 17+4,
1929
+ 0+8, 16+8, 1+8, 17+8,
1930
+ 0+12, 16+12, 1+12, 17+12);
1931
+ }
1932
+
1933
+ /* Bit Test */
1934
+
1935
+ static __inline __mmask16 __DEFAULT_FN_ATTRS
1936
+ _mm512_test_epi32_mask(__m512i __A, __m512i __B)
1937
+ {
1938
+ return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
1939
+ (__v16si) __B,
1940
+ (__mmask16) -1);
1941
+ }
1942
+
1943
+ static __inline __mmask8 __DEFAULT_FN_ATTRS
1944
+ _mm512_test_epi64_mask(__m512i __A, __m512i __B)
1945
+ {
1946
+ return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
1947
+ (__v8di) __B,
1948
+ (__mmask8) -1);
1949
+ }
1950
+
1951
+ /* SIMD load ops */
1952
+
1953
+ static __inline __m512i __DEFAULT_FN_ATTRS
1954
+ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
1955
+ {
1956
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
1957
+ (__v16si)
1958
+ _mm512_setzero_si512 (),
1959
+ (__mmask16) __U);
1960
+ }
1961
+
1962
+ static __inline __m512i __DEFAULT_FN_ATTRS
1963
+ _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
1964
+ {
1965
+ return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
1966
+ (__v8di)
1967
+ _mm512_setzero_si512 (),
1968
+ (__mmask8) __U);
1969
+ }
1970
+
1971
+ static __inline __m512 __DEFAULT_FN_ATTRS
1972
+ _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
1973
+ {
1974
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
1975
+ (__v16sf)
1976
+ _mm512_setzero_ps (),
1977
+ (__mmask16) __U);
1978
+ }
1979
+
1980
+ static __inline __m512d __DEFAULT_FN_ATTRS
1981
+ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
1982
+ {
1983
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
1984
+ (__v8df)
1985
+ _mm512_setzero_pd (),
1986
+ (__mmask8) __U);
1987
+ }
1988
+
1989
+ static __inline __m512 __DEFAULT_FN_ATTRS
1990
+ _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
1991
+ {
1992
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
1993
+ (__v16sf)
1994
+ _mm512_setzero_ps (),
1995
+ (__mmask16) __U);
1996
+ }
1997
+
1998
+ static __inline __m512d __DEFAULT_FN_ATTRS
1999
+ _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
2000
+ {
2001
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
2002
+ (__v8df)
2003
+ _mm512_setzero_pd (),
2004
+ (__mmask8) __U);
2005
+ }
2006
+
2007
+ static __inline __m512d __DEFAULT_FN_ATTRS
2008
+ _mm512_loadu_pd(double const *__p)
2009
+ {
2010
+ struct __loadu_pd {
2011
+ __m512d __v;
2012
+ } __attribute__((__packed__, __may_alias__));
2013
+ return ((struct __loadu_pd*)__p)->__v;
2014
+ }
2015
+
2016
+ static __inline __m512 __DEFAULT_FN_ATTRS
2017
+ _mm512_loadu_ps(float const *__p)
2018
+ {
2019
+ struct __loadu_ps {
2020
+ __m512 __v;
2021
+ } __attribute__((__packed__, __may_alias__));
2022
+ return ((struct __loadu_ps*)__p)->__v;
2023
+ }
2024
+
2025
+ static __inline __m512 __DEFAULT_FN_ATTRS
2026
+ _mm512_load_ps(double const *__p)
2027
+ {
2028
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
2029
+ (__v16sf)
2030
+ _mm512_setzero_ps (),
2031
+ (__mmask16) -1);
2032
+ }
2033
+
2034
+ static __inline __m512d __DEFAULT_FN_ATTRS
2035
+ _mm512_load_pd(float const *__p)
2036
+ {
2037
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
2038
+ (__v8df)
2039
+ _mm512_setzero_pd (),
2040
+ (__mmask8) -1);
2041
+ }
2042
+
2043
+ /* SIMD store ops */
2044
+
2045
+ static __inline void __DEFAULT_FN_ATTRS
2046
+ _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
2047
+ {
2048
+ __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
2049
+ (__mmask8) __U);
2050
+ }
2051
+
2052
+ static __inline void __DEFAULT_FN_ATTRS
2053
+ _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
2054
+ {
2055
+ __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
2056
+ (__mmask16) __U);
2057
+ }
2058
+
2059
+ static __inline void __DEFAULT_FN_ATTRS
2060
+ _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
2061
+ {
2062
+ __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2063
+ }
2064
+
2065
+ static __inline void __DEFAULT_FN_ATTRS
2066
+ _mm512_storeu_pd(void *__P, __m512d __A)
2067
+ {
2068
+ __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
2069
+ }
2070
+
2071
+ static __inline void __DEFAULT_FN_ATTRS
2072
+ _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
2073
+ {
2074
+ __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
2075
+ (__mmask16) __U);
2076
+ }
2077
+
2078
+ static __inline void __DEFAULT_FN_ATTRS
2079
+ _mm512_storeu_ps(void *__P, __m512 __A)
2080
+ {
2081
+ __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
2082
+ }
2083
+
2084
+ static __inline void __DEFAULT_FN_ATTRS
2085
+ _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
2086
+ {
2087
+ __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2088
+ }
2089
+
2090
+ static __inline void __DEFAULT_FN_ATTRS
2091
+ _mm512_store_pd(void *__P, __m512d __A)
2092
+ {
2093
+ *(__m512d*)__P = __A;
2094
+ }
2095
+
2096
+ static __inline void __DEFAULT_FN_ATTRS
2097
+ _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
2098
+ {
2099
+ __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
2100
+ (__mmask16) __U);
2101
+ }
2102
+
2103
+ static __inline void __DEFAULT_FN_ATTRS
2104
+ _mm512_store_ps(void *__P, __m512 __A)
2105
+ {
2106
+ *(__m512*)__P = __A;
2107
+ }
2108
+
2109
+ /* Mask ops */
2110
+
2111
+ static __inline __mmask16 __DEFAULT_FN_ATTRS
2112
+ _mm512_knot(__mmask16 __M)
2113
+ {
2114
+ return __builtin_ia32_knothi(__M);
2115
+ }
2116
+
2117
+ /* Integer compare */
2118
+
2119
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2120
+ _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
2121
+ return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2122
+ (__mmask16)-1);
2123
+ }
2124
+
2125
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2126
+ _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2127
+ return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2128
+ __u);
2129
+ }
2130
+
2131
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2132
+ _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
2133
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2134
+ (__mmask16)-1);
2135
+ }
2136
+
2137
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2138
+ _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2139
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2140
+ __u);
2141
+ }
2142
+
2143
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2144
+ _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2145
+ return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2146
+ __u);
2147
+ }
2148
+
2149
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2150
+ _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
2151
+ return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2152
+ (__mmask8)-1);
2153
+ }
2154
+
2155
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2156
+ _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
2157
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2158
+ (__mmask8)-1);
2159
+ }
2160
+
2161
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2162
+ _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2163
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2164
+ __u);
2165
+ }
2166
+
2167
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2168
+ _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
2169
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2170
+ (__mmask16)-1);
2171
+ }
2172
+
2173
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2174
+ _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2175
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2176
+ __u);
2177
+ }
2178
+
2179
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2180
+ _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
2181
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2182
+ (__mmask16)-1);
2183
+ }
2184
+
2185
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2186
+ _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2187
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2188
+ __u);
2189
+ }
2190
+
2191
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2192
+ _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
2193
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2194
+ (__mmask8)-1);
2195
+ }
2196
+
2197
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2198
+ _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2199
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2200
+ __u);
2201
+ }
2202
+
2203
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2204
+ _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
2205
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2206
+ (__mmask8)-1);
2207
+ }
2208
+
2209
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2210
+ _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2211
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2212
+ __u);
2213
+ }
2214
+
2215
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2216
+ _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
2217
+ return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2218
+ (__mmask16)-1);
2219
+ }
2220
+
2221
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2222
+ _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2223
+ return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2224
+ __u);
2225
+ }
2226
+
2227
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2228
+ _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
2229
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2230
+ (__mmask16)-1);
2231
+ }
2232
+
2233
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2234
+ _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2235
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2236
+ __u);
2237
+ }
2238
+
2239
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2240
+ _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2241
+ return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2242
+ __u);
2243
+ }
2244
+
2245
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2246
+ _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
2247
+ return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2248
+ (__mmask8)-1);
2249
+ }
2250
+
2251
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2252
+ _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
2253
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2254
+ (__mmask8)-1);
2255
+ }
2256
+
2257
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2258
+ _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2259
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2260
+ __u);
2261
+ }
2262
+
2263
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2264
+ _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
2265
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2266
+ (__mmask16)-1);
2267
+ }
2268
+
2269
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2270
+ _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2271
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2272
+ __u);
2273
+ }
2274
+
2275
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2276
+ _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
2277
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2278
+ (__mmask16)-1);
2279
+ }
2280
+
2281
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2282
+ _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2283
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2284
+ __u);
2285
+ }
2286
+
2287
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2288
+ _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
2289
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2290
+ (__mmask8)-1);
2291
+ }
2292
+
2293
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2294
+ _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2295
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2296
+ __u);
2297
+ }
2298
+
2299
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2300
+ _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
2301
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2302
+ (__mmask8)-1);
2303
+ }
2304
+
2305
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2306
+ _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2307
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2308
+ __u);
2309
+ }
2310
+
2311
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2312
+ _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
2313
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2314
+ (__mmask16)-1);
2315
+ }
2316
+
2317
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2318
+ _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2319
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2320
+ __u);
2321
+ }
2322
+
2323
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2324
+ _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
2325
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2326
+ (__mmask16)-1);
2327
+ }
2328
+
2329
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2330
+ _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2331
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2332
+ __u);
2333
+ }
2334
+
2335
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2336
+ _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
2337
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2338
+ (__mmask8)-1);
2339
+ }
2340
+
2341
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2342
+ _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2343
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2344
+ __u);
2345
+ }
2346
+
2347
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2348
+ _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
2349
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2350
+ (__mmask8)-1);
2351
+ }
2352
+
2353
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2354
+ _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2355
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2356
+ __u);
2357
+ }
2358
+
2359
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2360
+ _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
2361
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2362
+ (__mmask16)-1);
2363
+ }
2364
+
2365
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2366
+ _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2367
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2368
+ __u);
2369
+ }
2370
+
2371
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2372
+ _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
2373
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2374
+ (__mmask16)-1);
2375
+ }
2376
+
2377
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2378
+ _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2379
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2380
+ __u);
2381
+ }
2382
+
2383
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2384
+ _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
2385
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
2386
+ (__mmask8)-1);
2387
+ }
2388
+
2389
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2390
+ _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2391
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
2392
+ __u);
2393
+ }
2394
+
2395
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2396
+ _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
2397
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
2398
+ (__mmask8)-1);
2399
+ }
2400
+
2401
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2402
+ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2403
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
2404
+ __u);
2405
+ }
2406
+
2407
+ #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
2408
+ __m512i __a = (a); \
2409
+ __m512i __b = (b); \
2410
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
2411
+ (__mmask16)-1); })
2412
+
2413
+ #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
2414
+ __m512i __a = (a); \
2415
+ __m512i __b = (b); \
2416
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
2417
+ (__mmask16)-1); })
2418
+
2419
+ #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
2420
+ __m512i __a = (a); \
2421
+ __m512i __b = (b); \
2422
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
2423
+ (__mmask8)-1); })
2424
+
2425
+ #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
2426
+ __m512i __a = (a); \
2427
+ __m512i __b = (b); \
2428
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
2429
+ (__mmask8)-1); })
2430
+
2431
+ #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
2432
+ __m512i __a = (a); \
2433
+ __m512i __b = (b); \
2434
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
2435
+ (__mmask16)(m)); })
2436
+
2437
+ #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
2438
+ __m512i __a = (a); \
2439
+ __m512i __b = (b); \
2440
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
2441
+ (__mmask16)(m)); })
2442
+
2443
+ #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
2444
+ __m512i __a = (a); \
2445
+ __m512i __b = (b); \
2446
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
2447
+ (__mmask8)(m)); })
2448
+
2449
+ #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
2450
+ __m512i __a = (a); \
2451
+ __m512i __b = (b); \
2452
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
2453
+ (__mmask8)(m)); })
2454
+
2455
+ #undef __DEFAULT_FN_ATTRS
2456
+
2457
+ #endif // __AVX512FINTRIN_H