xcodebuild-helper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +20 -0
  3. data/.gitignore +1 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +110 -0
  8. data/Guardfile +18 -0
  9. data/README.md +7 -0
  10. data/Rakefile +7 -0
  11. data/TODO.md +3 -0
  12. data/bin/oclint +5 -0
  13. data/bin/oclint-0.8 +5 -0
  14. data/bin/oclint-json-compilation-database +5 -0
  15. data/bin/oclint-xcodebuild +5 -0
  16. data/externals/oclint/LICENSE +69 -0
  17. data/externals/oclint/bin/oclint +0 -0
  18. data/externals/oclint/bin/oclint-0.10.2 +0 -0
  19. data/externals/oclint/bin/oclint-json-compilation-database +88 -0
  20. data/externals/oclint/bin/oclint-xcodebuild +218 -0
  21. data/externals/oclint/lib/clang/3.7.0/asan_blacklist.txt +13 -0
  22. data/externals/oclint/lib/clang/3.7.0/include/Intrin.h +958 -0
  23. data/externals/oclint/lib/clang/3.7.0/include/__stddef_max_align_t.h +43 -0
  24. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_aes.h +72 -0
  25. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_pclmul.h +34 -0
  26. data/externals/oclint/lib/clang/3.7.0/include/adxintrin.h +88 -0
  27. data/externals/oclint/lib/clang/3.7.0/include/altivec.h +13528 -0
  28. data/externals/oclint/lib/clang/3.7.0/include/ammintrin.h +215 -0
  29. data/externals/oclint/lib/clang/3.7.0/include/arm_acle.h +304 -0
  30. data/externals/oclint/lib/clang/3.7.0/include/arm_neon.h +68419 -0
  31. data/externals/oclint/lib/clang/3.7.0/include/avx2intrin.h +1256 -0
  32. data/externals/oclint/lib/clang/3.7.0/include/avx512bwintrin.h +1250 -0
  33. data/externals/oclint/lib/clang/3.7.0/include/avx512cdintrin.h +131 -0
  34. data/externals/oclint/lib/clang/3.7.0/include/avx512dqintrin.h +242 -0
  35. data/externals/oclint/lib/clang/3.7.0/include/avx512erintrin.h +285 -0
  36. data/externals/oclint/lib/clang/3.7.0/include/avx512fintrin.h +2457 -0
  37. data/externals/oclint/lib/clang/3.7.0/include/avx512vlbwintrin.h +1907 -0
  38. data/externals/oclint/lib/clang/3.7.0/include/avx512vldqintrin.h +353 -0
  39. data/externals/oclint/lib/clang/3.7.0/include/avx512vlintrin.h +1982 -0
  40. data/externals/oclint/lib/clang/3.7.0/include/avxintrin.h +1308 -0
  41. data/externals/oclint/lib/clang/3.7.0/include/bmi2intrin.h +99 -0
  42. data/externals/oclint/lib/clang/3.7.0/include/bmiintrin.h +153 -0
  43. data/externals/oclint/lib/clang/3.7.0/include/cpuid.h +209 -0
  44. data/externals/oclint/lib/clang/3.7.0/include/cuda_builtin_vars.h +110 -0
  45. data/externals/oclint/lib/clang/3.7.0/include/emmintrin.h +1480 -0
  46. data/externals/oclint/lib/clang/3.7.0/include/f16cintrin.h +63 -0
  47. data/externals/oclint/lib/clang/3.7.0/include/float.h +124 -0
  48. data/externals/oclint/lib/clang/3.7.0/include/fma4intrin.h +236 -0
  49. data/externals/oclint/lib/clang/3.7.0/include/fmaintrin.h +234 -0
  50. data/externals/oclint/lib/clang/3.7.0/include/fxsrintrin.h +55 -0
  51. data/externals/oclint/lib/clang/3.7.0/include/htmintrin.h +226 -0
  52. data/externals/oclint/lib/clang/3.7.0/include/htmxlintrin.h +363 -0
  53. data/externals/oclint/lib/clang/3.7.0/include/ia32intrin.h +101 -0
  54. data/externals/oclint/lib/clang/3.7.0/include/immintrin.h +203 -0
  55. data/externals/oclint/lib/clang/3.7.0/include/inttypes.h +102 -0
  56. data/externals/oclint/lib/clang/3.7.0/include/iso646.h +43 -0
  57. data/externals/oclint/lib/clang/3.7.0/include/limits.h +118 -0
  58. data/externals/oclint/lib/clang/3.7.0/include/lzcntintrin.h +72 -0
  59. data/externals/oclint/lib/clang/3.7.0/include/mm3dnow.h +167 -0
  60. data/externals/oclint/lib/clang/3.7.0/include/mm_malloc.h +75 -0
  61. data/externals/oclint/lib/clang/3.7.0/include/mmintrin.h +507 -0
  62. data/externals/oclint/lib/clang/3.7.0/include/module.modulemap +196 -0
  63. data/externals/oclint/lib/clang/3.7.0/include/nmmintrin.h +35 -0
  64. data/externals/oclint/lib/clang/3.7.0/include/pmmintrin.h +122 -0
  65. data/externals/oclint/lib/clang/3.7.0/include/popcntintrin.h +50 -0
  66. data/externals/oclint/lib/clang/3.7.0/include/prfchwintrin.h +39 -0
  67. data/externals/oclint/lib/clang/3.7.0/include/rdseedintrin.h +59 -0
  68. data/externals/oclint/lib/clang/3.7.0/include/rtmintrin.h +59 -0
  69. data/externals/oclint/lib/clang/3.7.0/include/s390intrin.h +39 -0
  70. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/allocator_interface.h +66 -0
  71. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/asan_interface.h +155 -0
  72. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/common_interface_defs.h +118 -0
  73. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/coverage_interface.h +63 -0
  74. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/dfsan_interface.h +114 -0
  75. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/linux_syscall_hooks.h +3070 -0
  76. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/lsan_interface.h +84 -0
  77. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/msan_interface.h +107 -0
  78. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/tsan_interface_atomic.h +222 -0
  79. data/externals/oclint/lib/clang/3.7.0/include/shaintrin.h +79 -0
  80. data/externals/oclint/lib/clang/3.7.0/include/smmintrin.h +487 -0
  81. data/externals/oclint/lib/clang/3.7.0/include/stdalign.h +35 -0
  82. data/externals/oclint/lib/clang/3.7.0/include/stdarg.h +52 -0
  83. data/externals/oclint/lib/clang/3.7.0/include/stdatomic.h +190 -0
  84. data/externals/oclint/lib/clang/3.7.0/include/stdbool.h +44 -0
  85. data/externals/oclint/lib/clang/3.7.0/include/stddef.h +137 -0
  86. data/externals/oclint/lib/clang/3.7.0/include/stdint.h +707 -0
  87. data/externals/oclint/lib/clang/3.7.0/include/stdnoreturn.h +30 -0
  88. data/externals/oclint/lib/clang/3.7.0/include/tbmintrin.h +154 -0
  89. data/externals/oclint/lib/clang/3.7.0/include/tgmath.h +1374 -0
  90. data/externals/oclint/lib/clang/3.7.0/include/tmmintrin.h +230 -0
  91. data/externals/oclint/lib/clang/3.7.0/include/unwind.h +282 -0
  92. data/externals/oclint/lib/clang/3.7.0/include/vadefs.h +65 -0
  93. data/externals/oclint/lib/clang/3.7.0/include/varargs.h +26 -0
  94. data/externals/oclint/lib/clang/3.7.0/include/vecintrin.h +8946 -0
  95. data/externals/oclint/lib/clang/3.7.0/include/wmmintrin.h +42 -0
  96. data/externals/oclint/lib/clang/3.7.0/include/x86intrin.h +81 -0
  97. data/externals/oclint/lib/clang/3.7.0/include/xmmintrin.h +1008 -0
  98. data/externals/oclint/lib/clang/3.7.0/include/xopintrin.h +809 -0
  99. data/externals/oclint/lib/clang/3.7.0/include/xtestintrin.h +41 -0
  100. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib +0 -0
  101. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
  102. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-i386.a +0 -0
  103. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-x86_64.a +0 -0
  104. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.profile_osx.a +0 -0
  105. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.safestack_osx.a +0 -0
  106. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib +0 -0
  107. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib +0 -0
  108. data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
  109. data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
  110. data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
  111. data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
  112. data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
  113. data/externals/oclint/lib/oclint/reporters/libXcodeReporter.dylib +0 -0
  114. data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
  115. data/externals/oclint/lib/oclint/rules/libAvoidDefaultArgumentsOnVirtualMethodsRule.dylib +0 -0
  116. data/externals/oclint/lib/oclint/rules/libAvoidPrivateStaticMembersRule.dylib +0 -0
  117. data/externals/oclint/lib/oclint/rules/libBaseClassDestructorShouldBeVirtualOrProtectedRule.dylib +0 -0
  118. data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
  119. data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
  120. data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
  121. data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
  122. data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
  123. data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
  124. data/externals/oclint/lib/oclint/rules/libCoveredSwitchStatementsDontNeedDefaultRule.dylib +0 -0
  125. data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
  126. data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
  127. data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
  128. data/externals/oclint/lib/oclint/rules/libDestructorOfVirtualClassRule.dylib +0 -0
  129. data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
  130. data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
  131. data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
  132. data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
  133. data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
  134. data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
  135. data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
  136. data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
  137. data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
  138. data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
  139. data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
  140. data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
  141. data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
  142. data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
  143. data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
  144. data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
  145. data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
  146. data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
  147. data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
  148. data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
  149. data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
  150. data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
  151. data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
  152. data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
  153. data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
  154. data/externals/oclint/lib/oclint/rules/libObjCAssignIvarOutsideAccessorsRule.dylib +0 -0
  155. data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
  156. data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
  157. data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
  158. data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
  159. data/externals/oclint/lib/oclint/rules/libObjCVerifyIsEqualHashRule.dylib +0 -0
  160. data/externals/oclint/lib/oclint/rules/libObjCVerifyMustCallSuperRule.dylib +0 -0
  161. data/externals/oclint/lib/oclint/rules/libObjCVerifyProhibitedCallRule.dylib +0 -0
  162. data/externals/oclint/lib/oclint/rules/libObjCVerifyProtectedMethodRule.dylib +0 -0
  163. data/externals/oclint/lib/oclint/rules/libObjCVerifySubclassMustImplementRule.dylib +0 -0
  164. data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
  165. data/externals/oclint/lib/oclint/rules/libPreferEarlyExitRule.dylib +0 -0
  166. data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
  167. data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
  168. data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
  169. data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
  170. data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
  171. data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
  172. data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
  173. data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
  174. data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
  175. data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
  176. data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
  177. data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
  178. data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
  179. data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
  180. data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
  181. data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
  182. data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
  183. data/lib/coverage_plan.rb +19 -0
  184. data/lib/device.rb +27 -0
  185. data/lib/execute.rb +7 -0
  186. data/lib/lint_plan.rb +41 -0
  187. data/lib/rules.rb +23 -0
  188. data/lib/test_plan.rb +11 -0
  189. data/lib/version.rb +3 -0
  190. data/lib/xcode.rb +128 -0
  191. data/lib/xcodebuild-helper.rb +110 -0
  192. data/spec/coverage_plan_spec.rb +18 -0
  193. data/spec/device_spec.rb +24 -0
  194. data/spec/lint_plan_spec.rb +35 -0
  195. data/spec/rule_spec.rb +37 -0
  196. data/spec/spec_helper.rb +17 -0
  197. data/spec/test_plan_spec.rb +11 -0
  198. data/spec/xcode_dsl_actions_spec.rb +136 -0
  199. data/spec/xcode_dsl_spec.rb +176 -0
  200. data/spec/xcode_spec.rb +79 -0
  201. data/xcodebuild-helper.gemspec +26 -0
  202. metadata +327 -0
@@ -0,0 +1,2457 @@
1
+ /*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+ #ifndef __IMMINTRIN_H
24
+ #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25
+ #endif
26
+
27
+ #ifndef __AVX512FINTRIN_H
28
+ #define __AVX512FINTRIN_H
29
+
30
+ typedef double __v8df __attribute__((__vector_size__(64)));
31
+ typedef float __v16sf __attribute__((__vector_size__(64)));
32
+ typedef long long __v8di __attribute__((__vector_size__(64)));
33
+ typedef int __v16si __attribute__((__vector_size__(64)));
34
+
35
+ typedef float __m512 __attribute__((__vector_size__(64)));
36
+ typedef double __m512d __attribute__((__vector_size__(64)));
37
+ typedef long long __m512i __attribute__((__vector_size__(64)));
38
+
39
+ typedef unsigned char __mmask8;
40
+ typedef unsigned short __mmask16;
41
+
42
+ /* Rounding mode macros. */
43
+ #define _MM_FROUND_TO_NEAREST_INT 0x00
44
+ #define _MM_FROUND_TO_NEG_INF 0x01
45
+ #define _MM_FROUND_TO_POS_INF 0x02
46
+ #define _MM_FROUND_TO_ZERO 0x03
47
+ #define _MM_FROUND_CUR_DIRECTION 0x04
48
+
49
+ /* Define the default attributes for the functions in this file. */
50
+ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
51
+
52
+ /* Create vectors with repeated elements */
53
+
54
+ static __inline __m512i __DEFAULT_FN_ATTRS
55
+ _mm512_setzero_si512(void)
56
+ {
57
+ return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
58
+ }
59
+
60
+ static __inline __m512i __DEFAULT_FN_ATTRS
61
+ _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
62
+ {
63
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
64
+ (__v16si)
65
+ _mm512_setzero_si512 (),
66
+ __M);
67
+ }
68
+
69
+ static __inline __m512i __DEFAULT_FN_ATTRS
70
+ _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
71
+ {
72
+ #ifdef __x86_64__
73
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
74
+ (__v8di)
75
+ _mm512_setzero_si512 (),
76
+ __M);
77
+ #else
78
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
79
+ (__v8di)
80
+ _mm512_setzero_si512 (),
81
+ __M);
82
+ #endif
83
+ }
84
+
85
+ static __inline __m512 __DEFAULT_FN_ATTRS
86
+ _mm512_setzero_ps(void)
87
+ {
88
+ return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
89
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
90
+ }
91
+ static __inline __m512d __DEFAULT_FN_ATTRS
92
+ _mm512_setzero_pd(void)
93
+ {
94
+ return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
95
+ }
96
+
97
+ static __inline __m512 __DEFAULT_FN_ATTRS
98
+ _mm512_set1_ps(float __w)
99
+ {
100
+ return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
101
+ __w, __w, __w, __w, __w, __w, __w, __w };
102
+ }
103
+
104
+ static __inline __m512d __DEFAULT_FN_ATTRS
105
+ _mm512_set1_pd(double __w)
106
+ {
107
+ return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
108
+ }
109
+
110
+ static __inline __m512i __DEFAULT_FN_ATTRS
111
+ _mm512_set1_epi32(int __s)
112
+ {
113
+ return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
114
+ __s, __s, __s, __s, __s, __s, __s, __s };
115
+ }
116
+
117
+ static __inline __m512i __DEFAULT_FN_ATTRS
118
+ _mm512_set1_epi64(long long __d)
119
+ {
120
+ return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
121
+ }
122
+
123
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
124
+ _mm512_broadcastss_ps(__m128 __X)
125
+ {
126
+ float __f = __X[0];
127
+ return (__v16sf){ __f, __f, __f, __f,
128
+ __f, __f, __f, __f,
129
+ __f, __f, __f, __f,
130
+ __f, __f, __f, __f };
131
+ }
132
+
133
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
134
+ _mm512_broadcastsd_pd(__m128d __X)
135
+ {
136
+ double __d = __X[0];
137
+ return (__v8df){ __d, __d, __d, __d,
138
+ __d, __d, __d, __d };
139
+ }
140
+
141
+ /* Cast between vector types */
142
+
143
+ static __inline __m512d __DEFAULT_FN_ATTRS
144
+ _mm512_castpd256_pd512(__m256d __a)
145
+ {
146
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
147
+ }
148
+
149
+ static __inline __m512 __DEFAULT_FN_ATTRS
150
+ _mm512_castps256_ps512(__m256 __a)
151
+ {
152
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
153
+ -1, -1, -1, -1, -1, -1, -1, -1);
154
+ }
155
+
156
+ static __inline __m128d __DEFAULT_FN_ATTRS
157
+ _mm512_castpd512_pd128(__m512d __a)
158
+ {
159
+ return __builtin_shufflevector(__a, __a, 0, 1);
160
+ }
161
+
162
+ static __inline __m128 __DEFAULT_FN_ATTRS
163
+ _mm512_castps512_ps128(__m512 __a)
164
+ {
165
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
166
+ }
167
+
168
+ /* Bitwise operators */
169
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
170
+ _mm512_and_epi32(__m512i __a, __m512i __b)
171
+ {
172
+ return __a & __b;
173
+ }
174
+
175
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
176
+ _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
177
+ {
178
+ return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
179
+ (__v16si) __b,
180
+ (__v16si) __src,
181
+ (__mmask16) __k);
182
+ }
183
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
184
+ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
185
+ {
186
+ return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
187
+ (__v16si) __b,
188
+ (__v16si)
189
+ _mm512_setzero_si512 (),
190
+ (__mmask16) __k);
191
+ }
192
+
193
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
194
+ _mm512_and_epi64(__m512i __a, __m512i __b)
195
+ {
196
+ return __a & __b;
197
+ }
198
+
199
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
200
+ _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
201
+ {
202
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
203
+ (__v8di) __b,
204
+ (__v8di) __src,
205
+ (__mmask8) __k);
206
+ }
207
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
208
+ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
209
+ {
210
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
211
+ (__v8di) __b,
212
+ (__v8di)
213
+ _mm512_setzero_si512 (),
214
+ (__mmask8) __k);
215
+ }
216
+
217
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
218
+ _mm512_andnot_epi32 (__m512i __A, __m512i __B)
219
+ {
220
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
221
+ (__v16si) __B,
222
+ (__v16si)
223
+ _mm512_setzero_si512 (),
224
+ (__mmask16) -1);
225
+ }
226
+
227
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
228
+ _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
229
+ {
230
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
231
+ (__v16si) __B,
232
+ (__v16si) __W,
233
+ (__mmask16) __U);
234
+ }
235
+
236
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
237
+ _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
238
+ {
239
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
240
+ (__v16si) __B,
241
+ (__v16si)
242
+ _mm512_setzero_si512 (),
243
+ (__mmask16) __U);
244
+ }
245
+
246
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
247
+ _mm512_andnot_epi64 (__m512i __A, __m512i __B)
248
+ {
249
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
250
+ (__v8di) __B,
251
+ (__v8di)
252
+ _mm512_setzero_si512 (),
253
+ (__mmask8) -1);
254
+ }
255
+
256
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
257
+ _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
258
+ {
259
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
260
+ (__v8di) __B,
261
+ (__v8di) __W, __U);
262
+ }
263
+
264
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
265
+ _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
266
+ {
267
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
268
+ (__v8di) __B,
269
+ (__v8di)
270
+ _mm512_setzero_pd (),
271
+ __U);
272
+ }
273
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
274
+ _mm512_or_epi32(__m512i __a, __m512i __b)
275
+ {
276
+ return __a | __b;
277
+ }
278
+
279
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
280
+ _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
281
+ {
282
+ return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
283
+ (__v16si) __b,
284
+ (__v16si) __src,
285
+ (__mmask16) __k);
286
+ }
287
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
288
+ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
289
+ {
290
+ return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
291
+ (__v16si) __b,
292
+ (__v16si)
293
+ _mm512_setzero_si512 (),
294
+ (__mmask16) __k);
295
+ }
296
+
297
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
298
+ _mm512_or_epi64(__m512i __a, __m512i __b)
299
+ {
300
+ return __a | __b;
301
+ }
302
+
303
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
304
+ _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
305
+ {
306
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
307
+ (__v8di) __b,
308
+ (__v8di) __src,
309
+ (__mmask8) __k);
310
+ }
311
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
312
+ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
313
+ {
314
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
315
+ (__v8di) __b,
316
+ (__v8di)
317
+ _mm512_setzero_si512 (),
318
+ (__mmask8) __k);
319
+ }
320
+
321
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
322
+ _mm512_xor_epi32(__m512i __a, __m512i __b)
323
+ {
324
+ return __a ^ __b;
325
+ }
326
+
327
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
328
+ _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
329
+ {
330
+ return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
331
+ (__v16si) __b,
332
+ (__v16si) __src,
333
+ (__mmask16) __k);
334
+ }
335
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
336
+ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
337
+ {
338
+ return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
339
+ (__v16si) __b,
340
+ (__v16si)
341
+ _mm512_setzero_si512 (),
342
+ (__mmask16) __k);
343
+ }
344
+
345
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
346
+ _mm512_xor_epi64(__m512i __a, __m512i __b)
347
+ {
348
+ return __a ^ __b;
349
+ }
350
+
351
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
352
+ _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
353
+ {
354
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
355
+ (__v8di) __b,
356
+ (__v8di) __src,
357
+ (__mmask8) __k);
358
+ }
359
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
360
+ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
361
+ {
362
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
363
+ (__v8di) __b,
364
+ (__v8di)
365
+ _mm512_setzero_si512 (),
366
+ (__mmask8) __k);
367
+ }
368
+
369
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
370
+ _mm512_and_si512(__m512i __a, __m512i __b)
371
+ {
372
+ return __a & __b;
373
+ }
374
+
375
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
376
+ _mm512_or_si512(__m512i __a, __m512i __b)
377
+ {
378
+ return __a | __b;
379
+ }
380
+
381
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
382
+ _mm512_xor_si512(__m512i __a, __m512i __b)
383
+ {
384
+ return __a ^ __b;
385
+ }
386
+ /* Arithmetic */
387
+
388
+ static __inline __m512d __DEFAULT_FN_ATTRS
389
+ _mm512_add_pd(__m512d __a, __m512d __b)
390
+ {
391
+ return __a + __b;
392
+ }
393
+
394
+ static __inline __m512 __DEFAULT_FN_ATTRS
395
+ _mm512_add_ps(__m512 __a, __m512 __b)
396
+ {
397
+ return __a + __b;
398
+ }
399
+
400
+ static __inline __m512d __DEFAULT_FN_ATTRS
401
+ _mm512_mul_pd(__m512d __a, __m512d __b)
402
+ {
403
+ return __a * __b;
404
+ }
405
+
406
+ static __inline __m512 __DEFAULT_FN_ATTRS
407
+ _mm512_mul_ps(__m512 __a, __m512 __b)
408
+ {
409
+ return __a * __b;
410
+ }
411
+
412
+ static __inline __m512d __DEFAULT_FN_ATTRS
413
+ _mm512_sub_pd(__m512d __a, __m512d __b)
414
+ {
415
+ return __a - __b;
416
+ }
417
+
418
+ static __inline __m512 __DEFAULT_FN_ATTRS
419
+ _mm512_sub_ps(__m512 __a, __m512 __b)
420
+ {
421
+ return __a - __b;
422
+ }
423
+
424
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
425
+ _mm512_add_epi64 (__m512i __A, __m512i __B)
426
+ {
427
+ return (__m512i) ((__v8di) __A + (__v8di) __B);
428
+ }
429
+
430
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
431
+ _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
432
+ {
433
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
434
+ (__v8di) __B,
435
+ (__v8di) __W,
436
+ (__mmask8) __U);
437
+ }
438
+
439
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
440
+ _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
441
+ {
442
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
443
+ (__v8di) __B,
444
+ (__v8di)
445
+ _mm512_setzero_si512 (),
446
+ (__mmask8) __U);
447
+ }
448
+
449
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
450
+ _mm512_sub_epi64 (__m512i __A, __m512i __B)
451
+ {
452
+ return (__m512i) ((__v8di) __A - (__v8di) __B);
453
+ }
454
+
455
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
456
+ _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
457
+ {
458
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
459
+ (__v8di) __B,
460
+ (__v8di) __W,
461
+ (__mmask8) __U);
462
+ }
463
+
464
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
465
+ _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
466
+ {
467
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
468
+ (__v8di) __B,
469
+ (__v8di)
470
+ _mm512_setzero_si512 (),
471
+ (__mmask8) __U);
472
+ }
473
+
474
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
475
+ _mm512_add_epi32 (__m512i __A, __m512i __B)
476
+ {
477
+ return (__m512i) ((__v16si) __A + (__v16si) __B);
478
+ }
479
+
480
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
481
+ _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
482
+ {
483
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
484
+ (__v16si) __B,
485
+ (__v16si) __W,
486
+ (__mmask16) __U);
487
+ }
488
+
489
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
490
+ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
491
+ {
492
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
493
+ (__v16si) __B,
494
+ (__v16si)
495
+ _mm512_setzero_si512 (),
496
+ (__mmask16) __U);
497
+ }
498
+
499
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
500
+ _mm512_sub_epi32 (__m512i __A, __m512i __B)
501
+ {
502
+ return (__m512i) ((__v16si) __A - (__v16si) __B);
503
+ }
504
+
505
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
506
+ _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
507
+ {
508
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
509
+ (__v16si) __B,
510
+ (__v16si) __W,
511
+ (__mmask16) __U);
512
+ }
513
+
514
+ static __inline__ __m512i __DEFAULT_FN_ATTRS
515
+ _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
516
+ {
517
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
518
+ (__v16si) __B,
519
+ (__v16si)
520
+ _mm512_setzero_si512 (),
521
+ (__mmask16) __U);
522
+ }
523
+
524
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
525
+ _mm512_max_pd(__m512d __A, __m512d __B)
526
+ {
527
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
528
+ (__v8df) __B,
529
+ (__v8df)
530
+ _mm512_setzero_pd (),
531
+ (__mmask8) -1,
532
+ _MM_FROUND_CUR_DIRECTION);
533
+ }
534
+
535
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
536
+ _mm512_max_ps(__m512 __A, __m512 __B)
537
+ {
538
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
539
+ (__v16sf) __B,
540
+ (__v16sf)
541
+ _mm512_setzero_ps (),
542
+ (__mmask16) -1,
543
+ _MM_FROUND_CUR_DIRECTION);
544
+ }
545
+
546
+ static __inline __m512i
547
+ __DEFAULT_FN_ATTRS
548
+ _mm512_max_epi32(__m512i __A, __m512i __B)
549
+ {
550
+ return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
551
+ (__v16si) __B,
552
+ (__v16si)
553
+ _mm512_setzero_si512 (),
554
+ (__mmask16) -1);
555
+ }
556
+
557
+ static __inline __m512i __DEFAULT_FN_ATTRS
558
+ _mm512_max_epu32(__m512i __A, __m512i __B)
559
+ {
560
+ return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
561
+ (__v16si) __B,
562
+ (__v16si)
563
+ _mm512_setzero_si512 (),
564
+ (__mmask16) -1);
565
+ }
566
+
567
+ static __inline __m512i __DEFAULT_FN_ATTRS
568
+ _mm512_max_epi64(__m512i __A, __m512i __B)
569
+ {
570
+ return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
571
+ (__v8di) __B,
572
+ (__v8di)
573
+ _mm512_setzero_si512 (),
574
+ (__mmask8) -1);
575
+ }
576
+
577
+ static __inline __m512i __DEFAULT_FN_ATTRS
578
+ _mm512_max_epu64(__m512i __A, __m512i __B)
579
+ {
580
+ return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
581
+ (__v8di) __B,
582
+ (__v8di)
583
+ _mm512_setzero_si512 (),
584
+ (__mmask8) -1);
585
+ }
586
+
587
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
588
+ _mm512_min_pd(__m512d __A, __m512d __B)
589
+ {
590
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
591
+ (__v8df) __B,
592
+ (__v8df)
593
+ _mm512_setzero_pd (),
594
+ (__mmask8) -1,
595
+ _MM_FROUND_CUR_DIRECTION);
596
+ }
597
+
598
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
599
+ _mm512_min_ps(__m512 __A, __m512 __B)
600
+ {
601
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
602
+ (__v16sf) __B,
603
+ (__v16sf)
604
+ _mm512_setzero_ps (),
605
+ (__mmask16) -1,
606
+ _MM_FROUND_CUR_DIRECTION);
607
+ }
608
+
609
+ static __inline __m512i
610
+ __DEFAULT_FN_ATTRS
611
+ _mm512_min_epi32(__m512i __A, __m512i __B)
612
+ {
613
+ return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
614
+ (__v16si) __B,
615
+ (__v16si)
616
+ _mm512_setzero_si512 (),
617
+ (__mmask16) -1);
618
+ }
619
+
620
+ static __inline __m512i __DEFAULT_FN_ATTRS
621
+ _mm512_min_epu32(__m512i __A, __m512i __B)
622
+ {
623
+ return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
624
+ (__v16si) __B,
625
+ (__v16si)
626
+ _mm512_setzero_si512 (),
627
+ (__mmask16) -1);
628
+ }
629
+
630
+ static __inline __m512i __DEFAULT_FN_ATTRS
631
+ _mm512_min_epi64(__m512i __A, __m512i __B)
632
+ {
633
+ return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
634
+ (__v8di) __B,
635
+ (__v8di)
636
+ _mm512_setzero_si512 (),
637
+ (__mmask8) -1);
638
+ }
639
+
640
+ static __inline __m512i __DEFAULT_FN_ATTRS
641
+ _mm512_min_epu64(__m512i __A, __m512i __B)
642
+ {
643
+ return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
644
+ (__v8di) __B,
645
+ (__v8di)
646
+ _mm512_setzero_si512 (),
647
+ (__mmask8) -1);
648
+ }
649
+
650
+ static __inline __m512i __DEFAULT_FN_ATTRS
651
+ _mm512_mul_epi32(__m512i __X, __m512i __Y)
652
+ {
653
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
654
+ (__v16si) __Y,
655
+ (__v8di)
656
+ _mm512_setzero_si512 (),
657
+ (__mmask8) -1);
658
+ }
659
+
660
+ static __inline __m512i __DEFAULT_FN_ATTRS
661
+ _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
662
+ {
663
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
664
+ (__v16si) __Y,
665
+ (__v8di) __W, __M);
666
+ }
667
+
668
+ static __inline __m512i __DEFAULT_FN_ATTRS
669
+ _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
670
+ {
671
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
672
+ (__v16si) __Y,
673
+ (__v8di)
674
+ _mm512_setzero_si512 (),
675
+ __M);
676
+ }
677
+
678
+ static __inline __m512i __DEFAULT_FN_ATTRS
679
+ _mm512_mul_epu32(__m512i __X, __m512i __Y)
680
+ {
681
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
682
+ (__v16si) __Y,
683
+ (__v8di)
684
+ _mm512_setzero_si512 (),
685
+ (__mmask8) -1);
686
+ }
687
+
688
+ static __inline __m512i __DEFAULT_FN_ATTRS
689
+ _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
690
+ {
691
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
692
+ (__v16si) __Y,
693
+ (__v8di) __W, __M);
694
+ }
695
+
696
+ static __inline __m512i __DEFAULT_FN_ATTRS
697
+ _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
698
+ {
699
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
700
+ (__v16si) __Y,
701
+ (__v8di)
702
+ _mm512_setzero_si512 (),
703
+ __M);
704
+ }
705
+
706
+ static __inline __m512i __DEFAULT_FN_ATTRS
707
+ _mm512_mullo_epi32 (__m512i __A, __m512i __B)
708
+ {
709
+ return (__m512i) ((__v16si) __A * (__v16si) __B);
710
+ }
711
+
712
+ static __inline __m512i __DEFAULT_FN_ATTRS
713
+ _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
714
+ {
715
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
716
+ (__v16si) __B,
717
+ (__v16si)
718
+ _mm512_setzero_si512 (),
719
+ __M);
720
+ }
721
+
722
+ static __inline __m512i __DEFAULT_FN_ATTRS
723
+ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
724
+ {
725
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
726
+ (__v16si) __B,
727
+ (__v16si) __W, __M);
728
+ }
729
+
730
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
731
+ _mm512_sqrt_pd(__m512d a)
732
+ {
733
+ return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
734
+ (__v8df) _mm512_setzero_pd (),
735
+ (__mmask8) -1,
736
+ _MM_FROUND_CUR_DIRECTION);
737
+ }
738
+
739
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
740
+ _mm512_sqrt_ps(__m512 a)
741
+ {
742
+ return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
743
+ (__v16sf) _mm512_setzero_ps (),
744
+ (__mmask16) -1,
745
+ _MM_FROUND_CUR_DIRECTION);
746
+ }
747
+
748
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
749
+ _mm512_rsqrt14_pd(__m512d __A)
750
+ {
751
+ return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
752
+ (__v8df)
753
+ _mm512_setzero_pd (),
754
+ (__mmask8) -1);}
755
+
756
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
757
+ _mm512_rsqrt14_ps(__m512 __A)
758
+ {
759
+ return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
760
+ (__v16sf)
761
+ _mm512_setzero_ps (),
762
+ (__mmask16) -1);
763
+ }
764
+
765
+ static __inline__ __m128 __DEFAULT_FN_ATTRS
766
+ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
767
+ {
768
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
769
+ (__v4sf) __B,
770
+ (__v4sf)
771
+ _mm_setzero_ps (),
772
+ (__mmask8) -1);
773
+ }
774
+
775
+ static __inline__ __m128d __DEFAULT_FN_ATTRS
776
+ _mm_rsqrt14_sd(__m128d __A, __m128d __B)
777
+ {
778
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
779
+ (__v2df) __B,
780
+ (__v2df)
781
+ _mm_setzero_pd (),
782
+ (__mmask8) -1);
783
+ }
784
+
785
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
786
+ _mm512_rcp14_pd(__m512d __A)
787
+ {
788
+ return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
789
+ (__v8df)
790
+ _mm512_setzero_pd (),
791
+ (__mmask8) -1);
792
+ }
793
+
794
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
795
+ _mm512_rcp14_ps(__m512 __A)
796
+ {
797
+ return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
798
+ (__v16sf)
799
+ _mm512_setzero_ps (),
800
+ (__mmask16) -1);
801
+ }
802
+ static __inline__ __m128 __DEFAULT_FN_ATTRS
803
+ _mm_rcp14_ss(__m128 __A, __m128 __B)
804
+ {
805
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
806
+ (__v4sf) __B,
807
+ (__v4sf)
808
+ _mm_setzero_ps (),
809
+ (__mmask8) -1);
810
+ }
811
+
812
+ static __inline__ __m128d __DEFAULT_FN_ATTRS
813
+ _mm_rcp14_sd(__m128d __A, __m128d __B)
814
+ {
815
+ return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
816
+ (__v2df) __B,
817
+ (__v2df)
818
+ _mm_setzero_pd (),
819
+ (__mmask8) -1);
820
+ }
821
+
822
+ static __inline __m512 __DEFAULT_FN_ATTRS
823
+ _mm512_floor_ps(__m512 __A)
824
+ {
825
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
826
+ _MM_FROUND_FLOOR,
827
+ (__v16sf) __A, -1,
828
+ _MM_FROUND_CUR_DIRECTION);
829
+ }
830
+
831
+ static __inline __m512d __DEFAULT_FN_ATTRS
832
+ _mm512_floor_pd(__m512d __A)
833
+ {
834
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
835
+ _MM_FROUND_FLOOR,
836
+ (__v8df) __A, -1,
837
+ _MM_FROUND_CUR_DIRECTION);
838
+ }
839
+
840
+ static __inline __m512 __DEFAULT_FN_ATTRS
841
+ _mm512_ceil_ps(__m512 __A)
842
+ {
843
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
844
+ _MM_FROUND_CEIL,
845
+ (__v16sf) __A, -1,
846
+ _MM_FROUND_CUR_DIRECTION);
847
+ }
848
+
849
+ static __inline __m512d __DEFAULT_FN_ATTRS
850
+ _mm512_ceil_pd(__m512d __A)
851
+ {
852
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
853
+ _MM_FROUND_CEIL,
854
+ (__v8df) __A, -1,
855
+ _MM_FROUND_CUR_DIRECTION);
856
+ }
857
+
858
+ static __inline __m512i __DEFAULT_FN_ATTRS
859
+ _mm512_abs_epi64(__m512i __A)
860
+ {
861
+ return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
862
+ (__v8di)
863
+ _mm512_setzero_si512 (),
864
+ (__mmask8) -1);
865
+ }
866
+
867
+ static __inline __m512i __DEFAULT_FN_ATTRS
868
+ _mm512_abs_epi32(__m512i __A)
869
+ {
870
+ return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
871
+ (__v16si)
872
+ _mm512_setzero_si512 (),
873
+ (__mmask16) -1);
874
+ }
875
+
876
+ #define _mm512_roundscale_ps(A, B) __extension__ ({ \
877
+ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
878
+ -1, _MM_FROUND_CUR_DIRECTION); })
879
+
880
+ #define _mm512_roundscale_pd(A, B) __extension__ ({ \
881
+ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
882
+ -1, _MM_FROUND_CUR_DIRECTION); })
883
+
884
+ #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
885
+ (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
886
+ (__v8df) (B), (__v8df) (C), \
887
+ (__mmask8) -1, (R)); })
888
+
889
+
890
+ #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
891
+ (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
892
+ (__v8df) (B), (__v8df) (C), \
893
+ (__mmask8) (U), (R)); })
894
+
895
+
896
+ #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
897
+ (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
898
+ (__v8df) (B), (__v8df) (C), \
899
+ (__mmask8) (U), (R)); })
900
+
901
+
902
+ #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
903
+ (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
904
+ (__v8df) (B), (__v8df) (C), \
905
+ (__mmask8) (U), (R)); })
906
+
907
+
908
+ #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
909
+ (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
910
+ (__v8df) (B), -(__v8df) (C), \
911
+ (__mmask8) -1, (R)); })
912
+
913
+
914
+ #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
915
+ (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
916
+ (__v8df) (B), -(__v8df) (C), \
917
+ (__mmask8) (U), (R)); })
918
+
919
+
920
+ #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
921
+ (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
922
+ (__v8df) (B), -(__v8df) (C), \
923
+ (__mmask8) (U), (R)); })
924
+
925
+
926
+ #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
927
+ (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
928
+ (__v8df) (B), (__v8df) (C), \
929
+ (__mmask8) -1, (R)); })
930
+
931
+
932
+ #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
933
+ (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
934
+ (__v8df) (B), (__v8df) (C), \
935
+ (__mmask8) (U), (R)); })
936
+
937
+
938
+ #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
939
+ (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
940
+ (__v8df) (B), (__v8df) (C), \
941
+ (__mmask8) (U), (R)); })
942
+
943
+
944
+ #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
945
+ (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
946
+ (__v8df) (B), -(__v8df) (C), \
947
+ (__mmask8) -1, (R)); })
948
+
949
+
950
+ #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
951
+ (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
952
+ (__v8df) (B), -(__v8df) (C), \
953
+ (__mmask8) (U), (R)); })
954
+
955
+
956
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
957
+ _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
958
+ {
959
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
960
+ (__v8df) __B,
961
+ (__v8df) __C,
962
+ (__mmask8) -1,
963
+ _MM_FROUND_CUR_DIRECTION);
964
+ }
965
+
966
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
967
+ _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
968
+ {
969
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
970
+ (__v8df) __B,
971
+ (__v8df) __C,
972
+ (__mmask8) __U,
973
+ _MM_FROUND_CUR_DIRECTION);
974
+ }
975
+
976
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
977
+ _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
978
+ {
979
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
980
+ (__v8df) __B,
981
+ (__v8df) __C,
982
+ (__mmask8) __U,
983
+ _MM_FROUND_CUR_DIRECTION);
984
+ }
985
+
986
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
987
+ _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
988
+ {
989
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
990
+ (__v8df) __B,
991
+ (__v8df) __C,
992
+ (__mmask8) __U,
993
+ _MM_FROUND_CUR_DIRECTION);
994
+ }
995
+
996
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
997
+ _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
998
+ {
999
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1000
+ (__v8df) __B,
1001
+ -(__v8df) __C,
1002
+ (__mmask8) -1,
1003
+ _MM_FROUND_CUR_DIRECTION);
1004
+ }
1005
+
1006
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1007
+ _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1008
+ {
1009
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1010
+ (__v8df) __B,
1011
+ -(__v8df) __C,
1012
+ (__mmask8) __U,
1013
+ _MM_FROUND_CUR_DIRECTION);
1014
+ }
1015
+
1016
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1017
+ _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1018
+ {
1019
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1020
+ (__v8df) __B,
1021
+ -(__v8df) __C,
1022
+ (__mmask8) __U,
1023
+ _MM_FROUND_CUR_DIRECTION);
1024
+ }
1025
+
1026
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1027
+ _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1028
+ {
1029
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1030
+ (__v8df) __B,
1031
+ (__v8df) __C,
1032
+ (__mmask8) -1,
1033
+ _MM_FROUND_CUR_DIRECTION);
1034
+ }
1035
+
1036
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1037
+ _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1038
+ {
1039
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
1040
+ (__v8df) __B,
1041
+ (__v8df) __C,
1042
+ (__mmask8) __U,
1043
+ _MM_FROUND_CUR_DIRECTION);
1044
+ }
1045
+
1046
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1047
+ _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1048
+ {
1049
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1050
+ (__v8df) __B,
1051
+ (__v8df) __C,
1052
+ (__mmask8) __U,
1053
+ _MM_FROUND_CUR_DIRECTION);
1054
+ }
1055
+
1056
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1057
+ _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1058
+ {
1059
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1060
+ (__v8df) __B,
1061
+ -(__v8df) __C,
1062
+ (__mmask8) -1,
1063
+ _MM_FROUND_CUR_DIRECTION);
1064
+ }
1065
+
1066
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1067
+ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1068
+ {
1069
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1070
+ (__v8df) __B,
1071
+ -(__v8df) __C,
1072
+ (__mmask8) __U,
1073
+ _MM_FROUND_CUR_DIRECTION);
1074
+ }
1075
+
1076
+ #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
1077
+ (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1078
+ (__v16sf) (B), (__v16sf) (C), \
1079
+ (__mmask16) -1, (R)); })
1080
+
1081
+
1082
+ #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1083
+ (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1084
+ (__v16sf) (B), (__v16sf) (C), \
1085
+ (__mmask16) (U), (R)); })
1086
+
1087
+
1088
+ #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1089
+ (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
1090
+ (__v16sf) (B), (__v16sf) (C), \
1091
+ (__mmask16) (U), (R)); })
1092
+
1093
+
1094
+ #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1095
+ (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1096
+ (__v16sf) (B), (__v16sf) (C), \
1097
+ (__mmask16) (U), (R)); })
1098
+
1099
+
1100
+ #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
1101
+ (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1102
+ (__v16sf) (B), -(__v16sf) (C), \
1103
+ (__mmask16) -1, (R)); })
1104
+
1105
+
1106
+ #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1107
+ (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1108
+ (__v16sf) (B), -(__v16sf) (C), \
1109
+ (__mmask16) (U), (R)); })
1110
+
1111
+
1112
+ #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1113
+ (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1114
+ (__v16sf) (B), -(__v16sf) (C), \
1115
+ (__mmask16) (U), (R)); })
1116
+
1117
+
1118
+ #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
1119
+ (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1120
+ (__v16sf) (B), (__v16sf) (C), \
1121
+ (__mmask16) -1, (R)); })
1122
+
1123
+
1124
+ #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1125
+ (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
1126
+ (__v16sf) (B), (__v16sf) (C), \
1127
+ (__mmask16) (U), (R)); })
1128
+
1129
+
1130
+ #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1131
+ (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1132
+ (__v16sf) (B), (__v16sf) (C), \
1133
+ (__mmask16) (U), (R)); })
1134
+
1135
+
1136
+ #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
1137
+ (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1138
+ (__v16sf) (B), -(__v16sf) (C), \
1139
+ (__mmask16) -1, (R)); })
1140
+
1141
+
1142
+ #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1143
+ (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1144
+ (__v16sf) (B), -(__v16sf) (C), \
1145
+ (__mmask16) (U), (R)); })
1146
+
1147
+
1148
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1149
+ _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1150
+ {
1151
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1152
+ (__v16sf) __B,
1153
+ (__v16sf) __C,
1154
+ (__mmask16) -1,
1155
+ _MM_FROUND_CUR_DIRECTION);
1156
+ }
1157
+
1158
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1159
+ _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1160
+ {
1161
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1162
+ (__v16sf) __B,
1163
+ (__v16sf) __C,
1164
+ (__mmask16) __U,
1165
+ _MM_FROUND_CUR_DIRECTION);
1166
+ }
1167
+
1168
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1169
+ _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1170
+ {
1171
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
1172
+ (__v16sf) __B,
1173
+ (__v16sf) __C,
1174
+ (__mmask16) __U,
1175
+ _MM_FROUND_CUR_DIRECTION);
1176
+ }
1177
+
1178
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1179
+ _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1180
+ {
1181
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1182
+ (__v16sf) __B,
1183
+ (__v16sf) __C,
1184
+ (__mmask16) __U,
1185
+ _MM_FROUND_CUR_DIRECTION);
1186
+ }
1187
+
1188
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1189
+ _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1190
+ {
1191
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1192
+ (__v16sf) __B,
1193
+ -(__v16sf) __C,
1194
+ (__mmask16) -1,
1195
+ _MM_FROUND_CUR_DIRECTION);
1196
+ }
1197
+
1198
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1199
+ _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1200
+ {
1201
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1202
+ (__v16sf) __B,
1203
+ -(__v16sf) __C,
1204
+ (__mmask16) __U,
1205
+ _MM_FROUND_CUR_DIRECTION);
1206
+ }
1207
+
1208
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1209
+ _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1210
+ {
1211
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1212
+ (__v16sf) __B,
1213
+ -(__v16sf) __C,
1214
+ (__mmask16) __U,
1215
+ _MM_FROUND_CUR_DIRECTION);
1216
+ }
1217
+
1218
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1219
+ _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1220
+ {
1221
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1222
+ (__v16sf) __B,
1223
+ (__v16sf) __C,
1224
+ (__mmask16) -1,
1225
+ _MM_FROUND_CUR_DIRECTION);
1226
+ }
1227
+
1228
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1229
+ _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1230
+ {
1231
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
1232
+ (__v16sf) __B,
1233
+ (__v16sf) __C,
1234
+ (__mmask16) __U,
1235
+ _MM_FROUND_CUR_DIRECTION);
1236
+ }
1237
+
1238
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1239
+ _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1240
+ {
1241
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1242
+ (__v16sf) __B,
1243
+ (__v16sf) __C,
1244
+ (__mmask16) __U,
1245
+ _MM_FROUND_CUR_DIRECTION);
1246
+ }
1247
+
1248
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1249
+ _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1250
+ {
1251
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1252
+ (__v16sf) __B,
1253
+ -(__v16sf) __C,
1254
+ (__mmask16) -1,
1255
+ _MM_FROUND_CUR_DIRECTION);
1256
+ }
1257
+
1258
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1259
+ _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1260
+ {
1261
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1262
+ (__v16sf) __B,
1263
+ -(__v16sf) __C,
1264
+ (__mmask16) __U,
1265
+ _MM_FROUND_CUR_DIRECTION);
1266
+ }
1267
+
1268
+ #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
1269
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1270
+ (__v8df) (B), (__v8df) (C), \
1271
+ (__mmask8) -1, (R)); })
1272
+
1273
+
1274
+ #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
1275
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1276
+ (__v8df) (B), (__v8df) (C), \
1277
+ (__mmask8) (U), (R)); })
1278
+
1279
+
1280
+ #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
1281
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
1282
+ (__v8df) (B), (__v8df) (C), \
1283
+ (__mmask8) (U), (R)); })
1284
+
1285
+
1286
+ #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
1287
+ (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1288
+ (__v8df) (B), (__v8df) (C), \
1289
+ (__mmask8) (U), (R)); })
1290
+
1291
+
1292
+ #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
1293
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1294
+ (__v8df) (B), -(__v8df) (C), \
1295
+ (__mmask8) -1, (R)); })
1296
+
1297
+
1298
+ #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
1299
+ (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1300
+ (__v8df) (B), -(__v8df) (C), \
1301
+ (__mmask8) (U), (R)); })
1302
+
1303
+
1304
+ #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
1305
+ (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1306
+ (__v8df) (B), -(__v8df) (C), \
1307
+ (__mmask8) (U), (R)); })
1308
+
1309
+
1310
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1311
+ _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
1312
+ {
1313
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1314
+ (__v8df) __B,
1315
+ (__v8df) __C,
1316
+ (__mmask8) -1,
1317
+ _MM_FROUND_CUR_DIRECTION);
1318
+ }
1319
+
1320
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1321
+ _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1322
+ {
1323
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1324
+ (__v8df) __B,
1325
+ (__v8df) __C,
1326
+ (__mmask8) __U,
1327
+ _MM_FROUND_CUR_DIRECTION);
1328
+ }
1329
+
1330
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1331
+ _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1332
+ {
1333
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
1334
+ (__v8df) __B,
1335
+ (__v8df) __C,
1336
+ (__mmask8) __U,
1337
+ _MM_FROUND_CUR_DIRECTION);
1338
+ }
1339
+
1340
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1341
+ _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1342
+ {
1343
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
1344
+ (__v8df) __B,
1345
+ (__v8df) __C,
1346
+ (__mmask8) __U,
1347
+ _MM_FROUND_CUR_DIRECTION);
1348
+ }
1349
+
1350
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1351
+ _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
1352
+ {
1353
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1354
+ (__v8df) __B,
1355
+ -(__v8df) __C,
1356
+ (__mmask8) -1,
1357
+ _MM_FROUND_CUR_DIRECTION);
1358
+ }
1359
+
1360
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1361
+ _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1362
+ {
1363
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1364
+ (__v8df) __B,
1365
+ -(__v8df) __C,
1366
+ (__mmask8) __U,
1367
+ _MM_FROUND_CUR_DIRECTION);
1368
+ }
1369
+
1370
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1371
+ _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1372
+ {
1373
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
1374
+ (__v8df) __B,
1375
+ -(__v8df) __C,
1376
+ (__mmask8) __U,
1377
+ _MM_FROUND_CUR_DIRECTION);
1378
+ }
1379
+
1380
+ #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
1381
+ (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
1382
+ (__v16sf) (B), (__v16sf) (C), \
1383
+ (__mmask16) -1, (R)); })
1384
+
1385
+
1386
+ #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
1387
+ (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
1388
+ (__v16sf) (B), (__v16sf) (C), \
1389
+ (__mmask16) (U), (R)); })
1390
+
1391
+
1392
+ #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
1393
+ (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
1394
+ (__v16sf) (B), (__v16sf) (C), \
1395
+ (__mmask16) (U), (R)); })
1396
+
1397
+
1398
+ #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
1399
+ (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
1400
+ (__v16sf) (B), (__v16sf) (C), \
1401
+ (__mmask16) (U), (R)); })
1402
+
1403
+
1404
+ #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
1405
+ (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
1406
+ (__v16sf) (B), -(__v16sf) (C), \
1407
+ (__mmask16) -1, (R)); })
1408
+
1409
+
1410
+ #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
1411
+ (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
1412
+ (__v16sf) (B), -(__v16sf) (C), \
1413
+ (__mmask16) (U), (R)); })
1414
+
1415
+
1416
+ #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
1417
+ (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
1418
+ (__v16sf) (B), -(__v16sf) (C), \
1419
+ (__mmask16) (U), (R)); })
1420
+
1421
+
1422
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1423
+ _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
1424
+ {
1425
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
1426
+ (__v16sf) __B,
1427
+ (__v16sf) __C,
1428
+ (__mmask16) -1,
1429
+ _MM_FROUND_CUR_DIRECTION);
1430
+ }
1431
+
1432
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1433
+ _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1434
+ {
1435
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
1436
+ (__v16sf) __B,
1437
+ (__v16sf) __C,
1438
+ (__mmask16) __U,
1439
+ _MM_FROUND_CUR_DIRECTION);
1440
+ }
1441
+
1442
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1443
+ _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1444
+ {
1445
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
1446
+ (__v16sf) __B,
1447
+ (__v16sf) __C,
1448
+ (__mmask16) __U,
1449
+ _MM_FROUND_CUR_DIRECTION);
1450
+ }
1451
+
1452
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1453
+ _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1454
+ {
1455
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
1456
+ (__v16sf) __B,
1457
+ (__v16sf) __C,
1458
+ (__mmask16) __U,
1459
+ _MM_FROUND_CUR_DIRECTION);
1460
+ }
1461
+
1462
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1463
+ _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
1464
+ {
1465
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
1466
+ (__v16sf) __B,
1467
+ -(__v16sf) __C,
1468
+ (__mmask16) -1,
1469
+ _MM_FROUND_CUR_DIRECTION);
1470
+ }
1471
+
1472
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1473
+ _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1474
+ {
1475
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
1476
+ (__v16sf) __B,
1477
+ -(__v16sf) __C,
1478
+ (__mmask16) __U,
1479
+ _MM_FROUND_CUR_DIRECTION);
1480
+ }
1481
+
1482
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1483
+ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1484
+ {
1485
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
1486
+ (__v16sf) __B,
1487
+ -(__v16sf) __C,
1488
+ (__mmask16) __U,
1489
+ _MM_FROUND_CUR_DIRECTION);
1490
+ }
1491
+
1492
+ #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
1493
+ (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
1494
+ (__v8df) (B), (__v8df) (C), \
1495
+ (__mmask8) (U), (R)); })
1496
+
1497
+
1498
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1499
+ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1500
+ {
1501
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
1502
+ (__v8df) __B,
1503
+ (__v8df) __C,
1504
+ (__mmask8) __U,
1505
+ _MM_FROUND_CUR_DIRECTION);
1506
+ }
1507
+
1508
+ #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
1509
+ (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
1510
+ (__v16sf) (B), (__v16sf) (C), \
1511
+ (__mmask16) (U), (R)); })
1512
+
1513
+
1514
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1515
+ _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1516
+ {
1517
+ return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
1518
+ (__v16sf) __B,
1519
+ (__v16sf) __C,
1520
+ (__mmask16) __U,
1521
+ _MM_FROUND_CUR_DIRECTION);
1522
+ }
1523
+
1524
+ #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
1525
+ (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
1526
+ (__v8df) (B), (__v8df) (C), \
1527
+ (__mmask8) (U), (R)); })
1528
+
1529
+
1530
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1531
+ _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1532
+ {
1533
+ return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
1534
+ (__v8df) __B,
1535
+ (__v8df) __C,
1536
+ (__mmask8) __U,
1537
+ _MM_FROUND_CUR_DIRECTION);
1538
+ }
1539
+
1540
+ #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
1541
+ (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
1542
+ (__v16sf) (B), (__v16sf) (C), \
1543
+ (__mmask16) (U), (R)); })
1544
+
1545
+
1546
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1547
+ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1548
+ {
1549
+ return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
1550
+ (__v16sf) __B,
1551
+ (__v16sf) __C,
1552
+ (__mmask16) __U,
1553
+ _MM_FROUND_CUR_DIRECTION);
1554
+ }
1555
+
1556
+ #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
1557
+ (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
1558
+ (__v8df) (B), (__v8df) (C), \
1559
+ (__mmask8) (U), (R)); })
1560
+
1561
+
1562
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1563
+ _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1564
+ {
1565
+ return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
1566
+ (__v8df) __B,
1567
+ (__v8df) __C,
1568
+ (__mmask8) __U,
1569
+ _MM_FROUND_CUR_DIRECTION);
1570
+ }
1571
+
1572
+ #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1573
+ (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
1574
+ (__v16sf) (B), (__v16sf) (C), \
1575
+ (__mmask16) (U), (R)); })
1576
+
1577
+
1578
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1579
+ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1580
+ {
1581
+ return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
1582
+ (__v16sf) __B,
1583
+ (__v16sf) __C,
1584
+ (__mmask16) __U,
1585
+ _MM_FROUND_CUR_DIRECTION);
1586
+ }
1587
+
1588
+ #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
1589
+ (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
1590
+ (__v8df) (B), (__v8df) (C), \
1591
+ (__mmask8) (U), (R)); })
1592
+
1593
+
1594
+ #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
1595
+ (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
1596
+ (__v8df) (B), (__v8df) (C), \
1597
+ (__mmask8) (U), (R)); })
1598
+
1599
+
1600
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1601
+ _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1602
+ {
1603
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
1604
+ (__v8df) __B,
1605
+ (__v8df) __C,
1606
+ (__mmask8) __U,
1607
+ _MM_FROUND_CUR_DIRECTION);
1608
+ }
1609
+
1610
+ static __inline__ __m512d __DEFAULT_FN_ATTRS
1611
+ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1612
+ {
1613
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
1614
+ (__v8df) __B,
1615
+ (__v8df) __C,
1616
+ (__mmask8) __U,
1617
+ _MM_FROUND_CUR_DIRECTION);
1618
+ }
1619
+
1620
+ #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1621
+ (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
1622
+ (__v16sf) (B), (__v16sf) (C), \
1623
+ (__mmask16) (U), (R)); })
1624
+
1625
+
1626
+ #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
1627
+ (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
1628
+ (__v16sf) (B), (__v16sf) (C), \
1629
+ (__mmask16) (U), (R)); })
1630
+
1631
+
1632
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1633
+ _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1634
+ {
1635
+ return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
1636
+ (__v16sf) __B,
1637
+ (__v16sf) __C,
1638
+ (__mmask16) __U,
1639
+ _MM_FROUND_CUR_DIRECTION);
1640
+ }
1641
+
1642
+ static __inline__ __m512 __DEFAULT_FN_ATTRS
1643
+ _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1644
+ {
1645
+ return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
1646
+ (__v16sf) __B,
1647
+ (__v16sf) __C,
1648
+ (__mmask16) __U,
1649
+ _MM_FROUND_CUR_DIRECTION);
1650
+ }
1651
+
1652
+
1653
+
1654
+ /* Vector permutations */
1655
+
1656
+ static __inline __m512i __DEFAULT_FN_ATTRS
1657
+ _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
1658
+ {
1659
+ return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
1660
+ /* idx */ ,
1661
+ (__v16si) __A,
1662
+ (__v16si) __B,
1663
+ (__mmask16) -1);
1664
+ }
1665
+ static __inline __m512i __DEFAULT_FN_ATTRS
1666
+ _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
1667
+ {
1668
+ return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
1669
+ /* idx */ ,
1670
+ (__v8di) __A,
1671
+ (__v8di) __B,
1672
+ (__mmask8) -1);
1673
+ }
1674
+
1675
+ static __inline __m512d __DEFAULT_FN_ATTRS
1676
+ _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
1677
+ {
1678
+ return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
1679
+ /* idx */ ,
1680
+ (__v8df) __A,
1681
+ (__v8df) __B,
1682
+ (__mmask8) -1);
1683
+ }
1684
+ static __inline __m512 __DEFAULT_FN_ATTRS
1685
+ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
1686
+ {
1687
+ return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
1688
+ /* idx */ ,
1689
+ (__v16sf) __A,
1690
+ (__v16sf) __B,
1691
+ (__mmask16) -1);
1692
+ }
1693
+
1694
+ #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
1695
+ (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
1696
+ (__v8di)(__m512i)(B), \
1697
+ (I), (__v8di)_mm512_setzero_si512(), \
1698
+ (__mmask8)-1); })
1699
+
1700
+ #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
1701
+ (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
1702
+ (__v16si)(__m512i)(B), \
1703
+ (I), (__v16si)_mm512_setzero_si512(), \
1704
+ (__mmask16)-1); })
1705
+
1706
+ /* Vector Extract */
1707
+
1708
+ #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
1709
+ __m512d __A = (A); \
1710
+ (__m256d) \
1711
+ __builtin_ia32_extractf64x4_mask((__v8df)__A, \
1712
+ (I), \
1713
+ (__v4df)_mm256_setzero_si256(), \
1714
+ (__mmask8) -1); })
1715
+
1716
+ #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
1717
+ __m512 __A = (A); \
1718
+ (__m128) \
1719
+ __builtin_ia32_extractf32x4_mask((__v16sf)__A, \
1720
+ (I), \
1721
+ (__v4sf)_mm_setzero_ps(), \
1722
+ (__mmask8) -1); })
1723
+
1724
+ /* Vector Blend */
1725
+
1726
+ static __inline __m512d __DEFAULT_FN_ATTRS
1727
+ _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
1728
+ {
1729
+ return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
1730
+ (__v8df) __W,
1731
+ (__mmask8) __U);
1732
+ }
1733
+
1734
+ static __inline __m512 __DEFAULT_FN_ATTRS
1735
+ _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
1736
+ {
1737
+ return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
1738
+ (__v16sf) __W,
1739
+ (__mmask16) __U);
1740
+ }
1741
+
1742
+ static __inline __m512i __DEFAULT_FN_ATTRS
1743
+ _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
1744
+ {
1745
+ return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
1746
+ (__v8di) __W,
1747
+ (__mmask8) __U);
1748
+ }
1749
+
1750
+ static __inline __m512i __DEFAULT_FN_ATTRS
1751
+ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
1752
+ {
1753
+ return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
1754
+ (__v16si) __W,
1755
+ (__mmask16) __U);
1756
+ }
1757
+
1758
+ /* Compare */
1759
+
1760
+ #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
1761
+ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
1762
+ (__v16sf)(__m512)(B), \
1763
+ (P), (__mmask16)-1, (R)); })
1764
+
1765
+ #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
1766
+ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
1767
+ (__v16sf)(__m512)(B), \
1768
+ (P), (__mmask16)(U), (R)); })
1769
+
1770
+ #define _mm512_cmp_ps_mask(A, B, P) \
1771
+ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
1772
+
1773
+ #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
1774
+ _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
1775
+
1776
+ #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
1777
+ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
1778
+ (__v8df)(__m512d)(B), \
1779
+ (P), (__mmask8)-1, (R)); })
1780
+
1781
+ #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
1782
+ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
1783
+ (__v8df)(__m512d)(B), \
1784
+ (P), (__mmask8)(U), (R)); })
1785
+
1786
+ #define _mm512_cmp_pd_mask(A, B, P) \
1787
+ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
1788
+
1789
+ #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
1790
+ _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
1791
+
1792
+ /* Conversion */
1793
+
1794
+ static __inline __m512i __DEFAULT_FN_ATTRS
1795
+ _mm512_cvttps_epu32(__m512 __A)
1796
+ {
1797
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
1798
+ (__v16si)
1799
+ _mm512_setzero_si512 (),
1800
+ (__mmask16) -1,
1801
+ _MM_FROUND_CUR_DIRECTION);
1802
+ }
1803
+
1804
+ #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
1805
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
1806
+ (__v16sf)_mm512_setzero_ps(), \
1807
+ (__mmask16)-1, (R)); })
1808
+
1809
+ #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
1810
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
1811
+ (__v16sf)_mm512_setzero_ps(), \
1812
+ (__mmask16)-1, (R)); })
1813
+
1814
+ static __inline __m512d __DEFAULT_FN_ATTRS
1815
+ _mm512_cvtepi32_pd(__m256i __A)
1816
+ {
1817
+ return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
1818
+ (__v8df)
1819
+ _mm512_setzero_pd (),
1820
+ (__mmask8) -1);
1821
+ }
1822
+
1823
+ static __inline __m512d __DEFAULT_FN_ATTRS
1824
+ _mm512_cvtepu32_pd(__m256i __A)
1825
+ {
1826
+ return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
1827
+ (__v8df)
1828
+ _mm512_setzero_pd (),
1829
+ (__mmask8) -1);
1830
+ }
1831
+
1832
+ #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
1833
+ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
1834
+ (__v8sf)_mm256_setzero_ps(), \
1835
+ (__mmask8)-1, (R)); })
1836
+
1837
+ #define _mm512_cvtps_ph(A, I) __extension__ ({ \
1838
+ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
1839
+ (__v16hi)_mm256_setzero_si256(), \
1840
+ -1); })
1841
+
1842
+ static __inline __m512 __DEFAULT_FN_ATTRS
1843
+ _mm512_cvtph_ps(__m256i __A)
1844
+ {
1845
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
1846
+ (__v16sf)
1847
+ _mm512_setzero_ps (),
1848
+ (__mmask16) -1,
1849
+ _MM_FROUND_CUR_DIRECTION);
1850
+ }
1851
+
1852
+ static __inline __m512i __DEFAULT_FN_ATTRS
1853
+ _mm512_cvttps_epi32(__m512 a)
1854
+ {
1855
+ return (__m512i)
1856
+ __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
1857
+ (__v16si) _mm512_setzero_si512 (),
1858
+ (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
1859
+ }
1860
+
1861
+ static __inline __m256i __DEFAULT_FN_ATTRS
1862
+ _mm512_cvttpd_epi32(__m512d a)
1863
+ {
1864
+ return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
1865
+ (__v8si)_mm256_setzero_si256(),
1866
+ (__mmask8) -1,
1867
+ _MM_FROUND_CUR_DIRECTION);
1868
+ }
1869
+
1870
+ #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
1871
+ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
1872
+ (__v8si)_mm256_setzero_si256(), \
1873
+ (__mmask8)-1, (R)); })
1874
+
1875
+ #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
1876
+ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
1877
+ (__v16si)_mm512_setzero_si512(), \
1878
+ (__mmask16)-1, (R)); })
1879
+
1880
+ #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
1881
+ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
1882
+ (__v16si)_mm512_setzero_si512(), \
1883
+ (__mmask16)-1, (R)); })
1884
+
1885
+ #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
1886
+ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
1887
+ (__v8si)_mm256_setzero_si256(), \
1888
+ (__mmask8)-1, (R)); })
1889
+
1890
+ #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
1891
+ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
1892
+ (__v16si)_mm512_setzero_si512(), \
1893
+ (__mmask16)-1, (R)); })
1894
+
1895
+ #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
1896
+ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
1897
+ (__v8si)_mm256_setzero_si256(), \
1898
+ (__mmask8) -1, (R)); })
1899
+
1900
+ /* Unpack and Interleave */
1901
+ static __inline __m512d __DEFAULT_FN_ATTRS
1902
+ _mm512_unpackhi_pd(__m512d __a, __m512d __b)
1903
+ {
1904
+ return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
1905
+ }
1906
+
1907
+ static __inline __m512d __DEFAULT_FN_ATTRS
1908
+ _mm512_unpacklo_pd(__m512d __a, __m512d __b)
1909
+ {
1910
+ return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
1911
+ }
1912
+
1913
+ static __inline __m512 __DEFAULT_FN_ATTRS
1914
+ _mm512_unpackhi_ps(__m512 __a, __m512 __b)
1915
+ {
1916
+ return __builtin_shufflevector(__a, __b,
1917
+ 2, 18, 3, 19,
1918
+ 2+4, 18+4, 3+4, 19+4,
1919
+ 2+8, 18+8, 3+8, 19+8,
1920
+ 2+12, 18+12, 3+12, 19+12);
1921
+ }
1922
+
1923
+ static __inline __m512 __DEFAULT_FN_ATTRS
1924
+ _mm512_unpacklo_ps(__m512 __a, __m512 __b)
1925
+ {
1926
+ return __builtin_shufflevector(__a, __b,
1927
+ 0, 16, 1, 17,
1928
+ 0+4, 16+4, 1+4, 17+4,
1929
+ 0+8, 16+8, 1+8, 17+8,
1930
+ 0+12, 16+12, 1+12, 17+12);
1931
+ }
1932
+
1933
+ /* Bit Test */
1934
+
1935
+ static __inline __mmask16 __DEFAULT_FN_ATTRS
1936
+ _mm512_test_epi32_mask(__m512i __A, __m512i __B)
1937
+ {
1938
+ return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
1939
+ (__v16si) __B,
1940
+ (__mmask16) -1);
1941
+ }
1942
+
1943
+ static __inline __mmask8 __DEFAULT_FN_ATTRS
1944
+ _mm512_test_epi64_mask(__m512i __A, __m512i __B)
1945
+ {
1946
+ return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
1947
+ (__v8di) __B,
1948
+ (__mmask8) -1);
1949
+ }
1950
+
1951
+ /* SIMD load ops */
1952
+
1953
+ static __inline __m512i __DEFAULT_FN_ATTRS
1954
+ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
1955
+ {
1956
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
1957
+ (__v16si)
1958
+ _mm512_setzero_si512 (),
1959
+ (__mmask16) __U);
1960
+ }
1961
+
1962
+ static __inline __m512i __DEFAULT_FN_ATTRS
1963
+ _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
1964
+ {
1965
+ return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
1966
+ (__v8di)
1967
+ _mm512_setzero_si512 (),
1968
+ (__mmask8) __U);
1969
+ }
1970
+
1971
+ static __inline __m512 __DEFAULT_FN_ATTRS
1972
+ _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
1973
+ {
1974
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
1975
+ (__v16sf)
1976
+ _mm512_setzero_ps (),
1977
+ (__mmask16) __U);
1978
+ }
1979
+
1980
+ static __inline __m512d __DEFAULT_FN_ATTRS
1981
+ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
1982
+ {
1983
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
1984
+ (__v8df)
1985
+ _mm512_setzero_pd (),
1986
+ (__mmask8) __U);
1987
+ }
1988
+
1989
+ static __inline __m512 __DEFAULT_FN_ATTRS
1990
+ _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
1991
+ {
1992
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
1993
+ (__v16sf)
1994
+ _mm512_setzero_ps (),
1995
+ (__mmask16) __U);
1996
+ }
1997
+
1998
+ static __inline __m512d __DEFAULT_FN_ATTRS
1999
+ _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
2000
+ {
2001
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
2002
+ (__v8df)
2003
+ _mm512_setzero_pd (),
2004
+ (__mmask8) __U);
2005
+ }
2006
+
2007
+ static __inline __m512d __DEFAULT_FN_ATTRS
2008
+ _mm512_loadu_pd(double const *__p)
2009
+ {
2010
+ struct __loadu_pd {
2011
+ __m512d __v;
2012
+ } __attribute__((__packed__, __may_alias__));
2013
+ return ((struct __loadu_pd*)__p)->__v;
2014
+ }
2015
+
2016
+ static __inline __m512 __DEFAULT_FN_ATTRS
2017
+ _mm512_loadu_ps(float const *__p)
2018
+ {
2019
+ struct __loadu_ps {
2020
+ __m512 __v;
2021
+ } __attribute__((__packed__, __may_alias__));
2022
+ return ((struct __loadu_ps*)__p)->__v;
2023
+ }
2024
+
2025
+ static __inline __m512 __DEFAULT_FN_ATTRS
2026
+ _mm512_load_ps(double const *__p)
2027
+ {
2028
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
2029
+ (__v16sf)
2030
+ _mm512_setzero_ps (),
2031
+ (__mmask16) -1);
2032
+ }
2033
+
2034
+ static __inline __m512d __DEFAULT_FN_ATTRS
2035
+ _mm512_load_pd(float const *__p)
2036
+ {
2037
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
2038
+ (__v8df)
2039
+ _mm512_setzero_pd (),
2040
+ (__mmask8) -1);
2041
+ }
2042
+
2043
+ /* SIMD store ops */
2044
+
2045
+ static __inline void __DEFAULT_FN_ATTRS
2046
+ _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
2047
+ {
2048
+ __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
2049
+ (__mmask8) __U);
2050
+ }
2051
+
2052
+ static __inline void __DEFAULT_FN_ATTRS
2053
+ _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
2054
+ {
2055
+ __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
2056
+ (__mmask16) __U);
2057
+ }
2058
+
2059
+ static __inline void __DEFAULT_FN_ATTRS
2060
+ _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
2061
+ {
2062
+ __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2063
+ }
2064
+
2065
+ static __inline void __DEFAULT_FN_ATTRS
2066
+ _mm512_storeu_pd(void *__P, __m512d __A)
2067
+ {
2068
+ __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
2069
+ }
2070
+
2071
+ static __inline void __DEFAULT_FN_ATTRS
2072
+ _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
2073
+ {
2074
+ __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
2075
+ (__mmask16) __U);
2076
+ }
2077
+
2078
+ static __inline void __DEFAULT_FN_ATTRS
2079
+ _mm512_storeu_ps(void *__P, __m512 __A)
2080
+ {
2081
+ __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
2082
+ }
2083
+
2084
+ static __inline void __DEFAULT_FN_ATTRS
2085
+ _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
2086
+ {
2087
+ __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2088
+ }
2089
+
2090
+ static __inline void __DEFAULT_FN_ATTRS
2091
+ _mm512_store_pd(void *__P, __m512d __A)
2092
+ {
2093
+ *(__m512d*)__P = __A;
2094
+ }
2095
+
2096
+ static __inline void __DEFAULT_FN_ATTRS
2097
+ _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
2098
+ {
2099
+ __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
2100
+ (__mmask16) __U);
2101
+ }
2102
+
2103
+ static __inline void __DEFAULT_FN_ATTRS
2104
+ _mm512_store_ps(void *__P, __m512 __A)
2105
+ {
2106
+ *(__m512*)__P = __A;
2107
+ }
2108
+
2109
+ /* Mask ops */
2110
+
2111
+ static __inline __mmask16 __DEFAULT_FN_ATTRS
2112
+ _mm512_knot(__mmask16 __M)
2113
+ {
2114
+ return __builtin_ia32_knothi(__M);
2115
+ }
2116
+
2117
+ /* Integer compare */
2118
+
2119
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2120
+ _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
2121
+ return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2122
+ (__mmask16)-1);
2123
+ }
2124
+
2125
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2126
+ _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2127
+ return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2128
+ __u);
2129
+ }
2130
+
2131
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2132
+ _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
2133
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2134
+ (__mmask16)-1);
2135
+ }
2136
+
2137
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2138
+ _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2139
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2140
+ __u);
2141
+ }
2142
+
2143
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2144
+ _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2145
+ return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2146
+ __u);
2147
+ }
2148
+
2149
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2150
+ _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
2151
+ return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2152
+ (__mmask8)-1);
2153
+ }
2154
+
2155
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2156
+ _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
2157
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2158
+ (__mmask8)-1);
2159
+ }
2160
+
2161
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2162
+ _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2163
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2164
+ __u);
2165
+ }
2166
+
2167
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2168
+ _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
2169
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2170
+ (__mmask16)-1);
2171
+ }
2172
+
2173
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2174
+ _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2175
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2176
+ __u);
2177
+ }
2178
+
2179
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2180
+ _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
2181
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2182
+ (__mmask16)-1);
2183
+ }
2184
+
2185
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2186
+ _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2187
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2188
+ __u);
2189
+ }
2190
+
2191
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2192
+ _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
2193
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2194
+ (__mmask8)-1);
2195
+ }
2196
+
2197
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2198
+ _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2199
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2200
+ __u);
2201
+ }
2202
+
2203
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2204
+ _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
2205
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2206
+ (__mmask8)-1);
2207
+ }
2208
+
2209
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2210
+ _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2211
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2212
+ __u);
2213
+ }
2214
+
2215
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2216
+ _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
2217
+ return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2218
+ (__mmask16)-1);
2219
+ }
2220
+
2221
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2222
+ _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2223
+ return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2224
+ __u);
2225
+ }
2226
+
2227
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2228
+ _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
2229
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2230
+ (__mmask16)-1);
2231
+ }
2232
+
2233
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2234
+ _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2235
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2236
+ __u);
2237
+ }
2238
+
2239
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2240
+ _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2241
+ return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2242
+ __u);
2243
+ }
2244
+
2245
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2246
+ _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
2247
+ return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2248
+ (__mmask8)-1);
2249
+ }
2250
+
2251
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2252
+ _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
2253
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2254
+ (__mmask8)-1);
2255
+ }
2256
+
2257
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2258
+ _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2259
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2260
+ __u);
2261
+ }
2262
+
2263
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2264
+ _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
2265
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2266
+ (__mmask16)-1);
2267
+ }
2268
+
2269
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2270
+ _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2271
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2272
+ __u);
2273
+ }
2274
+
2275
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2276
+ _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
2277
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2278
+ (__mmask16)-1);
2279
+ }
2280
+
2281
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2282
+ _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2283
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2284
+ __u);
2285
+ }
2286
+
2287
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2288
+ _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
2289
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2290
+ (__mmask8)-1);
2291
+ }
2292
+
2293
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2294
+ _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2295
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2296
+ __u);
2297
+ }
2298
+
2299
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2300
+ _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
2301
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2302
+ (__mmask8)-1);
2303
+ }
2304
+
2305
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2306
+ _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2307
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2308
+ __u);
2309
+ }
2310
+
2311
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2312
+ _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
2313
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2314
+ (__mmask16)-1);
2315
+ }
2316
+
2317
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2318
+ _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2319
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2320
+ __u);
2321
+ }
2322
+
2323
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2324
+ _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
2325
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2326
+ (__mmask16)-1);
2327
+ }
2328
+
2329
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2330
+ _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2331
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2332
+ __u);
2333
+ }
2334
+
2335
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2336
+ _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
2337
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2338
+ (__mmask8)-1);
2339
+ }
2340
+
2341
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2342
+ _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2343
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2344
+ __u);
2345
+ }
2346
+
2347
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2348
+ _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
2349
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2350
+ (__mmask8)-1);
2351
+ }
2352
+
2353
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2354
+ _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2355
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2356
+ __u);
2357
+ }
2358
+
2359
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2360
+ _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
2361
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2362
+ (__mmask16)-1);
2363
+ }
2364
+
2365
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2366
+ _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2367
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2368
+ __u);
2369
+ }
2370
+
2371
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2372
+ _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
2373
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2374
+ (__mmask16)-1);
2375
+ }
2376
+
2377
+ static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2378
+ _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2379
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2380
+ __u);
2381
+ }
2382
+
2383
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2384
+ _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
2385
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
2386
+ (__mmask8)-1);
2387
+ }
2388
+
2389
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2390
+ _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2391
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
2392
+ __u);
2393
+ }
2394
+
2395
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2396
+ _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
2397
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
2398
+ (__mmask8)-1);
2399
+ }
2400
+
2401
+ static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2402
+ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2403
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
2404
+ __u);
2405
+ }
2406
+
2407
+ #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
2408
+ __m512i __a = (a); \
2409
+ __m512i __b = (b); \
2410
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
2411
+ (__mmask16)-1); })
2412
+
2413
+ #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
2414
+ __m512i __a = (a); \
2415
+ __m512i __b = (b); \
2416
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
2417
+ (__mmask16)-1); })
2418
+
2419
+ #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
2420
+ __m512i __a = (a); \
2421
+ __m512i __b = (b); \
2422
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
2423
+ (__mmask8)-1); })
2424
+
2425
+ #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
2426
+ __m512i __a = (a); \
2427
+ __m512i __b = (b); \
2428
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
2429
+ (__mmask8)-1); })
2430
+
2431
+ #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
2432
+ __m512i __a = (a); \
2433
+ __m512i __b = (b); \
2434
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
2435
+ (__mmask16)(m)); })
2436
+
2437
+ #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
2438
+ __m512i __a = (a); \
2439
+ __m512i __b = (b); \
2440
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
2441
+ (__mmask16)(m)); })
2442
+
2443
+ #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
2444
+ __m512i __a = (a); \
2445
+ __m512i __b = (b); \
2446
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
2447
+ (__mmask8)(m)); })
2448
+
2449
+ #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
2450
+ __m512i __a = (a); \
2451
+ __m512i __b = (b); \
2452
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
2453
+ (__mmask8)(m)); })
2454
+
2455
+ #undef __DEFAULT_FN_ATTRS
2456
+
2457
+ #endif // __AVX512FINTRIN_H