xcodebuild-helper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +20 -0
  3. data/.gitignore +1 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +110 -0
  8. data/Guardfile +18 -0
  9. data/README.md +7 -0
  10. data/Rakefile +7 -0
  11. data/TODO.md +3 -0
  12. data/bin/oclint +5 -0
  13. data/bin/oclint-0.8 +5 -0
  14. data/bin/oclint-json-compilation-database +5 -0
  15. data/bin/oclint-xcodebuild +5 -0
  16. data/externals/oclint/LICENSE +69 -0
  17. data/externals/oclint/bin/oclint +0 -0
  18. data/externals/oclint/bin/oclint-0.10.2 +0 -0
  19. data/externals/oclint/bin/oclint-json-compilation-database +88 -0
  20. data/externals/oclint/bin/oclint-xcodebuild +218 -0
  21. data/externals/oclint/lib/clang/3.7.0/asan_blacklist.txt +13 -0
  22. data/externals/oclint/lib/clang/3.7.0/include/Intrin.h +958 -0
  23. data/externals/oclint/lib/clang/3.7.0/include/__stddef_max_align_t.h +43 -0
  24. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_aes.h +72 -0
  25. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_pclmul.h +34 -0
  26. data/externals/oclint/lib/clang/3.7.0/include/adxintrin.h +88 -0
  27. data/externals/oclint/lib/clang/3.7.0/include/altivec.h +13528 -0
  28. data/externals/oclint/lib/clang/3.7.0/include/ammintrin.h +215 -0
  29. data/externals/oclint/lib/clang/3.7.0/include/arm_acle.h +304 -0
  30. data/externals/oclint/lib/clang/3.7.0/include/arm_neon.h +68419 -0
  31. data/externals/oclint/lib/clang/3.7.0/include/avx2intrin.h +1256 -0
  32. data/externals/oclint/lib/clang/3.7.0/include/avx512bwintrin.h +1250 -0
  33. data/externals/oclint/lib/clang/3.7.0/include/avx512cdintrin.h +131 -0
  34. data/externals/oclint/lib/clang/3.7.0/include/avx512dqintrin.h +242 -0
  35. data/externals/oclint/lib/clang/3.7.0/include/avx512erintrin.h +285 -0
  36. data/externals/oclint/lib/clang/3.7.0/include/avx512fintrin.h +2457 -0
  37. data/externals/oclint/lib/clang/3.7.0/include/avx512vlbwintrin.h +1907 -0
  38. data/externals/oclint/lib/clang/3.7.0/include/avx512vldqintrin.h +353 -0
  39. data/externals/oclint/lib/clang/3.7.0/include/avx512vlintrin.h +1982 -0
  40. data/externals/oclint/lib/clang/3.7.0/include/avxintrin.h +1308 -0
  41. data/externals/oclint/lib/clang/3.7.0/include/bmi2intrin.h +99 -0
  42. data/externals/oclint/lib/clang/3.7.0/include/bmiintrin.h +153 -0
  43. data/externals/oclint/lib/clang/3.7.0/include/cpuid.h +209 -0
  44. data/externals/oclint/lib/clang/3.7.0/include/cuda_builtin_vars.h +110 -0
  45. data/externals/oclint/lib/clang/3.7.0/include/emmintrin.h +1480 -0
  46. data/externals/oclint/lib/clang/3.7.0/include/f16cintrin.h +63 -0
  47. data/externals/oclint/lib/clang/3.7.0/include/float.h +124 -0
  48. data/externals/oclint/lib/clang/3.7.0/include/fma4intrin.h +236 -0
  49. data/externals/oclint/lib/clang/3.7.0/include/fmaintrin.h +234 -0
  50. data/externals/oclint/lib/clang/3.7.0/include/fxsrintrin.h +55 -0
  51. data/externals/oclint/lib/clang/3.7.0/include/htmintrin.h +226 -0
  52. data/externals/oclint/lib/clang/3.7.0/include/htmxlintrin.h +363 -0
  53. data/externals/oclint/lib/clang/3.7.0/include/ia32intrin.h +101 -0
  54. data/externals/oclint/lib/clang/3.7.0/include/immintrin.h +203 -0
  55. data/externals/oclint/lib/clang/3.7.0/include/inttypes.h +102 -0
  56. data/externals/oclint/lib/clang/3.7.0/include/iso646.h +43 -0
  57. data/externals/oclint/lib/clang/3.7.0/include/limits.h +118 -0
  58. data/externals/oclint/lib/clang/3.7.0/include/lzcntintrin.h +72 -0
  59. data/externals/oclint/lib/clang/3.7.0/include/mm3dnow.h +167 -0
  60. data/externals/oclint/lib/clang/3.7.0/include/mm_malloc.h +75 -0
  61. data/externals/oclint/lib/clang/3.7.0/include/mmintrin.h +507 -0
  62. data/externals/oclint/lib/clang/3.7.0/include/module.modulemap +196 -0
  63. data/externals/oclint/lib/clang/3.7.0/include/nmmintrin.h +35 -0
  64. data/externals/oclint/lib/clang/3.7.0/include/pmmintrin.h +122 -0
  65. data/externals/oclint/lib/clang/3.7.0/include/popcntintrin.h +50 -0
  66. data/externals/oclint/lib/clang/3.7.0/include/prfchwintrin.h +39 -0
  67. data/externals/oclint/lib/clang/3.7.0/include/rdseedintrin.h +59 -0
  68. data/externals/oclint/lib/clang/3.7.0/include/rtmintrin.h +59 -0
  69. data/externals/oclint/lib/clang/3.7.0/include/s390intrin.h +39 -0
  70. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/allocator_interface.h +66 -0
  71. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/asan_interface.h +155 -0
  72. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/common_interface_defs.h +118 -0
  73. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/coverage_interface.h +63 -0
  74. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/dfsan_interface.h +114 -0
  75. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/linux_syscall_hooks.h +3070 -0
  76. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/lsan_interface.h +84 -0
  77. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/msan_interface.h +107 -0
  78. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/tsan_interface_atomic.h +222 -0
  79. data/externals/oclint/lib/clang/3.7.0/include/shaintrin.h +79 -0
  80. data/externals/oclint/lib/clang/3.7.0/include/smmintrin.h +487 -0
  81. data/externals/oclint/lib/clang/3.7.0/include/stdalign.h +35 -0
  82. data/externals/oclint/lib/clang/3.7.0/include/stdarg.h +52 -0
  83. data/externals/oclint/lib/clang/3.7.0/include/stdatomic.h +190 -0
  84. data/externals/oclint/lib/clang/3.7.0/include/stdbool.h +44 -0
  85. data/externals/oclint/lib/clang/3.7.0/include/stddef.h +137 -0
  86. data/externals/oclint/lib/clang/3.7.0/include/stdint.h +707 -0
  87. data/externals/oclint/lib/clang/3.7.0/include/stdnoreturn.h +30 -0
  88. data/externals/oclint/lib/clang/3.7.0/include/tbmintrin.h +154 -0
  89. data/externals/oclint/lib/clang/3.7.0/include/tgmath.h +1374 -0
  90. data/externals/oclint/lib/clang/3.7.0/include/tmmintrin.h +230 -0
  91. data/externals/oclint/lib/clang/3.7.0/include/unwind.h +282 -0
  92. data/externals/oclint/lib/clang/3.7.0/include/vadefs.h +65 -0
  93. data/externals/oclint/lib/clang/3.7.0/include/varargs.h +26 -0
  94. data/externals/oclint/lib/clang/3.7.0/include/vecintrin.h +8946 -0
  95. data/externals/oclint/lib/clang/3.7.0/include/wmmintrin.h +42 -0
  96. data/externals/oclint/lib/clang/3.7.0/include/x86intrin.h +81 -0
  97. data/externals/oclint/lib/clang/3.7.0/include/xmmintrin.h +1008 -0
  98. data/externals/oclint/lib/clang/3.7.0/include/xopintrin.h +809 -0
  99. data/externals/oclint/lib/clang/3.7.0/include/xtestintrin.h +41 -0
  100. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib +0 -0
  101. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
  102. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-i386.a +0 -0
  103. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-x86_64.a +0 -0
  104. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.profile_osx.a +0 -0
  105. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.safestack_osx.a +0 -0
  106. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib +0 -0
  107. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib +0 -0
  108. data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
  109. data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
  110. data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
  111. data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
  112. data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
  113. data/externals/oclint/lib/oclint/reporters/libXcodeReporter.dylib +0 -0
  114. data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
  115. data/externals/oclint/lib/oclint/rules/libAvoidDefaultArgumentsOnVirtualMethodsRule.dylib +0 -0
  116. data/externals/oclint/lib/oclint/rules/libAvoidPrivateStaticMembersRule.dylib +0 -0
  117. data/externals/oclint/lib/oclint/rules/libBaseClassDestructorShouldBeVirtualOrProtectedRule.dylib +0 -0
  118. data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
  119. data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
  120. data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
  121. data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
  122. data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
  123. data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
  124. data/externals/oclint/lib/oclint/rules/libCoveredSwitchStatementsDontNeedDefaultRule.dylib +0 -0
  125. data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
  126. data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
  127. data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
  128. data/externals/oclint/lib/oclint/rules/libDestructorOfVirtualClassRule.dylib +0 -0
  129. data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
  130. data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
  131. data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
  132. data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
  133. data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
  134. data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
  135. data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
  136. data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
  137. data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
  138. data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
  139. data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
  140. data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
  141. data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
  142. data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
  143. data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
  144. data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
  145. data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
  146. data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
  147. data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
  148. data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
  149. data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
  150. data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
  151. data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
  152. data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
  153. data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
  154. data/externals/oclint/lib/oclint/rules/libObjCAssignIvarOutsideAccessorsRule.dylib +0 -0
  155. data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
  156. data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
  157. data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
  158. data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
  159. data/externals/oclint/lib/oclint/rules/libObjCVerifyIsEqualHashRule.dylib +0 -0
  160. data/externals/oclint/lib/oclint/rules/libObjCVerifyMustCallSuperRule.dylib +0 -0
  161. data/externals/oclint/lib/oclint/rules/libObjCVerifyProhibitedCallRule.dylib +0 -0
  162. data/externals/oclint/lib/oclint/rules/libObjCVerifyProtectedMethodRule.dylib +0 -0
  163. data/externals/oclint/lib/oclint/rules/libObjCVerifySubclassMustImplementRule.dylib +0 -0
  164. data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
  165. data/externals/oclint/lib/oclint/rules/libPreferEarlyExitRule.dylib +0 -0
  166. data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
  167. data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
  168. data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
  169. data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
  170. data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
  171. data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
  172. data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
  173. data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
  174. data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
  175. data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
  176. data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
  177. data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
  178. data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
  179. data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
  180. data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
  181. data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
  182. data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
  183. data/lib/coverage_plan.rb +19 -0
  184. data/lib/device.rb +27 -0
  185. data/lib/execute.rb +7 -0
  186. data/lib/lint_plan.rb +41 -0
  187. data/lib/rules.rb +23 -0
  188. data/lib/test_plan.rb +11 -0
  189. data/lib/version.rb +3 -0
  190. data/lib/xcode.rb +128 -0
  191. data/lib/xcodebuild-helper.rb +110 -0
  192. data/spec/coverage_plan_spec.rb +18 -0
  193. data/spec/device_spec.rb +24 -0
  194. data/spec/lint_plan_spec.rb +35 -0
  195. data/spec/rule_spec.rb +37 -0
  196. data/spec/spec_helper.rb +17 -0
  197. data/spec/test_plan_spec.rb +11 -0
  198. data/spec/xcode_dsl_actions_spec.rb +136 -0
  199. data/spec/xcode_dsl_spec.rb +176 -0
  200. data/spec/xcode_spec.rb +79 -0
  201. data/xcodebuild-helper.gemspec +26 -0
  202. metadata +327 -0
@@ -0,0 +1,1308 @@
1
+ /*===---- avxintrin.h - AVX intrinsics -------------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+
24
+ #ifndef __IMMINTRIN_H
25
+ #error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
26
+ #endif
27
+
28
+ #ifndef __AVXINTRIN_H
29
+ #define __AVXINTRIN_H
30
+
31
+ typedef double __v4df __attribute__ ((__vector_size__ (32)));
32
+ typedef float __v8sf __attribute__ ((__vector_size__ (32)));
33
+ typedef long long __v4di __attribute__ ((__vector_size__ (32)));
34
+ typedef int __v8si __attribute__ ((__vector_size__ (32)));
35
+ typedef short __v16hi __attribute__ ((__vector_size__ (32)));
36
+ typedef char __v32qi __attribute__ ((__vector_size__ (32)));
37
+
38
+ typedef float __m256 __attribute__ ((__vector_size__ (32)));
39
+ typedef double __m256d __attribute__((__vector_size__(32)));
40
+ typedef long long __m256i __attribute__((__vector_size__(32)));
41
+
42
+ /* Define the default attributes for the functions in this file. */
43
+ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
44
+
45
+ /* Arithmetic */
46
+ static __inline __m256d __DEFAULT_FN_ATTRS
47
+ _mm256_add_pd(__m256d __a, __m256d __b)
48
+ {
49
+ return __a+__b;
50
+ }
51
+
52
+ static __inline __m256 __DEFAULT_FN_ATTRS
53
+ _mm256_add_ps(__m256 __a, __m256 __b)
54
+ {
55
+ return __a+__b;
56
+ }
57
+
58
+ static __inline __m256d __DEFAULT_FN_ATTRS
59
+ _mm256_sub_pd(__m256d __a, __m256d __b)
60
+ {
61
+ return __a-__b;
62
+ }
63
+
64
+ static __inline __m256 __DEFAULT_FN_ATTRS
65
+ _mm256_sub_ps(__m256 __a, __m256 __b)
66
+ {
67
+ return __a-__b;
68
+ }
69
+
70
+ static __inline __m256d __DEFAULT_FN_ATTRS
71
+ _mm256_addsub_pd(__m256d __a, __m256d __b)
72
+ {
73
+ return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
74
+ }
75
+
76
+ static __inline __m256 __DEFAULT_FN_ATTRS
77
+ _mm256_addsub_ps(__m256 __a, __m256 __b)
78
+ {
79
+ return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
80
+ }
81
+
82
+ static __inline __m256d __DEFAULT_FN_ATTRS
83
+ _mm256_div_pd(__m256d __a, __m256d __b)
84
+ {
85
+ return __a / __b;
86
+ }
87
+
88
+ static __inline __m256 __DEFAULT_FN_ATTRS
89
+ _mm256_div_ps(__m256 __a, __m256 __b)
90
+ {
91
+ return __a / __b;
92
+ }
93
+
94
+ static __inline __m256d __DEFAULT_FN_ATTRS
95
+ _mm256_max_pd(__m256d __a, __m256d __b)
96
+ {
97
+ return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
98
+ }
99
+
100
+ static __inline __m256 __DEFAULT_FN_ATTRS
101
+ _mm256_max_ps(__m256 __a, __m256 __b)
102
+ {
103
+ return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
104
+ }
105
+
106
+ static __inline __m256d __DEFAULT_FN_ATTRS
107
+ _mm256_min_pd(__m256d __a, __m256d __b)
108
+ {
109
+ return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
110
+ }
111
+
112
+ static __inline __m256 __DEFAULT_FN_ATTRS
113
+ _mm256_min_ps(__m256 __a, __m256 __b)
114
+ {
115
+ return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
116
+ }
117
+
118
+ static __inline __m256d __DEFAULT_FN_ATTRS
119
+ _mm256_mul_pd(__m256d __a, __m256d __b)
120
+ {
121
+ return __a * __b;
122
+ }
123
+
124
+ static __inline __m256 __DEFAULT_FN_ATTRS
125
+ _mm256_mul_ps(__m256 __a, __m256 __b)
126
+ {
127
+ return __a * __b;
128
+ }
129
+
130
+ static __inline __m256d __DEFAULT_FN_ATTRS
131
+ _mm256_sqrt_pd(__m256d __a)
132
+ {
133
+ return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
134
+ }
135
+
136
+ static __inline __m256 __DEFAULT_FN_ATTRS
137
+ _mm256_sqrt_ps(__m256 __a)
138
+ {
139
+ return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
140
+ }
141
+
142
+ static __inline __m256 __DEFAULT_FN_ATTRS
143
+ _mm256_rsqrt_ps(__m256 __a)
144
+ {
145
+ return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
146
+ }
147
+
148
+ static __inline __m256 __DEFAULT_FN_ATTRS
149
+ _mm256_rcp_ps(__m256 __a)
150
+ {
151
+ return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
152
+ }
153
+
154
+ #define _mm256_round_pd(V, M) __extension__ ({ \
155
+ __m256d __V = (V); \
156
+ (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })
157
+
158
+ #define _mm256_round_ps(V, M) __extension__ ({ \
159
+ __m256 __V = (V); \
160
+ (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })
161
+
162
+ #define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)
163
+ #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
164
+ #define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)
165
+ #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
166
+
167
+ /* Logical */
168
+ static __inline __m256d __DEFAULT_FN_ATTRS
169
+ _mm256_and_pd(__m256d __a, __m256d __b)
170
+ {
171
+ return (__m256d)((__v4di)__a & (__v4di)__b);
172
+ }
173
+
174
+ static __inline __m256 __DEFAULT_FN_ATTRS
175
+ _mm256_and_ps(__m256 __a, __m256 __b)
176
+ {
177
+ return (__m256)((__v8si)__a & (__v8si)__b);
178
+ }
179
+
180
+ static __inline __m256d __DEFAULT_FN_ATTRS
181
+ _mm256_andnot_pd(__m256d __a, __m256d __b)
182
+ {
183
+ return (__m256d)(~(__v4di)__a & (__v4di)__b);
184
+ }
185
+
186
+ static __inline __m256 __DEFAULT_FN_ATTRS
187
+ _mm256_andnot_ps(__m256 __a, __m256 __b)
188
+ {
189
+ return (__m256)(~(__v8si)__a & (__v8si)__b);
190
+ }
191
+
192
+ static __inline __m256d __DEFAULT_FN_ATTRS
193
+ _mm256_or_pd(__m256d __a, __m256d __b)
194
+ {
195
+ return (__m256d)((__v4di)__a | (__v4di)__b);
196
+ }
197
+
198
+ static __inline __m256 __DEFAULT_FN_ATTRS
199
+ _mm256_or_ps(__m256 __a, __m256 __b)
200
+ {
201
+ return (__m256)((__v8si)__a | (__v8si)__b);
202
+ }
203
+
204
+ static __inline __m256d __DEFAULT_FN_ATTRS
205
+ _mm256_xor_pd(__m256d __a, __m256d __b)
206
+ {
207
+ return (__m256d)((__v4di)__a ^ (__v4di)__b);
208
+ }
209
+
210
+ static __inline __m256 __DEFAULT_FN_ATTRS
211
+ _mm256_xor_ps(__m256 __a, __m256 __b)
212
+ {
213
+ return (__m256)((__v8si)__a ^ (__v8si)__b);
214
+ }
215
+
216
+ /* Horizontal arithmetic */
217
+ static __inline __m256d __DEFAULT_FN_ATTRS
218
+ _mm256_hadd_pd(__m256d __a, __m256d __b)
219
+ {
220
+ return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
221
+ }
222
+
223
+ static __inline __m256 __DEFAULT_FN_ATTRS
224
+ _mm256_hadd_ps(__m256 __a, __m256 __b)
225
+ {
226
+ return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
227
+ }
228
+
229
+ static __inline __m256d __DEFAULT_FN_ATTRS
230
+ _mm256_hsub_pd(__m256d __a, __m256d __b)
231
+ {
232
+ return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
233
+ }
234
+
235
+ static __inline __m256 __DEFAULT_FN_ATTRS
236
+ _mm256_hsub_ps(__m256 __a, __m256 __b)
237
+ {
238
+ return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
239
+ }
240
+
241
+ /* Vector permutations */
242
+ static __inline __m128d __DEFAULT_FN_ATTRS
243
+ _mm_permutevar_pd(__m128d __a, __m128i __c)
244
+ {
245
+ return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
246
+ }
247
+
248
+ static __inline __m256d __DEFAULT_FN_ATTRS
249
+ _mm256_permutevar_pd(__m256d __a, __m256i __c)
250
+ {
251
+ return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
252
+ }
253
+
254
+ static __inline __m128 __DEFAULT_FN_ATTRS
255
+ _mm_permutevar_ps(__m128 __a, __m128i __c)
256
+ {
257
+ return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
258
+ }
259
+
260
+ static __inline __m256 __DEFAULT_FN_ATTRS
261
+ _mm256_permutevar_ps(__m256 __a, __m256i __c)
262
+ {
263
+ return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
264
+ }
265
+
266
+ #define _mm_permute_pd(A, C) __extension__ ({ \
267
+ __m128d __A = (A); \
268
+ (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \
269
+ (C) & 0x1, ((C) & 0x2) >> 1); })
270
+
271
+ #define _mm256_permute_pd(A, C) __extension__ ({ \
272
+ __m256d __A = (A); \
273
+ (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \
274
+ (C) & 0x1, ((C) & 0x2) >> 1, \
275
+ 2 + (((C) & 0x4) >> 2), \
276
+ 2 + (((C) & 0x8) >> 3)); })
277
+
278
+ #define _mm_permute_ps(A, C) __extension__ ({ \
279
+ __m128 __A = (A); \
280
+ (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \
281
+ (C) & 0x3, ((C) & 0xc) >> 2, \
282
+ ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
283
+
284
+ #define _mm256_permute_ps(A, C) __extension__ ({ \
285
+ __m256 __A = (A); \
286
+ (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \
287
+ (C) & 0x3, ((C) & 0xc) >> 2, \
288
+ ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
289
+ 4 + (((C) & 0x03) >> 0), \
290
+ 4 + (((C) & 0x0c) >> 2), \
291
+ 4 + (((C) & 0x30) >> 4), \
292
+ 4 + (((C) & 0xc0) >> 6)); })
293
+
294
+ #define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
295
+ __m256d __V1 = (V1); \
296
+ __m256d __V2 = (V2); \
297
+ (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
298
+
299
+ #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
300
+ __m256 __V1 = (V1); \
301
+ __m256 __V2 = (V2); \
302
+ (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
303
+
304
+ #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
305
+ __m256i __V1 = (V1); \
306
+ __m256i __V2 = (V2); \
307
+ (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
308
+
309
+ /* Vector Blend */
310
+ #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
311
+ __m256d __V1 = (V1); \
312
+ __m256d __V2 = (V2); \
313
+ (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
314
+ (((M) & 0x01) ? 4 : 0), \
315
+ (((M) & 0x02) ? 5 : 1), \
316
+ (((M) & 0x04) ? 6 : 2), \
317
+ (((M) & 0x08) ? 7 : 3)); })
318
+
319
+ #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
320
+ __m256 __V1 = (V1); \
321
+ __m256 __V2 = (V2); \
322
+ (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
323
+ (((M) & 0x01) ? 8 : 0), \
324
+ (((M) & 0x02) ? 9 : 1), \
325
+ (((M) & 0x04) ? 10 : 2), \
326
+ (((M) & 0x08) ? 11 : 3), \
327
+ (((M) & 0x10) ? 12 : 4), \
328
+ (((M) & 0x20) ? 13 : 5), \
329
+ (((M) & 0x40) ? 14 : 6), \
330
+ (((M) & 0x80) ? 15 : 7)); })
331
+
332
+ static __inline __m256d __DEFAULT_FN_ATTRS
333
+ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
334
+ {
335
+ return (__m256d)__builtin_ia32_blendvpd256(
336
+ (__v4df)__a, (__v4df)__b, (__v4df)__c);
337
+ }
338
+
339
+ static __inline __m256 __DEFAULT_FN_ATTRS
340
+ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
341
+ {
342
+ return (__m256)__builtin_ia32_blendvps256(
343
+ (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
344
+ }
345
+
346
+ /* Vector Dot Product */
347
+ #define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
348
+ __m256 __V1 = (V1); \
349
+ __m256 __V2 = (V2); \
350
+ (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
351
+
352
+ /* Vector shuffle */
353
+ #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
354
+ __m256 __a = (a); \
355
+ __m256 __b = (b); \
356
+ (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \
357
+ (mask) & 0x3, ((mask) & 0xc) >> 2, \
358
+ (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \
359
+ ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \
360
+ (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })
361
+
362
+ #define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
363
+ __m256d __a = (a); \
364
+ __m256d __b = (b); \
365
+ (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \
366
+ (mask) & 0x1, \
367
+ (((mask) & 0x2) >> 1) + 4, \
368
+ (((mask) & 0x4) >> 2) + 2, \
369
+ (((mask) & 0x8) >> 3) + 6); })
370
+
371
+ /* Compare */
372
+ #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
373
+ #define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
374
+ #define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
375
+ #define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
376
+ #define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
377
+ #define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
378
+ #define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
379
+ #define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
380
+ #define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
381
+ #define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
382
+ #define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
383
+ #define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
384
+ #define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
385
+ #define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
386
+ #define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
387
+ #define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
388
+ #define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
389
+ #define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
390
+ #define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
391
+ #define _CMP_UNORD_S 0x13 /* Unordered (signaling) */
392
+ #define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
393
+ #define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
394
+ #define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
395
+ #define _CMP_ORD_S 0x17 /* Ordered (signaling) */
396
+ #define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
397
+ #define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
398
+ #define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
399
+ #define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
400
+ #define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
401
+ #define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
402
+ #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
403
+ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
404
+
405
+ #define _mm_cmp_pd(a, b, c) __extension__ ({ \
406
+ __m128d __a = (a); \
407
+ __m128d __b = (b); \
408
+ (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); })
409
+
410
+ #define _mm_cmp_ps(a, b, c) __extension__ ({ \
411
+ __m128 __a = (a); \
412
+ __m128 __b = (b); \
413
+ (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); })
414
+
415
+ #define _mm256_cmp_pd(a, b, c) __extension__ ({ \
416
+ __m256d __a = (a); \
417
+ __m256d __b = (b); \
418
+ (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); })
419
+
420
+ #define _mm256_cmp_ps(a, b, c) __extension__ ({ \
421
+ __m256 __a = (a); \
422
+ __m256 __b = (b); \
423
+ (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); })
424
+
425
+ #define _mm_cmp_sd(a, b, c) __extension__ ({ \
426
+ __m128d __a = (a); \
427
+ __m128d __b = (b); \
428
+ (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })
429
+
430
+ #define _mm_cmp_ss(a, b, c) __extension__ ({ \
431
+ __m128 __a = (a); \
432
+ __m128 __b = (b); \
433
+ (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
434
+
435
+ static __inline int __DEFAULT_FN_ATTRS
436
+ _mm256_extract_epi32(__m256i __a, const int __imm)
437
+ {
438
+ __v8si __b = (__v8si)__a;
439
+ return __b[__imm & 7];
440
+ }
441
+
442
+ static __inline int __DEFAULT_FN_ATTRS
443
+ _mm256_extract_epi16(__m256i __a, const int __imm)
444
+ {
445
+ __v16hi __b = (__v16hi)__a;
446
+ return __b[__imm & 15];
447
+ }
448
+
449
+ static __inline int __DEFAULT_FN_ATTRS
450
+ _mm256_extract_epi8(__m256i __a, const int __imm)
451
+ {
452
+ __v32qi __b = (__v32qi)__a;
453
+ return __b[__imm & 31];
454
+ }
455
+
456
+ #ifdef __x86_64__
457
+ static __inline long long __DEFAULT_FN_ATTRS
458
+ _mm256_extract_epi64(__m256i __a, const int __imm)
459
+ {
460
+ __v4di __b = (__v4di)__a;
461
+ return __b[__imm & 3];
462
+ }
463
+ #endif
464
+
465
+ static __inline __m256i __DEFAULT_FN_ATTRS
466
+ _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
467
+ {
468
+ __v8si __c = (__v8si)__a;
469
+ __c[__imm & 7] = __b;
470
+ return (__m256i)__c;
471
+ }
472
+
473
+ static __inline __m256i __DEFAULT_FN_ATTRS
474
+ _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
475
+ {
476
+ __v16hi __c = (__v16hi)__a;
477
+ __c[__imm & 15] = __b;
478
+ return (__m256i)__c;
479
+ }
480
+
481
+ static __inline __m256i __DEFAULT_FN_ATTRS
482
+ _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
483
+ {
484
+ __v32qi __c = (__v32qi)__a;
485
+ __c[__imm & 31] = __b;
486
+ return (__m256i)__c;
487
+ }
488
+
489
+ #ifdef __x86_64__
490
+ static __inline __m256i __DEFAULT_FN_ATTRS
491
+ _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
492
+ {
493
+ __v4di __c = (__v4di)__a;
494
+ __c[__imm & 3] = __b;
495
+ return (__m256i)__c;
496
+ }
497
+ #endif
498
+
499
+ /* Conversion */
500
+ static __inline __m256d __DEFAULT_FN_ATTRS
501
+ _mm256_cvtepi32_pd(__m128i __a)
502
+ {
503
+ return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
504
+ }
505
+
506
+ static __inline __m256 __DEFAULT_FN_ATTRS
507
+ _mm256_cvtepi32_ps(__m256i __a)
508
+ {
509
+ return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
510
+ }
511
+
512
+ static __inline __m128 __DEFAULT_FN_ATTRS
513
+ _mm256_cvtpd_ps(__m256d __a)
514
+ {
515
+ return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
516
+ }
517
+
518
+ static __inline __m256i __DEFAULT_FN_ATTRS
519
+ _mm256_cvtps_epi32(__m256 __a)
520
+ {
521
+ return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
522
+ }
523
+
524
+ static __inline __m256d __DEFAULT_FN_ATTRS
525
+ _mm256_cvtps_pd(__m128 __a)
526
+ {
527
+ return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
528
+ }
529
+
530
+ static __inline __m128i __DEFAULT_FN_ATTRS
531
+ _mm256_cvttpd_epi32(__m256d __a)
532
+ {
533
+ return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
534
+ }
535
+
536
+ static __inline __m128i __DEFAULT_FN_ATTRS
537
+ _mm256_cvtpd_epi32(__m256d __a)
538
+ {
539
+ return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
540
+ }
541
+
542
+ static __inline __m256i __DEFAULT_FN_ATTRS
543
+ _mm256_cvttps_epi32(__m256 __a)
544
+ {
545
+ return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
546
+ }
547
+
548
+ /* Vector replicate */
549
+ static __inline __m256 __DEFAULT_FN_ATTRS
550
+ _mm256_movehdup_ps(__m256 __a)
551
+ {
552
+ return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
553
+ }
554
+
555
+ static __inline __m256 __DEFAULT_FN_ATTRS
556
+ _mm256_moveldup_ps(__m256 __a)
557
+ {
558
+ return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6);
559
+ }
560
+
561
+ static __inline __m256d __DEFAULT_FN_ATTRS
562
+ _mm256_movedup_pd(__m256d __a)
563
+ {
564
+ return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
565
+ }
566
+
567
+ /* Unpack and Interleave */
568
+ static __inline __m256d __DEFAULT_FN_ATTRS
569
+ _mm256_unpackhi_pd(__m256d __a, __m256d __b)
570
+ {
571
+ return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2);
572
+ }
573
+
574
+ static __inline __m256d __DEFAULT_FN_ATTRS
575
+ _mm256_unpacklo_pd(__m256d __a, __m256d __b)
576
+ {
577
+ return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2);
578
+ }
579
+
580
+ static __inline __m256 __DEFAULT_FN_ATTRS
581
+ _mm256_unpackhi_ps(__m256 __a, __m256 __b)
582
+ {
583
+ return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
584
+ }
585
+
586
+ static __inline __m256 __DEFAULT_FN_ATTRS
587
+ _mm256_unpacklo_ps(__m256 __a, __m256 __b)
588
+ {
589
+ return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
590
+ }
591
+
592
+ /* Bit Test */
593
+ static __inline int __DEFAULT_FN_ATTRS
594
+ _mm_testz_pd(__m128d __a, __m128d __b)
595
+ {
596
+ return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
597
+ }
598
+
599
+ static __inline int __DEFAULT_FN_ATTRS
600
+ _mm_testc_pd(__m128d __a, __m128d __b)
601
+ {
602
+ return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
603
+ }
604
+
605
+ static __inline int __DEFAULT_FN_ATTRS
606
+ _mm_testnzc_pd(__m128d __a, __m128d __b)
607
+ {
608
+ return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
609
+ }
610
+
611
+ static __inline int __DEFAULT_FN_ATTRS
612
+ _mm_testz_ps(__m128 __a, __m128 __b)
613
+ {
614
+ return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
615
+ }
616
+
617
+ static __inline int __DEFAULT_FN_ATTRS
618
+ _mm_testc_ps(__m128 __a, __m128 __b)
619
+ {
620
+ return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
621
+ }
622
+
623
+ static __inline int __DEFAULT_FN_ATTRS
624
+ _mm_testnzc_ps(__m128 __a, __m128 __b)
625
+ {
626
+ return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
627
+ }
628
+
629
+ static __inline int __DEFAULT_FN_ATTRS
630
+ _mm256_testz_pd(__m256d __a, __m256d __b)
631
+ {
632
+ return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
633
+ }
634
+
635
+ static __inline int __DEFAULT_FN_ATTRS
636
+ _mm256_testc_pd(__m256d __a, __m256d __b)
637
+ {
638
+ return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
639
+ }
640
+
641
+ static __inline int __DEFAULT_FN_ATTRS
642
+ _mm256_testnzc_pd(__m256d __a, __m256d __b)
643
+ {
644
+ return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
645
+ }
646
+
647
+ static __inline int __DEFAULT_FN_ATTRS
648
+ _mm256_testz_ps(__m256 __a, __m256 __b)
649
+ {
650
+ return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
651
+ }
652
+
653
+ static __inline int __DEFAULT_FN_ATTRS
654
+ _mm256_testc_ps(__m256 __a, __m256 __b)
655
+ {
656
+ return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
657
+ }
658
+
659
+ static __inline int __DEFAULT_FN_ATTRS
660
+ _mm256_testnzc_ps(__m256 __a, __m256 __b)
661
+ {
662
+ return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
663
+ }
664
+
665
+ static __inline int __DEFAULT_FN_ATTRS
666
+ _mm256_testz_si256(__m256i __a, __m256i __b)
667
+ {
668
+ return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
669
+ }
670
+
671
+ static __inline int __DEFAULT_FN_ATTRS
672
+ _mm256_testc_si256(__m256i __a, __m256i __b)
673
+ {
674
+ return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
675
+ }
676
+
677
+ static __inline int __DEFAULT_FN_ATTRS
678
+ _mm256_testnzc_si256(__m256i __a, __m256i __b)
679
+ {
680
+ return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
681
+ }
682
+
683
+ /* Vector extract sign mask */
684
+ static __inline int __DEFAULT_FN_ATTRS
685
+ _mm256_movemask_pd(__m256d __a)
686
+ {
687
+ return __builtin_ia32_movmskpd256((__v4df)__a);
688
+ }
689
+
690
+ static __inline int __DEFAULT_FN_ATTRS
691
+ _mm256_movemask_ps(__m256 __a)
692
+ {
693
+ return __builtin_ia32_movmskps256((__v8sf)__a);
694
+ }
695
+
696
+ /* Vector __zero */
697
+ static __inline void __DEFAULT_FN_ATTRS
698
+ _mm256_zeroall(void)
699
+ {
700
+ __builtin_ia32_vzeroall();
701
+ }
702
+
703
+ static __inline void __DEFAULT_FN_ATTRS
704
+ _mm256_zeroupper(void)
705
+ {
706
+ __builtin_ia32_vzeroupper();
707
+ }
708
+
709
+ /* Vector load with broadcast */
710
+ static __inline __m128 __DEFAULT_FN_ATTRS
711
+ _mm_broadcast_ss(float const *__a)
712
+ {
713
+ float __f = *__a;
714
+ return (__m128)(__v4sf){ __f, __f, __f, __f };
715
+ }
716
+
717
+ static __inline __m256d __DEFAULT_FN_ATTRS
718
+ _mm256_broadcast_sd(double const *__a)
719
+ {
720
+ double __d = *__a;
721
+ return (__m256d)(__v4df){ __d, __d, __d, __d };
722
+ }
723
+
724
+ static __inline __m256 __DEFAULT_FN_ATTRS
725
+ _mm256_broadcast_ss(float const *__a)
726
+ {
727
+ float __f = *__a;
728
+ return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
729
+ }
730
+
731
+ static __inline __m256d __DEFAULT_FN_ATTRS
732
+ _mm256_broadcast_pd(__m128d const *__a)
733
+ {
734
+ return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a);
735
+ }
736
+
737
+ static __inline __m256 __DEFAULT_FN_ATTRS
738
+ _mm256_broadcast_ps(__m128 const *__a)
739
+ {
740
+ return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a);
741
+ }
742
+
743
+ /* SIMD load ops */
744
+ static __inline __m256d __DEFAULT_FN_ATTRS
745
+ _mm256_load_pd(double const *__p)
746
+ {
747
+ return *(__m256d *)__p;
748
+ }
749
+
750
+ static __inline __m256 __DEFAULT_FN_ATTRS
751
+ _mm256_load_ps(float const *__p)
752
+ {
753
+ return *(__m256 *)__p;
754
+ }
755
+
756
+ static __inline __m256d __DEFAULT_FN_ATTRS
757
+ _mm256_loadu_pd(double const *__p)
758
+ {
759
+ struct __loadu_pd {
760
+ __m256d __v;
761
+ } __attribute__((__packed__, __may_alias__));
762
+ return ((struct __loadu_pd*)__p)->__v;
763
+ }
764
+
765
+ static __inline __m256 __DEFAULT_FN_ATTRS
766
+ _mm256_loadu_ps(float const *__p)
767
+ {
768
+ struct __loadu_ps {
769
+ __m256 __v;
770
+ } __attribute__((__packed__, __may_alias__));
771
+ return ((struct __loadu_ps*)__p)->__v;
772
+ }
773
+
774
+ static __inline __m256i __DEFAULT_FN_ATTRS
775
+ _mm256_load_si256(__m256i const *__p)
776
+ {
777
+ return *__p;
778
+ }
779
+
780
+ static __inline __m256i __DEFAULT_FN_ATTRS
781
+ _mm256_loadu_si256(__m256i const *__p)
782
+ {
783
+ struct __loadu_si256 {
784
+ __m256i __v;
785
+ } __attribute__((__packed__, __may_alias__));
786
+ return ((struct __loadu_si256*)__p)->__v;
787
+ }
788
+
789
+ static __inline __m256i __DEFAULT_FN_ATTRS
790
+ _mm256_lddqu_si256(__m256i const *__p)
791
+ {
792
+ return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
793
+ }
794
+
795
+ /* SIMD store ops */
796
+ static __inline void __DEFAULT_FN_ATTRS
797
+ _mm256_store_pd(double *__p, __m256d __a)
798
+ {
799
+ *(__m256d *)__p = __a;
800
+ }
801
+
802
+ static __inline void __DEFAULT_FN_ATTRS
803
+ _mm256_store_ps(float *__p, __m256 __a)
804
+ {
805
+ *(__m256 *)__p = __a;
806
+ }
807
+
808
+ static __inline void __DEFAULT_FN_ATTRS
809
+ _mm256_storeu_pd(double *__p, __m256d __a)
810
+ {
811
+ __builtin_ia32_storeupd256(__p, (__v4df)__a);
812
+ }
813
+
814
+ static __inline void __DEFAULT_FN_ATTRS
815
+ _mm256_storeu_ps(float *__p, __m256 __a)
816
+ {
817
+ __builtin_ia32_storeups256(__p, (__v8sf)__a);
818
+ }
819
+
820
+ static __inline void __DEFAULT_FN_ATTRS
821
+ _mm256_store_si256(__m256i *__p, __m256i __a)
822
+ {
823
+ *__p = __a;
824
+ }
825
+
826
+ static __inline void __DEFAULT_FN_ATTRS
827
+ _mm256_storeu_si256(__m256i *__p, __m256i __a)
828
+ {
829
+ __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a);
830
+ }
831
+
832
+ /* Conditional load ops */
833
+ static __inline __m128d __DEFAULT_FN_ATTRS
834
+ _mm_maskload_pd(double const *__p, __m128d __m)
835
+ {
836
+ return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m);
837
+ }
838
+
839
+ static __inline __m256d __DEFAULT_FN_ATTRS
840
+ _mm256_maskload_pd(double const *__p, __m256d __m)
841
+ {
842
+ return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
843
+ (__v4df)__m);
844
+ }
845
+
846
+ static __inline __m128 __DEFAULT_FN_ATTRS
847
+ _mm_maskload_ps(float const *__p, __m128 __m)
848
+ {
849
+ return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m);
850
+ }
851
+
852
+ static __inline __m256 __DEFAULT_FN_ATTRS
853
+ _mm256_maskload_ps(float const *__p, __m256 __m)
854
+ {
855
+ return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m);
856
+ }
857
+
858
+ /* Conditional store ops */
859
+ static __inline void __DEFAULT_FN_ATTRS
860
+ _mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a)
861
+ {
862
+ __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a);
863
+ }
864
+
865
+ static __inline void __DEFAULT_FN_ATTRS
866
+ _mm_maskstore_pd(double *__p, __m128d __m, __m128d __a)
867
+ {
868
+ __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a);
869
+ }
870
+
871
+ static __inline void __DEFAULT_FN_ATTRS
872
+ _mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a)
873
+ {
874
+ __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a);
875
+ }
876
+
877
+ static __inline void __DEFAULT_FN_ATTRS
878
+ _mm_maskstore_ps(float *__p, __m128 __m, __m128 __a)
879
+ {
880
+ __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a);
881
+ }
882
+
883
+ /* Cacheability support ops */
884
+ static __inline void __DEFAULT_FN_ATTRS
885
+ _mm256_stream_si256(__m256i *__a, __m256i __b)
886
+ {
887
+ __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
888
+ }
889
+
890
+ static __inline void __DEFAULT_FN_ATTRS
891
+ _mm256_stream_pd(double *__a, __m256d __b)
892
+ {
893
+ __builtin_ia32_movntpd256(__a, (__v4df)__b);
894
+ }
895
+
896
+ static __inline void __DEFAULT_FN_ATTRS
897
+ _mm256_stream_ps(float *__p, __m256 __a)
898
+ {
899
+ __builtin_ia32_movntps256(__p, (__v8sf)__a);
900
+ }
901
+
902
+ /* Create vectors */
903
+ static __inline __m256d __DEFAULT_FN_ATTRS
904
+ _mm256_set_pd(double __a, double __b, double __c, double __d)
905
+ {
906
+ return (__m256d){ __d, __c, __b, __a };
907
+ }
908
+
909
+ static __inline __m256 __DEFAULT_FN_ATTRS
910
+ _mm256_set_ps(float __a, float __b, float __c, float __d,
911
+ float __e, float __f, float __g, float __h)
912
+ {
913
+ return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
914
+ }
915
+
916
+ static __inline __m256i __DEFAULT_FN_ATTRS
917
+ _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
918
+ int __i4, int __i5, int __i6, int __i7)
919
+ {
920
+ return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
921
+ }
922
+
923
+ static __inline __m256i __DEFAULT_FN_ATTRS
924
+ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
925
+ short __w11, short __w10, short __w09, short __w08,
926
+ short __w07, short __w06, short __w05, short __w04,
927
+ short __w03, short __w02, short __w01, short __w00)
928
+ {
929
+ return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
930
+ __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
931
+ }
932
+
933
+ static __inline __m256i __DEFAULT_FN_ATTRS
934
+ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
935
+ char __b27, char __b26, char __b25, char __b24,
936
+ char __b23, char __b22, char __b21, char __b20,
937
+ char __b19, char __b18, char __b17, char __b16,
938
+ char __b15, char __b14, char __b13, char __b12,
939
+ char __b11, char __b10, char __b09, char __b08,
940
+ char __b07, char __b06, char __b05, char __b04,
941
+ char __b03, char __b02, char __b01, char __b00)
942
+ {
943
+ return (__m256i)(__v32qi){
944
+ __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
945
+ __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,
946
+ __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,
947
+ __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31
948
+ };
949
+ }
950
+
951
+ static __inline __m256i __DEFAULT_FN_ATTRS
952
+ _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
953
+ {
954
+ return (__m256i)(__v4di){ __d, __c, __b, __a };
955
+ }
956
+
957
+ /* Create vectors with elements in reverse order */
958
+ static __inline __m256d __DEFAULT_FN_ATTRS
959
+ _mm256_setr_pd(double __a, double __b, double __c, double __d)
960
+ {
961
+ return (__m256d){ __a, __b, __c, __d };
962
+ }
963
+
964
+ static __inline __m256 __DEFAULT_FN_ATTRS
965
+ _mm256_setr_ps(float __a, float __b, float __c, float __d,
966
+ float __e, float __f, float __g, float __h)
967
+ {
968
+ return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
969
+ }
970
+
971
+ static __inline __m256i __DEFAULT_FN_ATTRS
972
+ _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
973
+ int __i4, int __i5, int __i6, int __i7)
974
+ {
975
+ return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
976
+ }
977
+
978
+ static __inline __m256i __DEFAULT_FN_ATTRS
979
+ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
980
+ short __w11, short __w10, short __w09, short __w08,
981
+ short __w07, short __w06, short __w05, short __w04,
982
+ short __w03, short __w02, short __w01, short __w00)
983
+ {
984
+ return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09,
985
+ __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
986
+ }
987
+
988
+ static __inline __m256i __DEFAULT_FN_ATTRS
989
+ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
990
+ char __b27, char __b26, char __b25, char __b24,
991
+ char __b23, char __b22, char __b21, char __b20,
992
+ char __b19, char __b18, char __b17, char __b16,
993
+ char __b15, char __b14, char __b13, char __b12,
994
+ char __b11, char __b10, char __b09, char __b08,
995
+ char __b07, char __b06, char __b05, char __b04,
996
+ char __b03, char __b02, char __b01, char __b00)
997
+ {
998
+ return (__m256i)(__v32qi){
999
+ __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24,
1000
+ __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
1001
+ __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
1002
+ __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
1003
+ }
1004
+
1005
+ static __inline __m256i __DEFAULT_FN_ATTRS
1006
+ _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
1007
+ {
1008
+ return (__m256i)(__v4di){ __a, __b, __c, __d };
1009
+ }
1010
+
1011
+ /* Create vectors with repeated elements */
1012
+ static __inline __m256d __DEFAULT_FN_ATTRS
1013
+ _mm256_set1_pd(double __w)
1014
+ {
1015
+ return (__m256d){ __w, __w, __w, __w };
1016
+ }
1017
+
1018
+ static __inline __m256 __DEFAULT_FN_ATTRS
1019
+ _mm256_set1_ps(float __w)
1020
+ {
1021
+ return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w };
1022
+ }
1023
+
1024
+ static __inline __m256i __DEFAULT_FN_ATTRS
1025
+ _mm256_set1_epi32(int __i)
1026
+ {
1027
+ return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i };
1028
+ }
1029
+
1030
+ static __inline __m256i __DEFAULT_FN_ATTRS
1031
+ _mm256_set1_epi16(short __w)
1032
+ {
1033
+ return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w,
1034
+ __w, __w, __w, __w, __w, __w };
1035
+ }
1036
+
1037
+ static __inline __m256i __DEFAULT_FN_ATTRS
1038
+ _mm256_set1_epi8(char __b)
1039
+ {
1040
+ return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1041
+ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1042
+ __b, __b, __b, __b, __b, __b, __b };
1043
+ }
1044
+
1045
+ static __inline __m256i __DEFAULT_FN_ATTRS
1046
+ _mm256_set1_epi64x(long long __q)
1047
+ {
1048
+ return (__m256i)(__v4di){ __q, __q, __q, __q };
1049
+ }
1050
+
1051
+ /* Create __zeroed vectors */
1052
+ static __inline __m256d __DEFAULT_FN_ATTRS
1053
+ _mm256_setzero_pd(void)
1054
+ {
1055
+ return (__m256d){ 0, 0, 0, 0 };
1056
+ }
1057
+
1058
+ static __inline __m256 __DEFAULT_FN_ATTRS
1059
+ _mm256_setzero_ps(void)
1060
+ {
1061
+ return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
1062
+ }
1063
+
1064
+ static __inline __m256i __DEFAULT_FN_ATTRS
1065
+ _mm256_setzero_si256(void)
1066
+ {
1067
+ return (__m256i){ 0LL, 0LL, 0LL, 0LL };
1068
+ }
1069
+
1070
+ /* Cast between vector types */
1071
+ static __inline __m256 __DEFAULT_FN_ATTRS
1072
+ _mm256_castpd_ps(__m256d __a)
1073
+ {
1074
+ return (__m256)__a;
1075
+ }
1076
+
1077
+ static __inline __m256i __DEFAULT_FN_ATTRS
1078
+ _mm256_castpd_si256(__m256d __a)
1079
+ {
1080
+ return (__m256i)__a;
1081
+ }
1082
+
1083
+ static __inline __m256d __DEFAULT_FN_ATTRS
1084
+ _mm256_castps_pd(__m256 __a)
1085
+ {
1086
+ return (__m256d)__a;
1087
+ }
1088
+
1089
+ static __inline __m256i __DEFAULT_FN_ATTRS
1090
+ _mm256_castps_si256(__m256 __a)
1091
+ {
1092
+ return (__m256i)__a;
1093
+ }
1094
+
1095
+ static __inline __m256 __DEFAULT_FN_ATTRS
1096
+ _mm256_castsi256_ps(__m256i __a)
1097
+ {
1098
+ return (__m256)__a;
1099
+ }
1100
+
1101
+ static __inline __m256d __DEFAULT_FN_ATTRS
1102
+ _mm256_castsi256_pd(__m256i __a)
1103
+ {
1104
+ return (__m256d)__a;
1105
+ }
1106
+
1107
+ static __inline __m128d __DEFAULT_FN_ATTRS
1108
+ _mm256_castpd256_pd128(__m256d __a)
1109
+ {
1110
+ return __builtin_shufflevector(__a, __a, 0, 1);
1111
+ }
1112
+
1113
+ static __inline __m128 __DEFAULT_FN_ATTRS
1114
+ _mm256_castps256_ps128(__m256 __a)
1115
+ {
1116
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
1117
+ }
1118
+
1119
+ static __inline __m128i __DEFAULT_FN_ATTRS
1120
+ _mm256_castsi256_si128(__m256i __a)
1121
+ {
1122
+ return __builtin_shufflevector(__a, __a, 0, 1);
1123
+ }
1124
+
1125
+ static __inline __m256d __DEFAULT_FN_ATTRS
1126
+ _mm256_castpd128_pd256(__m128d __a)
1127
+ {
1128
+ return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
1129
+ }
1130
+
1131
+ static __inline __m256 __DEFAULT_FN_ATTRS
1132
+ _mm256_castps128_ps256(__m128 __a)
1133
+ {
1134
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
1135
+ }
1136
+
1137
+ static __inline __m256i __DEFAULT_FN_ATTRS
1138
+ _mm256_castsi128_si256(__m128i __a)
1139
+ {
1140
+ return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
1141
+ }
1142
+
1143
+ /*
1144
+ Vector insert.
1145
+ We use macros rather than inlines because we only want to accept
1146
+ invocations where the immediate M is a constant expression.
1147
+ */
1148
+ #define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
1149
+ (__m256)__builtin_shufflevector( \
1150
+ (__v8sf)(V1), \
1151
+ (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
1152
+ (((M) & 1) ? 0 : 8), \
1153
+ (((M) & 1) ? 1 : 9), \
1154
+ (((M) & 1) ? 2 : 10), \
1155
+ (((M) & 1) ? 3 : 11), \
1156
+ (((M) & 1) ? 8 : 4), \
1157
+ (((M) & 1) ? 9 : 5), \
1158
+ (((M) & 1) ? 10 : 6), \
1159
+ (((M) & 1) ? 11 : 7) );})
1160
+
1161
+ #define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
1162
+ (__m256d)__builtin_shufflevector( \
1163
+ (__v4df)(V1), \
1164
+ (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
1165
+ (((M) & 1) ? 0 : 4), \
1166
+ (((M) & 1) ? 1 : 5), \
1167
+ (((M) & 1) ? 4 : 2), \
1168
+ (((M) & 1) ? 5 : 3) );})
1169
+
1170
+ #define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
1171
+ (__m256i)__builtin_shufflevector( \
1172
+ (__v4di)(V1), \
1173
+ (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
1174
+ (((M) & 1) ? 0 : 4), \
1175
+ (((M) & 1) ? 1 : 5), \
1176
+ (((M) & 1) ? 4 : 2), \
1177
+ (((M) & 1) ? 5 : 3) );})
1178
+
1179
+ /*
1180
+ Vector extract.
1181
+ We use macros rather than inlines because we only want to accept
1182
+ invocations where the immediate M is a constant expression.
1183
+ */
1184
+ #define _mm256_extractf128_ps(V, M) __extension__ ({ \
1185
+ (__m128)__builtin_shufflevector( \
1186
+ (__v8sf)(V), \
1187
+ (__v8sf)(_mm256_setzero_ps()), \
1188
+ (((M) & 1) ? 4 : 0), \
1189
+ (((M) & 1) ? 5 : 1), \
1190
+ (((M) & 1) ? 6 : 2), \
1191
+ (((M) & 1) ? 7 : 3) );})
1192
+
1193
+ #define _mm256_extractf128_pd(V, M) __extension__ ({ \
1194
+ (__m128d)__builtin_shufflevector( \
1195
+ (__v4df)(V), \
1196
+ (__v4df)(_mm256_setzero_pd()), \
1197
+ (((M) & 1) ? 2 : 0), \
1198
+ (((M) & 1) ? 3 : 1) );})
1199
+
1200
+ #define _mm256_extractf128_si256(V, M) __extension__ ({ \
1201
+ (__m128i)__builtin_shufflevector( \
1202
+ (__v4di)(V), \
1203
+ (__v4di)(_mm256_setzero_si256()), \
1204
+ (((M) & 1) ? 2 : 0), \
1205
+ (((M) & 1) ? 3 : 1) );})
1206
+
1207
+ /* SIMD load ops (unaligned) */
1208
+ static __inline __m256 __DEFAULT_FN_ATTRS
1209
+ _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
1210
+ {
1211
+ struct __loadu_ps {
1212
+ __m128 __v;
1213
+ } __attribute__((__packed__, __may_alias__));
1214
+
1215
+ __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v);
1216
+ return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1);
1217
+ }
1218
+
1219
+ static __inline __m256d __DEFAULT_FN_ATTRS
1220
+ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
1221
+ {
1222
+ struct __loadu_pd {
1223
+ __m128d __v;
1224
+ } __attribute__((__packed__, __may_alias__));
1225
+
1226
+ __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v);
1227
+ return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
1228
+ }
1229
+
1230
+ static __inline __m256i __DEFAULT_FN_ATTRS
1231
+ _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
1232
+ {
1233
+ struct __loadu_si128 {
1234
+ __m128i __v;
1235
+ } __attribute__((__packed__, __may_alias__));
1236
+ __m256i __v256 = _mm256_castsi128_si256(
1237
+ ((struct __loadu_si128*)__addr_lo)->__v);
1238
+ return _mm256_insertf128_si256(__v256,
1239
+ ((struct __loadu_si128*)__addr_hi)->__v, 1);
1240
+ }
1241
+
1242
+ /* SIMD store ops (unaligned) */
1243
+ static __inline void __DEFAULT_FN_ATTRS
1244
+ _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
1245
+ {
1246
+ __m128 __v128;
1247
+
1248
+ __v128 = _mm256_castps256_ps128(__a);
1249
+ __builtin_ia32_storeups(__addr_lo, __v128);
1250
+ __v128 = _mm256_extractf128_ps(__a, 1);
1251
+ __builtin_ia32_storeups(__addr_hi, __v128);
1252
+ }
1253
+
1254
+ static __inline void __DEFAULT_FN_ATTRS
1255
+ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
1256
+ {
1257
+ __m128d __v128;
1258
+
1259
+ __v128 = _mm256_castpd256_pd128(__a);
1260
+ __builtin_ia32_storeupd(__addr_lo, __v128);
1261
+ __v128 = _mm256_extractf128_pd(__a, 1);
1262
+ __builtin_ia32_storeupd(__addr_hi, __v128);
1263
+ }
1264
+
1265
+ static __inline void __DEFAULT_FN_ATTRS
1266
+ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
1267
+ {
1268
+ __m128i __v128;
1269
+
1270
+ __v128 = _mm256_castsi256_si128(__a);
1271
+ __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128);
1272
+ __v128 = _mm256_extractf128_si256(__a, 1);
1273
+ __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
1274
+ }
1275
+
1276
+ static __inline __m256 __DEFAULT_FN_ATTRS
1277
+ _mm256_set_m128 (__m128 __hi, __m128 __lo) {
1278
+ return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
1279
+ }
1280
+
1281
+ static __inline __m256d __DEFAULT_FN_ATTRS
1282
+ _mm256_set_m128d (__m128d __hi, __m128d __lo) {
1283
+ return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1284
+ }
1285
+
1286
+ static __inline __m256i __DEFAULT_FN_ATTRS
1287
+ _mm256_set_m128i (__m128i __hi, __m128i __lo) {
1288
+ return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1289
+ }
1290
+
1291
+ static __inline __m256 __DEFAULT_FN_ATTRS
1292
+ _mm256_setr_m128 (__m128 __lo, __m128 __hi) {
1293
+ return _mm256_set_m128(__hi, __lo);
1294
+ }
1295
+
1296
+ static __inline __m256d __DEFAULT_FN_ATTRS
1297
+ _mm256_setr_m128d (__m128d __lo, __m128d __hi) {
1298
+ return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1299
+ }
1300
+
1301
+ static __inline __m256i __DEFAULT_FN_ATTRS
1302
+ _mm256_setr_m128i (__m128i __lo, __m128i __hi) {
1303
+ return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1304
+ }
1305
+
1306
+ #undef __DEFAULT_FN_ATTRS
1307
+
1308
+ #endif /* __AVXINTRIN_H */