xcodebuild-helper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +20 -0
  3. data/.gitignore +1 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +110 -0
  8. data/Guardfile +18 -0
  9. data/README.md +7 -0
  10. data/Rakefile +7 -0
  11. data/TODO.md +3 -0
  12. data/bin/oclint +5 -0
  13. data/bin/oclint-0.8 +5 -0
  14. data/bin/oclint-json-compilation-database +5 -0
  15. data/bin/oclint-xcodebuild +5 -0
  16. data/externals/oclint/LICENSE +69 -0
  17. data/externals/oclint/bin/oclint +0 -0
  18. data/externals/oclint/bin/oclint-0.10.2 +0 -0
  19. data/externals/oclint/bin/oclint-json-compilation-database +88 -0
  20. data/externals/oclint/bin/oclint-xcodebuild +218 -0
  21. data/externals/oclint/lib/clang/3.7.0/asan_blacklist.txt +13 -0
  22. data/externals/oclint/lib/clang/3.7.0/include/Intrin.h +958 -0
  23. data/externals/oclint/lib/clang/3.7.0/include/__stddef_max_align_t.h +43 -0
  24. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_aes.h +72 -0
  25. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_pclmul.h +34 -0
  26. data/externals/oclint/lib/clang/3.7.0/include/adxintrin.h +88 -0
  27. data/externals/oclint/lib/clang/3.7.0/include/altivec.h +13528 -0
  28. data/externals/oclint/lib/clang/3.7.0/include/ammintrin.h +215 -0
  29. data/externals/oclint/lib/clang/3.7.0/include/arm_acle.h +304 -0
  30. data/externals/oclint/lib/clang/3.7.0/include/arm_neon.h +68419 -0
  31. data/externals/oclint/lib/clang/3.7.0/include/avx2intrin.h +1256 -0
  32. data/externals/oclint/lib/clang/3.7.0/include/avx512bwintrin.h +1250 -0
  33. data/externals/oclint/lib/clang/3.7.0/include/avx512cdintrin.h +131 -0
  34. data/externals/oclint/lib/clang/3.7.0/include/avx512dqintrin.h +242 -0
  35. data/externals/oclint/lib/clang/3.7.0/include/avx512erintrin.h +285 -0
  36. data/externals/oclint/lib/clang/3.7.0/include/avx512fintrin.h +2457 -0
  37. data/externals/oclint/lib/clang/3.7.0/include/avx512vlbwintrin.h +1907 -0
  38. data/externals/oclint/lib/clang/3.7.0/include/avx512vldqintrin.h +353 -0
  39. data/externals/oclint/lib/clang/3.7.0/include/avx512vlintrin.h +1982 -0
  40. data/externals/oclint/lib/clang/3.7.0/include/avxintrin.h +1308 -0
  41. data/externals/oclint/lib/clang/3.7.0/include/bmi2intrin.h +99 -0
  42. data/externals/oclint/lib/clang/3.7.0/include/bmiintrin.h +153 -0
  43. data/externals/oclint/lib/clang/3.7.0/include/cpuid.h +209 -0
  44. data/externals/oclint/lib/clang/3.7.0/include/cuda_builtin_vars.h +110 -0
  45. data/externals/oclint/lib/clang/3.7.0/include/emmintrin.h +1480 -0
  46. data/externals/oclint/lib/clang/3.7.0/include/f16cintrin.h +63 -0
  47. data/externals/oclint/lib/clang/3.7.0/include/float.h +124 -0
  48. data/externals/oclint/lib/clang/3.7.0/include/fma4intrin.h +236 -0
  49. data/externals/oclint/lib/clang/3.7.0/include/fmaintrin.h +234 -0
  50. data/externals/oclint/lib/clang/3.7.0/include/fxsrintrin.h +55 -0
  51. data/externals/oclint/lib/clang/3.7.0/include/htmintrin.h +226 -0
  52. data/externals/oclint/lib/clang/3.7.0/include/htmxlintrin.h +363 -0
  53. data/externals/oclint/lib/clang/3.7.0/include/ia32intrin.h +101 -0
  54. data/externals/oclint/lib/clang/3.7.0/include/immintrin.h +203 -0
  55. data/externals/oclint/lib/clang/3.7.0/include/inttypes.h +102 -0
  56. data/externals/oclint/lib/clang/3.7.0/include/iso646.h +43 -0
  57. data/externals/oclint/lib/clang/3.7.0/include/limits.h +118 -0
  58. data/externals/oclint/lib/clang/3.7.0/include/lzcntintrin.h +72 -0
  59. data/externals/oclint/lib/clang/3.7.0/include/mm3dnow.h +167 -0
  60. data/externals/oclint/lib/clang/3.7.0/include/mm_malloc.h +75 -0
  61. data/externals/oclint/lib/clang/3.7.0/include/mmintrin.h +507 -0
  62. data/externals/oclint/lib/clang/3.7.0/include/module.modulemap +196 -0
  63. data/externals/oclint/lib/clang/3.7.0/include/nmmintrin.h +35 -0
  64. data/externals/oclint/lib/clang/3.7.0/include/pmmintrin.h +122 -0
  65. data/externals/oclint/lib/clang/3.7.0/include/popcntintrin.h +50 -0
  66. data/externals/oclint/lib/clang/3.7.0/include/prfchwintrin.h +39 -0
  67. data/externals/oclint/lib/clang/3.7.0/include/rdseedintrin.h +59 -0
  68. data/externals/oclint/lib/clang/3.7.0/include/rtmintrin.h +59 -0
  69. data/externals/oclint/lib/clang/3.7.0/include/s390intrin.h +39 -0
  70. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/allocator_interface.h +66 -0
  71. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/asan_interface.h +155 -0
  72. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/common_interface_defs.h +118 -0
  73. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/coverage_interface.h +63 -0
  74. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/dfsan_interface.h +114 -0
  75. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/linux_syscall_hooks.h +3070 -0
  76. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/lsan_interface.h +84 -0
  77. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/msan_interface.h +107 -0
  78. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/tsan_interface_atomic.h +222 -0
  79. data/externals/oclint/lib/clang/3.7.0/include/shaintrin.h +79 -0
  80. data/externals/oclint/lib/clang/3.7.0/include/smmintrin.h +487 -0
  81. data/externals/oclint/lib/clang/3.7.0/include/stdalign.h +35 -0
  82. data/externals/oclint/lib/clang/3.7.0/include/stdarg.h +52 -0
  83. data/externals/oclint/lib/clang/3.7.0/include/stdatomic.h +190 -0
  84. data/externals/oclint/lib/clang/3.7.0/include/stdbool.h +44 -0
  85. data/externals/oclint/lib/clang/3.7.0/include/stddef.h +137 -0
  86. data/externals/oclint/lib/clang/3.7.0/include/stdint.h +707 -0
  87. data/externals/oclint/lib/clang/3.7.0/include/stdnoreturn.h +30 -0
  88. data/externals/oclint/lib/clang/3.7.0/include/tbmintrin.h +154 -0
  89. data/externals/oclint/lib/clang/3.7.0/include/tgmath.h +1374 -0
  90. data/externals/oclint/lib/clang/3.7.0/include/tmmintrin.h +230 -0
  91. data/externals/oclint/lib/clang/3.7.0/include/unwind.h +282 -0
  92. data/externals/oclint/lib/clang/3.7.0/include/vadefs.h +65 -0
  93. data/externals/oclint/lib/clang/3.7.0/include/varargs.h +26 -0
  94. data/externals/oclint/lib/clang/3.7.0/include/vecintrin.h +8946 -0
  95. data/externals/oclint/lib/clang/3.7.0/include/wmmintrin.h +42 -0
  96. data/externals/oclint/lib/clang/3.7.0/include/x86intrin.h +81 -0
  97. data/externals/oclint/lib/clang/3.7.0/include/xmmintrin.h +1008 -0
  98. data/externals/oclint/lib/clang/3.7.0/include/xopintrin.h +809 -0
  99. data/externals/oclint/lib/clang/3.7.0/include/xtestintrin.h +41 -0
  100. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib +0 -0
  101. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
  102. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-i386.a +0 -0
  103. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-x86_64.a +0 -0
  104. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.profile_osx.a +0 -0
  105. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.safestack_osx.a +0 -0
  106. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib +0 -0
  107. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib +0 -0
  108. data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
  109. data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
  110. data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
  111. data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
  112. data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
  113. data/externals/oclint/lib/oclint/reporters/libXcodeReporter.dylib +0 -0
  114. data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
  115. data/externals/oclint/lib/oclint/rules/libAvoidDefaultArgumentsOnVirtualMethodsRule.dylib +0 -0
  116. data/externals/oclint/lib/oclint/rules/libAvoidPrivateStaticMembersRule.dylib +0 -0
  117. data/externals/oclint/lib/oclint/rules/libBaseClassDestructorShouldBeVirtualOrProtectedRule.dylib +0 -0
  118. data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
  119. data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
  120. data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
  121. data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
  122. data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
  123. data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
  124. data/externals/oclint/lib/oclint/rules/libCoveredSwitchStatementsDontNeedDefaultRule.dylib +0 -0
  125. data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
  126. data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
  127. data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
  128. data/externals/oclint/lib/oclint/rules/libDestructorOfVirtualClassRule.dylib +0 -0
  129. data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
  130. data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
  131. data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
  132. data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
  133. data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
  134. data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
  135. data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
  136. data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
  137. data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
  138. data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
  139. data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
  140. data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
  141. data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
  142. data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
  143. data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
  144. data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
  145. data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
  146. data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
  147. data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
  148. data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
  149. data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
  150. data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
  151. data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
  152. data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
  153. data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
  154. data/externals/oclint/lib/oclint/rules/libObjCAssignIvarOutsideAccessorsRule.dylib +0 -0
  155. data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
  156. data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
  157. data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
  158. data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
  159. data/externals/oclint/lib/oclint/rules/libObjCVerifyIsEqualHashRule.dylib +0 -0
  160. data/externals/oclint/lib/oclint/rules/libObjCVerifyMustCallSuperRule.dylib +0 -0
  161. data/externals/oclint/lib/oclint/rules/libObjCVerifyProhibitedCallRule.dylib +0 -0
  162. data/externals/oclint/lib/oclint/rules/libObjCVerifyProtectedMethodRule.dylib +0 -0
  163. data/externals/oclint/lib/oclint/rules/libObjCVerifySubclassMustImplementRule.dylib +0 -0
  164. data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
  165. data/externals/oclint/lib/oclint/rules/libPreferEarlyExitRule.dylib +0 -0
  166. data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
  167. data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
  168. data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
  169. data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
  170. data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
  171. data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
  172. data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
  173. data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
  174. data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
  175. data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
  176. data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
  177. data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
  178. data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
  179. data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
  180. data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
  181. data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
  182. data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
  183. data/lib/coverage_plan.rb +19 -0
  184. data/lib/device.rb +27 -0
  185. data/lib/execute.rb +7 -0
  186. data/lib/lint_plan.rb +41 -0
  187. data/lib/rules.rb +23 -0
  188. data/lib/test_plan.rb +11 -0
  189. data/lib/version.rb +3 -0
  190. data/lib/xcode.rb +128 -0
  191. data/lib/xcodebuild-helper.rb +110 -0
  192. data/spec/coverage_plan_spec.rb +18 -0
  193. data/spec/device_spec.rb +24 -0
  194. data/spec/lint_plan_spec.rb +35 -0
  195. data/spec/rule_spec.rb +37 -0
  196. data/spec/spec_helper.rb +17 -0
  197. data/spec/test_plan_spec.rb +11 -0
  198. data/spec/xcode_dsl_actions_spec.rb +136 -0
  199. data/spec/xcode_dsl_spec.rb +176 -0
  200. data/spec/xcode_spec.rb +79 -0
  201. data/xcodebuild-helper.gemspec +26 -0
  202. metadata +327 -0
@@ -0,0 +1,1308 @@
1
+ /*===---- avxintrin.h - AVX intrinsics -------------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+
24
+ #ifndef __IMMINTRIN_H
25
+ #error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
26
+ #endif
27
+
28
+ #ifndef __AVXINTRIN_H
29
+ #define __AVXINTRIN_H
30
+
31
+ typedef double __v4df __attribute__ ((__vector_size__ (32)));
32
+ typedef float __v8sf __attribute__ ((__vector_size__ (32)));
33
+ typedef long long __v4di __attribute__ ((__vector_size__ (32)));
34
+ typedef int __v8si __attribute__ ((__vector_size__ (32)));
35
+ typedef short __v16hi __attribute__ ((__vector_size__ (32)));
36
+ typedef char __v32qi __attribute__ ((__vector_size__ (32)));
37
+
38
+ typedef float __m256 __attribute__ ((__vector_size__ (32)));
39
+ typedef double __m256d __attribute__((__vector_size__(32)));
40
+ typedef long long __m256i __attribute__((__vector_size__(32)));
41
+
42
+ /* Define the default attributes for the functions in this file. */
43
+ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
44
+
45
+ /* Arithmetic */
46
+ static __inline __m256d __DEFAULT_FN_ATTRS
47
+ _mm256_add_pd(__m256d __a, __m256d __b)
48
+ {
49
+ return __a+__b;
50
+ }
51
+
52
+ static __inline __m256 __DEFAULT_FN_ATTRS
53
+ _mm256_add_ps(__m256 __a, __m256 __b)
54
+ {
55
+ return __a+__b;
56
+ }
57
+
58
+ static __inline __m256d __DEFAULT_FN_ATTRS
59
+ _mm256_sub_pd(__m256d __a, __m256d __b)
60
+ {
61
+ return __a-__b;
62
+ }
63
+
64
+ static __inline __m256 __DEFAULT_FN_ATTRS
65
+ _mm256_sub_ps(__m256 __a, __m256 __b)
66
+ {
67
+ return __a-__b;
68
+ }
69
+
70
+ static __inline __m256d __DEFAULT_FN_ATTRS
71
+ _mm256_addsub_pd(__m256d __a, __m256d __b)
72
+ {
73
+ return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
74
+ }
75
+
76
+ static __inline __m256 __DEFAULT_FN_ATTRS
77
+ _mm256_addsub_ps(__m256 __a, __m256 __b)
78
+ {
79
+ return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
80
+ }
81
+
82
+ static __inline __m256d __DEFAULT_FN_ATTRS
83
+ _mm256_div_pd(__m256d __a, __m256d __b)
84
+ {
85
+ return __a / __b;
86
+ }
87
+
88
+ static __inline __m256 __DEFAULT_FN_ATTRS
89
+ _mm256_div_ps(__m256 __a, __m256 __b)
90
+ {
91
+ return __a / __b;
92
+ }
93
+
94
+ static __inline __m256d __DEFAULT_FN_ATTRS
95
+ _mm256_max_pd(__m256d __a, __m256d __b)
96
+ {
97
+ return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
98
+ }
99
+
100
+ static __inline __m256 __DEFAULT_FN_ATTRS
101
+ _mm256_max_ps(__m256 __a, __m256 __b)
102
+ {
103
+ return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
104
+ }
105
+
106
+ static __inline __m256d __DEFAULT_FN_ATTRS
107
+ _mm256_min_pd(__m256d __a, __m256d __b)
108
+ {
109
+ return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
110
+ }
111
+
112
+ static __inline __m256 __DEFAULT_FN_ATTRS
113
+ _mm256_min_ps(__m256 __a, __m256 __b)
114
+ {
115
+ return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
116
+ }
117
+
118
+ static __inline __m256d __DEFAULT_FN_ATTRS
119
+ _mm256_mul_pd(__m256d __a, __m256d __b)
120
+ {
121
+ return __a * __b;
122
+ }
123
+
124
+ static __inline __m256 __DEFAULT_FN_ATTRS
125
+ _mm256_mul_ps(__m256 __a, __m256 __b)
126
+ {
127
+ return __a * __b;
128
+ }
129
+
130
+ static __inline __m256d __DEFAULT_FN_ATTRS
131
+ _mm256_sqrt_pd(__m256d __a)
132
+ {
133
+ return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
134
+ }
135
+
136
+ static __inline __m256 __DEFAULT_FN_ATTRS
137
+ _mm256_sqrt_ps(__m256 __a)
138
+ {
139
+ return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
140
+ }
141
+
142
+ static __inline __m256 __DEFAULT_FN_ATTRS
143
+ _mm256_rsqrt_ps(__m256 __a)
144
+ {
145
+ return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
146
+ }
147
+
148
+ static __inline __m256 __DEFAULT_FN_ATTRS
149
+ _mm256_rcp_ps(__m256 __a)
150
+ {
151
+ return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
152
+ }
153
+
154
+ #define _mm256_round_pd(V, M) __extension__ ({ \
155
+ __m256d __V = (V); \
156
+ (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })
157
+
158
+ #define _mm256_round_ps(V, M) __extension__ ({ \
159
+ __m256 __V = (V); \
160
+ (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })
161
+
162
+ #define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)
163
+ #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
164
+ #define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)
165
+ #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
166
+
167
+ /* Logical */
168
+ static __inline __m256d __DEFAULT_FN_ATTRS
169
+ _mm256_and_pd(__m256d __a, __m256d __b)
170
+ {
171
+ return (__m256d)((__v4di)__a & (__v4di)__b);
172
+ }
173
+
174
+ static __inline __m256 __DEFAULT_FN_ATTRS
175
+ _mm256_and_ps(__m256 __a, __m256 __b)
176
+ {
177
+ return (__m256)((__v8si)__a & (__v8si)__b);
178
+ }
179
+
180
+ static __inline __m256d __DEFAULT_FN_ATTRS
181
+ _mm256_andnot_pd(__m256d __a, __m256d __b)
182
+ {
183
+ return (__m256d)(~(__v4di)__a & (__v4di)__b);
184
+ }
185
+
186
+ static __inline __m256 __DEFAULT_FN_ATTRS
187
+ _mm256_andnot_ps(__m256 __a, __m256 __b)
188
+ {
189
+ return (__m256)(~(__v8si)__a & (__v8si)__b);
190
+ }
191
+
192
+ static __inline __m256d __DEFAULT_FN_ATTRS
193
+ _mm256_or_pd(__m256d __a, __m256d __b)
194
+ {
195
+ return (__m256d)((__v4di)__a | (__v4di)__b);
196
+ }
197
+
198
+ static __inline __m256 __DEFAULT_FN_ATTRS
199
+ _mm256_or_ps(__m256 __a, __m256 __b)
200
+ {
201
+ return (__m256)((__v8si)__a | (__v8si)__b);
202
+ }
203
+
204
+ static __inline __m256d __DEFAULT_FN_ATTRS
205
+ _mm256_xor_pd(__m256d __a, __m256d __b)
206
+ {
207
+ return (__m256d)((__v4di)__a ^ (__v4di)__b);
208
+ }
209
+
210
+ static __inline __m256 __DEFAULT_FN_ATTRS
211
+ _mm256_xor_ps(__m256 __a, __m256 __b)
212
+ {
213
+ return (__m256)((__v8si)__a ^ (__v8si)__b);
214
+ }
215
+
216
+ /* Horizontal arithmetic */
217
+ static __inline __m256d __DEFAULT_FN_ATTRS
218
+ _mm256_hadd_pd(__m256d __a, __m256d __b)
219
+ {
220
+ return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
221
+ }
222
+
223
+ static __inline __m256 __DEFAULT_FN_ATTRS
224
+ _mm256_hadd_ps(__m256 __a, __m256 __b)
225
+ {
226
+ return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
227
+ }
228
+
229
+ static __inline __m256d __DEFAULT_FN_ATTRS
230
+ _mm256_hsub_pd(__m256d __a, __m256d __b)
231
+ {
232
+ return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
233
+ }
234
+
235
+ static __inline __m256 __DEFAULT_FN_ATTRS
236
+ _mm256_hsub_ps(__m256 __a, __m256 __b)
237
+ {
238
+ return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
239
+ }
240
+
241
+ /* Vector permutations */
242
+ static __inline __m128d __DEFAULT_FN_ATTRS
243
+ _mm_permutevar_pd(__m128d __a, __m128i __c)
244
+ {
245
+ return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
246
+ }
247
+
248
+ static __inline __m256d __DEFAULT_FN_ATTRS
249
+ _mm256_permutevar_pd(__m256d __a, __m256i __c)
250
+ {
251
+ return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
252
+ }
253
+
254
+ static __inline __m128 __DEFAULT_FN_ATTRS
255
+ _mm_permutevar_ps(__m128 __a, __m128i __c)
256
+ {
257
+ return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
258
+ }
259
+
260
+ static __inline __m256 __DEFAULT_FN_ATTRS
261
+ _mm256_permutevar_ps(__m256 __a, __m256i __c)
262
+ {
263
+ return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
264
+ }
265
+
266
+ #define _mm_permute_pd(A, C) __extension__ ({ \
267
+ __m128d __A = (A); \
268
+ (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \
269
+ (C) & 0x1, ((C) & 0x2) >> 1); })
270
+
271
+ #define _mm256_permute_pd(A, C) __extension__ ({ \
272
+ __m256d __A = (A); \
273
+ (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \
274
+ (C) & 0x1, ((C) & 0x2) >> 1, \
275
+ 2 + (((C) & 0x4) >> 2), \
276
+ 2 + (((C) & 0x8) >> 3)); })
277
+
278
+ #define _mm_permute_ps(A, C) __extension__ ({ \
279
+ __m128 __A = (A); \
280
+ (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \
281
+ (C) & 0x3, ((C) & 0xc) >> 2, \
282
+ ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
283
+
284
+ #define _mm256_permute_ps(A, C) __extension__ ({ \
285
+ __m256 __A = (A); \
286
+ (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \
287
+ (C) & 0x3, ((C) & 0xc) >> 2, \
288
+ ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
289
+ 4 + (((C) & 0x03) >> 0), \
290
+ 4 + (((C) & 0x0c) >> 2), \
291
+ 4 + (((C) & 0x30) >> 4), \
292
+ 4 + (((C) & 0xc0) >> 6)); })
293
+
294
+ #define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
295
+ __m256d __V1 = (V1); \
296
+ __m256d __V2 = (V2); \
297
+ (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
298
+
299
+ #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
300
+ __m256 __V1 = (V1); \
301
+ __m256 __V2 = (V2); \
302
+ (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
303
+
304
+ #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
305
+ __m256i __V1 = (V1); \
306
+ __m256i __V2 = (V2); \
307
+ (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
308
+
309
+ /* Vector Blend */
310
+ #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
311
+ __m256d __V1 = (V1); \
312
+ __m256d __V2 = (V2); \
313
+ (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
314
+ (((M) & 0x01) ? 4 : 0), \
315
+ (((M) & 0x02) ? 5 : 1), \
316
+ (((M) & 0x04) ? 6 : 2), \
317
+ (((M) & 0x08) ? 7 : 3)); })
318
+
319
+ #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
320
+ __m256 __V1 = (V1); \
321
+ __m256 __V2 = (V2); \
322
+ (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
323
+ (((M) & 0x01) ? 8 : 0), \
324
+ (((M) & 0x02) ? 9 : 1), \
325
+ (((M) & 0x04) ? 10 : 2), \
326
+ (((M) & 0x08) ? 11 : 3), \
327
+ (((M) & 0x10) ? 12 : 4), \
328
+ (((M) & 0x20) ? 13 : 5), \
329
+ (((M) & 0x40) ? 14 : 6), \
330
+ (((M) & 0x80) ? 15 : 7)); })
331
+
332
+ static __inline __m256d __DEFAULT_FN_ATTRS
333
+ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
334
+ {
335
+ return (__m256d)__builtin_ia32_blendvpd256(
336
+ (__v4df)__a, (__v4df)__b, (__v4df)__c);
337
+ }
338
+
339
+ static __inline __m256 __DEFAULT_FN_ATTRS
340
+ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
341
+ {
342
+ return (__m256)__builtin_ia32_blendvps256(
343
+ (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
344
+ }
345
+
346
+ /* Vector Dot Product */
347
+ #define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
348
+ __m256 __V1 = (V1); \
349
+ __m256 __V2 = (V2); \
350
+ (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
351
+
352
+ /* Vector shuffle */
353
+ #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
354
+ __m256 __a = (a); \
355
+ __m256 __b = (b); \
356
+ (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \
357
+ (mask) & 0x3, ((mask) & 0xc) >> 2, \
358
+ (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \
359
+ ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \
360
+ (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })
361
+
362
+ #define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
363
+ __m256d __a = (a); \
364
+ __m256d __b = (b); \
365
+ (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \
366
+ (mask) & 0x1, \
367
+ (((mask) & 0x2) >> 1) + 4, \
368
+ (((mask) & 0x4) >> 2) + 2, \
369
+ (((mask) & 0x8) >> 3) + 6); })
370
+
371
+ /* Compare */
372
+ #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
373
+ #define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
374
+ #define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
375
+ #define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
376
+ #define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
377
+ #define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
378
+ #define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
379
+ #define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
380
+ #define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
381
+ #define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
382
+ #define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
383
+ #define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
384
+ #define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
385
+ #define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
386
+ #define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
387
+ #define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
388
+ #define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
389
+ #define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
390
+ #define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
391
+ #define _CMP_UNORD_S 0x13 /* Unordered (signaling) */
392
+ #define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
393
+ #define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
394
+ #define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
395
+ #define _CMP_ORD_S 0x17 /* Ordered (signaling) */
396
+ #define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
397
+ #define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
398
+ #define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
399
+ #define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
400
+ #define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
401
+ #define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
402
+ #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
403
+ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
404
+
405
+ #define _mm_cmp_pd(a, b, c) __extension__ ({ \
406
+ __m128d __a = (a); \
407
+ __m128d __b = (b); \
408
+ (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); })
409
+
410
+ #define _mm_cmp_ps(a, b, c) __extension__ ({ \
411
+ __m128 __a = (a); \
412
+ __m128 __b = (b); \
413
+ (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); })
414
+
415
+ #define _mm256_cmp_pd(a, b, c) __extension__ ({ \
416
+ __m256d __a = (a); \
417
+ __m256d __b = (b); \
418
+ (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); })
419
+
420
+ #define _mm256_cmp_ps(a, b, c) __extension__ ({ \
421
+ __m256 __a = (a); \
422
+ __m256 __b = (b); \
423
+ (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); })
424
+
425
+ #define _mm_cmp_sd(a, b, c) __extension__ ({ \
426
+ __m128d __a = (a); \
427
+ __m128d __b = (b); \
428
+ (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })
429
+
430
+ #define _mm_cmp_ss(a, b, c) __extension__ ({ \
431
+ __m128 __a = (a); \
432
+ __m128 __b = (b); \
433
+ (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
434
+
435
+ static __inline int __DEFAULT_FN_ATTRS
436
+ _mm256_extract_epi32(__m256i __a, const int __imm)
437
+ {
438
+ __v8si __b = (__v8si)__a;
439
+ return __b[__imm & 7];
440
+ }
441
+
442
+ static __inline int __DEFAULT_FN_ATTRS
443
+ _mm256_extract_epi16(__m256i __a, const int __imm)
444
+ {
445
+ __v16hi __b = (__v16hi)__a;
446
+ return __b[__imm & 15];
447
+ }
448
+
449
+ static __inline int __DEFAULT_FN_ATTRS
450
+ _mm256_extract_epi8(__m256i __a, const int __imm)
451
+ {
452
+ __v32qi __b = (__v32qi)__a;
453
+ return __b[__imm & 31];
454
+ }
455
+
456
+ #ifdef __x86_64__
457
+ static __inline long long __DEFAULT_FN_ATTRS
458
+ _mm256_extract_epi64(__m256i __a, const int __imm)
459
+ {
460
+ __v4di __b = (__v4di)__a;
461
+ return __b[__imm & 3];
462
+ }
463
+ #endif
464
+
465
+ static __inline __m256i __DEFAULT_FN_ATTRS
466
+ _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
467
+ {
468
+ __v8si __c = (__v8si)__a;
469
+ __c[__imm & 7] = __b;
470
+ return (__m256i)__c;
471
+ }
472
+
473
+ static __inline __m256i __DEFAULT_FN_ATTRS
474
+ _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
475
+ {
476
+ __v16hi __c = (__v16hi)__a;
477
+ __c[__imm & 15] = __b;
478
+ return (__m256i)__c;
479
+ }
480
+
481
+ static __inline __m256i __DEFAULT_FN_ATTRS
482
+ _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
483
+ {
484
+ __v32qi __c = (__v32qi)__a;
485
+ __c[__imm & 31] = __b;
486
+ return (__m256i)__c;
487
+ }
488
+
489
+ #ifdef __x86_64__
490
+ static __inline __m256i __DEFAULT_FN_ATTRS
491
+ _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
492
+ {
493
+ __v4di __c = (__v4di)__a;
494
+ __c[__imm & 3] = __b;
495
+ return (__m256i)__c;
496
+ }
497
+ #endif
498
+
499
+ /* Conversion */
500
+ static __inline __m256d __DEFAULT_FN_ATTRS
501
+ _mm256_cvtepi32_pd(__m128i __a)
502
+ {
503
+ return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
504
+ }
505
+
506
+ static __inline __m256 __DEFAULT_FN_ATTRS
507
+ _mm256_cvtepi32_ps(__m256i __a)
508
+ {
509
+ return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
510
+ }
511
+
512
+ static __inline __m128 __DEFAULT_FN_ATTRS
513
+ _mm256_cvtpd_ps(__m256d __a)
514
+ {
515
+ return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
516
+ }
517
+
518
+ static __inline __m256i __DEFAULT_FN_ATTRS
519
+ _mm256_cvtps_epi32(__m256 __a)
520
+ {
521
+ return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
522
+ }
523
+
524
+ static __inline __m256d __DEFAULT_FN_ATTRS
525
+ _mm256_cvtps_pd(__m128 __a)
526
+ {
527
+ return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
528
+ }
529
+
530
+ static __inline __m128i __DEFAULT_FN_ATTRS
531
+ _mm256_cvttpd_epi32(__m256d __a)
532
+ {
533
+ return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
534
+ }
535
+
536
+ static __inline __m128i __DEFAULT_FN_ATTRS
537
+ _mm256_cvtpd_epi32(__m256d __a)
538
+ {
539
+ return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
540
+ }
541
+
542
+ static __inline __m256i __DEFAULT_FN_ATTRS
543
+ _mm256_cvttps_epi32(__m256 __a)
544
+ {
545
+ return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
546
+ }
547
+
548
+ /* Vector replicate */
549
+ static __inline __m256 __DEFAULT_FN_ATTRS
550
+ _mm256_movehdup_ps(__m256 __a)
551
+ {
552
+ return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
553
+ }
554
+
555
+ static __inline __m256 __DEFAULT_FN_ATTRS
556
+ _mm256_moveldup_ps(__m256 __a)
557
+ {
558
+ return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6);
559
+ }
560
+
561
+ static __inline __m256d __DEFAULT_FN_ATTRS
562
+ _mm256_movedup_pd(__m256d __a)
563
+ {
564
+ return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
565
+ }
566
+
567
+ /* Unpack and Interleave */
568
+ static __inline __m256d __DEFAULT_FN_ATTRS
569
+ _mm256_unpackhi_pd(__m256d __a, __m256d __b)
570
+ {
571
+ return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2);
572
+ }
573
+
574
+ static __inline __m256d __DEFAULT_FN_ATTRS
575
+ _mm256_unpacklo_pd(__m256d __a, __m256d __b)
576
+ {
577
+ return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2);
578
+ }
579
+
580
+ static __inline __m256 __DEFAULT_FN_ATTRS
581
+ _mm256_unpackhi_ps(__m256 __a, __m256 __b)
582
+ {
583
+ return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
584
+ }
585
+
586
+ static __inline __m256 __DEFAULT_FN_ATTRS
587
+ _mm256_unpacklo_ps(__m256 __a, __m256 __b)
588
+ {
589
+ return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
590
+ }
591
+
592
+ /* Bit Test */
593
+ static __inline int __DEFAULT_FN_ATTRS
594
+ _mm_testz_pd(__m128d __a, __m128d __b)
595
+ {
596
+ return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
597
+ }
598
+
599
+ static __inline int __DEFAULT_FN_ATTRS
600
+ _mm_testc_pd(__m128d __a, __m128d __b)
601
+ {
602
+ return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
603
+ }
604
+
605
+ static __inline int __DEFAULT_FN_ATTRS
606
+ _mm_testnzc_pd(__m128d __a, __m128d __b)
607
+ {
608
+ return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
609
+ }
610
+
611
+ static __inline int __DEFAULT_FN_ATTRS
612
+ _mm_testz_ps(__m128 __a, __m128 __b)
613
+ {
614
+ return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
615
+ }
616
+
617
+ static __inline int __DEFAULT_FN_ATTRS
618
+ _mm_testc_ps(__m128 __a, __m128 __b)
619
+ {
620
+ return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
621
+ }
622
+
623
+ static __inline int __DEFAULT_FN_ATTRS
624
+ _mm_testnzc_ps(__m128 __a, __m128 __b)
625
+ {
626
+ return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
627
+ }
628
+
629
+ static __inline int __DEFAULT_FN_ATTRS
630
+ _mm256_testz_pd(__m256d __a, __m256d __b)
631
+ {
632
+ return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
633
+ }
634
+
635
+ static __inline int __DEFAULT_FN_ATTRS
636
+ _mm256_testc_pd(__m256d __a, __m256d __b)
637
+ {
638
+ return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
639
+ }
640
+
641
+ static __inline int __DEFAULT_FN_ATTRS
642
+ _mm256_testnzc_pd(__m256d __a, __m256d __b)
643
+ {
644
+ return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
645
+ }
646
+
647
+ static __inline int __DEFAULT_FN_ATTRS
648
+ _mm256_testz_ps(__m256 __a, __m256 __b)
649
+ {
650
+ return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
651
+ }
652
+
653
+ static __inline int __DEFAULT_FN_ATTRS
654
+ _mm256_testc_ps(__m256 __a, __m256 __b)
655
+ {
656
+ return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
657
+ }
658
+
659
+ static __inline int __DEFAULT_FN_ATTRS
660
+ _mm256_testnzc_ps(__m256 __a, __m256 __b)
661
+ {
662
+ return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
663
+ }
664
+
665
+ static __inline int __DEFAULT_FN_ATTRS
666
+ _mm256_testz_si256(__m256i __a, __m256i __b)
667
+ {
668
+ return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
669
+ }
670
+
671
+ static __inline int __DEFAULT_FN_ATTRS
672
+ _mm256_testc_si256(__m256i __a, __m256i __b)
673
+ {
674
+ return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
675
+ }
676
+
677
+ static __inline int __DEFAULT_FN_ATTRS
678
+ _mm256_testnzc_si256(__m256i __a, __m256i __b)
679
+ {
680
+ return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
681
+ }
682
+
683
+ /* Vector extract sign mask */
684
+ static __inline int __DEFAULT_FN_ATTRS
685
+ _mm256_movemask_pd(__m256d __a)
686
+ {
687
+ return __builtin_ia32_movmskpd256((__v4df)__a);
688
+ }
689
+
690
+ static __inline int __DEFAULT_FN_ATTRS
691
+ _mm256_movemask_ps(__m256 __a)
692
+ {
693
+ return __builtin_ia32_movmskps256((__v8sf)__a);
694
+ }
695
+
696
+ /* Vector __zero */
697
+ static __inline void __DEFAULT_FN_ATTRS
698
+ _mm256_zeroall(void)
699
+ {
700
+ __builtin_ia32_vzeroall();
701
+ }
702
+
703
+ static __inline void __DEFAULT_FN_ATTRS
704
+ _mm256_zeroupper(void)
705
+ {
706
+ __builtin_ia32_vzeroupper();
707
+ }
708
+
709
+ /* Vector load with broadcast */
710
+ static __inline __m128 __DEFAULT_FN_ATTRS
711
+ _mm_broadcast_ss(float const *__a)
712
+ {
713
+ float __f = *__a;
714
+ return (__m128)(__v4sf){ __f, __f, __f, __f };
715
+ }
716
+
717
+ static __inline __m256d __DEFAULT_FN_ATTRS
718
+ _mm256_broadcast_sd(double const *__a)
719
+ {
720
+ double __d = *__a;
721
+ return (__m256d)(__v4df){ __d, __d, __d, __d };
722
+ }
723
+
724
+ static __inline __m256 __DEFAULT_FN_ATTRS
725
+ _mm256_broadcast_ss(float const *__a)
726
+ {
727
+ float __f = *__a;
728
+ return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
729
+ }
730
+
731
+ static __inline __m256d __DEFAULT_FN_ATTRS
732
+ _mm256_broadcast_pd(__m128d const *__a)
733
+ {
734
+ return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a);
735
+ }
736
+
737
+ static __inline __m256 __DEFAULT_FN_ATTRS
738
+ _mm256_broadcast_ps(__m128 const *__a)
739
+ {
740
+ return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a);
741
+ }
742
+
743
+ /* SIMD load ops */
744
+ static __inline __m256d __DEFAULT_FN_ATTRS
745
+ _mm256_load_pd(double const *__p)
746
+ {
747
+ return *(__m256d *)__p;
748
+ }
749
+
750
+ static __inline __m256 __DEFAULT_FN_ATTRS
751
+ _mm256_load_ps(float const *__p)
752
+ {
753
+ return *(__m256 *)__p;
754
+ }
755
+
756
+ static __inline __m256d __DEFAULT_FN_ATTRS
757
+ _mm256_loadu_pd(double const *__p)
758
+ {
759
+ struct __loadu_pd {
760
+ __m256d __v;
761
+ } __attribute__((__packed__, __may_alias__));
762
+ return ((struct __loadu_pd*)__p)->__v;
763
+ }
764
+
765
+ static __inline __m256 __DEFAULT_FN_ATTRS
766
+ _mm256_loadu_ps(float const *__p)
767
+ {
768
+ struct __loadu_ps {
769
+ __m256 __v;
770
+ } __attribute__((__packed__, __may_alias__));
771
+ return ((struct __loadu_ps*)__p)->__v;
772
+ }
773
+
774
+ static __inline __m256i __DEFAULT_FN_ATTRS
775
+ _mm256_load_si256(__m256i const *__p)
776
+ {
777
+ return *__p;
778
+ }
779
+
780
+ static __inline __m256i __DEFAULT_FN_ATTRS
781
+ _mm256_loadu_si256(__m256i const *__p)
782
+ {
783
+ struct __loadu_si256 {
784
+ __m256i __v;
785
+ } __attribute__((__packed__, __may_alias__));
786
+ return ((struct __loadu_si256*)__p)->__v;
787
+ }
788
+
789
+ static __inline __m256i __DEFAULT_FN_ATTRS
790
+ _mm256_lddqu_si256(__m256i const *__p)
791
+ {
792
+ return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
793
+ }
794
+
795
+ /* SIMD store ops */
796
+ static __inline void __DEFAULT_FN_ATTRS
797
+ _mm256_store_pd(double *__p, __m256d __a)
798
+ {
799
+ *(__m256d *)__p = __a;
800
+ }
801
+
802
+ static __inline void __DEFAULT_FN_ATTRS
803
+ _mm256_store_ps(float *__p, __m256 __a)
804
+ {
805
+ *(__m256 *)__p = __a;
806
+ }
807
+
808
+ static __inline void __DEFAULT_FN_ATTRS
809
+ _mm256_storeu_pd(double *__p, __m256d __a)
810
+ {
811
+ __builtin_ia32_storeupd256(__p, (__v4df)__a);
812
+ }
813
+
814
+ static __inline void __DEFAULT_FN_ATTRS
815
+ _mm256_storeu_ps(float *__p, __m256 __a)
816
+ {
817
+ __builtin_ia32_storeups256(__p, (__v8sf)__a);
818
+ }
819
+
820
+ static __inline void __DEFAULT_FN_ATTRS
821
+ _mm256_store_si256(__m256i *__p, __m256i __a)
822
+ {
823
+ *__p = __a;
824
+ }
825
+
826
+ static __inline void __DEFAULT_FN_ATTRS
827
+ _mm256_storeu_si256(__m256i *__p, __m256i __a)
828
+ {
829
+ __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a);
830
+ }
831
+
832
+ /* Conditional load ops */
833
+ static __inline __m128d __DEFAULT_FN_ATTRS
834
+ _mm_maskload_pd(double const *__p, __m128d __m)
835
+ {
836
+ return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m);
837
+ }
838
+
839
+ static __inline __m256d __DEFAULT_FN_ATTRS
840
+ _mm256_maskload_pd(double const *__p, __m256d __m)
841
+ {
842
+ return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
843
+ (__v4df)__m);
844
+ }
845
+
846
+ static __inline __m128 __DEFAULT_FN_ATTRS
847
+ _mm_maskload_ps(float const *__p, __m128 __m)
848
+ {
849
+ return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m);
850
+ }
851
+
852
+ static __inline __m256 __DEFAULT_FN_ATTRS
853
+ _mm256_maskload_ps(float const *__p, __m256 __m)
854
+ {
855
+ return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m);
856
+ }
857
+
858
+ /* Conditional store ops */
859
+ static __inline void __DEFAULT_FN_ATTRS
860
+ _mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a)
861
+ {
862
+ __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a);
863
+ }
864
+
865
+ static __inline void __DEFAULT_FN_ATTRS
866
+ _mm_maskstore_pd(double *__p, __m128d __m, __m128d __a)
867
+ {
868
+ __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a);
869
+ }
870
+
871
+ static __inline void __DEFAULT_FN_ATTRS
872
+ _mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a)
873
+ {
874
+ __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a);
875
+ }
876
+
877
+ static __inline void __DEFAULT_FN_ATTRS
878
+ _mm_maskstore_ps(float *__p, __m128 __m, __m128 __a)
879
+ {
880
+ __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a);
881
+ }
882
+
883
+ /* Cacheability support ops */
884
+ static __inline void __DEFAULT_FN_ATTRS
885
+ _mm256_stream_si256(__m256i *__a, __m256i __b)
886
+ {
887
+ __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
888
+ }
889
+
890
+ static __inline void __DEFAULT_FN_ATTRS
891
+ _mm256_stream_pd(double *__a, __m256d __b)
892
+ {
893
+ __builtin_ia32_movntpd256(__a, (__v4df)__b);
894
+ }
895
+
896
+ static __inline void __DEFAULT_FN_ATTRS
897
+ _mm256_stream_ps(float *__p, __m256 __a)
898
+ {
899
+ __builtin_ia32_movntps256(__p, (__v8sf)__a);
900
+ }
901
+
902
+ /* Create vectors */
903
+ static __inline __m256d __DEFAULT_FN_ATTRS
904
+ _mm256_set_pd(double __a, double __b, double __c, double __d)
905
+ {
906
+ return (__m256d){ __d, __c, __b, __a };
907
+ }
908
+
909
+ static __inline __m256 __DEFAULT_FN_ATTRS
910
+ _mm256_set_ps(float __a, float __b, float __c, float __d,
911
+ float __e, float __f, float __g, float __h)
912
+ {
913
+ return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
914
+ }
915
+
916
+ static __inline __m256i __DEFAULT_FN_ATTRS
917
+ _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
918
+ int __i4, int __i5, int __i6, int __i7)
919
+ {
920
+ return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
921
+ }
922
+
923
+ static __inline __m256i __DEFAULT_FN_ATTRS
924
+ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
925
+ short __w11, short __w10, short __w09, short __w08,
926
+ short __w07, short __w06, short __w05, short __w04,
927
+ short __w03, short __w02, short __w01, short __w00)
928
+ {
929
+ return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
930
+ __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
931
+ }
932
+
933
+ static __inline __m256i __DEFAULT_FN_ATTRS
934
+ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
935
+ char __b27, char __b26, char __b25, char __b24,
936
+ char __b23, char __b22, char __b21, char __b20,
937
+ char __b19, char __b18, char __b17, char __b16,
938
+ char __b15, char __b14, char __b13, char __b12,
939
+ char __b11, char __b10, char __b09, char __b08,
940
+ char __b07, char __b06, char __b05, char __b04,
941
+ char __b03, char __b02, char __b01, char __b00)
942
+ {
943
+ return (__m256i)(__v32qi){
944
+ __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
945
+ __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,
946
+ __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,
947
+ __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31
948
+ };
949
+ }
950
+
951
+ static __inline __m256i __DEFAULT_FN_ATTRS
952
+ _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
953
+ {
954
+ return (__m256i)(__v4di){ __d, __c, __b, __a };
955
+ }
956
+
957
+ /* Create vectors with elements in reverse order */
958
+ static __inline __m256d __DEFAULT_FN_ATTRS
959
+ _mm256_setr_pd(double __a, double __b, double __c, double __d)
960
+ {
961
+ return (__m256d){ __a, __b, __c, __d };
962
+ }
963
+
964
+ static __inline __m256 __DEFAULT_FN_ATTRS
965
+ _mm256_setr_ps(float __a, float __b, float __c, float __d,
966
+ float __e, float __f, float __g, float __h)
967
+ {
968
+ return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
969
+ }
970
+
971
+ static __inline __m256i __DEFAULT_FN_ATTRS
972
+ _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
973
+ int __i4, int __i5, int __i6, int __i7)
974
+ {
975
+ return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
976
+ }
977
+
978
+ static __inline __m256i __DEFAULT_FN_ATTRS
979
+ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
980
+ short __w11, short __w10, short __w09, short __w08,
981
+ short __w07, short __w06, short __w05, short __w04,
982
+ short __w03, short __w02, short __w01, short __w00)
983
+ {
984
+ return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09,
985
+ __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
986
+ }
987
+
988
+ static __inline __m256i __DEFAULT_FN_ATTRS
989
+ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
990
+ char __b27, char __b26, char __b25, char __b24,
991
+ char __b23, char __b22, char __b21, char __b20,
992
+ char __b19, char __b18, char __b17, char __b16,
993
+ char __b15, char __b14, char __b13, char __b12,
994
+ char __b11, char __b10, char __b09, char __b08,
995
+ char __b07, char __b06, char __b05, char __b04,
996
+ char __b03, char __b02, char __b01, char __b00)
997
+ {
998
+ return (__m256i)(__v32qi){
999
+ __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24,
1000
+ __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
1001
+ __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
1002
+ __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
1003
+ }
1004
+
1005
+ static __inline __m256i __DEFAULT_FN_ATTRS
1006
+ _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
1007
+ {
1008
+ return (__m256i)(__v4di){ __a, __b, __c, __d };
1009
+ }
1010
+
1011
+ /* Create vectors with repeated elements */
1012
+ static __inline __m256d __DEFAULT_FN_ATTRS
1013
+ _mm256_set1_pd(double __w)
1014
+ {
1015
+ return (__m256d){ __w, __w, __w, __w };
1016
+ }
1017
+
1018
+ static __inline __m256 __DEFAULT_FN_ATTRS
1019
+ _mm256_set1_ps(float __w)
1020
+ {
1021
+ return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w };
1022
+ }
1023
+
1024
+ static __inline __m256i __DEFAULT_FN_ATTRS
1025
+ _mm256_set1_epi32(int __i)
1026
+ {
1027
+ return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i };
1028
+ }
1029
+
1030
+ static __inline __m256i __DEFAULT_FN_ATTRS
1031
+ _mm256_set1_epi16(short __w)
1032
+ {
1033
+ return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w,
1034
+ __w, __w, __w, __w, __w, __w };
1035
+ }
1036
+
1037
+ static __inline __m256i __DEFAULT_FN_ATTRS
1038
+ _mm256_set1_epi8(char __b)
1039
+ {
1040
+ return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1041
+ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1042
+ __b, __b, __b, __b, __b, __b, __b };
1043
+ }
1044
+
1045
+ static __inline __m256i __DEFAULT_FN_ATTRS
1046
+ _mm256_set1_epi64x(long long __q)
1047
+ {
1048
+ return (__m256i)(__v4di){ __q, __q, __q, __q };
1049
+ }
1050
+
1051
+ /* Create __zeroed vectors */
1052
+ static __inline __m256d __DEFAULT_FN_ATTRS
1053
+ _mm256_setzero_pd(void)
1054
+ {
1055
+ return (__m256d){ 0, 0, 0, 0 };
1056
+ }
1057
+
1058
+ static __inline __m256 __DEFAULT_FN_ATTRS
1059
+ _mm256_setzero_ps(void)
1060
+ {
1061
+ return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
1062
+ }
1063
+
1064
+ static __inline __m256i __DEFAULT_FN_ATTRS
1065
+ _mm256_setzero_si256(void)
1066
+ {
1067
+ return (__m256i){ 0LL, 0LL, 0LL, 0LL };
1068
+ }
1069
+
1070
+ /* Cast between vector types */
1071
+ static __inline __m256 __DEFAULT_FN_ATTRS
1072
+ _mm256_castpd_ps(__m256d __a)
1073
+ {
1074
+ return (__m256)__a;
1075
+ }
1076
+
1077
+ static __inline __m256i __DEFAULT_FN_ATTRS
1078
+ _mm256_castpd_si256(__m256d __a)
1079
+ {
1080
+ return (__m256i)__a;
1081
+ }
1082
+
1083
+ static __inline __m256d __DEFAULT_FN_ATTRS
1084
+ _mm256_castps_pd(__m256 __a)
1085
+ {
1086
+ return (__m256d)__a;
1087
+ }
1088
+
1089
+ static __inline __m256i __DEFAULT_FN_ATTRS
1090
+ _mm256_castps_si256(__m256 __a)
1091
+ {
1092
+ return (__m256i)__a;
1093
+ }
1094
+
1095
+ static __inline __m256 __DEFAULT_FN_ATTRS
1096
+ _mm256_castsi256_ps(__m256i __a)
1097
+ {
1098
+ return (__m256)__a;
1099
+ }
1100
+
1101
+ static __inline __m256d __DEFAULT_FN_ATTRS
1102
+ _mm256_castsi256_pd(__m256i __a)
1103
+ {
1104
+ return (__m256d)__a;
1105
+ }
1106
+
1107
+ static __inline __m128d __DEFAULT_FN_ATTRS
1108
+ _mm256_castpd256_pd128(__m256d __a)
1109
+ {
1110
+ return __builtin_shufflevector(__a, __a, 0, 1);
1111
+ }
1112
+
1113
+ static __inline __m128 __DEFAULT_FN_ATTRS
1114
+ _mm256_castps256_ps128(__m256 __a)
1115
+ {
1116
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
1117
+ }
1118
+
1119
+ static __inline __m128i __DEFAULT_FN_ATTRS
1120
+ _mm256_castsi256_si128(__m256i __a)
1121
+ {
1122
+ return __builtin_shufflevector(__a, __a, 0, 1);
1123
+ }
1124
+
1125
+ static __inline __m256d __DEFAULT_FN_ATTRS
1126
+ _mm256_castpd128_pd256(__m128d __a)
1127
+ {
1128
+ return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
1129
+ }
1130
+
1131
+ static __inline __m256 __DEFAULT_FN_ATTRS
1132
+ _mm256_castps128_ps256(__m128 __a)
1133
+ {
1134
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
1135
+ }
1136
+
1137
+ static __inline __m256i __DEFAULT_FN_ATTRS
1138
+ _mm256_castsi128_si256(__m128i __a)
1139
+ {
1140
+ return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
1141
+ }
1142
+
1143
+ /*
1144
+ Vector insert.
1145
+ We use macros rather than inlines because we only want to accept
1146
+ invocations where the immediate M is a constant expression.
1147
+ */
1148
+ #define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
1149
+ (__m256)__builtin_shufflevector( \
1150
+ (__v8sf)(V1), \
1151
+ (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
1152
+ (((M) & 1) ? 0 : 8), \
1153
+ (((M) & 1) ? 1 : 9), \
1154
+ (((M) & 1) ? 2 : 10), \
1155
+ (((M) & 1) ? 3 : 11), \
1156
+ (((M) & 1) ? 8 : 4), \
1157
+ (((M) & 1) ? 9 : 5), \
1158
+ (((M) & 1) ? 10 : 6), \
1159
+ (((M) & 1) ? 11 : 7) );})
1160
+
1161
+ #define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
1162
+ (__m256d)__builtin_shufflevector( \
1163
+ (__v4df)(V1), \
1164
+ (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
1165
+ (((M) & 1) ? 0 : 4), \
1166
+ (((M) & 1) ? 1 : 5), \
1167
+ (((M) & 1) ? 4 : 2), \
1168
+ (((M) & 1) ? 5 : 3) );})
1169
+
1170
+ #define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
1171
+ (__m256i)__builtin_shufflevector( \
1172
+ (__v4di)(V1), \
1173
+ (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
1174
+ (((M) & 1) ? 0 : 4), \
1175
+ (((M) & 1) ? 1 : 5), \
1176
+ (((M) & 1) ? 4 : 2), \
1177
+ (((M) & 1) ? 5 : 3) );})
1178
+
1179
+ /*
1180
+ Vector extract.
1181
+ We use macros rather than inlines because we only want to accept
1182
+ invocations where the immediate M is a constant expression.
1183
+ */
1184
+ #define _mm256_extractf128_ps(V, M) __extension__ ({ \
1185
+ (__m128)__builtin_shufflevector( \
1186
+ (__v8sf)(V), \
1187
+ (__v8sf)(_mm256_setzero_ps()), \
1188
+ (((M) & 1) ? 4 : 0), \
1189
+ (((M) & 1) ? 5 : 1), \
1190
+ (((M) & 1) ? 6 : 2), \
1191
+ (((M) & 1) ? 7 : 3) );})
1192
+
1193
+ #define _mm256_extractf128_pd(V, M) __extension__ ({ \
1194
+ (__m128d)__builtin_shufflevector( \
1195
+ (__v4df)(V), \
1196
+ (__v4df)(_mm256_setzero_pd()), \
1197
+ (((M) & 1) ? 2 : 0), \
1198
+ (((M) & 1) ? 3 : 1) );})
1199
+
1200
+ #define _mm256_extractf128_si256(V, M) __extension__ ({ \
1201
+ (__m128i)__builtin_shufflevector( \
1202
+ (__v4di)(V), \
1203
+ (__v4di)(_mm256_setzero_si256()), \
1204
+ (((M) & 1) ? 2 : 0), \
1205
+ (((M) & 1) ? 3 : 1) );})
1206
+
1207
+ /* SIMD load ops (unaligned) */
1208
+ static __inline __m256 __DEFAULT_FN_ATTRS
1209
+ _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
1210
+ {
1211
+ struct __loadu_ps {
1212
+ __m128 __v;
1213
+ } __attribute__((__packed__, __may_alias__));
1214
+
1215
+ __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v);
1216
+ return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1);
1217
+ }
1218
+
1219
+ static __inline __m256d __DEFAULT_FN_ATTRS
1220
+ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
1221
+ {
1222
+ struct __loadu_pd {
1223
+ __m128d __v;
1224
+ } __attribute__((__packed__, __may_alias__));
1225
+
1226
+ __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v);
1227
+ return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
1228
+ }
1229
+
1230
+ static __inline __m256i __DEFAULT_FN_ATTRS
1231
+ _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
1232
+ {
1233
+ struct __loadu_si128 {
1234
+ __m128i __v;
1235
+ } __attribute__((__packed__, __may_alias__));
1236
+ __m256i __v256 = _mm256_castsi128_si256(
1237
+ ((struct __loadu_si128*)__addr_lo)->__v);
1238
+ return _mm256_insertf128_si256(__v256,
1239
+ ((struct __loadu_si128*)__addr_hi)->__v, 1);
1240
+ }
1241
+
1242
+ /* SIMD store ops (unaligned) */
1243
+ static __inline void __DEFAULT_FN_ATTRS
1244
+ _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
1245
+ {
1246
+ __m128 __v128;
1247
+
1248
+ __v128 = _mm256_castps256_ps128(__a);
1249
+ __builtin_ia32_storeups(__addr_lo, __v128);
1250
+ __v128 = _mm256_extractf128_ps(__a, 1);
1251
+ __builtin_ia32_storeups(__addr_hi, __v128);
1252
+ }
1253
+
1254
+ static __inline void __DEFAULT_FN_ATTRS
1255
+ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
1256
+ {
1257
+ __m128d __v128;
1258
+
1259
+ __v128 = _mm256_castpd256_pd128(__a);
1260
+ __builtin_ia32_storeupd(__addr_lo, __v128);
1261
+ __v128 = _mm256_extractf128_pd(__a, 1);
1262
+ __builtin_ia32_storeupd(__addr_hi, __v128);
1263
+ }
1264
+
1265
+ static __inline void __DEFAULT_FN_ATTRS
1266
+ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
1267
+ {
1268
+ __m128i __v128;
1269
+
1270
+ __v128 = _mm256_castsi256_si128(__a);
1271
+ __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128);
1272
+ __v128 = _mm256_extractf128_si256(__a, 1);
1273
+ __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
1274
+ }
1275
+
1276
+ static __inline __m256 __DEFAULT_FN_ATTRS
1277
+ _mm256_set_m128 (__m128 __hi, __m128 __lo) {
1278
+ return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
1279
+ }
1280
+
1281
+ static __inline __m256d __DEFAULT_FN_ATTRS
1282
+ _mm256_set_m128d (__m128d __hi, __m128d __lo) {
1283
+ return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1284
+ }
1285
+
1286
+ static __inline __m256i __DEFAULT_FN_ATTRS
1287
+ _mm256_set_m128i (__m128i __hi, __m128i __lo) {
1288
+ return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1289
+ }
1290
+
1291
+ static __inline __m256 __DEFAULT_FN_ATTRS
1292
+ _mm256_setr_m128 (__m128 __lo, __m128 __hi) {
1293
+ return _mm256_set_m128(__hi, __lo);
1294
+ }
1295
+
1296
+ static __inline __m256d __DEFAULT_FN_ATTRS
1297
+ _mm256_setr_m128d (__m128d __lo, __m128d __hi) {
1298
+ return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1299
+ }
1300
+
1301
+ static __inline __m256i __DEFAULT_FN_ATTRS
1302
+ _mm256_setr_m128i (__m128i __lo, __m128i __hi) {
1303
+ return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1304
+ }
1305
+
1306
+ #undef __DEFAULT_FN_ATTRS
1307
+
1308
+ #endif /* __AVXINTRIN_H */