xcodebuild-helper 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +4 -4
  2. metadata +4 -220
  3. data/.codeclimate.yml +0 -20
  4. data/.gitignore +0 -1
  5. data/.rspec +0 -2
  6. data/.travis.yml +0 -7
  7. data/Gemfile +0 -6
  8. data/Gemfile.lock +0 -114
  9. data/Guardfile +0 -18
  10. data/README.md +0 -7
  11. data/Rakefile +0 -7
  12. data/assets/style.css +0 -110
  13. data/bin/oclint +0 -5
  14. data/bin/oclint-0.8 +0 -5
  15. data/bin/oclint-json-compilation-database +0 -5
  16. data/bin/oclint-xcodebuild +0 -5
  17. data/externals/oclint/LICENSE +0 -69
  18. data/externals/oclint/bin/oclint +0 -0
  19. data/externals/oclint/bin/oclint-0.10.2 +0 -0
  20. data/externals/oclint/bin/oclint-json-compilation-database +0 -88
  21. data/externals/oclint/bin/oclint-xcodebuild +0 -218
  22. data/externals/oclint/lib/clang/3.7.0/asan_blacklist.txt +0 -13
  23. data/externals/oclint/lib/clang/3.7.0/include/Intrin.h +0 -958
  24. data/externals/oclint/lib/clang/3.7.0/include/__stddef_max_align_t.h +0 -43
  25. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_aes.h +0 -72
  26. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_pclmul.h +0 -34
  27. data/externals/oclint/lib/clang/3.7.0/include/adxintrin.h +0 -88
  28. data/externals/oclint/lib/clang/3.7.0/include/altivec.h +0 -13528
  29. data/externals/oclint/lib/clang/3.7.0/include/ammintrin.h +0 -215
  30. data/externals/oclint/lib/clang/3.7.0/include/arm_acle.h +0 -304
  31. data/externals/oclint/lib/clang/3.7.0/include/arm_neon.h +0 -68419
  32. data/externals/oclint/lib/clang/3.7.0/include/avx2intrin.h +0 -1256
  33. data/externals/oclint/lib/clang/3.7.0/include/avx512bwintrin.h +0 -1250
  34. data/externals/oclint/lib/clang/3.7.0/include/avx512cdintrin.h +0 -131
  35. data/externals/oclint/lib/clang/3.7.0/include/avx512dqintrin.h +0 -242
  36. data/externals/oclint/lib/clang/3.7.0/include/avx512erintrin.h +0 -285
  37. data/externals/oclint/lib/clang/3.7.0/include/avx512fintrin.h +0 -2457
  38. data/externals/oclint/lib/clang/3.7.0/include/avx512vlbwintrin.h +0 -1907
  39. data/externals/oclint/lib/clang/3.7.0/include/avx512vldqintrin.h +0 -353
  40. data/externals/oclint/lib/clang/3.7.0/include/avx512vlintrin.h +0 -1982
  41. data/externals/oclint/lib/clang/3.7.0/include/avxintrin.h +0 -1308
  42. data/externals/oclint/lib/clang/3.7.0/include/bmi2intrin.h +0 -99
  43. data/externals/oclint/lib/clang/3.7.0/include/bmiintrin.h +0 -153
  44. data/externals/oclint/lib/clang/3.7.0/include/cpuid.h +0 -209
  45. data/externals/oclint/lib/clang/3.7.0/include/cuda_builtin_vars.h +0 -110
  46. data/externals/oclint/lib/clang/3.7.0/include/emmintrin.h +0 -1480
  47. data/externals/oclint/lib/clang/3.7.0/include/f16cintrin.h +0 -63
  48. data/externals/oclint/lib/clang/3.7.0/include/float.h +0 -124
  49. data/externals/oclint/lib/clang/3.7.0/include/fma4intrin.h +0 -236
  50. data/externals/oclint/lib/clang/3.7.0/include/fmaintrin.h +0 -234
  51. data/externals/oclint/lib/clang/3.7.0/include/fxsrintrin.h +0 -55
  52. data/externals/oclint/lib/clang/3.7.0/include/htmintrin.h +0 -226
  53. data/externals/oclint/lib/clang/3.7.0/include/htmxlintrin.h +0 -363
  54. data/externals/oclint/lib/clang/3.7.0/include/ia32intrin.h +0 -101
  55. data/externals/oclint/lib/clang/3.7.0/include/immintrin.h +0 -203
  56. data/externals/oclint/lib/clang/3.7.0/include/inttypes.h +0 -102
  57. data/externals/oclint/lib/clang/3.7.0/include/iso646.h +0 -43
  58. data/externals/oclint/lib/clang/3.7.0/include/limits.h +0 -118
  59. data/externals/oclint/lib/clang/3.7.0/include/lzcntintrin.h +0 -72
  60. data/externals/oclint/lib/clang/3.7.0/include/mm3dnow.h +0 -167
  61. data/externals/oclint/lib/clang/3.7.0/include/mm_malloc.h +0 -75
  62. data/externals/oclint/lib/clang/3.7.0/include/mmintrin.h +0 -507
  63. data/externals/oclint/lib/clang/3.7.0/include/module.modulemap +0 -196
  64. data/externals/oclint/lib/clang/3.7.0/include/nmmintrin.h +0 -35
  65. data/externals/oclint/lib/clang/3.7.0/include/pmmintrin.h +0 -122
  66. data/externals/oclint/lib/clang/3.7.0/include/popcntintrin.h +0 -50
  67. data/externals/oclint/lib/clang/3.7.0/include/prfchwintrin.h +0 -39
  68. data/externals/oclint/lib/clang/3.7.0/include/rdseedintrin.h +0 -59
  69. data/externals/oclint/lib/clang/3.7.0/include/rtmintrin.h +0 -59
  70. data/externals/oclint/lib/clang/3.7.0/include/s390intrin.h +0 -39
  71. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/allocator_interface.h +0 -66
  72. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/asan_interface.h +0 -155
  73. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/common_interface_defs.h +0 -118
  74. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/coverage_interface.h +0 -63
  75. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/dfsan_interface.h +0 -114
  76. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/linux_syscall_hooks.h +0 -3070
  77. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/lsan_interface.h +0 -84
  78. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/msan_interface.h +0 -107
  79. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/tsan_interface_atomic.h +0 -222
  80. data/externals/oclint/lib/clang/3.7.0/include/shaintrin.h +0 -79
  81. data/externals/oclint/lib/clang/3.7.0/include/smmintrin.h +0 -487
  82. data/externals/oclint/lib/clang/3.7.0/include/stdalign.h +0 -35
  83. data/externals/oclint/lib/clang/3.7.0/include/stdarg.h +0 -52
  84. data/externals/oclint/lib/clang/3.7.0/include/stdatomic.h +0 -190
  85. data/externals/oclint/lib/clang/3.7.0/include/stdbool.h +0 -44
  86. data/externals/oclint/lib/clang/3.7.0/include/stddef.h +0 -137
  87. data/externals/oclint/lib/clang/3.7.0/include/stdint.h +0 -707
  88. data/externals/oclint/lib/clang/3.7.0/include/stdnoreturn.h +0 -30
  89. data/externals/oclint/lib/clang/3.7.0/include/tbmintrin.h +0 -154
  90. data/externals/oclint/lib/clang/3.7.0/include/tgmath.h +0 -1374
  91. data/externals/oclint/lib/clang/3.7.0/include/tmmintrin.h +0 -230
  92. data/externals/oclint/lib/clang/3.7.0/include/unwind.h +0 -282
  93. data/externals/oclint/lib/clang/3.7.0/include/vadefs.h +0 -65
  94. data/externals/oclint/lib/clang/3.7.0/include/varargs.h +0 -26
  95. data/externals/oclint/lib/clang/3.7.0/include/vecintrin.h +0 -8946
  96. data/externals/oclint/lib/clang/3.7.0/include/wmmintrin.h +0 -42
  97. data/externals/oclint/lib/clang/3.7.0/include/x86intrin.h +0 -81
  98. data/externals/oclint/lib/clang/3.7.0/include/xmmintrin.h +0 -1008
  99. data/externals/oclint/lib/clang/3.7.0/include/xopintrin.h +0 -809
  100. data/externals/oclint/lib/clang/3.7.0/include/xtestintrin.h +0 -41
  101. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib +0 -0
  102. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
  103. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-i386.a +0 -0
  104. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-x86_64.a +0 -0
  105. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.profile_osx.a +0 -0
  106. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.safestack_osx.a +0 -0
  107. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib +0 -0
  108. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib +0 -0
  109. data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
  110. data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
  111. data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
  112. data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
  113. data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
  114. data/externals/oclint/lib/oclint/reporters/libXcodeReporter.dylib +0 -0
  115. data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
  116. data/externals/oclint/lib/oclint/rules/libAvoidDefaultArgumentsOnVirtualMethodsRule.dylib +0 -0
  117. data/externals/oclint/lib/oclint/rules/libAvoidPrivateStaticMembersRule.dylib +0 -0
  118. data/externals/oclint/lib/oclint/rules/libBaseClassDestructorShouldBeVirtualOrProtectedRule.dylib +0 -0
  119. data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
  120. data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
  121. data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
  122. data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
  123. data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
  124. data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
  125. data/externals/oclint/lib/oclint/rules/libCoveredSwitchStatementsDontNeedDefaultRule.dylib +0 -0
  126. data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
  127. data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
  128. data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
  129. data/externals/oclint/lib/oclint/rules/libDestructorOfVirtualClassRule.dylib +0 -0
  130. data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
  131. data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
  132. data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
  133. data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
  134. data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
  135. data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
  136. data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
  137. data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
  138. data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
  139. data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
  140. data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
  141. data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
  142. data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
  143. data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
  144. data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
  145. data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
  146. data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
  147. data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
  148. data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
  149. data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
  150. data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
  151. data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
  152. data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
  153. data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
  154. data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
  155. data/externals/oclint/lib/oclint/rules/libObjCAssignIvarOutsideAccessorsRule.dylib +0 -0
  156. data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
  157. data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
  158. data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
  159. data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
  160. data/externals/oclint/lib/oclint/rules/libObjCVerifyIsEqualHashRule.dylib +0 -0
  161. data/externals/oclint/lib/oclint/rules/libObjCVerifyMustCallSuperRule.dylib +0 -0
  162. data/externals/oclint/lib/oclint/rules/libObjCVerifyProhibitedCallRule.dylib +0 -0
  163. data/externals/oclint/lib/oclint/rules/libObjCVerifyProtectedMethodRule.dylib +0 -0
  164. data/externals/oclint/lib/oclint/rules/libObjCVerifySubclassMustImplementRule.dylib +0 -0
  165. data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
  166. data/externals/oclint/lib/oclint/rules/libPreferEarlyExitRule.dylib +0 -0
  167. data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
  168. data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
  169. data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
  170. data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
  171. data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
  172. data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
  173. data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
  174. data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
  175. data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
  176. data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
  177. data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
  178. data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
  179. data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
  180. data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
  181. data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
  182. data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
  183. data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
  184. data/lib/coverage_html_converter.rb +0 -141
  185. data/lib/coverage_plan.rb +0 -27
  186. data/lib/device.rb +0 -27
  187. data/lib/execute.rb +0 -7
  188. data/lib/lint_plan.rb +0 -41
  189. data/lib/rules.rb +0 -23
  190. data/lib/test_plan.rb +0 -11
  191. data/lib/version.rb +0 -3
  192. data/lib/xcode.rb +0 -128
  193. data/lib/xcodebuild-helper.rb +0 -135
  194. data/spec/coverage_html_coverter_spec.rb +0 -55
  195. data/spec/coverage_plan_spec.rb +0 -23
  196. data/spec/device_spec.rb +0 -24
  197. data/spec/lint_plan_spec.rb +0 -35
  198. data/spec/rule_spec.rb +0 -37
  199. data/spec/spec_helper.rb +0 -17
  200. data/spec/test_plan_spec.rb +0 -11
  201. data/spec/xcode_dsl_actions_spec.rb +0 -227
  202. data/spec/xcode_dsl_spec.rb +0 -176
  203. data/spec/xcode_spec.rb +0 -85
  204. data/xcodebuild-helper.gemspec +0 -27
@@ -1,1308 +0,0 @@
1
- /*===---- avxintrin.h - AVX intrinsics -------------------------------------===
2
- *
3
- * Permission is hereby granted, free of charge, to any person obtaining a copy
4
- * of this software and associated documentation files (the "Software"), to deal
5
- * in the Software without restriction, including without limitation the rights
6
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
- * copies of the Software, and to permit persons to whom the Software is
8
- * furnished to do so, subject to the following conditions:
9
- *
10
- * The above copyright notice and this permission notice shall be included in
11
- * all copies or substantial portions of the Software.
12
- *
13
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
- * THE SOFTWARE.
20
- *
21
- *===-----------------------------------------------------------------------===
22
- */
23
-
24
- #ifndef __IMMINTRIN_H
25
- #error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
26
- #endif
27
-
28
- #ifndef __AVXINTRIN_H
29
- #define __AVXINTRIN_H
30
-
31
- typedef double __v4df __attribute__ ((__vector_size__ (32)));
32
- typedef float __v8sf __attribute__ ((__vector_size__ (32)));
33
- typedef long long __v4di __attribute__ ((__vector_size__ (32)));
34
- typedef int __v8si __attribute__ ((__vector_size__ (32)));
35
- typedef short __v16hi __attribute__ ((__vector_size__ (32)));
36
- typedef char __v32qi __attribute__ ((__vector_size__ (32)));
37
-
38
- typedef float __m256 __attribute__ ((__vector_size__ (32)));
39
- typedef double __m256d __attribute__((__vector_size__(32)));
40
- typedef long long __m256i __attribute__((__vector_size__(32)));
41
-
42
- /* Define the default attributes for the functions in this file. */
43
- #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
44
-
45
- /* Arithmetic */
46
- static __inline __m256d __DEFAULT_FN_ATTRS
47
- _mm256_add_pd(__m256d __a, __m256d __b)
48
- {
49
- return __a+__b;
50
- }
51
-
52
- static __inline __m256 __DEFAULT_FN_ATTRS
53
- _mm256_add_ps(__m256 __a, __m256 __b)
54
- {
55
- return __a+__b;
56
- }
57
-
58
- static __inline __m256d __DEFAULT_FN_ATTRS
59
- _mm256_sub_pd(__m256d __a, __m256d __b)
60
- {
61
- return __a-__b;
62
- }
63
-
64
- static __inline __m256 __DEFAULT_FN_ATTRS
65
- _mm256_sub_ps(__m256 __a, __m256 __b)
66
- {
67
- return __a-__b;
68
- }
69
-
70
- static __inline __m256d __DEFAULT_FN_ATTRS
71
- _mm256_addsub_pd(__m256d __a, __m256d __b)
72
- {
73
- return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
74
- }
75
-
76
- static __inline __m256 __DEFAULT_FN_ATTRS
77
- _mm256_addsub_ps(__m256 __a, __m256 __b)
78
- {
79
- return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
80
- }
81
-
82
- static __inline __m256d __DEFAULT_FN_ATTRS
83
- _mm256_div_pd(__m256d __a, __m256d __b)
84
- {
85
- return __a / __b;
86
- }
87
-
88
- static __inline __m256 __DEFAULT_FN_ATTRS
89
- _mm256_div_ps(__m256 __a, __m256 __b)
90
- {
91
- return __a / __b;
92
- }
93
-
94
- static __inline __m256d __DEFAULT_FN_ATTRS
95
- _mm256_max_pd(__m256d __a, __m256d __b)
96
- {
97
- return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
98
- }
99
-
100
- static __inline __m256 __DEFAULT_FN_ATTRS
101
- _mm256_max_ps(__m256 __a, __m256 __b)
102
- {
103
- return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
104
- }
105
-
106
- static __inline __m256d __DEFAULT_FN_ATTRS
107
- _mm256_min_pd(__m256d __a, __m256d __b)
108
- {
109
- return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
110
- }
111
-
112
- static __inline __m256 __DEFAULT_FN_ATTRS
113
- _mm256_min_ps(__m256 __a, __m256 __b)
114
- {
115
- return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
116
- }
117
-
118
- static __inline __m256d __DEFAULT_FN_ATTRS
119
- _mm256_mul_pd(__m256d __a, __m256d __b)
120
- {
121
- return __a * __b;
122
- }
123
-
124
- static __inline __m256 __DEFAULT_FN_ATTRS
125
- _mm256_mul_ps(__m256 __a, __m256 __b)
126
- {
127
- return __a * __b;
128
- }
129
-
130
- static __inline __m256d __DEFAULT_FN_ATTRS
131
- _mm256_sqrt_pd(__m256d __a)
132
- {
133
- return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
134
- }
135
-
136
- static __inline __m256 __DEFAULT_FN_ATTRS
137
- _mm256_sqrt_ps(__m256 __a)
138
- {
139
- return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
140
- }
141
-
142
- static __inline __m256 __DEFAULT_FN_ATTRS
143
- _mm256_rsqrt_ps(__m256 __a)
144
- {
145
- return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
146
- }
147
-
148
- static __inline __m256 __DEFAULT_FN_ATTRS
149
- _mm256_rcp_ps(__m256 __a)
150
- {
151
- return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
152
- }
153
-
154
- #define _mm256_round_pd(V, M) __extension__ ({ \
155
- __m256d __V = (V); \
156
- (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })
157
-
158
- #define _mm256_round_ps(V, M) __extension__ ({ \
159
- __m256 __V = (V); \
160
- (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })
161
-
162
- #define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)
163
- #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
164
- #define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)
165
- #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
166
-
167
- /* Logical */
168
- static __inline __m256d __DEFAULT_FN_ATTRS
169
- _mm256_and_pd(__m256d __a, __m256d __b)
170
- {
171
- return (__m256d)((__v4di)__a & (__v4di)__b);
172
- }
173
-
174
- static __inline __m256 __DEFAULT_FN_ATTRS
175
- _mm256_and_ps(__m256 __a, __m256 __b)
176
- {
177
- return (__m256)((__v8si)__a & (__v8si)__b);
178
- }
179
-
180
- static __inline __m256d __DEFAULT_FN_ATTRS
181
- _mm256_andnot_pd(__m256d __a, __m256d __b)
182
- {
183
- return (__m256d)(~(__v4di)__a & (__v4di)__b);
184
- }
185
-
186
- static __inline __m256 __DEFAULT_FN_ATTRS
187
- _mm256_andnot_ps(__m256 __a, __m256 __b)
188
- {
189
- return (__m256)(~(__v8si)__a & (__v8si)__b);
190
- }
191
-
192
- static __inline __m256d __DEFAULT_FN_ATTRS
193
- _mm256_or_pd(__m256d __a, __m256d __b)
194
- {
195
- return (__m256d)((__v4di)__a | (__v4di)__b);
196
- }
197
-
198
- static __inline __m256 __DEFAULT_FN_ATTRS
199
- _mm256_or_ps(__m256 __a, __m256 __b)
200
- {
201
- return (__m256)((__v8si)__a | (__v8si)__b);
202
- }
203
-
204
- static __inline __m256d __DEFAULT_FN_ATTRS
205
- _mm256_xor_pd(__m256d __a, __m256d __b)
206
- {
207
- return (__m256d)((__v4di)__a ^ (__v4di)__b);
208
- }
209
-
210
- static __inline __m256 __DEFAULT_FN_ATTRS
211
- _mm256_xor_ps(__m256 __a, __m256 __b)
212
- {
213
- return (__m256)((__v8si)__a ^ (__v8si)__b);
214
- }
215
-
216
- /* Horizontal arithmetic */
217
- static __inline __m256d __DEFAULT_FN_ATTRS
218
- _mm256_hadd_pd(__m256d __a, __m256d __b)
219
- {
220
- return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
221
- }
222
-
223
- static __inline __m256 __DEFAULT_FN_ATTRS
224
- _mm256_hadd_ps(__m256 __a, __m256 __b)
225
- {
226
- return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
227
- }
228
-
229
- static __inline __m256d __DEFAULT_FN_ATTRS
230
- _mm256_hsub_pd(__m256d __a, __m256d __b)
231
- {
232
- return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
233
- }
234
-
235
- static __inline __m256 __DEFAULT_FN_ATTRS
236
- _mm256_hsub_ps(__m256 __a, __m256 __b)
237
- {
238
- return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
239
- }
240
-
241
- /* Vector permutations */
242
- static __inline __m128d __DEFAULT_FN_ATTRS
243
- _mm_permutevar_pd(__m128d __a, __m128i __c)
244
- {
245
- return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
246
- }
247
-
248
- static __inline __m256d __DEFAULT_FN_ATTRS
249
- _mm256_permutevar_pd(__m256d __a, __m256i __c)
250
- {
251
- return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
252
- }
253
-
254
- static __inline __m128 __DEFAULT_FN_ATTRS
255
- _mm_permutevar_ps(__m128 __a, __m128i __c)
256
- {
257
- return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
258
- }
259
-
260
- static __inline __m256 __DEFAULT_FN_ATTRS
261
- _mm256_permutevar_ps(__m256 __a, __m256i __c)
262
- {
263
- return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
264
- }
265
-
266
- #define _mm_permute_pd(A, C) __extension__ ({ \
267
- __m128d __A = (A); \
268
- (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \
269
- (C) & 0x1, ((C) & 0x2) >> 1); })
270
-
271
- #define _mm256_permute_pd(A, C) __extension__ ({ \
272
- __m256d __A = (A); \
273
- (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \
274
- (C) & 0x1, ((C) & 0x2) >> 1, \
275
- 2 + (((C) & 0x4) >> 2), \
276
- 2 + (((C) & 0x8) >> 3)); })
277
-
278
- #define _mm_permute_ps(A, C) __extension__ ({ \
279
- __m128 __A = (A); \
280
- (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \
281
- (C) & 0x3, ((C) & 0xc) >> 2, \
282
- ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
283
-
284
- #define _mm256_permute_ps(A, C) __extension__ ({ \
285
- __m256 __A = (A); \
286
- (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \
287
- (C) & 0x3, ((C) & 0xc) >> 2, \
288
- ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
289
- 4 + (((C) & 0x03) >> 0), \
290
- 4 + (((C) & 0x0c) >> 2), \
291
- 4 + (((C) & 0x30) >> 4), \
292
- 4 + (((C) & 0xc0) >> 6)); })
293
-
294
- #define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
295
- __m256d __V1 = (V1); \
296
- __m256d __V2 = (V2); \
297
- (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
298
-
299
- #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
300
- __m256 __V1 = (V1); \
301
- __m256 __V2 = (V2); \
302
- (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
303
-
304
- #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
305
- __m256i __V1 = (V1); \
306
- __m256i __V2 = (V2); \
307
- (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
308
-
309
- /* Vector Blend */
310
- #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
311
- __m256d __V1 = (V1); \
312
- __m256d __V2 = (V2); \
313
- (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
314
- (((M) & 0x01) ? 4 : 0), \
315
- (((M) & 0x02) ? 5 : 1), \
316
- (((M) & 0x04) ? 6 : 2), \
317
- (((M) & 0x08) ? 7 : 3)); })
318
-
319
- #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
320
- __m256 __V1 = (V1); \
321
- __m256 __V2 = (V2); \
322
- (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
323
- (((M) & 0x01) ? 8 : 0), \
324
- (((M) & 0x02) ? 9 : 1), \
325
- (((M) & 0x04) ? 10 : 2), \
326
- (((M) & 0x08) ? 11 : 3), \
327
- (((M) & 0x10) ? 12 : 4), \
328
- (((M) & 0x20) ? 13 : 5), \
329
- (((M) & 0x40) ? 14 : 6), \
330
- (((M) & 0x80) ? 15 : 7)); })
331
-
332
- static __inline __m256d __DEFAULT_FN_ATTRS
333
- _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
334
- {
335
- return (__m256d)__builtin_ia32_blendvpd256(
336
- (__v4df)__a, (__v4df)__b, (__v4df)__c);
337
- }
338
-
339
- static __inline __m256 __DEFAULT_FN_ATTRS
340
- _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
341
- {
342
- return (__m256)__builtin_ia32_blendvps256(
343
- (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
344
- }
345
-
346
- /* Vector Dot Product */
347
- #define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
348
- __m256 __V1 = (V1); \
349
- __m256 __V2 = (V2); \
350
- (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
351
-
352
- /* Vector shuffle */
353
- #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
354
- __m256 __a = (a); \
355
- __m256 __b = (b); \
356
- (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \
357
- (mask) & 0x3, ((mask) & 0xc) >> 2, \
358
- (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \
359
- ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \
360
- (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })
361
-
362
- #define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
363
- __m256d __a = (a); \
364
- __m256d __b = (b); \
365
- (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \
366
- (mask) & 0x1, \
367
- (((mask) & 0x2) >> 1) + 4, \
368
- (((mask) & 0x4) >> 2) + 2, \
369
- (((mask) & 0x8) >> 3) + 6); })
370
-
371
- /* Compare */
372
- #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
373
- #define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
374
- #define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
375
- #define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
376
- #define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
377
- #define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
378
- #define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
379
- #define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
380
- #define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
381
- #define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
382
- #define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
383
- #define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
384
- #define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
385
- #define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
386
- #define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
387
- #define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
388
- #define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
389
- #define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
390
- #define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
391
- #define _CMP_UNORD_S 0x13 /* Unordered (signaling) */
392
- #define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
393
- #define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
394
- #define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
395
- #define _CMP_ORD_S 0x17 /* Ordered (signaling) */
396
- #define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
397
- #define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
398
- #define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
399
- #define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
400
- #define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
401
- #define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
402
- #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
403
- #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
404
-
405
- #define _mm_cmp_pd(a, b, c) __extension__ ({ \
406
- __m128d __a = (a); \
407
- __m128d __b = (b); \
408
- (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); })
409
-
410
- #define _mm_cmp_ps(a, b, c) __extension__ ({ \
411
- __m128 __a = (a); \
412
- __m128 __b = (b); \
413
- (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); })
414
-
415
- #define _mm256_cmp_pd(a, b, c) __extension__ ({ \
416
- __m256d __a = (a); \
417
- __m256d __b = (b); \
418
- (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); })
419
-
420
- #define _mm256_cmp_ps(a, b, c) __extension__ ({ \
421
- __m256 __a = (a); \
422
- __m256 __b = (b); \
423
- (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); })
424
-
425
- #define _mm_cmp_sd(a, b, c) __extension__ ({ \
426
- __m128d __a = (a); \
427
- __m128d __b = (b); \
428
- (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })
429
-
430
- #define _mm_cmp_ss(a, b, c) __extension__ ({ \
431
- __m128 __a = (a); \
432
- __m128 __b = (b); \
433
- (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
434
-
435
- static __inline int __DEFAULT_FN_ATTRS
436
- _mm256_extract_epi32(__m256i __a, const int __imm)
437
- {
438
- __v8si __b = (__v8si)__a;
439
- return __b[__imm & 7];
440
- }
441
-
442
- static __inline int __DEFAULT_FN_ATTRS
443
- _mm256_extract_epi16(__m256i __a, const int __imm)
444
- {
445
- __v16hi __b = (__v16hi)__a;
446
- return __b[__imm & 15];
447
- }
448
-
449
- static __inline int __DEFAULT_FN_ATTRS
450
- _mm256_extract_epi8(__m256i __a, const int __imm)
451
- {
452
- __v32qi __b = (__v32qi)__a;
453
- return __b[__imm & 31];
454
- }
455
-
456
- #ifdef __x86_64__
457
- static __inline long long __DEFAULT_FN_ATTRS
458
- _mm256_extract_epi64(__m256i __a, const int __imm)
459
- {
460
- __v4di __b = (__v4di)__a;
461
- return __b[__imm & 3];
462
- }
463
- #endif
464
-
465
- static __inline __m256i __DEFAULT_FN_ATTRS
466
- _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
467
- {
468
- __v8si __c = (__v8si)__a;
469
- __c[__imm & 7] = __b;
470
- return (__m256i)__c;
471
- }
472
-
473
- static __inline __m256i __DEFAULT_FN_ATTRS
474
- _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
475
- {
476
- __v16hi __c = (__v16hi)__a;
477
- __c[__imm & 15] = __b;
478
- return (__m256i)__c;
479
- }
480
-
481
- static __inline __m256i __DEFAULT_FN_ATTRS
482
- _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
483
- {
484
- __v32qi __c = (__v32qi)__a;
485
- __c[__imm & 31] = __b;
486
- return (__m256i)__c;
487
- }
488
-
489
- #ifdef __x86_64__
490
- static __inline __m256i __DEFAULT_FN_ATTRS
491
- _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
492
- {
493
- __v4di __c = (__v4di)__a;
494
- __c[__imm & 3] = __b;
495
- return (__m256i)__c;
496
- }
497
- #endif
498
-
499
- /* Conversion */
500
- static __inline __m256d __DEFAULT_FN_ATTRS
501
- _mm256_cvtepi32_pd(__m128i __a)
502
- {
503
- return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
504
- }
505
-
506
- static __inline __m256 __DEFAULT_FN_ATTRS
507
- _mm256_cvtepi32_ps(__m256i __a)
508
- {
509
- return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
510
- }
511
-
512
- static __inline __m128 __DEFAULT_FN_ATTRS
513
- _mm256_cvtpd_ps(__m256d __a)
514
- {
515
- return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
516
- }
517
-
518
- static __inline __m256i __DEFAULT_FN_ATTRS
519
- _mm256_cvtps_epi32(__m256 __a)
520
- {
521
- return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
522
- }
523
-
524
- static __inline __m256d __DEFAULT_FN_ATTRS
525
- _mm256_cvtps_pd(__m128 __a)
526
- {
527
- return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
528
- }
529
-
530
- static __inline __m128i __DEFAULT_FN_ATTRS
531
- _mm256_cvttpd_epi32(__m256d __a)
532
- {
533
- return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
534
- }
535
-
536
- static __inline __m128i __DEFAULT_FN_ATTRS
537
- _mm256_cvtpd_epi32(__m256d __a)
538
- {
539
- return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
540
- }
541
-
542
- static __inline __m256i __DEFAULT_FN_ATTRS
543
- _mm256_cvttps_epi32(__m256 __a)
544
- {
545
- return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
546
- }
547
-
548
- /* Vector replicate */
549
- static __inline __m256 __DEFAULT_FN_ATTRS
550
- _mm256_movehdup_ps(__m256 __a)
551
- {
552
- return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
553
- }
554
-
555
- static __inline __m256 __DEFAULT_FN_ATTRS
556
- _mm256_moveldup_ps(__m256 __a)
557
- {
558
- return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6);
559
- }
560
-
561
- static __inline __m256d __DEFAULT_FN_ATTRS
562
- _mm256_movedup_pd(__m256d __a)
563
- {
564
- return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
565
- }
566
-
567
- /* Unpack and Interleave */
568
- static __inline __m256d __DEFAULT_FN_ATTRS
569
- _mm256_unpackhi_pd(__m256d __a, __m256d __b)
570
- {
571
- return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2);
572
- }
573
-
574
- static __inline __m256d __DEFAULT_FN_ATTRS
575
- _mm256_unpacklo_pd(__m256d __a, __m256d __b)
576
- {
577
- return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2);
578
- }
579
-
580
- static __inline __m256 __DEFAULT_FN_ATTRS
581
- _mm256_unpackhi_ps(__m256 __a, __m256 __b)
582
- {
583
- return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
584
- }
585
-
586
- static __inline __m256 __DEFAULT_FN_ATTRS
587
- _mm256_unpacklo_ps(__m256 __a, __m256 __b)
588
- {
589
- return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
590
- }
591
-
592
- /* Bit Test */
593
- static __inline int __DEFAULT_FN_ATTRS
594
- _mm_testz_pd(__m128d __a, __m128d __b)
595
- {
596
- return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
597
- }
598
-
599
- static __inline int __DEFAULT_FN_ATTRS
600
- _mm_testc_pd(__m128d __a, __m128d __b)
601
- {
602
- return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
603
- }
604
-
605
- static __inline int __DEFAULT_FN_ATTRS
606
- _mm_testnzc_pd(__m128d __a, __m128d __b)
607
- {
608
- return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
609
- }
610
-
611
- static __inline int __DEFAULT_FN_ATTRS
612
- _mm_testz_ps(__m128 __a, __m128 __b)
613
- {
614
- return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
615
- }
616
-
617
- static __inline int __DEFAULT_FN_ATTRS
618
- _mm_testc_ps(__m128 __a, __m128 __b)
619
- {
620
- return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
621
- }
622
-
623
- static __inline int __DEFAULT_FN_ATTRS
624
- _mm_testnzc_ps(__m128 __a, __m128 __b)
625
- {
626
- return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
627
- }
628
-
629
- static __inline int __DEFAULT_FN_ATTRS
630
- _mm256_testz_pd(__m256d __a, __m256d __b)
631
- {
632
- return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
633
- }
634
-
635
- static __inline int __DEFAULT_FN_ATTRS
636
- _mm256_testc_pd(__m256d __a, __m256d __b)
637
- {
638
- return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
639
- }
640
-
641
- static __inline int __DEFAULT_FN_ATTRS
642
- _mm256_testnzc_pd(__m256d __a, __m256d __b)
643
- {
644
- return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
645
- }
646
-
647
- static __inline int __DEFAULT_FN_ATTRS
648
- _mm256_testz_ps(__m256 __a, __m256 __b)
649
- {
650
- return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
651
- }
652
-
653
- static __inline int __DEFAULT_FN_ATTRS
654
- _mm256_testc_ps(__m256 __a, __m256 __b)
655
- {
656
- return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
657
- }
658
-
659
- static __inline int __DEFAULT_FN_ATTRS
660
- _mm256_testnzc_ps(__m256 __a, __m256 __b)
661
- {
662
- return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
663
- }
664
-
665
- static __inline int __DEFAULT_FN_ATTRS
666
- _mm256_testz_si256(__m256i __a, __m256i __b)
667
- {
668
- return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
669
- }
670
-
671
- static __inline int __DEFAULT_FN_ATTRS
672
- _mm256_testc_si256(__m256i __a, __m256i __b)
673
- {
674
- return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
675
- }
676
-
677
- static __inline int __DEFAULT_FN_ATTRS
678
- _mm256_testnzc_si256(__m256i __a, __m256i __b)
679
- {
680
- return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
681
- }
682
-
683
- /* Vector extract sign mask */
684
- static __inline int __DEFAULT_FN_ATTRS
685
- _mm256_movemask_pd(__m256d __a)
686
- {
687
- return __builtin_ia32_movmskpd256((__v4df)__a);
688
- }
689
-
690
- static __inline int __DEFAULT_FN_ATTRS
691
- _mm256_movemask_ps(__m256 __a)
692
- {
693
- return __builtin_ia32_movmskps256((__v8sf)__a);
694
- }
695
-
696
- /* Vector __zero */
697
- static __inline void __DEFAULT_FN_ATTRS
698
- _mm256_zeroall(void)
699
- {
700
- __builtin_ia32_vzeroall();
701
- }
702
-
703
- static __inline void __DEFAULT_FN_ATTRS
704
- _mm256_zeroupper(void)
705
- {
706
- __builtin_ia32_vzeroupper();
707
- }
708
-
709
- /* Vector load with broadcast */
710
- static __inline __m128 __DEFAULT_FN_ATTRS
711
- _mm_broadcast_ss(float const *__a)
712
- {
713
- float __f = *__a;
714
- return (__m128)(__v4sf){ __f, __f, __f, __f };
715
- }
716
-
717
- static __inline __m256d __DEFAULT_FN_ATTRS
718
- _mm256_broadcast_sd(double const *__a)
719
- {
720
- double __d = *__a;
721
- return (__m256d)(__v4df){ __d, __d, __d, __d };
722
- }
723
-
724
- static __inline __m256 __DEFAULT_FN_ATTRS
725
- _mm256_broadcast_ss(float const *__a)
726
- {
727
- float __f = *__a;
728
- return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
729
- }
730
-
731
- static __inline __m256d __DEFAULT_FN_ATTRS
732
- _mm256_broadcast_pd(__m128d const *__a)
733
- {
734
- return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a);
735
- }
736
-
737
- static __inline __m256 __DEFAULT_FN_ATTRS
738
- _mm256_broadcast_ps(__m128 const *__a)
739
- {
740
- return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a);
741
- }
742
-
743
- /* SIMD load ops */
744
- static __inline __m256d __DEFAULT_FN_ATTRS
745
- _mm256_load_pd(double const *__p)
746
- {
747
- return *(__m256d *)__p;
748
- }
749
-
750
- static __inline __m256 __DEFAULT_FN_ATTRS
751
- _mm256_load_ps(float const *__p)
752
- {
753
- return *(__m256 *)__p;
754
- }
755
-
756
- static __inline __m256d __DEFAULT_FN_ATTRS
757
- _mm256_loadu_pd(double const *__p)
758
- {
759
- struct __loadu_pd {
760
- __m256d __v;
761
- } __attribute__((__packed__, __may_alias__));
762
- return ((struct __loadu_pd*)__p)->__v;
763
- }
764
-
765
- static __inline __m256 __DEFAULT_FN_ATTRS
766
- _mm256_loadu_ps(float const *__p)
767
- {
768
- struct __loadu_ps {
769
- __m256 __v;
770
- } __attribute__((__packed__, __may_alias__));
771
- return ((struct __loadu_ps*)__p)->__v;
772
- }
773
-
774
- static __inline __m256i __DEFAULT_FN_ATTRS
775
- _mm256_load_si256(__m256i const *__p)
776
- {
777
- return *__p;
778
- }
779
-
780
- static __inline __m256i __DEFAULT_FN_ATTRS
781
- _mm256_loadu_si256(__m256i const *__p)
782
- {
783
- struct __loadu_si256 {
784
- __m256i __v;
785
- } __attribute__((__packed__, __may_alias__));
786
- return ((struct __loadu_si256*)__p)->__v;
787
- }
788
-
789
- static __inline __m256i __DEFAULT_FN_ATTRS
790
- _mm256_lddqu_si256(__m256i const *__p)
791
- {
792
- return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
793
- }
794
-
795
- /* SIMD store ops */
796
- static __inline void __DEFAULT_FN_ATTRS
797
- _mm256_store_pd(double *__p, __m256d __a)
798
- {
799
- *(__m256d *)__p = __a;
800
- }
801
-
802
- static __inline void __DEFAULT_FN_ATTRS
803
- _mm256_store_ps(float *__p, __m256 __a)
804
- {
805
- *(__m256 *)__p = __a;
806
- }
807
-
808
- static __inline void __DEFAULT_FN_ATTRS
809
- _mm256_storeu_pd(double *__p, __m256d __a)
810
- {
811
- __builtin_ia32_storeupd256(__p, (__v4df)__a);
812
- }
813
-
814
- static __inline void __DEFAULT_FN_ATTRS
815
- _mm256_storeu_ps(float *__p, __m256 __a)
816
- {
817
- __builtin_ia32_storeups256(__p, (__v8sf)__a);
818
- }
819
-
820
- static __inline void __DEFAULT_FN_ATTRS
821
- _mm256_store_si256(__m256i *__p, __m256i __a)
822
- {
823
- *__p = __a;
824
- }
825
-
826
- static __inline void __DEFAULT_FN_ATTRS
827
- _mm256_storeu_si256(__m256i *__p, __m256i __a)
828
- {
829
- __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a);
830
- }
831
-
832
- /* Conditional load ops */
833
- static __inline __m128d __DEFAULT_FN_ATTRS
834
- _mm_maskload_pd(double const *__p, __m128d __m)
835
- {
836
- return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m);
837
- }
838
-
839
- static __inline __m256d __DEFAULT_FN_ATTRS
840
- _mm256_maskload_pd(double const *__p, __m256d __m)
841
- {
842
- return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
843
- (__v4df)__m);
844
- }
845
-
846
- static __inline __m128 __DEFAULT_FN_ATTRS
847
- _mm_maskload_ps(float const *__p, __m128 __m)
848
- {
849
- return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m);
850
- }
851
-
852
- static __inline __m256 __DEFAULT_FN_ATTRS
853
- _mm256_maskload_ps(float const *__p, __m256 __m)
854
- {
855
- return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m);
856
- }
857
-
858
- /* Conditional store ops */
859
- static __inline void __DEFAULT_FN_ATTRS
860
- _mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a)
861
- {
862
- __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a);
863
- }
864
-
865
- static __inline void __DEFAULT_FN_ATTRS
866
- _mm_maskstore_pd(double *__p, __m128d __m, __m128d __a)
867
- {
868
- __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a);
869
- }
870
-
871
- static __inline void __DEFAULT_FN_ATTRS
872
- _mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a)
873
- {
874
- __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a);
875
- }
876
-
877
- static __inline void __DEFAULT_FN_ATTRS
878
- _mm_maskstore_ps(float *__p, __m128 __m, __m128 __a)
879
- {
880
- __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a);
881
- }
882
-
883
- /* Cacheability support ops */
884
- static __inline void __DEFAULT_FN_ATTRS
885
- _mm256_stream_si256(__m256i *__a, __m256i __b)
886
- {
887
- __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
888
- }
889
-
890
- static __inline void __DEFAULT_FN_ATTRS
891
- _mm256_stream_pd(double *__a, __m256d __b)
892
- {
893
- __builtin_ia32_movntpd256(__a, (__v4df)__b);
894
- }
895
-
896
- static __inline void __DEFAULT_FN_ATTRS
897
- _mm256_stream_ps(float *__p, __m256 __a)
898
- {
899
- __builtin_ia32_movntps256(__p, (__v8sf)__a);
900
- }
901
-
902
- /* Create vectors */
903
- static __inline __m256d __DEFAULT_FN_ATTRS
904
- _mm256_set_pd(double __a, double __b, double __c, double __d)
905
- {
906
- return (__m256d){ __d, __c, __b, __a };
907
- }
908
-
909
- static __inline __m256 __DEFAULT_FN_ATTRS
910
- _mm256_set_ps(float __a, float __b, float __c, float __d,
911
- float __e, float __f, float __g, float __h)
912
- {
913
- return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
914
- }
915
-
916
- static __inline __m256i __DEFAULT_FN_ATTRS
917
- _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
918
- int __i4, int __i5, int __i6, int __i7)
919
- {
920
- return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
921
- }
922
-
923
- static __inline __m256i __DEFAULT_FN_ATTRS
924
- _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
925
- short __w11, short __w10, short __w09, short __w08,
926
- short __w07, short __w06, short __w05, short __w04,
927
- short __w03, short __w02, short __w01, short __w00)
928
- {
929
- return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
930
- __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
931
- }
932
-
933
- static __inline __m256i __DEFAULT_FN_ATTRS
934
- _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
935
- char __b27, char __b26, char __b25, char __b24,
936
- char __b23, char __b22, char __b21, char __b20,
937
- char __b19, char __b18, char __b17, char __b16,
938
- char __b15, char __b14, char __b13, char __b12,
939
- char __b11, char __b10, char __b09, char __b08,
940
- char __b07, char __b06, char __b05, char __b04,
941
- char __b03, char __b02, char __b01, char __b00)
942
- {
943
- return (__m256i)(__v32qi){
944
- __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
945
- __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,
946
- __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,
947
- __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31
948
- };
949
- }
950
-
951
- static __inline __m256i __DEFAULT_FN_ATTRS
952
- _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
953
- {
954
- return (__m256i)(__v4di){ __d, __c, __b, __a };
955
- }
956
-
957
- /* Create vectors with elements in reverse order */
958
- static __inline __m256d __DEFAULT_FN_ATTRS
959
- _mm256_setr_pd(double __a, double __b, double __c, double __d)
960
- {
961
- return (__m256d){ __a, __b, __c, __d };
962
- }
963
-
964
- static __inline __m256 __DEFAULT_FN_ATTRS
965
- _mm256_setr_ps(float __a, float __b, float __c, float __d,
966
- float __e, float __f, float __g, float __h)
967
- {
968
- return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
969
- }
970
-
971
- static __inline __m256i __DEFAULT_FN_ATTRS
972
- _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
973
- int __i4, int __i5, int __i6, int __i7)
974
- {
975
- return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
976
- }
977
-
978
- static __inline __m256i __DEFAULT_FN_ATTRS
979
- _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
980
- short __w11, short __w10, short __w09, short __w08,
981
- short __w07, short __w06, short __w05, short __w04,
982
- short __w03, short __w02, short __w01, short __w00)
983
- {
984
- return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09,
985
- __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
986
- }
987
-
988
- static __inline __m256i __DEFAULT_FN_ATTRS
989
- _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
990
- char __b27, char __b26, char __b25, char __b24,
991
- char __b23, char __b22, char __b21, char __b20,
992
- char __b19, char __b18, char __b17, char __b16,
993
- char __b15, char __b14, char __b13, char __b12,
994
- char __b11, char __b10, char __b09, char __b08,
995
- char __b07, char __b06, char __b05, char __b04,
996
- char __b03, char __b02, char __b01, char __b00)
997
- {
998
- return (__m256i)(__v32qi){
999
- __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24,
1000
- __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
1001
- __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
1002
- __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
1003
- }
1004
-
1005
- static __inline __m256i __DEFAULT_FN_ATTRS
1006
- _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
1007
- {
1008
- return (__m256i)(__v4di){ __a, __b, __c, __d };
1009
- }
1010
-
1011
- /* Create vectors with repeated elements */
1012
- static __inline __m256d __DEFAULT_FN_ATTRS
1013
- _mm256_set1_pd(double __w)
1014
- {
1015
- return (__m256d){ __w, __w, __w, __w };
1016
- }
1017
-
1018
- static __inline __m256 __DEFAULT_FN_ATTRS
1019
- _mm256_set1_ps(float __w)
1020
- {
1021
- return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w };
1022
- }
1023
-
1024
- static __inline __m256i __DEFAULT_FN_ATTRS
1025
- _mm256_set1_epi32(int __i)
1026
- {
1027
- return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i };
1028
- }
1029
-
1030
- static __inline __m256i __DEFAULT_FN_ATTRS
1031
- _mm256_set1_epi16(short __w)
1032
- {
1033
- return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w,
1034
- __w, __w, __w, __w, __w, __w };
1035
- }
1036
-
1037
- static __inline __m256i __DEFAULT_FN_ATTRS
1038
- _mm256_set1_epi8(char __b)
1039
- {
1040
- return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1041
- __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1042
- __b, __b, __b, __b, __b, __b, __b };
1043
- }
1044
-
1045
- static __inline __m256i __DEFAULT_FN_ATTRS
1046
- _mm256_set1_epi64x(long long __q)
1047
- {
1048
- return (__m256i)(__v4di){ __q, __q, __q, __q };
1049
- }
1050
-
1051
- /* Create __zeroed vectors */
1052
- static __inline __m256d __DEFAULT_FN_ATTRS
1053
- _mm256_setzero_pd(void)
1054
- {
1055
- return (__m256d){ 0, 0, 0, 0 };
1056
- }
1057
-
1058
- static __inline __m256 __DEFAULT_FN_ATTRS
1059
- _mm256_setzero_ps(void)
1060
- {
1061
- return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
1062
- }
1063
-
1064
- static __inline __m256i __DEFAULT_FN_ATTRS
1065
- _mm256_setzero_si256(void)
1066
- {
1067
- return (__m256i){ 0LL, 0LL, 0LL, 0LL };
1068
- }
1069
-
1070
- /* Cast between vector types */
1071
- static __inline __m256 __DEFAULT_FN_ATTRS
1072
- _mm256_castpd_ps(__m256d __a)
1073
- {
1074
- return (__m256)__a;
1075
- }
1076
-
1077
- static __inline __m256i __DEFAULT_FN_ATTRS
1078
- _mm256_castpd_si256(__m256d __a)
1079
- {
1080
- return (__m256i)__a;
1081
- }
1082
-
1083
- static __inline __m256d __DEFAULT_FN_ATTRS
1084
- _mm256_castps_pd(__m256 __a)
1085
- {
1086
- return (__m256d)__a;
1087
- }
1088
-
1089
- static __inline __m256i __DEFAULT_FN_ATTRS
1090
- _mm256_castps_si256(__m256 __a)
1091
- {
1092
- return (__m256i)__a;
1093
- }
1094
-
1095
- static __inline __m256 __DEFAULT_FN_ATTRS
1096
- _mm256_castsi256_ps(__m256i __a)
1097
- {
1098
- return (__m256)__a;
1099
- }
1100
-
1101
- static __inline __m256d __DEFAULT_FN_ATTRS
1102
- _mm256_castsi256_pd(__m256i __a)
1103
- {
1104
- return (__m256d)__a;
1105
- }
1106
-
1107
- static __inline __m128d __DEFAULT_FN_ATTRS
1108
- _mm256_castpd256_pd128(__m256d __a)
1109
- {
1110
- return __builtin_shufflevector(__a, __a, 0, 1);
1111
- }
1112
-
1113
- static __inline __m128 __DEFAULT_FN_ATTRS
1114
- _mm256_castps256_ps128(__m256 __a)
1115
- {
1116
- return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
1117
- }
1118
-
1119
- static __inline __m128i __DEFAULT_FN_ATTRS
1120
- _mm256_castsi256_si128(__m256i __a)
1121
- {
1122
- return __builtin_shufflevector(__a, __a, 0, 1);
1123
- }
1124
-
1125
- static __inline __m256d __DEFAULT_FN_ATTRS
1126
- _mm256_castpd128_pd256(__m128d __a)
1127
- {
1128
- return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
1129
- }
1130
-
1131
- static __inline __m256 __DEFAULT_FN_ATTRS
1132
- _mm256_castps128_ps256(__m128 __a)
1133
- {
1134
- return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
1135
- }
1136
-
1137
- static __inline __m256i __DEFAULT_FN_ATTRS
1138
- _mm256_castsi128_si256(__m128i __a)
1139
- {
1140
- return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
1141
- }
1142
-
1143
- /*
1144
- Vector insert.
1145
- We use macros rather than inlines because we only want to accept
1146
- invocations where the immediate M is a constant expression.
1147
- */
1148
- #define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
1149
- (__m256)__builtin_shufflevector( \
1150
- (__v8sf)(V1), \
1151
- (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
1152
- (((M) & 1) ? 0 : 8), \
1153
- (((M) & 1) ? 1 : 9), \
1154
- (((M) & 1) ? 2 : 10), \
1155
- (((M) & 1) ? 3 : 11), \
1156
- (((M) & 1) ? 8 : 4), \
1157
- (((M) & 1) ? 9 : 5), \
1158
- (((M) & 1) ? 10 : 6), \
1159
- (((M) & 1) ? 11 : 7) );})
1160
-
1161
- #define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
1162
- (__m256d)__builtin_shufflevector( \
1163
- (__v4df)(V1), \
1164
- (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
1165
- (((M) & 1) ? 0 : 4), \
1166
- (((M) & 1) ? 1 : 5), \
1167
- (((M) & 1) ? 4 : 2), \
1168
- (((M) & 1) ? 5 : 3) );})
1169
-
1170
- #define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
1171
- (__m256i)__builtin_shufflevector( \
1172
- (__v4di)(V1), \
1173
- (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
1174
- (((M) & 1) ? 0 : 4), \
1175
- (((M) & 1) ? 1 : 5), \
1176
- (((M) & 1) ? 4 : 2), \
1177
- (((M) & 1) ? 5 : 3) );})
1178
-
1179
- /*
1180
- Vector extract.
1181
- We use macros rather than inlines because we only want to accept
1182
- invocations where the immediate M is a constant expression.
1183
- */
1184
- #define _mm256_extractf128_ps(V, M) __extension__ ({ \
1185
- (__m128)__builtin_shufflevector( \
1186
- (__v8sf)(V), \
1187
- (__v8sf)(_mm256_setzero_ps()), \
1188
- (((M) & 1) ? 4 : 0), \
1189
- (((M) & 1) ? 5 : 1), \
1190
- (((M) & 1) ? 6 : 2), \
1191
- (((M) & 1) ? 7 : 3) );})
1192
-
1193
- #define _mm256_extractf128_pd(V, M) __extension__ ({ \
1194
- (__m128d)__builtin_shufflevector( \
1195
- (__v4df)(V), \
1196
- (__v4df)(_mm256_setzero_pd()), \
1197
- (((M) & 1) ? 2 : 0), \
1198
- (((M) & 1) ? 3 : 1) );})
1199
-
1200
- #define _mm256_extractf128_si256(V, M) __extension__ ({ \
1201
- (__m128i)__builtin_shufflevector( \
1202
- (__v4di)(V), \
1203
- (__v4di)(_mm256_setzero_si256()), \
1204
- (((M) & 1) ? 2 : 0), \
1205
- (((M) & 1) ? 3 : 1) );})
1206
-
1207
- /* SIMD load ops (unaligned) */
1208
- static __inline __m256 __DEFAULT_FN_ATTRS
1209
- _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
1210
- {
1211
- struct __loadu_ps {
1212
- __m128 __v;
1213
- } __attribute__((__packed__, __may_alias__));
1214
-
1215
- __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v);
1216
- return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1);
1217
- }
1218
-
1219
- static __inline __m256d __DEFAULT_FN_ATTRS
1220
- _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
1221
- {
1222
- struct __loadu_pd {
1223
- __m128d __v;
1224
- } __attribute__((__packed__, __may_alias__));
1225
-
1226
- __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v);
1227
- return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
1228
- }
1229
-
1230
- static __inline __m256i __DEFAULT_FN_ATTRS
1231
- _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
1232
- {
1233
- struct __loadu_si128 {
1234
- __m128i __v;
1235
- } __attribute__((__packed__, __may_alias__));
1236
- __m256i __v256 = _mm256_castsi128_si256(
1237
- ((struct __loadu_si128*)__addr_lo)->__v);
1238
- return _mm256_insertf128_si256(__v256,
1239
- ((struct __loadu_si128*)__addr_hi)->__v, 1);
1240
- }
1241
-
1242
- /* SIMD store ops (unaligned) */
1243
- static __inline void __DEFAULT_FN_ATTRS
1244
- _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
1245
- {
1246
- __m128 __v128;
1247
-
1248
- __v128 = _mm256_castps256_ps128(__a);
1249
- __builtin_ia32_storeups(__addr_lo, __v128);
1250
- __v128 = _mm256_extractf128_ps(__a, 1);
1251
- __builtin_ia32_storeups(__addr_hi, __v128);
1252
- }
1253
-
1254
- static __inline void __DEFAULT_FN_ATTRS
1255
- _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
1256
- {
1257
- __m128d __v128;
1258
-
1259
- __v128 = _mm256_castpd256_pd128(__a);
1260
- __builtin_ia32_storeupd(__addr_lo, __v128);
1261
- __v128 = _mm256_extractf128_pd(__a, 1);
1262
- __builtin_ia32_storeupd(__addr_hi, __v128);
1263
- }
1264
-
1265
- static __inline void __DEFAULT_FN_ATTRS
1266
- _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
1267
- {
1268
- __m128i __v128;
1269
-
1270
- __v128 = _mm256_castsi256_si128(__a);
1271
- __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128);
1272
- __v128 = _mm256_extractf128_si256(__a, 1);
1273
- __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
1274
- }
1275
-
1276
- static __inline __m256 __DEFAULT_FN_ATTRS
1277
- _mm256_set_m128 (__m128 __hi, __m128 __lo) {
1278
- return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
1279
- }
1280
-
1281
- static __inline __m256d __DEFAULT_FN_ATTRS
1282
- _mm256_set_m128d (__m128d __hi, __m128d __lo) {
1283
- return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1284
- }
1285
-
1286
- static __inline __m256i __DEFAULT_FN_ATTRS
1287
- _mm256_set_m128i (__m128i __hi, __m128i __lo) {
1288
- return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1289
- }
1290
-
1291
- static __inline __m256 __DEFAULT_FN_ATTRS
1292
- _mm256_setr_m128 (__m128 __lo, __m128 __hi) {
1293
- return _mm256_set_m128(__hi, __lo);
1294
- }
1295
-
1296
- static __inline __m256d __DEFAULT_FN_ATTRS
1297
- _mm256_setr_m128d (__m128d __lo, __m128d __hi) {
1298
- return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1299
- }
1300
-
1301
- static __inline __m256i __DEFAULT_FN_ATTRS
1302
- _mm256_setr_m128i (__m128i __lo, __m128i __hi) {
1303
- return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1304
- }
1305
-
1306
- #undef __DEFAULT_FN_ATTRS
1307
-
1308
- #endif /* __AVXINTRIN_H */