xcodebuild-helper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +20 -0
  3. data/.gitignore +1 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +110 -0
  8. data/Guardfile +18 -0
  9. data/README.md +7 -0
  10. data/Rakefile +7 -0
  11. data/TODO.md +3 -0
  12. data/bin/oclint +5 -0
  13. data/bin/oclint-0.8 +5 -0
  14. data/bin/oclint-json-compilation-database +5 -0
  15. data/bin/oclint-xcodebuild +5 -0
  16. data/externals/oclint/LICENSE +69 -0
  17. data/externals/oclint/bin/oclint +0 -0
  18. data/externals/oclint/bin/oclint-0.10.2 +0 -0
  19. data/externals/oclint/bin/oclint-json-compilation-database +88 -0
  20. data/externals/oclint/bin/oclint-xcodebuild +218 -0
  21. data/externals/oclint/lib/clang/3.7.0/asan_blacklist.txt +13 -0
  22. data/externals/oclint/lib/clang/3.7.0/include/Intrin.h +958 -0
  23. data/externals/oclint/lib/clang/3.7.0/include/__stddef_max_align_t.h +43 -0
  24. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_aes.h +72 -0
  25. data/externals/oclint/lib/clang/3.7.0/include/__wmmintrin_pclmul.h +34 -0
  26. data/externals/oclint/lib/clang/3.7.0/include/adxintrin.h +88 -0
  27. data/externals/oclint/lib/clang/3.7.0/include/altivec.h +13528 -0
  28. data/externals/oclint/lib/clang/3.7.0/include/ammintrin.h +215 -0
  29. data/externals/oclint/lib/clang/3.7.0/include/arm_acle.h +304 -0
  30. data/externals/oclint/lib/clang/3.7.0/include/arm_neon.h +68419 -0
  31. data/externals/oclint/lib/clang/3.7.0/include/avx2intrin.h +1256 -0
  32. data/externals/oclint/lib/clang/3.7.0/include/avx512bwintrin.h +1250 -0
  33. data/externals/oclint/lib/clang/3.7.0/include/avx512cdintrin.h +131 -0
  34. data/externals/oclint/lib/clang/3.7.0/include/avx512dqintrin.h +242 -0
  35. data/externals/oclint/lib/clang/3.7.0/include/avx512erintrin.h +285 -0
  36. data/externals/oclint/lib/clang/3.7.0/include/avx512fintrin.h +2457 -0
  37. data/externals/oclint/lib/clang/3.7.0/include/avx512vlbwintrin.h +1907 -0
  38. data/externals/oclint/lib/clang/3.7.0/include/avx512vldqintrin.h +353 -0
  39. data/externals/oclint/lib/clang/3.7.0/include/avx512vlintrin.h +1982 -0
  40. data/externals/oclint/lib/clang/3.7.0/include/avxintrin.h +1308 -0
  41. data/externals/oclint/lib/clang/3.7.0/include/bmi2intrin.h +99 -0
  42. data/externals/oclint/lib/clang/3.7.0/include/bmiintrin.h +153 -0
  43. data/externals/oclint/lib/clang/3.7.0/include/cpuid.h +209 -0
  44. data/externals/oclint/lib/clang/3.7.0/include/cuda_builtin_vars.h +110 -0
  45. data/externals/oclint/lib/clang/3.7.0/include/emmintrin.h +1480 -0
  46. data/externals/oclint/lib/clang/3.7.0/include/f16cintrin.h +63 -0
  47. data/externals/oclint/lib/clang/3.7.0/include/float.h +124 -0
  48. data/externals/oclint/lib/clang/3.7.0/include/fma4intrin.h +236 -0
  49. data/externals/oclint/lib/clang/3.7.0/include/fmaintrin.h +234 -0
  50. data/externals/oclint/lib/clang/3.7.0/include/fxsrintrin.h +55 -0
  51. data/externals/oclint/lib/clang/3.7.0/include/htmintrin.h +226 -0
  52. data/externals/oclint/lib/clang/3.7.0/include/htmxlintrin.h +363 -0
  53. data/externals/oclint/lib/clang/3.7.0/include/ia32intrin.h +101 -0
  54. data/externals/oclint/lib/clang/3.7.0/include/immintrin.h +203 -0
  55. data/externals/oclint/lib/clang/3.7.0/include/inttypes.h +102 -0
  56. data/externals/oclint/lib/clang/3.7.0/include/iso646.h +43 -0
  57. data/externals/oclint/lib/clang/3.7.0/include/limits.h +118 -0
  58. data/externals/oclint/lib/clang/3.7.0/include/lzcntintrin.h +72 -0
  59. data/externals/oclint/lib/clang/3.7.0/include/mm3dnow.h +167 -0
  60. data/externals/oclint/lib/clang/3.7.0/include/mm_malloc.h +75 -0
  61. data/externals/oclint/lib/clang/3.7.0/include/mmintrin.h +507 -0
  62. data/externals/oclint/lib/clang/3.7.0/include/module.modulemap +196 -0
  63. data/externals/oclint/lib/clang/3.7.0/include/nmmintrin.h +35 -0
  64. data/externals/oclint/lib/clang/3.7.0/include/pmmintrin.h +122 -0
  65. data/externals/oclint/lib/clang/3.7.0/include/popcntintrin.h +50 -0
  66. data/externals/oclint/lib/clang/3.7.0/include/prfchwintrin.h +39 -0
  67. data/externals/oclint/lib/clang/3.7.0/include/rdseedintrin.h +59 -0
  68. data/externals/oclint/lib/clang/3.7.0/include/rtmintrin.h +59 -0
  69. data/externals/oclint/lib/clang/3.7.0/include/s390intrin.h +39 -0
  70. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/allocator_interface.h +66 -0
  71. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/asan_interface.h +155 -0
  72. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/common_interface_defs.h +118 -0
  73. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/coverage_interface.h +63 -0
  74. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/dfsan_interface.h +114 -0
  75. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/linux_syscall_hooks.h +3070 -0
  76. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/lsan_interface.h +84 -0
  77. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/msan_interface.h +107 -0
  78. data/externals/oclint/lib/clang/3.7.0/include/sanitizer/tsan_interface_atomic.h +222 -0
  79. data/externals/oclint/lib/clang/3.7.0/include/shaintrin.h +79 -0
  80. data/externals/oclint/lib/clang/3.7.0/include/smmintrin.h +487 -0
  81. data/externals/oclint/lib/clang/3.7.0/include/stdalign.h +35 -0
  82. data/externals/oclint/lib/clang/3.7.0/include/stdarg.h +52 -0
  83. data/externals/oclint/lib/clang/3.7.0/include/stdatomic.h +190 -0
  84. data/externals/oclint/lib/clang/3.7.0/include/stdbool.h +44 -0
  85. data/externals/oclint/lib/clang/3.7.0/include/stddef.h +137 -0
  86. data/externals/oclint/lib/clang/3.7.0/include/stdint.h +707 -0
  87. data/externals/oclint/lib/clang/3.7.0/include/stdnoreturn.h +30 -0
  88. data/externals/oclint/lib/clang/3.7.0/include/tbmintrin.h +154 -0
  89. data/externals/oclint/lib/clang/3.7.0/include/tgmath.h +1374 -0
  90. data/externals/oclint/lib/clang/3.7.0/include/tmmintrin.h +230 -0
  91. data/externals/oclint/lib/clang/3.7.0/include/unwind.h +282 -0
  92. data/externals/oclint/lib/clang/3.7.0/include/vadefs.h +65 -0
  93. data/externals/oclint/lib/clang/3.7.0/include/varargs.h +26 -0
  94. data/externals/oclint/lib/clang/3.7.0/include/vecintrin.h +8946 -0
  95. data/externals/oclint/lib/clang/3.7.0/include/wmmintrin.h +42 -0
  96. data/externals/oclint/lib/clang/3.7.0/include/x86intrin.h +81 -0
  97. data/externals/oclint/lib/clang/3.7.0/include/xmmintrin.h +1008 -0
  98. data/externals/oclint/lib/clang/3.7.0/include/xopintrin.h +809 -0
  99. data/externals/oclint/lib/clang/3.7.0/include/xtestintrin.h +41 -0
  100. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib +0 -0
  101. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
  102. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-i386.a +0 -0
  103. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.builtins-x86_64.a +0 -0
  104. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.profile_osx.a +0 -0
  105. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.safestack_osx.a +0 -0
  106. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib +0 -0
  107. data/externals/oclint/lib/clang/3.7.0/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib +0 -0
  108. data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
  109. data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
  110. data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
  111. data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
  112. data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
  113. data/externals/oclint/lib/oclint/reporters/libXcodeReporter.dylib +0 -0
  114. data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
  115. data/externals/oclint/lib/oclint/rules/libAvoidDefaultArgumentsOnVirtualMethodsRule.dylib +0 -0
  116. data/externals/oclint/lib/oclint/rules/libAvoidPrivateStaticMembersRule.dylib +0 -0
  117. data/externals/oclint/lib/oclint/rules/libBaseClassDestructorShouldBeVirtualOrProtectedRule.dylib +0 -0
  118. data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
  119. data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
  120. data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
  121. data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
  122. data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
  123. data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
  124. data/externals/oclint/lib/oclint/rules/libCoveredSwitchStatementsDontNeedDefaultRule.dylib +0 -0
  125. data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
  126. data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
  127. data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
  128. data/externals/oclint/lib/oclint/rules/libDestructorOfVirtualClassRule.dylib +0 -0
  129. data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
  130. data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
  131. data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
  132. data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
  133. data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
  134. data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
  135. data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
  136. data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
  137. data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
  138. data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
  139. data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
  140. data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
  141. data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
  142. data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
  143. data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
  144. data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
  145. data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
  146. data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
  147. data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
  148. data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
  149. data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
  150. data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
  151. data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
  152. data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
  153. data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
  154. data/externals/oclint/lib/oclint/rules/libObjCAssignIvarOutsideAccessorsRule.dylib +0 -0
  155. data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
  156. data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
  157. data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
  158. data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
  159. data/externals/oclint/lib/oclint/rules/libObjCVerifyIsEqualHashRule.dylib +0 -0
  160. data/externals/oclint/lib/oclint/rules/libObjCVerifyMustCallSuperRule.dylib +0 -0
  161. data/externals/oclint/lib/oclint/rules/libObjCVerifyProhibitedCallRule.dylib +0 -0
  162. data/externals/oclint/lib/oclint/rules/libObjCVerifyProtectedMethodRule.dylib +0 -0
  163. data/externals/oclint/lib/oclint/rules/libObjCVerifySubclassMustImplementRule.dylib +0 -0
  164. data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
  165. data/externals/oclint/lib/oclint/rules/libPreferEarlyExitRule.dylib +0 -0
  166. data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
  167. data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
  168. data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
  169. data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
  170. data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
  171. data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
  172. data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
  173. data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
  174. data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
  175. data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
  176. data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
  177. data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
  178. data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
  179. data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
  180. data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
  181. data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
  182. data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
  183. data/lib/coverage_plan.rb +19 -0
  184. data/lib/device.rb +27 -0
  185. data/lib/execute.rb +7 -0
  186. data/lib/lint_plan.rb +41 -0
  187. data/lib/rules.rb +23 -0
  188. data/lib/test_plan.rb +11 -0
  189. data/lib/version.rb +3 -0
  190. data/lib/xcode.rb +128 -0
  191. data/lib/xcodebuild-helper.rb +110 -0
  192. data/spec/coverage_plan_spec.rb +18 -0
  193. data/spec/device_spec.rb +24 -0
  194. data/spec/lint_plan_spec.rb +35 -0
  195. data/spec/rule_spec.rb +37 -0
  196. data/spec/spec_helper.rb +17 -0
  197. data/spec/test_plan_spec.rb +11 -0
  198. data/spec/xcode_dsl_actions_spec.rb +136 -0
  199. data/spec/xcode_dsl_spec.rb +176 -0
  200. data/spec/xcode_spec.rb +79 -0
  201. data/xcodebuild-helper.gemspec +26 -0
  202. metadata +327 -0
@@ -0,0 +1,1256 @@
1
+ /*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+
24
+ #ifndef __IMMINTRIN_H
25
+ #error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
26
+ #endif
27
+
28
+ #ifndef __AVX2INTRIN_H
29
+ #define __AVX2INTRIN_H
30
+
31
+ /* Define the default attributes for the functions in this file. */
32
+ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
33
+
34
+ /* SSE4 Multiple Packed Sums of Absolute Difference. */
35
+ #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
36
+
37
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
38
+ _mm256_abs_epi8(__m256i __a)
39
+ {
40
+ return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
41
+ }
42
+
43
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
44
+ _mm256_abs_epi16(__m256i __a)
45
+ {
46
+ return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
47
+ }
48
+
49
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
50
+ _mm256_abs_epi32(__m256i __a)
51
+ {
52
+ return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
53
+ }
54
+
55
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
56
+ _mm256_packs_epi16(__m256i __a, __m256i __b)
57
+ {
58
+ return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
59
+ }
60
+
61
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
62
+ _mm256_packs_epi32(__m256i __a, __m256i __b)
63
+ {
64
+ return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
65
+ }
66
+
67
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
68
+ _mm256_packus_epi16(__m256i __a, __m256i __b)
69
+ {
70
+ return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
71
+ }
72
+
73
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
74
+ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
75
+ {
76
+ return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
77
+ }
78
+
79
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
80
+ _mm256_add_epi8(__m256i __a, __m256i __b)
81
+ {
82
+ return (__m256i)((__v32qi)__a + (__v32qi)__b);
83
+ }
84
+
85
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
86
+ _mm256_add_epi16(__m256i __a, __m256i __b)
87
+ {
88
+ return (__m256i)((__v16hi)__a + (__v16hi)__b);
89
+ }
90
+
91
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
92
+ _mm256_add_epi32(__m256i __a, __m256i __b)
93
+ {
94
+ return (__m256i)((__v8si)__a + (__v8si)__b);
95
+ }
96
+
97
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
98
+ _mm256_add_epi64(__m256i __a, __m256i __b)
99
+ {
100
+ return __a + __b;
101
+ }
102
+
103
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
104
+ _mm256_adds_epi8(__m256i __a, __m256i __b)
105
+ {
106
+ return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
107
+ }
108
+
109
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
110
+ _mm256_adds_epi16(__m256i __a, __m256i __b)
111
+ {
112
+ return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
113
+ }
114
+
115
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
116
+ _mm256_adds_epu8(__m256i __a, __m256i __b)
117
+ {
118
+ return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
119
+ }
120
+
121
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
122
+ _mm256_adds_epu16(__m256i __a, __m256i __b)
123
+ {
124
+ return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
125
+ }
126
+
127
+ #define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
128
+ __m256i __a = (a); \
129
+ __m256i __b = (b); \
130
+ (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
131
+
132
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
133
+ _mm256_and_si256(__m256i __a, __m256i __b)
134
+ {
135
+ return __a & __b;
136
+ }
137
+
138
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
139
+ _mm256_andnot_si256(__m256i __a, __m256i __b)
140
+ {
141
+ return ~__a & __b;
142
+ }
143
+
144
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
145
+ _mm256_avg_epu8(__m256i __a, __m256i __b)
146
+ {
147
+ return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
148
+ }
149
+
150
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
151
+ _mm256_avg_epu16(__m256i __a, __m256i __b)
152
+ {
153
+ return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
154
+ }
155
+
156
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
157
+ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
158
+ {
159
+ return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
160
+ (__v32qi)__M);
161
+ }
162
+
163
+ #define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
164
+ __m256i __V1 = (V1); \
165
+ __m256i __V2 = (V2); \
166
+ (__m256i)__builtin_shufflevector((__v16hi)__V1, (__v16hi)__V2, \
167
+ (((M) & 0x01) ? 16 : 0), \
168
+ (((M) & 0x02) ? 17 : 1), \
169
+ (((M) & 0x04) ? 18 : 2), \
170
+ (((M) & 0x08) ? 19 : 3), \
171
+ (((M) & 0x10) ? 20 : 4), \
172
+ (((M) & 0x20) ? 21 : 5), \
173
+ (((M) & 0x40) ? 22 : 6), \
174
+ (((M) & 0x80) ? 23 : 7), \
175
+ (((M) & 0x01) ? 24 : 8), \
176
+ (((M) & 0x02) ? 25 : 9), \
177
+ (((M) & 0x04) ? 26 : 10), \
178
+ (((M) & 0x08) ? 27 : 11), \
179
+ (((M) & 0x10) ? 28 : 12), \
180
+ (((M) & 0x20) ? 29 : 13), \
181
+ (((M) & 0x40) ? 30 : 14), \
182
+ (((M) & 0x80) ? 31 : 15)); })
183
+
184
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
185
+ _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
186
+ {
187
+ return (__m256i)((__v32qi)__a == (__v32qi)__b);
188
+ }
189
+
190
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
191
+ _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
192
+ {
193
+ return (__m256i)((__v16hi)__a == (__v16hi)__b);
194
+ }
195
+
196
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
197
+ _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
198
+ {
199
+ return (__m256i)((__v8si)__a == (__v8si)__b);
200
+ }
201
+
202
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
203
+ _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
204
+ {
205
+ return (__m256i)(__a == __b);
206
+ }
207
+
208
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
209
+ _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
210
+ {
211
+ return (__m256i)((__v32qi)__a > (__v32qi)__b);
212
+ }
213
+
214
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
215
+ _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
216
+ {
217
+ return (__m256i)((__v16hi)__a > (__v16hi)__b);
218
+ }
219
+
220
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
221
+ _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
222
+ {
223
+ return (__m256i)((__v8si)__a > (__v8si)__b);
224
+ }
225
+
226
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
227
+ _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
228
+ {
229
+ return (__m256i)(__a > __b);
230
+ }
231
+
232
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
233
+ _mm256_hadd_epi16(__m256i __a, __m256i __b)
234
+ {
235
+ return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
236
+ }
237
+
238
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
239
+ _mm256_hadd_epi32(__m256i __a, __m256i __b)
240
+ {
241
+ return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
242
+ }
243
+
244
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
245
+ _mm256_hadds_epi16(__m256i __a, __m256i __b)
246
+ {
247
+ return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
248
+ }
249
+
250
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
251
+ _mm256_hsub_epi16(__m256i __a, __m256i __b)
252
+ {
253
+ return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
254
+ }
255
+
256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
257
+ _mm256_hsub_epi32(__m256i __a, __m256i __b)
258
+ {
259
+ return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
260
+ }
261
+
262
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
263
+ _mm256_hsubs_epi16(__m256i __a, __m256i __b)
264
+ {
265
+ return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
266
+ }
267
+
268
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
269
+ _mm256_maddubs_epi16(__m256i __a, __m256i __b)
270
+ {
271
+ return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
272
+ }
273
+
274
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
275
+ _mm256_madd_epi16(__m256i __a, __m256i __b)
276
+ {
277
+ return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
278
+ }
279
+
280
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
281
+ _mm256_max_epi8(__m256i __a, __m256i __b)
282
+ {
283
+ return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
284
+ }
285
+
286
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
287
+ _mm256_max_epi16(__m256i __a, __m256i __b)
288
+ {
289
+ return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
290
+ }
291
+
292
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
293
+ _mm256_max_epi32(__m256i __a, __m256i __b)
294
+ {
295
+ return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
296
+ }
297
+
298
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
299
+ _mm256_max_epu8(__m256i __a, __m256i __b)
300
+ {
301
+ return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
302
+ }
303
+
304
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
305
+ _mm256_max_epu16(__m256i __a, __m256i __b)
306
+ {
307
+ return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
308
+ }
309
+
310
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
311
+ _mm256_max_epu32(__m256i __a, __m256i __b)
312
+ {
313
+ return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
314
+ }
315
+
316
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
317
+ _mm256_min_epi8(__m256i __a, __m256i __b)
318
+ {
319
+ return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
320
+ }
321
+
322
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
323
+ _mm256_min_epi16(__m256i __a, __m256i __b)
324
+ {
325
+ return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
326
+ }
327
+
328
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
329
+ _mm256_min_epi32(__m256i __a, __m256i __b)
330
+ {
331
+ return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
332
+ }
333
+
334
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
335
+ _mm256_min_epu8(__m256i __a, __m256i __b)
336
+ {
337
+ return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
338
+ }
339
+
340
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
341
+ _mm256_min_epu16(__m256i __a, __m256i __b)
342
+ {
343
+ return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
344
+ }
345
+
346
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
347
+ _mm256_min_epu32(__m256i __a, __m256i __b)
348
+ {
349
+ return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
350
+ }
351
+
352
+ static __inline__ int __DEFAULT_FN_ATTRS
353
+ _mm256_movemask_epi8(__m256i __a)
354
+ {
355
+ return __builtin_ia32_pmovmskb256((__v32qi)__a);
356
+ }
357
+
358
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
359
+ _mm256_cvtepi8_epi16(__m128i __V)
360
+ {
361
+ return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V);
362
+ }
363
+
364
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
365
+ _mm256_cvtepi8_epi32(__m128i __V)
366
+ {
367
+ return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V);
368
+ }
369
+
370
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
371
+ _mm256_cvtepi8_epi64(__m128i __V)
372
+ {
373
+ return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V);
374
+ }
375
+
376
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
377
+ _mm256_cvtepi16_epi32(__m128i __V)
378
+ {
379
+ return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V);
380
+ }
381
+
382
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
383
+ _mm256_cvtepi16_epi64(__m128i __V)
384
+ {
385
+ return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V);
386
+ }
387
+
388
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
389
+ _mm256_cvtepi32_epi64(__m128i __V)
390
+ {
391
+ return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V);
392
+ }
393
+
394
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
395
+ _mm256_cvtepu8_epi16(__m128i __V)
396
+ {
397
+ return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V);
398
+ }
399
+
400
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
401
+ _mm256_cvtepu8_epi32(__m128i __V)
402
+ {
403
+ return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V);
404
+ }
405
+
406
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
407
+ _mm256_cvtepu8_epi64(__m128i __V)
408
+ {
409
+ return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V);
410
+ }
411
+
412
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
413
+ _mm256_cvtepu16_epi32(__m128i __V)
414
+ {
415
+ return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V);
416
+ }
417
+
418
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
419
+ _mm256_cvtepu16_epi64(__m128i __V)
420
+ {
421
+ return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V);
422
+ }
423
+
424
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
425
+ _mm256_cvtepu32_epi64(__m128i __V)
426
+ {
427
+ return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V);
428
+ }
429
+
430
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
431
+ _mm256_mul_epi32(__m256i __a, __m256i __b)
432
+ {
433
+ return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
434
+ }
435
+
436
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
437
+ _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
438
+ {
439
+ return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
440
+ }
441
+
442
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
443
+ _mm256_mulhi_epu16(__m256i __a, __m256i __b)
444
+ {
445
+ return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
446
+ }
447
+
448
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
449
+ _mm256_mulhi_epi16(__m256i __a, __m256i __b)
450
+ {
451
+ return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
452
+ }
453
+
454
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
455
+ _mm256_mullo_epi16(__m256i __a, __m256i __b)
456
+ {
457
+ return (__m256i)((__v16hi)__a * (__v16hi)__b);
458
+ }
459
+
460
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
461
+ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
462
+ {
463
+ return (__m256i)((__v8si)__a * (__v8si)__b);
464
+ }
465
+
466
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
467
+ _mm256_mul_epu32(__m256i __a, __m256i __b)
468
+ {
469
+ return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
470
+ }
471
+
472
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
473
+ _mm256_or_si256(__m256i __a, __m256i __b)
474
+ {
475
+ return __a | __b;
476
+ }
477
+
478
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
479
+ _mm256_sad_epu8(__m256i __a, __m256i __b)
480
+ {
481
+ return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
482
+ }
483
+
484
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
485
+ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
486
+ {
487
+ return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
488
+ }
489
+
490
+ #define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
491
+ __m256i __a = (a); \
492
+ (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \
493
+ (imm) & 0x3, ((imm) & 0xc) >> 2, \
494
+ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
495
+ 4 + (((imm) & 0x03) >> 0), \
496
+ 4 + (((imm) & 0x0c) >> 2), \
497
+ 4 + (((imm) & 0x30) >> 4), \
498
+ 4 + (((imm) & 0xc0) >> 6)); })
499
+
500
+ #define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
501
+ __m256i __a = (a); \
502
+ (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
503
+ 0, 1, 2, 3, \
504
+ 4 + (((imm) & 0x03) >> 0), \
505
+ 4 + (((imm) & 0x0c) >> 2), \
506
+ 4 + (((imm) & 0x30) >> 4), \
507
+ 4 + (((imm) & 0xc0) >> 6), \
508
+ 8, 9, 10, 11, \
509
+ 12 + (((imm) & 0x03) >> 0), \
510
+ 12 + (((imm) & 0x0c) >> 2), \
511
+ 12 + (((imm) & 0x30) >> 4), \
512
+ 12 + (((imm) & 0xc0) >> 6)); })
513
+
514
+ #define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
515
+ __m256i __a = (a); \
516
+ (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
517
+ (imm) & 0x3,((imm) & 0xc) >> 2, \
518
+ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
519
+ 4, 5, 6, 7, \
520
+ 8 + (((imm) & 0x03) >> 0), \
521
+ 8 + (((imm) & 0x0c) >> 2), \
522
+ 8 + (((imm) & 0x30) >> 4), \
523
+ 8 + (((imm) & 0xc0) >> 6), \
524
+ 12, 13, 14, 15); })
525
+
526
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
527
+ _mm256_sign_epi8(__m256i __a, __m256i __b)
528
+ {
529
+ return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
530
+ }
531
+
532
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
533
+ _mm256_sign_epi16(__m256i __a, __m256i __b)
534
+ {
535
+ return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
536
+ }
537
+
538
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
539
+ _mm256_sign_epi32(__m256i __a, __m256i __b)
540
+ {
541
+ return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
542
+ }
543
+
544
+ #define _mm256_slli_si256(a, count) __extension__ ({ \
545
+ __m256i __a = (a); \
546
+ (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
547
+
548
+ #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
549
+
550
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
551
+ _mm256_slli_epi16(__m256i __a, int __count)
552
+ {
553
+ return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
554
+ }
555
+
556
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
557
+ _mm256_sll_epi16(__m256i __a, __m128i __count)
558
+ {
559
+ return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
560
+ }
561
+
562
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
563
+ _mm256_slli_epi32(__m256i __a, int __count)
564
+ {
565
+ return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
566
+ }
567
+
568
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
569
+ _mm256_sll_epi32(__m256i __a, __m128i __count)
570
+ {
571
+ return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
572
+ }
573
+
574
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
575
+ _mm256_slli_epi64(__m256i __a, int __count)
576
+ {
577
+ return __builtin_ia32_psllqi256(__a, __count);
578
+ }
579
+
580
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
581
+ _mm256_sll_epi64(__m256i __a, __m128i __count)
582
+ {
583
+ return __builtin_ia32_psllq256(__a, __count);
584
+ }
585
+
586
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
587
+ _mm256_srai_epi16(__m256i __a, int __count)
588
+ {
589
+ return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
590
+ }
591
+
592
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
593
+ _mm256_sra_epi16(__m256i __a, __m128i __count)
594
+ {
595
+ return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
596
+ }
597
+
598
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
599
+ _mm256_srai_epi32(__m256i __a, int __count)
600
+ {
601
+ return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
602
+ }
603
+
604
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
605
+ _mm256_sra_epi32(__m256i __a, __m128i __count)
606
+ {
607
+ return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
608
+ }
609
+
610
+ #define _mm256_srli_si256(a, count) __extension__ ({ \
611
+ __m256i __a = (a); \
612
+ (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
613
+
614
+ #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
615
+
616
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
617
+ _mm256_srli_epi16(__m256i __a, int __count)
618
+ {
619
+ return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
620
+ }
621
+
622
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
623
+ _mm256_srl_epi16(__m256i __a, __m128i __count)
624
+ {
625
+ return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
626
+ }
627
+
628
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
629
+ _mm256_srli_epi32(__m256i __a, int __count)
630
+ {
631
+ return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
632
+ }
633
+
634
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
635
+ _mm256_srl_epi32(__m256i __a, __m128i __count)
636
+ {
637
+ return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
638
+ }
639
+
640
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
641
+ _mm256_srli_epi64(__m256i __a, int __count)
642
+ {
643
+ return __builtin_ia32_psrlqi256(__a, __count);
644
+ }
645
+
646
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
647
+ _mm256_srl_epi64(__m256i __a, __m128i __count)
648
+ {
649
+ return __builtin_ia32_psrlq256(__a, __count);
650
+ }
651
+
652
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
653
+ _mm256_sub_epi8(__m256i __a, __m256i __b)
654
+ {
655
+ return (__m256i)((__v32qi)__a - (__v32qi)__b);
656
+ }
657
+
658
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
659
+ _mm256_sub_epi16(__m256i __a, __m256i __b)
660
+ {
661
+ return (__m256i)((__v16hi)__a - (__v16hi)__b);
662
+ }
663
+
664
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
665
+ _mm256_sub_epi32(__m256i __a, __m256i __b)
666
+ {
667
+ return (__m256i)((__v8si)__a - (__v8si)__b);
668
+ }
669
+
670
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
671
+ _mm256_sub_epi64(__m256i __a, __m256i __b)
672
+ {
673
+ return __a - __b;
674
+ }
675
+
676
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
677
+ _mm256_subs_epi8(__m256i __a, __m256i __b)
678
+ {
679
+ return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
680
+ }
681
+
682
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
683
+ _mm256_subs_epi16(__m256i __a, __m256i __b)
684
+ {
685
+ return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
686
+ }
687
+
688
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
689
+ _mm256_subs_epu8(__m256i __a, __m256i __b)
690
+ {
691
+ return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
692
+ }
693
+
694
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
695
+ _mm256_subs_epu16(__m256i __a, __m256i __b)
696
+ {
697
+ return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
698
+ }
699
+
700
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
701
+ _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
702
+ {
703
+ return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
704
+ }
705
+
706
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
707
+ _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
708
+ {
709
+ return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
710
+ }
711
+
712
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
713
+ _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
714
+ {
715
+ return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
716
+ }
717
+
718
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
719
+ _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
720
+ {
721
+ return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3);
722
+ }
723
+
724
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
725
+ _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
726
+ {
727
+ return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
728
+ }
729
+
730
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
731
+ _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
732
+ {
733
+ return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
734
+ }
735
+
736
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
737
+ _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
738
+ {
739
+ return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
740
+ }
741
+
742
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
743
+ _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
744
+ {
745
+ return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2);
746
+ }
747
+
748
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
749
+ _mm256_xor_si256(__m256i __a, __m256i __b)
750
+ {
751
+ return __a ^ __b;
752
+ }
753
+
754
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
755
+ _mm256_stream_load_si256(__m256i *__V)
756
+ {
757
+ return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V);
758
+ }
759
+
760
+ static __inline__ __m128 __DEFAULT_FN_ATTRS
761
+ _mm_broadcastss_ps(__m128 __X)
762
+ {
763
+ return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
764
+ }
765
+
766
+ static __inline__ __m128d __DEFAULT_FN_ATTRS
767
+ _mm_broadcastsd_pd(__m128d __a)
768
+ {
769
+ return __builtin_shufflevector(__a, __a, 0, 0);
770
+ }
771
+
772
+ static __inline__ __m256 __DEFAULT_FN_ATTRS
773
+ _mm256_broadcastss_ps(__m128 __X)
774
+ {
775
+ return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
776
+ }
777
+
778
+ static __inline__ __m256d __DEFAULT_FN_ATTRS
779
+ _mm256_broadcastsd_pd(__m128d __X)
780
+ {
781
+ return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
782
+ }
783
+
784
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
785
+ _mm256_broadcastsi128_si256(__m128i __X)
786
+ {
787
+ return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1);
788
+ }
789
+
790
+ #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
791
+ __m128i __V1 = (V1); \
792
+ __m128i __V2 = (V2); \
793
+ (__m128i)__builtin_shufflevector((__v4si)__V1, (__v4si)__V2, \
794
+ (((M) & 0x01) ? 4 : 0), \
795
+ (((M) & 0x02) ? 5 : 1), \
796
+ (((M) & 0x04) ? 6 : 2), \
797
+ (((M) & 0x08) ? 7 : 3)); })
798
+
799
+ #define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \
800
+ __m256i __V1 = (V1); \
801
+ __m256i __V2 = (V2); \
802
+ (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \
803
+ (((M) & 0x01) ? 8 : 0), \
804
+ (((M) & 0x02) ? 9 : 1), \
805
+ (((M) & 0x04) ? 10 : 2), \
806
+ (((M) & 0x08) ? 11 : 3), \
807
+ (((M) & 0x10) ? 12 : 4), \
808
+ (((M) & 0x20) ? 13 : 5), \
809
+ (((M) & 0x40) ? 14 : 6), \
810
+ (((M) & 0x80) ? 15 : 7)); })
811
+
812
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
813
+ _mm256_broadcastb_epi8(__m128i __X)
814
+ {
815
+ return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
816
+ }
817
+
818
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
819
+ _mm256_broadcastw_epi16(__m128i __X)
820
+ {
821
+ return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
822
+ }
823
+
824
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
825
+ _mm256_broadcastd_epi32(__m128i __X)
826
+ {
827
+ return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
828
+ }
829
+
830
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
831
+ _mm256_broadcastq_epi64(__m128i __X)
832
+ {
833
+ return (__m256i)__builtin_ia32_pbroadcastq256(__X);
834
+ }
835
+
836
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
837
+ _mm_broadcastb_epi8(__m128i __X)
838
+ {
839
+ return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
840
+ }
841
+
842
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
843
+ _mm_broadcastw_epi16(__m128i __X)
844
+ {
845
+ return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
846
+ }
847
+
848
+
849
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
850
+ _mm_broadcastd_epi32(__m128i __X)
851
+ {
852
+ return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
853
+ }
854
+
855
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
856
+ _mm_broadcastq_epi64(__m128i __X)
857
+ {
858
+ return (__m128i)__builtin_ia32_pbroadcastq128(__X);
859
+ }
860
+
861
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
862
+ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
863
+ {
864
+ return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
865
+ }
866
+
867
+ #define _mm256_permute4x64_pd(V, M) __extension__ ({ \
868
+ __m256d __V = (V); \
869
+ (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \
870
+ (M) & 0x3, ((M) & 0xc) >> 2, \
871
+ ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
872
+
873
+ static __inline__ __m256 __DEFAULT_FN_ATTRS
874
+ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
875
+ {
876
+ return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
877
+ }
878
+
879
+ #define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
880
+ __m256i __V = (V); \
881
+ (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \
882
+ (M) & 0x3, ((M) & 0xc) >> 2, \
883
+ ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
884
+
885
+ #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
886
+ __m256i __V1 = (V1); \
887
+ __m256i __V2 = (V2); \
888
+ (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
889
+
890
+ #define _mm256_extracti128_si256(V, M) __extension__ ({ \
891
+ (__m128i)__builtin_shufflevector( \
892
+ (__v4di)(V), \
893
+ (__v4di)(_mm256_setzero_si256()), \
894
+ (((M) & 1) ? 2 : 0), \
895
+ (((M) & 1) ? 3 : 1) );})
896
+
897
+ #define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \
898
+ (__m256i)__builtin_shufflevector( \
899
+ (__v4di)(V1), \
900
+ (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
901
+ (((M) & 1) ? 0 : 4), \
902
+ (((M) & 1) ? 1 : 5), \
903
+ (((M) & 1) ? 4 : 2), \
904
+ (((M) & 1) ? 5 : 3) );})
905
+
906
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
907
+ _mm256_maskload_epi32(int const *__X, __m256i __M)
908
+ {
909
+ return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);
910
+ }
911
+
912
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
913
+ _mm256_maskload_epi64(long long const *__X, __m256i __M)
914
+ {
915
+ return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M);
916
+ }
917
+
918
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
919
+ _mm_maskload_epi32(int const *__X, __m128i __M)
920
+ {
921
+ return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);
922
+ }
923
+
924
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
925
+ _mm_maskload_epi64(long long const *__X, __m128i __M)
926
+ {
927
+ return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);
928
+ }
929
+
930
+ static __inline__ void __DEFAULT_FN_ATTRS
931
+ _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
932
+ {
933
+ __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
934
+ }
935
+
936
+ static __inline__ void __DEFAULT_FN_ATTRS
937
+ _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
938
+ {
939
+ __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y);
940
+ }
941
+
942
+ static __inline__ void __DEFAULT_FN_ATTRS
943
+ _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
944
+ {
945
+ __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
946
+ }
947
+
948
+ static __inline__ void __DEFAULT_FN_ATTRS
949
+ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
950
+ {
951
+ __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y);
952
+ }
953
+
954
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
955
+ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
956
+ {
957
+ return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
958
+ }
959
+
960
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
961
+ _mm_sllv_epi32(__m128i __X, __m128i __Y)
962
+ {
963
+ return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
964
+ }
965
+
966
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
967
+ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
968
+ {
969
+ return (__m256i)__builtin_ia32_psllv4di(__X, __Y);
970
+ }
971
+
972
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
973
+ _mm_sllv_epi64(__m128i __X, __m128i __Y)
974
+ {
975
+ return (__m128i)__builtin_ia32_psllv2di(__X, __Y);
976
+ }
977
+
978
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
979
+ _mm256_srav_epi32(__m256i __X, __m256i __Y)
980
+ {
981
+ return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
982
+ }
983
+
984
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
985
+ _mm_srav_epi32(__m128i __X, __m128i __Y)
986
+ {
987
+ return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
988
+ }
989
+
990
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
991
+ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
992
+ {
993
+ return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
994
+ }
995
+
996
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
997
+ _mm_srlv_epi32(__m128i __X, __m128i __Y)
998
+ {
999
+ return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
1000
+ }
1001
+
1002
+ static __inline__ __m256i __DEFAULT_FN_ATTRS
1003
+ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
1004
+ {
1005
+ return (__m256i)__builtin_ia32_psrlv4di(__X, __Y);
1006
+ }
1007
+
1008
+ static __inline__ __m128i __DEFAULT_FN_ATTRS
1009
+ _mm_srlv_epi64(__m128i __X, __m128i __Y)
1010
+ {
1011
+ return (__m128i)__builtin_ia32_psrlv2di(__X, __Y);
1012
+ }
1013
+
1014
+ #define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
1015
+ __m128d __a = (a); \
1016
+ double const *__m = (m); \
1017
+ __m128i __i = (i); \
1018
+ __m128d __mask = (mask); \
1019
+ (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \
1020
+ (__v4si)__i, (__v2df)__mask, (s)); })
1021
+
1022
+ #define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
1023
+ __m256d __a = (a); \
1024
+ double const *__m = (m); \
1025
+ __m128i __i = (i); \
1026
+ __m256d __mask = (mask); \
1027
+ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \
1028
+ (__v4si)__i, (__v4df)__mask, (s)); })
1029
+
1030
+ #define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
1031
+ __m128d __a = (a); \
1032
+ double const *__m = (m); \
1033
+ __m128i __i = (i); \
1034
+ __m128d __mask = (mask); \
1035
+ (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \
1036
+ (__v2di)__i, (__v2df)__mask, (s)); })
1037
+
1038
+ #define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
1039
+ __m256d __a = (a); \
1040
+ double const *__m = (m); \
1041
+ __m256i __i = (i); \
1042
+ __m256d __mask = (mask); \
1043
+ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \
1044
+ (__v4di)__i, (__v4df)__mask, (s)); })
1045
+
1046
+ #define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
1047
+ __m128 __a = (a); \
1048
+ float const *__m = (m); \
1049
+ __m128i __i = (i); \
1050
+ __m128 __mask = (mask); \
1051
+ (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \
1052
+ (__v4si)__i, (__v4sf)__mask, (s)); })
1053
+
1054
+ #define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
1055
+ __m256 __a = (a); \
1056
+ float const *__m = (m); \
1057
+ __m256i __i = (i); \
1058
+ __m256 __mask = (mask); \
1059
+ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \
1060
+ (__v8si)__i, (__v8sf)__mask, (s)); })
1061
+
1062
+ #define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
1063
+ __m128 __a = (a); \
1064
+ float const *__m = (m); \
1065
+ __m128i __i = (i); \
1066
+ __m128 __mask = (mask); \
1067
+ (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \
1068
+ (__v2di)__i, (__v4sf)__mask, (s)); })
1069
+
1070
+ #define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
1071
+ __m128 __a = (a); \
1072
+ float const *__m = (m); \
1073
+ __m256i __i = (i); \
1074
+ __m128 __mask = (mask); \
1075
+ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \
1076
+ (__v4di)__i, (__v4sf)__mask, (s)); })
1077
+
1078
+ #define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
1079
+ __m128i __a = (a); \
1080
+ int const *__m = (m); \
1081
+ __m128i __i = (i); \
1082
+ __m128i __mask = (mask); \
1083
+ (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \
1084
+ (__v4si)__i, (__v4si)__mask, (s)); })
1085
+
1086
+ #define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
1087
+ __m256i __a = (a); \
1088
+ int const *__m = (m); \
1089
+ __m256i __i = (i); \
1090
+ __m256i __mask = (mask); \
1091
+ (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \
1092
+ (__v8si)__i, (__v8si)__mask, (s)); })
1093
+
1094
+ #define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
1095
+ __m128i __a = (a); \
1096
+ int const *__m = (m); \
1097
+ __m128i __i = (i); \
1098
+ __m128i __mask = (mask); \
1099
+ (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \
1100
+ (__v2di)__i, (__v4si)__mask, (s)); })
1101
+
1102
+ #define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
1103
+ __m128i __a = (a); \
1104
+ int const *__m = (m); \
1105
+ __m256i __i = (i); \
1106
+ __m128i __mask = (mask); \
1107
+ (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \
1108
+ (__v4di)__i, (__v4si)__mask, (s)); })
1109
+
1110
+ #define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
1111
+ __m128i __a = (a); \
1112
+ long long const *__m = (m); \
1113
+ __m128i __i = (i); \
1114
+ __m128i __mask = (mask); \
1115
+ (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \
1116
+ (__v4si)__i, (__v2di)__mask, (s)); })
1117
+
1118
+ #define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
1119
+ __m256i __a = (a); \
1120
+ long long const *__m = (m); \
1121
+ __m128i __i = (i); \
1122
+ __m256i __mask = (mask); \
1123
+ (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \
1124
+ (__v4si)__i, (__v4di)__mask, (s)); })
1125
+
1126
+ #define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
1127
+ __m128i __a = (a); \
1128
+ long long const *__m = (m); \
1129
+ __m128i __i = (i); \
1130
+ __m128i __mask = (mask); \
1131
+ (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \
1132
+ (__v2di)__i, (__v2di)__mask, (s)); })
1133
+
1134
+ #define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
1135
+ __m256i __a = (a); \
1136
+ long long const *__m = (m); \
1137
+ __m256i __i = (i); \
1138
+ __m256i __mask = (mask); \
1139
+ (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \
1140
+ (__v4di)__i, (__v4di)__mask, (s)); })
1141
+
1142
+ #define _mm_i32gather_pd(m, i, s) __extension__ ({ \
1143
+ double const *__m = (m); \
1144
+ __m128i __i = (i); \
1145
+ (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \
1146
+ (const __v2df *)__m, (__v4si)__i, \
1147
+ (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
1148
+
1149
+ #define _mm256_i32gather_pd(m, i, s) __extension__ ({ \
1150
+ double const *__m = (m); \
1151
+ __m128i __i = (i); \
1152
+ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \
1153
+ (const __v4df *)__m, (__v4si)__i, \
1154
+ (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
1155
+
1156
+ #define _mm_i64gather_pd(m, i, s) __extension__ ({ \
1157
+ double const *__m = (m); \
1158
+ __m128i __i = (i); \
1159
+ (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \
1160
+ (const __v2df *)__m, (__v2di)__i, \
1161
+ (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
1162
+
1163
+ #define _mm256_i64gather_pd(m, i, s) __extension__ ({ \
1164
+ double const *__m = (m); \
1165
+ __m256i __i = (i); \
1166
+ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \
1167
+ (const __v4df *)__m, (__v4di)__i, \
1168
+ (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
1169
+
1170
+ #define _mm_i32gather_ps(m, i, s) __extension__ ({ \
1171
+ float const *__m = (m); \
1172
+ __m128i __i = (i); \
1173
+ (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \
1174
+ (const __v4sf *)__m, (__v4si)__i, \
1175
+ (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
1176
+
1177
+ #define _mm256_i32gather_ps(m, i, s) __extension__ ({ \
1178
+ float const *__m = (m); \
1179
+ __m256i __i = (i); \
1180
+ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \
1181
+ (const __v8sf *)__m, (__v8si)__i, \
1182
+ (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); })
1183
+
1184
+ #define _mm_i64gather_ps(m, i, s) __extension__ ({ \
1185
+ float const *__m = (m); \
1186
+ __m128i __i = (i); \
1187
+ (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \
1188
+ (const __v4sf *)__m, (__v2di)__i, \
1189
+ (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
1190
+
1191
+ #define _mm256_i64gather_ps(m, i, s) __extension__ ({ \
1192
+ float const *__m = (m); \
1193
+ __m256i __i = (i); \
1194
+ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \
1195
+ (const __v4sf *)__m, (__v4di)__i, \
1196
+ (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
1197
+
1198
+ #define _mm_i32gather_epi32(m, i, s) __extension__ ({ \
1199
+ int const *__m = (m); \
1200
+ __m128i __i = (i); \
1201
+ (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \
1202
+ (const __v4si *)__m, (__v4si)__i, \
1203
+ (__v4si)_mm_set1_epi32(-1), (s)); })
1204
+
1205
+ #define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \
1206
+ int const *__m = (m); \
1207
+ __m256i __i = (i); \
1208
+ (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \
1209
+ (const __v8si *)__m, (__v8si)__i, \
1210
+ (__v8si)_mm256_set1_epi32(-1), (s)); })
1211
+
1212
+ #define _mm_i64gather_epi32(m, i, s) __extension__ ({ \
1213
+ int const *__m = (m); \
1214
+ __m128i __i = (i); \
1215
+ (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \
1216
+ (const __v4si *)__m, (__v2di)__i, \
1217
+ (__v4si)_mm_set1_epi32(-1), (s)); })
1218
+
1219
+ #define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \
1220
+ int const *__m = (m); \
1221
+ __m256i __i = (i); \
1222
+ (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \
1223
+ (const __v4si *)__m, (__v4di)__i, \
1224
+ (__v4si)_mm_set1_epi32(-1), (s)); })
1225
+
1226
+ #define _mm_i32gather_epi64(m, i, s) __extension__ ({ \
1227
+ long long const *__m = (m); \
1228
+ __m128i __i = (i); \
1229
+ (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \
1230
+ (const __v2di *)__m, (__v4si)__i, \
1231
+ (__v2di)_mm_set1_epi64x(-1), (s)); })
1232
+
1233
+ #define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \
1234
+ long long const *__m = (m); \
1235
+ __m128i __i = (i); \
1236
+ (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \
1237
+ (const __v4di *)__m, (__v4si)__i, \
1238
+ (__v4di)_mm256_set1_epi64x(-1), (s)); })
1239
+
1240
+ #define _mm_i64gather_epi64(m, i, s) __extension__ ({ \
1241
+ long long const *__m = (m); \
1242
+ __m128i __i = (i); \
1243
+ (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \
1244
+ (const __v2di *)__m, (__v2di)__i, \
1245
+ (__v2di)_mm_set1_epi64x(-1), (s)); })
1246
+
1247
+ #define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \
1248
+ long long const *__m = (m); \
1249
+ __m256i __i = (i); \
1250
+ (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \
1251
+ (const __v4di *)__m, (__v4di)__i, \
1252
+ (__v4di)_mm256_set1_epi64x(-1), (s)); })
1253
+
1254
+ #undef __DEFAULT_FN_ATTRS
1255
+
1256
+ #endif /* __AVX2INTRIN_H */