objective-ci 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (136) hide show
  1. checksums.yaml +15 -0
  2. data/README.md +1 -3
  3. data/Rakefile +1 -1
  4. data/bin/oclint +4 -0
  5. data/bin/oclint-0.8 +1 -1
  6. data/bin/oclint-json-compilation-database +1 -1
  7. data/bin/oclint-xcodebuild +1 -1
  8. data/externals/oclint/LICENSE +69 -0
  9. data/externals/oclint/bin/oclint +0 -0
  10. data/externals/oclint/{oclint-0.8 → bin/oclint-0.8} +0 -0
  11. data/externals/oclint/{oclint-json-compilation-database → bin/oclint-json-compilation-database} +0 -0
  12. data/externals/oclint/{oclint-xcodebuild → bin/oclint-xcodebuild} +0 -0
  13. data/externals/oclint/lib/clang/3.4/asan_blacklist.txt +10 -0
  14. data/externals/oclint/lib/clang/3.4/include/Intrin.h +784 -0
  15. data/externals/oclint/lib/clang/3.4/include/__wmmintrin_aes.h +67 -0
  16. data/externals/oclint/lib/clang/3.4/include/__wmmintrin_pclmul.h +34 -0
  17. data/externals/oclint/lib/clang/3.4/include/altivec.h +11856 -0
  18. data/externals/oclint/lib/clang/3.4/include/ammintrin.h +68 -0
  19. data/externals/oclint/lib/clang/3.4/include/arm_neon.h +6802 -0
  20. data/externals/oclint/lib/clang/3.4/include/avx2intrin.h +1206 -0
  21. data/externals/oclint/lib/clang/3.4/include/avxintrin.h +1224 -0
  22. data/externals/oclint/lib/clang/3.4/include/bmi2intrin.h +94 -0
  23. data/externals/oclint/lib/clang/3.4/include/bmiintrin.h +115 -0
  24. data/externals/oclint/lib/clang/3.4/include/cpuid.h +156 -0
  25. data/externals/oclint/lib/clang/3.4/include/emmintrin.h +1451 -0
  26. data/externals/oclint/lib/clang/3.4/include/f16cintrin.h +58 -0
  27. data/externals/oclint/lib/clang/3.4/include/float.h +124 -0
  28. data/externals/oclint/lib/clang/3.4/include/fma4intrin.h +231 -0
  29. data/externals/oclint/lib/clang/3.4/include/fmaintrin.h +229 -0
  30. data/externals/oclint/lib/clang/3.4/include/immintrin.h +118 -0
  31. data/externals/oclint/lib/clang/3.4/include/iso646.h +43 -0
  32. data/externals/oclint/lib/clang/3.4/include/limits.h +119 -0
  33. data/externals/oclint/lib/clang/3.4/include/lzcntintrin.h +55 -0
  34. data/externals/oclint/lib/clang/3.4/include/mm3dnow.h +162 -0
  35. data/externals/oclint/lib/clang/3.4/include/mm_malloc.h +75 -0
  36. data/externals/oclint/lib/clang/3.4/include/mmintrin.h +503 -0
  37. data/externals/oclint/lib/clang/3.4/include/module.map +156 -0
  38. data/externals/oclint/lib/clang/3.4/include/nmmintrin.h +35 -0
  39. data/externals/oclint/lib/clang/3.4/include/pmmintrin.h +117 -0
  40. data/externals/oclint/lib/clang/3.4/include/popcntintrin.h +45 -0
  41. data/externals/oclint/lib/clang/3.4/include/prfchwintrin.h +39 -0
  42. data/externals/oclint/lib/clang/3.4/include/rdseedintrin.h +52 -0
  43. data/externals/oclint/lib/clang/3.4/include/rtmintrin.h +54 -0
  44. data/externals/oclint/lib/clang/3.4/include/sanitizer/asan_interface.h +137 -0
  45. data/externals/oclint/lib/clang/3.4/include/sanitizer/common_interface_defs.h +54 -0
  46. data/externals/oclint/lib/clang/3.4/include/sanitizer/dfsan_interface.h +87 -0
  47. data/externals/oclint/lib/clang/3.4/include/sanitizer/linux_syscall_hooks.h +3070 -0
  48. data/externals/oclint/lib/clang/3.4/include/sanitizer/lsan_interface.h +52 -0
  49. data/externals/oclint/lib/clang/3.4/include/sanitizer/msan_interface.h +162 -0
  50. data/externals/oclint/lib/clang/3.4/include/shaintrin.h +74 -0
  51. data/externals/oclint/lib/clang/3.4/include/smmintrin.h +468 -0
  52. data/externals/oclint/lib/clang/3.4/include/stdalign.h +35 -0
  53. data/externals/oclint/lib/clang/3.4/include/stdarg.h +50 -0
  54. data/externals/oclint/lib/clang/3.4/include/stdbool.h +44 -0
  55. data/externals/oclint/lib/clang/3.4/include/stddef.h +102 -0
  56. data/externals/oclint/lib/clang/3.4/include/stdint.h +708 -0
  57. data/externals/oclint/lib/clang/3.4/include/stdnoreturn.h +30 -0
  58. data/externals/oclint/lib/clang/3.4/include/tbmintrin.h +158 -0
  59. data/externals/oclint/lib/clang/3.4/include/tgmath.h +1374 -0
  60. data/externals/oclint/lib/clang/3.4/include/tmmintrin.h +225 -0
  61. data/externals/oclint/lib/clang/3.4/include/unwind.h +280 -0
  62. data/externals/oclint/lib/clang/3.4/include/varargs.h +26 -0
  63. data/externals/oclint/lib/clang/3.4/include/wmmintrin.h +42 -0
  64. data/externals/oclint/lib/clang/3.4/include/x86intrin.h +79 -0
  65. data/externals/oclint/lib/clang/3.4/include/xmmintrin.h +1001 -0
  66. data/externals/oclint/lib/clang/3.4/include/xopintrin.h +804 -0
  67. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
  68. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.i386.a +0 -0
  69. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.profile_osx.a +0 -0
  70. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.ubsan_osx.a +0 -0
  71. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.x86_64.a +0 -0
  72. data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
  73. data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
  74. data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
  75. data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
  76. data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
  77. data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
  78. data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
  79. data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
  80. data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
  81. data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
  82. data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
  83. data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
  84. data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
  85. data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
  86. data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
  87. data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
  88. data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
  89. data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
  90. data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
  91. data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
  92. data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
  93. data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
  94. data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
  95. data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
  96. data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
  97. data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
  98. data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
  99. data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
  100. data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
  101. data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
  102. data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
  103. data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
  104. data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
  105. data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
  106. data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
  107. data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
  108. data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
  109. data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
  110. data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
  111. data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
  112. data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
  113. data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
  114. data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
  115. data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
  116. data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
  117. data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
  118. data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
  119. data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
  120. data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
  121. data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
  122. data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
  123. data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
  124. data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
  125. data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
  126. data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
  127. data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
  128. data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
  129. data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
  130. data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
  131. data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
  132. data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
  133. data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
  134. data/lib/objective_ci/ci_tasks.rb +1 -1
  135. data/lib/objective_ci/version.rb +1 -1
  136. metadata +200 -84
@@ -0,0 +1,1206 @@
1
+ /*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+
24
+ #ifndef __IMMINTRIN_H
25
+ #error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
26
+ #endif
27
+
28
+ #ifndef __AVX2INTRIN_H
29
+ #define __AVX2INTRIN_H
30
+
31
+ /* SSE4 Multiple Packed Sums of Absolute Difference. */
32
+ #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
33
+
34
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
35
+ _mm256_abs_epi8(__m256i __a)
36
+ {
37
+ return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
38
+ }
39
+
40
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
41
+ _mm256_abs_epi16(__m256i __a)
42
+ {
43
+ return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
44
+ }
45
+
46
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
47
+ _mm256_abs_epi32(__m256i __a)
48
+ {
49
+ return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
50
+ }
51
+
52
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
53
+ _mm256_packs_epi16(__m256i __a, __m256i __b)
54
+ {
55
+ return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
56
+ }
57
+
58
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
59
+ _mm256_packs_epi32(__m256i __a, __m256i __b)
60
+ {
61
+ return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
62
+ }
63
+
64
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
65
+ _mm256_packus_epi16(__m256i __a, __m256i __b)
66
+ {
67
+ return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
68
+ }
69
+
70
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
71
+ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
72
+ {
73
+ return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
74
+ }
75
+
76
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
77
+ _mm256_add_epi8(__m256i __a, __m256i __b)
78
+ {
79
+ return (__m256i)((__v32qi)__a + (__v32qi)__b);
80
+ }
81
+
82
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
83
+ _mm256_add_epi16(__m256i __a, __m256i __b)
84
+ {
85
+ return (__m256i)((__v16hi)__a + (__v16hi)__b);
86
+ }
87
+
88
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
89
+ _mm256_add_epi32(__m256i __a, __m256i __b)
90
+ {
91
+ return (__m256i)((__v8si)__a + (__v8si)__b);
92
+ }
93
+
94
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
95
+ _mm256_add_epi64(__m256i __a, __m256i __b)
96
+ {
97
+ return __a + __b;
98
+ }
99
+
100
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
101
+ _mm256_adds_epi8(__m256i __a, __m256i __b)
102
+ {
103
+ return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
104
+ }
105
+
106
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
107
+ _mm256_adds_epi16(__m256i __a, __m256i __b)
108
+ {
109
+ return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
110
+ }
111
+
112
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
113
+ _mm256_adds_epu8(__m256i __a, __m256i __b)
114
+ {
115
+ return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
116
+ }
117
+
118
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
119
+ _mm256_adds_epu16(__m256i __a, __m256i __b)
120
+ {
121
+ return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
122
+ }
123
+
124
+ #define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
125
+ __m256i __a = (a); \
126
+ __m256i __b = (b); \
127
+ (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
128
+
129
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
130
+ _mm256_and_si256(__m256i __a, __m256i __b)
131
+ {
132
+ return __a & __b;
133
+ }
134
+
135
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
136
+ _mm256_andnot_si256(__m256i __a, __m256i __b)
137
+ {
138
+ return ~__a & __b;
139
+ }
140
+
141
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
142
+ _mm256_avg_epu8(__m256i __a, __m256i __b)
143
+ {
144
+ return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
145
+ }
146
+
147
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
148
+ _mm256_avg_epu16(__m256i __a, __m256i __b)
149
+ {
150
+ return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
151
+ }
152
+
153
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
154
+ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
155
+ {
156
+ return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
157
+ (__v32qi)__M);
158
+ }
159
+
160
+ #define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
161
+ __m256i __V1 = (V1); \
162
+ __m256i __V2 = (V2); \
163
+ (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); })
164
+
165
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
166
+ _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
167
+ {
168
+ return (__m256i)((__v32qi)__a == (__v32qi)__b);
169
+ }
170
+
171
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
172
+ _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
173
+ {
174
+ return (__m256i)((__v16hi)__a == (__v16hi)__b);
175
+ }
176
+
177
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
178
+ _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
179
+ {
180
+ return (__m256i)((__v8si)__a == (__v8si)__b);
181
+ }
182
+
183
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
184
+ _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
185
+ {
186
+ return (__m256i)(__a == __b);
187
+ }
188
+
189
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
190
+ _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
191
+ {
192
+ return (__m256i)((__v32qi)__a > (__v32qi)__b);
193
+ }
194
+
195
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
196
+ _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
197
+ {
198
+ return (__m256i)((__v16hi)__a > (__v16hi)__b);
199
+ }
200
+
201
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
202
+ _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
203
+ {
204
+ return (__m256i)((__v8si)__a > (__v8si)__b);
205
+ }
206
+
207
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
208
+ _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
209
+ {
210
+ return (__m256i)(__a > __b);
211
+ }
212
+
213
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
214
+ _mm256_hadd_epi16(__m256i __a, __m256i __b)
215
+ {
216
+ return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
217
+ }
218
+
219
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
220
+ _mm256_hadd_epi32(__m256i __a, __m256i __b)
221
+ {
222
+ return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
223
+ }
224
+
225
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
226
+ _mm256_hadds_epi16(__m256i __a, __m256i __b)
227
+ {
228
+ return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
229
+ }
230
+
231
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
232
+ _mm256_hsub_epi16(__m256i __a, __m256i __b)
233
+ {
234
+ return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
235
+ }
236
+
237
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
238
+ _mm256_hsub_epi32(__m256i __a, __m256i __b)
239
+ {
240
+ return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
241
+ }
242
+
243
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
244
+ _mm256_hsubs_epi16(__m256i __a, __m256i __b)
245
+ {
246
+ return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
247
+ }
248
+
249
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
250
+ _mm256_maddubs_epi16(__m256i __a, __m256i __b)
251
+ {
252
+ return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
253
+ }
254
+
255
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
256
+ _mm256_madd_epi16(__m256i __a, __m256i __b)
257
+ {
258
+ return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
259
+ }
260
+
261
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
262
+ _mm256_max_epi8(__m256i __a, __m256i __b)
263
+ {
264
+ return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
265
+ }
266
+
267
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
268
+ _mm256_max_epi16(__m256i __a, __m256i __b)
269
+ {
270
+ return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
271
+ }
272
+
273
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
274
+ _mm256_max_epi32(__m256i __a, __m256i __b)
275
+ {
276
+ return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
277
+ }
278
+
279
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
280
+ _mm256_max_epu8(__m256i __a, __m256i __b)
281
+ {
282
+ return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
283
+ }
284
+
285
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
286
+ _mm256_max_epu16(__m256i __a, __m256i __b)
287
+ {
288
+ return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
289
+ }
290
+
291
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
292
+ _mm256_max_epu32(__m256i __a, __m256i __b)
293
+ {
294
+ return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
295
+ }
296
+
297
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
298
+ _mm256_min_epi8(__m256i __a, __m256i __b)
299
+ {
300
+ return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
301
+ }
302
+
303
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
304
+ _mm256_min_epi16(__m256i __a, __m256i __b)
305
+ {
306
+ return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
307
+ }
308
+
309
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
310
+ _mm256_min_epi32(__m256i __a, __m256i __b)
311
+ {
312
+ return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
313
+ }
314
+
315
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
316
+ _mm256_min_epu8(__m256i __a, __m256i __b)
317
+ {
318
+ return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
319
+ }
320
+
321
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
322
+ _mm256_min_epu16(__m256i __a, __m256i __b)
323
+ {
324
+ return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
325
+ }
326
+
327
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
328
+ _mm256_min_epu32(__m256i __a, __m256i __b)
329
+ {
330
+ return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
331
+ }
332
+
333
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
334
+ _mm256_movemask_epi8(__m256i __a)
335
+ {
336
+ return __builtin_ia32_pmovmskb256((__v32qi)__a);
337
+ }
338
+
339
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
340
+ _mm256_cvtepi8_epi16(__m128i __V)
341
+ {
342
+ return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V);
343
+ }
344
+
345
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
346
+ _mm256_cvtepi8_epi32(__m128i __V)
347
+ {
348
+ return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V);
349
+ }
350
+
351
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
352
+ _mm256_cvtepi8_epi64(__m128i __V)
353
+ {
354
+ return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V);
355
+ }
356
+
357
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
358
+ _mm256_cvtepi16_epi32(__m128i __V)
359
+ {
360
+ return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V);
361
+ }
362
+
363
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
364
+ _mm256_cvtepi16_epi64(__m128i __V)
365
+ {
366
+ return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V);
367
+ }
368
+
369
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
370
+ _mm256_cvtepi32_epi64(__m128i __V)
371
+ {
372
+ return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V);
373
+ }
374
+
375
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
376
+ _mm256_cvtepu8_epi16(__m128i __V)
377
+ {
378
+ return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V);
379
+ }
380
+
381
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
382
+ _mm256_cvtepu8_epi32(__m128i __V)
383
+ {
384
+ return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V);
385
+ }
386
+
387
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
388
+ _mm256_cvtepu8_epi64(__m128i __V)
389
+ {
390
+ return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V);
391
+ }
392
+
393
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
394
+ _mm256_cvtepu16_epi32(__m128i __V)
395
+ {
396
+ return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V);
397
+ }
398
+
399
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
400
+ _mm256_cvtepu16_epi64(__m128i __V)
401
+ {
402
+ return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V);
403
+ }
404
+
405
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
406
+ _mm256_cvtepu32_epi64(__m128i __V)
407
+ {
408
+ return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V);
409
+ }
410
+
411
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
412
+ _mm256_mul_epi32(__m256i __a, __m256i __b)
413
+ {
414
+ return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
415
+ }
416
+
417
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
418
+ _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
419
+ {
420
+ return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
421
+ }
422
+
423
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
424
+ _mm256_mulhi_epu16(__m256i __a, __m256i __b)
425
+ {
426
+ return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
427
+ }
428
+
429
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
430
+ _mm256_mulhi_epi16(__m256i __a, __m256i __b)
431
+ {
432
+ return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
433
+ }
434
+
435
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
436
+ _mm256_mullo_epi16(__m256i __a, __m256i __b)
437
+ {
438
+ return (__m256i)((__v16hi)__a * (__v16hi)__b);
439
+ }
440
+
441
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
442
+ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
443
+ {
444
+ return (__m256i)((__v8si)__a * (__v8si)__b);
445
+ }
446
+
447
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
448
+ _mm256_mul_epu32(__m256i __a, __m256i __b)
449
+ {
450
+ return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
451
+ }
452
+
453
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
454
+ _mm256_or_si256(__m256i __a, __m256i __b)
455
+ {
456
+ return __a | __b;
457
+ }
458
+
459
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
460
+ _mm256_sad_epu8(__m256i __a, __m256i __b)
461
+ {
462
+ return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
463
+ }
464
+
465
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
466
+ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
467
+ {
468
+ return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
469
+ }
470
+
471
+ #define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
472
+ __m256i __a = (a); \
473
+ (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \
474
+ (imm) & 0x3, ((imm) & 0xc) >> 2, \
475
+ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
476
+ 4 + (((imm) & 0x03) >> 0), \
477
+ 4 + (((imm) & 0x0c) >> 2), \
478
+ 4 + (((imm) & 0x30) >> 4), \
479
+ 4 + (((imm) & 0xc0) >> 6)); })
480
+
481
+ #define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
482
+ __m256i __a = (a); \
483
+ (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
484
+ 0, 1, 2, 3, \
485
+ 4 + (((imm) & 0x03) >> 0), \
486
+ 4 + (((imm) & 0x0c) >> 2), \
487
+ 4 + (((imm) & 0x30) >> 4), \
488
+ 4 + (((imm) & 0xc0) >> 6), \
489
+ 8, 9, 10, 11, \
490
+ 12 + (((imm) & 0x03) >> 0), \
491
+ 12 + (((imm) & 0x0c) >> 2), \
492
+ 12 + (((imm) & 0x30) >> 4), \
493
+ 12 + (((imm) & 0xc0) >> 6)); })
494
+
495
+ #define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
496
+ __m256i __a = (a); \
497
+ (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
498
+ (imm) & 0x3,((imm) & 0xc) >> 2, \
499
+ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
500
+ 4, 5, 6, 7, \
501
+ 8 + (((imm) & 0x03) >> 0), \
502
+ 8 + (((imm) & 0x0c) >> 2), \
503
+ 8 + (((imm) & 0x30) >> 4), \
504
+ 8 + (((imm) & 0xc0) >> 6), \
505
+ 12, 13, 14, 15); })
506
+
507
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
508
+ _mm256_sign_epi8(__m256i __a, __m256i __b)
509
+ {
510
+ return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
511
+ }
512
+
513
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
514
+ _mm256_sign_epi16(__m256i __a, __m256i __b)
515
+ {
516
+ return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
517
+ }
518
+
519
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
520
+ _mm256_sign_epi32(__m256i __a, __m256i __b)
521
+ {
522
+ return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
523
+ }
524
+
525
+ #define _mm256_slli_si256(a, count) __extension__ ({ \
526
+ __m256i __a = (a); \
527
+ (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
528
+
529
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
530
+ _mm256_slli_epi16(__m256i __a, int __count)
531
+ {
532
+ return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
533
+ }
534
+
535
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
536
+ _mm256_sll_epi16(__m256i __a, __m128i __count)
537
+ {
538
+ return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
539
+ }
540
+
541
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
542
+ _mm256_slli_epi32(__m256i __a, int __count)
543
+ {
544
+ return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
545
+ }
546
+
547
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
548
+ _mm256_sll_epi32(__m256i __a, __m128i __count)
549
+ {
550
+ return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
551
+ }
552
+
553
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
554
+ _mm256_slli_epi64(__m256i __a, int __count)
555
+ {
556
+ return __builtin_ia32_psllqi256(__a, __count);
557
+ }
558
+
559
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
560
+ _mm256_sll_epi64(__m256i __a, __m128i __count)
561
+ {
562
+ return __builtin_ia32_psllq256(__a, __count);
563
+ }
564
+
565
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
566
+ _mm256_srai_epi16(__m256i __a, int __count)
567
+ {
568
+ return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
569
+ }
570
+
571
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
572
+ _mm256_sra_epi16(__m256i __a, __m128i __count)
573
+ {
574
+ return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
575
+ }
576
+
577
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
578
+ _mm256_srai_epi32(__m256i __a, int __count)
579
+ {
580
+ return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
581
+ }
582
+
583
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
584
+ _mm256_sra_epi32(__m256i __a, __m128i __count)
585
+ {
586
+ return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
587
+ }
588
+
589
+ #define _mm256_srli_si256(a, count) __extension__ ({ \
590
+ __m256i __a = (a); \
591
+ (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
592
+
593
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
594
+ _mm256_srli_epi16(__m256i __a, int __count)
595
+ {
596
+ return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
597
+ }
598
+
599
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
600
+ _mm256_srl_epi16(__m256i __a, __m128i __count)
601
+ {
602
+ return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
603
+ }
604
+
605
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
606
+ _mm256_srli_epi32(__m256i __a, int __count)
607
+ {
608
+ return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
609
+ }
610
+
611
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
612
+ _mm256_srl_epi32(__m256i __a, __m128i __count)
613
+ {
614
+ return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
615
+ }
616
+
617
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
618
+ _mm256_srli_epi64(__m256i __a, int __count)
619
+ {
620
+ return __builtin_ia32_psrlqi256(__a, __count);
621
+ }
622
+
623
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
624
+ _mm256_srl_epi64(__m256i __a, __m128i __count)
625
+ {
626
+ return __builtin_ia32_psrlq256(__a, __count);
627
+ }
628
+
629
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
630
+ _mm256_sub_epi8(__m256i __a, __m256i __b)
631
+ {
632
+ return (__m256i)((__v32qi)__a - (__v32qi)__b);
633
+ }
634
+
635
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
636
+ _mm256_sub_epi16(__m256i __a, __m256i __b)
637
+ {
638
+ return (__m256i)((__v16hi)__a - (__v16hi)__b);
639
+ }
640
+
641
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
642
+ _mm256_sub_epi32(__m256i __a, __m256i __b)
643
+ {
644
+ return (__m256i)((__v8si)__a - (__v8si)__b);
645
+ }
646
+
647
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
648
+ _mm256_sub_epi64(__m256i __a, __m256i __b)
649
+ {
650
+ return __a - __b;
651
+ }
652
+
653
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
654
+ _mm256_subs_epi8(__m256i __a, __m256i __b)
655
+ {
656
+ return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
657
+ }
658
+
659
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
660
+ _mm256_subs_epi16(__m256i __a, __m256i __b)
661
+ {
662
+ return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
663
+ }
664
+
665
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
666
+ _mm256_subs_epu8(__m256i __a, __m256i __b)
667
+ {
668
+ return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
669
+ }
670
+
671
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
672
+ _mm256_subs_epu16(__m256i __a, __m256i __b)
673
+ {
674
+ return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
675
+ }
676
+
677
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
678
+ _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
679
+ {
680
+ return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
681
+ }
682
+
683
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
684
+ _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
685
+ {
686
+ return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
687
+ }
688
+
689
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
690
+ _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
691
+ {
692
+ return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
693
+ }
694
+
695
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
696
+ _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
697
+ {
698
+ return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3);
699
+ }
700
+
701
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
702
+ _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
703
+ {
704
+ return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
705
+ }
706
+
707
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
708
+ _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
709
+ {
710
+ return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
711
+ }
712
+
713
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
714
+ _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
715
+ {
716
+ return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
717
+ }
718
+
719
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
720
+ _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
721
+ {
722
+ return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2);
723
+ }
724
+
725
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
726
+ _mm256_xor_si256(__m256i __a, __m256i __b)
727
+ {
728
+ return __a ^ __b;
729
+ }
730
+
731
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
732
+ _mm256_stream_load_si256(__m256i *__V)
733
+ {
734
+ return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V);
735
+ }
736
+
737
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
738
+ _mm_broadcastss_ps(__m128 __X)
739
+ {
740
+ return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
741
+ }
742
+
743
+ static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
744
+ _mm256_broadcastss_ps(__m128 __X)
745
+ {
746
+ return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
747
+ }
748
+
749
+ static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
750
+ _mm256_broadcastsd_pd(__m128d __X)
751
+ {
752
+ return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
753
+ }
754
+
755
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
756
+ _mm256_broadcastsi128_si256(__m128i __X)
757
+ {
758
+ return (__m256i)__builtin_ia32_vbroadcastsi256(__X);
759
+ }
760
+
761
+ #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
762
+ __m128i __V1 = (V1); \
763
+ __m128i __V2 = (V2); \
764
+ (__m128i)__builtin_ia32_pblendd128((__v4si)__V1, (__v4si)__V2, (M)); })
765
+
766
+ #define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \
767
+ __m256i __V1 = (V1); \
768
+ __m256i __V2 = (V2); \
769
+ (__m256i)__builtin_ia32_pblendd256((__v8si)__V1, (__v8si)__V2, (M)); })
770
+
771
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
772
+ _mm256_broadcastb_epi8(__m128i __X)
773
+ {
774
+ return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
775
+ }
776
+
777
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
778
+ _mm256_broadcastw_epi16(__m128i __X)
779
+ {
780
+ return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
781
+ }
782
+
783
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
784
+ _mm256_broadcastd_epi32(__m128i __X)
785
+ {
786
+ return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
787
+ }
788
+
789
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
790
+ _mm256_broadcastq_epi64(__m128i __X)
791
+ {
792
+ return (__m256i)__builtin_ia32_pbroadcastq256(__X);
793
+ }
794
+
795
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
796
+ _mm_broadcastb_epi8(__m128i __X)
797
+ {
798
+ return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
799
+ }
800
+
801
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
802
+ _mm_broadcastw_epi16(__m128i __X)
803
+ {
804
+ return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
805
+ }
806
+
807
+
808
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
809
+ _mm_broadcastd_epi32(__m128i __X)
810
+ {
811
+ return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
812
+ }
813
+
814
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
815
+ _mm_broadcastq_epi64(__m128i __X)
816
+ {
817
+ return (__m128i)__builtin_ia32_pbroadcastq128(__X);
818
+ }
819
+
820
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
821
+ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
822
+ {
823
+ return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
824
+ }
825
+
826
+ #define _mm256_permute4x64_pd(V, M) __extension__ ({ \
827
+ __m256d __V = (V); \
828
+ (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \
829
+ (M) & 0x3, ((M) & 0xc) >> 2, \
830
+ ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
831
+
832
+ static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
833
+ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
834
+ {
835
+ return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
836
+ }
837
+
838
+ #define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
839
+ __m256i __V = (V); \
840
+ (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \
841
+ (M) & 0x3, ((M) & 0xc) >> 2, \
842
+ ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
843
+
844
+ #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
845
+ __m256i __V1 = (V1); \
846
+ __m256i __V2 = (V2); \
847
+ (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
848
+
849
+ #define _mm256_extracti128_si256(A, O) __extension__ ({ \
850
+ __m256i __A = (A); \
851
+ (__m128i)__builtin_ia32_extract128i256(__A, (O)); })
852
+
853
+ #define _mm256_inserti128_si256(V1, V2, O) __extension__ ({ \
854
+ __m256i __V1 = (V1); \
855
+ __m128i __V2 = (V2); \
856
+ (__m256i)__builtin_ia32_insert128i256(__V1, __V2, (O)); })
857
+
858
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
859
+ _mm256_maskload_epi32(int const *__X, __m256i __M)
860
+ {
861
+ return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);
862
+ }
863
+
864
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
865
+ _mm256_maskload_epi64(long long const *__X, __m256i __M)
866
+ {
867
+ return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M);
868
+ }
869
+
870
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
871
+ _mm_maskload_epi32(int const *__X, __m128i __M)
872
+ {
873
+ return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);
874
+ }
875
+
876
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
877
+ _mm_maskload_epi64(long long const *__X, __m128i __M)
878
+ {
879
+ return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);
880
+ }
881
+
882
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
883
+ _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
884
+ {
885
+ __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
886
+ }
887
+
888
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
889
+ _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
890
+ {
891
+ __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y);
892
+ }
893
+
894
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
895
+ _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
896
+ {
897
+ __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
898
+ }
899
+
900
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
901
+ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
902
+ {
903
+ __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y);
904
+ }
905
+
906
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
907
+ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
908
+ {
909
+ return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
910
+ }
911
+
912
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
913
+ _mm_sllv_epi32(__m128i __X, __m128i __Y)
914
+ {
915
+ return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
916
+ }
917
+
918
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
919
+ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
920
+ {
921
+ return (__m256i)__builtin_ia32_psllv4di(__X, __Y);
922
+ }
923
+
924
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
925
+ _mm_sllv_epi64(__m128i __X, __m128i __Y)
926
+ {
927
+ return (__m128i)__builtin_ia32_psllv2di(__X, __Y);
928
+ }
929
+
930
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
931
+ _mm256_srav_epi32(__m256i __X, __m256i __Y)
932
+ {
933
+ return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
934
+ }
935
+
936
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
937
+ _mm_srav_epi32(__m128i __X, __m128i __Y)
938
+ {
939
+ return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
940
+ }
941
+
942
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
943
+ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
944
+ {
945
+ return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
946
+ }
947
+
948
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
949
+ _mm_srlv_epi32(__m128i __X, __m128i __Y)
950
+ {
951
+ return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
952
+ }
953
+
954
+ static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
955
+ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
956
+ {
957
+ return (__m256i)__builtin_ia32_psrlv4di(__X, __Y);
958
+ }
959
+
960
+ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
961
+ _mm_srlv_epi64(__m128i __X, __m128i __Y)
962
+ {
963
+ return (__m128i)__builtin_ia32_psrlv2di(__X, __Y);
964
+ }
965
+
966
+ #define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
967
+ __m128d __a = (a); \
968
+ double const *__m = (m); \
969
+ __m128i __i = (i); \
970
+ __m128d __mask = (mask); \
971
+ (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \
972
+ (__v4si)__i, (__v2df)__mask, (s)); })
973
+
974
+ #define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
975
+ __m256d __a = (a); \
976
+ double const *__m = (m); \
977
+ __m128i __i = (i); \
978
+ __m256d __mask = (mask); \
979
+ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \
980
+ (__v4si)__i, (__v4df)__mask, (s)); })
981
+
982
+ #define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
983
+ __m128d __a = (a); \
984
+ double const *__m = (m); \
985
+ __m128i __i = (i); \
986
+ __m128d __mask = (mask); \
987
+ (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \
988
+ (__v2di)__i, (__v2df)__mask, (s)); })
989
+
990
+ #define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
991
+ __m256d __a = (a); \
992
+ double const *__m = (m); \
993
+ __m256i __i = (i); \
994
+ __m256d __mask = (mask); \
995
+ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \
996
+ (__v4di)__i, (__v4df)__mask, (s)); })
997
+
998
+ #define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
999
+ __m128 __a = (a); \
1000
+ float const *__m = (m); \
1001
+ __m128i __i = (i); \
1002
+ __m128 __mask = (mask); \
1003
+ (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \
1004
+ (__v4si)__i, (__v4sf)__mask, (s)); })
1005
+
1006
+ #define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
1007
+ __m256 __a = (a); \
1008
+ float const *__m = (m); \
1009
+ __m256i __i = (i); \
1010
+ __m256 __mask = (mask); \
1011
+ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \
1012
+ (__v8si)__i, (__v8sf)__mask, (s)); })
1013
+
1014
+ #define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
1015
+ __m128 __a = (a); \
1016
+ float const *__m = (m); \
1017
+ __m128i __i = (i); \
1018
+ __m128 __mask = (mask); \
1019
+ (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \
1020
+ (__v2di)__i, (__v4sf)__mask, (s)); })
1021
+
1022
+ #define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
1023
+ __m128 __a = (a); \
1024
+ float const *__m = (m); \
1025
+ __m256i __i = (i); \
1026
+ __m128 __mask = (mask); \
1027
+ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \
1028
+ (__v4di)__i, (__v4sf)__mask, (s)); })
1029
+
1030
+ #define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
1031
+ __m128i __a = (a); \
1032
+ int const *__m = (m); \
1033
+ __m128i __i = (i); \
1034
+ __m128i __mask = (mask); \
1035
+ (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \
1036
+ (__v4si)__i, (__v4si)__mask, (s)); })
1037
+
1038
+ #define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
1039
+ __m256i __a = (a); \
1040
+ int const *__m = (m); \
1041
+ __m256i __i = (i); \
1042
+ __m256i __mask = (mask); \
1043
+ (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \
1044
+ (__v8si)__i, (__v8si)__mask, (s)); })
1045
+
1046
+ #define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
1047
+ __m128i __a = (a); \
1048
+ int const *__m = (m); \
1049
+ __m128i __i = (i); \
1050
+ __m128i __mask = (mask); \
1051
+ (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \
1052
+ (__v2di)__i, (__v4si)__mask, (s)); })
1053
+
1054
+ #define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
1055
+ __m128i __a = (a); \
1056
+ int const *__m = (m); \
1057
+ __m256i __i = (i); \
1058
+ __m128i __mask = (mask); \
1059
+ (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \
1060
+ (__v4di)__i, (__v4si)__mask, (s)); })
1061
+
1062
+ #define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
1063
+ __m128i __a = (a); \
1064
+ long long const *__m = (m); \
1065
+ __m128i __i = (i); \
1066
+ __m128i __mask = (mask); \
1067
+ (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \
1068
+ (__v4si)__i, (__v2di)__mask, (s)); })
1069
+
1070
+ #define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
1071
+ __m256i __a = (a); \
1072
+ long long const *__m = (m); \
1073
+ __m128i __i = (i); \
1074
+ __m256i __mask = (mask); \
1075
+ (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \
1076
+ (__v4si)__i, (__v4di)__mask, (s)); })
1077
+
1078
+ #define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
1079
+ __m128i __a = (a); \
1080
+ long long const *__m = (m); \
1081
+ __m128i __i = (i); \
1082
+ __m128i __mask = (mask); \
1083
+ (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \
1084
+ (__v2di)__i, (__v2di)__mask, (s)); })
1085
+
1086
+ #define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
1087
+ __m256i __a = (a); \
1088
+ long long const *__m = (m); \
1089
+ __m256i __i = (i); \
1090
+ __m256i __mask = (mask); \
1091
+ (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \
1092
+ (__v4di)__i, (__v4di)__mask, (s)); })
1093
+
1094
+ #define _mm_i32gather_pd(m, i, s) __extension__ ({ \
1095
+ double const *__m = (m); \
1096
+ __m128i __i = (i); \
1097
+ (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \
1098
+ (const __v2df *)__m, (__v4si)__i, \
1099
+ (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
1100
+
1101
+ #define _mm256_i32gather_pd(m, i, s) __extension__ ({ \
1102
+ double const *__m = (m); \
1103
+ __m128i __i = (i); \
1104
+ (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \
1105
+ (const __v4df *)__m, (__v4si)__i, \
1106
+ (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
1107
+
1108
+ #define _mm_i64gather_pd(m, i, s) __extension__ ({ \
1109
+ double const *__m = (m); \
1110
+ __m128i __i = (i); \
1111
+ (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \
1112
+ (const __v2df *)__m, (__v2di)__i, \
1113
+ (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
1114
+
1115
+ #define _mm256_i64gather_pd(m, i, s) __extension__ ({ \
1116
+ double const *__m = (m); \
1117
+ __m256i __i = (i); \
1118
+ (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \
1119
+ (const __v4df *)__m, (__v4di)__i, \
1120
+ (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
1121
+
1122
+ #define _mm_i32gather_ps(m, i, s) __extension__ ({ \
1123
+ float const *__m = (m); \
1124
+ __m128i __i = (i); \
1125
+ (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \
1126
+ (const __v4sf *)__m, (__v4si)__i, \
1127
+ (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
1128
+
1129
+ #define _mm256_i32gather_ps(m, i, s) __extension__ ({ \
1130
+ float const *__m = (m); \
1131
+ __m256i __i = (i); \
1132
+ (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \
1133
+ (const __v8sf *)__m, (__v8si)__i, \
1134
+ (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); })
1135
+
1136
+ #define _mm_i64gather_ps(m, i, s) __extension__ ({ \
1137
+ float const *__m = (m); \
1138
+ __m128i __i = (i); \
1139
+ (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \
1140
+ (const __v4sf *)__m, (__v2di)__i, \
1141
+ (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
1142
+
1143
+ #define _mm256_i64gather_ps(m, i, s) __extension__ ({ \
1144
+ float const *__m = (m); \
1145
+ __m256i __i = (i); \
1146
+ (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \
1147
+ (const __v4sf *)__m, (__v4di)__i, \
1148
+ (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
1149
+
1150
+ #define _mm_i32gather_epi32(m, i, s) __extension__ ({ \
1151
+ int const *__m = (m); \
1152
+ __m128i __i = (i); \
1153
+ (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \
1154
+ (const __v4si *)__m, (__v4si)__i, \
1155
+ (__v4si)_mm_set1_epi32(-1), (s)); })
1156
+
1157
+ #define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \
1158
+ int const *__m = (m); \
1159
+ __m256i __i = (i); \
1160
+ (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \
1161
+ (const __v8si *)__m, (__v8si)__i, \
1162
+ (__v8si)_mm256_set1_epi32(-1), (s)); })
1163
+
1164
+ #define _mm_i64gather_epi32(m, i, s) __extension__ ({ \
1165
+ int const *__m = (m); \
1166
+ __m128i __i = (i); \
1167
+ (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \
1168
+ (const __v4si *)__m, (__v2di)__i, \
1169
+ (__v4si)_mm_set1_epi32(-1), (s)); })
1170
+
1171
+ #define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \
1172
+ int const *__m = (m); \
1173
+ __m256i __i = (i); \
1174
+ (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \
1175
+ (const __v4si *)__m, (__v4di)__i, \
1176
+ (__v4si)_mm_set1_epi32(-1), (s)); })
1177
+
1178
+ #define _mm_i32gather_epi64(m, i, s) __extension__ ({ \
1179
+ long long const *__m = (m); \
1180
+ __m128i __i = (i); \
1181
+ (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \
1182
+ (const __v2di *)__m, (__v4si)__i, \
1183
+ (__v2di)_mm_set1_epi64x(-1), (s)); })
1184
+
1185
+ #define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \
1186
+ long long const *__m = (m); \
1187
+ __m128i __i = (i); \
1188
+ (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \
1189
+ (const __v4di *)__m, (__v4si)__i, \
1190
+ (__v4di)_mm256_set1_epi64x(-1), (s)); })
1191
+
1192
+ #define _mm_i64gather_epi64(m, i, s) __extension__ ({ \
1193
+ long long const *__m = (m); \
1194
+ __m128i __i = (i); \
1195
+ (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \
1196
+ (const __v2di *)__m, (__v2di)__i, \
1197
+ (__v2di)_mm_set1_epi64x(-1), (s)); })
1198
+
1199
+ #define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \
1200
+ long long const *__m = (m); \
1201
+ __m256i __i = (i); \
1202
+ (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \
1203
+ (const __v4di *)__m, (__v4di)__i, \
1204
+ (__v4di)_mm256_set1_epi64x(-1), (s)); })
1205
+
1206
+ #endif /* __AVX2INTRIN_H */