objective-ci 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +15 -0
  2. data/README.md +1 -3
  3. data/Rakefile +1 -1
  4. data/bin/oclint +4 -0
  5. data/bin/oclint-0.8 +1 -1
  6. data/bin/oclint-json-compilation-database +1 -1
  7. data/bin/oclint-xcodebuild +1 -1
  8. data/externals/oclint/LICENSE +69 -0
  9. data/externals/oclint/bin/oclint +0 -0
  10. data/externals/oclint/{oclint-0.8 → bin/oclint-0.8} +0 -0
  11. data/externals/oclint/{oclint-json-compilation-database → bin/oclint-json-compilation-database} +0 -0
  12. data/externals/oclint/{oclint-xcodebuild → bin/oclint-xcodebuild} +0 -0
  13. data/externals/oclint/lib/clang/3.4/asan_blacklist.txt +10 -0
  14. data/externals/oclint/lib/clang/3.4/include/Intrin.h +784 -0
  15. data/externals/oclint/lib/clang/3.4/include/__wmmintrin_aes.h +67 -0
  16. data/externals/oclint/lib/clang/3.4/include/__wmmintrin_pclmul.h +34 -0
  17. data/externals/oclint/lib/clang/3.4/include/altivec.h +11856 -0
  18. data/externals/oclint/lib/clang/3.4/include/ammintrin.h +68 -0
  19. data/externals/oclint/lib/clang/3.4/include/arm_neon.h +6802 -0
  20. data/externals/oclint/lib/clang/3.4/include/avx2intrin.h +1206 -0
  21. data/externals/oclint/lib/clang/3.4/include/avxintrin.h +1224 -0
  22. data/externals/oclint/lib/clang/3.4/include/bmi2intrin.h +94 -0
  23. data/externals/oclint/lib/clang/3.4/include/bmiintrin.h +115 -0
  24. data/externals/oclint/lib/clang/3.4/include/cpuid.h +156 -0
  25. data/externals/oclint/lib/clang/3.4/include/emmintrin.h +1451 -0
  26. data/externals/oclint/lib/clang/3.4/include/f16cintrin.h +58 -0
  27. data/externals/oclint/lib/clang/3.4/include/float.h +124 -0
  28. data/externals/oclint/lib/clang/3.4/include/fma4intrin.h +231 -0
  29. data/externals/oclint/lib/clang/3.4/include/fmaintrin.h +229 -0
  30. data/externals/oclint/lib/clang/3.4/include/immintrin.h +118 -0
  31. data/externals/oclint/lib/clang/3.4/include/iso646.h +43 -0
  32. data/externals/oclint/lib/clang/3.4/include/limits.h +119 -0
  33. data/externals/oclint/lib/clang/3.4/include/lzcntintrin.h +55 -0
  34. data/externals/oclint/lib/clang/3.4/include/mm3dnow.h +162 -0
  35. data/externals/oclint/lib/clang/3.4/include/mm_malloc.h +75 -0
  36. data/externals/oclint/lib/clang/3.4/include/mmintrin.h +503 -0
  37. data/externals/oclint/lib/clang/3.4/include/module.map +156 -0
  38. data/externals/oclint/lib/clang/3.4/include/nmmintrin.h +35 -0
  39. data/externals/oclint/lib/clang/3.4/include/pmmintrin.h +117 -0
  40. data/externals/oclint/lib/clang/3.4/include/popcntintrin.h +45 -0
  41. data/externals/oclint/lib/clang/3.4/include/prfchwintrin.h +39 -0
  42. data/externals/oclint/lib/clang/3.4/include/rdseedintrin.h +52 -0
  43. data/externals/oclint/lib/clang/3.4/include/rtmintrin.h +54 -0
  44. data/externals/oclint/lib/clang/3.4/include/sanitizer/asan_interface.h +137 -0
  45. data/externals/oclint/lib/clang/3.4/include/sanitizer/common_interface_defs.h +54 -0
  46. data/externals/oclint/lib/clang/3.4/include/sanitizer/dfsan_interface.h +87 -0
  47. data/externals/oclint/lib/clang/3.4/include/sanitizer/linux_syscall_hooks.h +3070 -0
  48. data/externals/oclint/lib/clang/3.4/include/sanitizer/lsan_interface.h +52 -0
  49. data/externals/oclint/lib/clang/3.4/include/sanitizer/msan_interface.h +162 -0
  50. data/externals/oclint/lib/clang/3.4/include/shaintrin.h +74 -0
  51. data/externals/oclint/lib/clang/3.4/include/smmintrin.h +468 -0
  52. data/externals/oclint/lib/clang/3.4/include/stdalign.h +35 -0
  53. data/externals/oclint/lib/clang/3.4/include/stdarg.h +50 -0
  54. data/externals/oclint/lib/clang/3.4/include/stdbool.h +44 -0
  55. data/externals/oclint/lib/clang/3.4/include/stddef.h +102 -0
  56. data/externals/oclint/lib/clang/3.4/include/stdint.h +708 -0
  57. data/externals/oclint/lib/clang/3.4/include/stdnoreturn.h +30 -0
  58. data/externals/oclint/lib/clang/3.4/include/tbmintrin.h +158 -0
  59. data/externals/oclint/lib/clang/3.4/include/tgmath.h +1374 -0
  60. data/externals/oclint/lib/clang/3.4/include/tmmintrin.h +225 -0
  61. data/externals/oclint/lib/clang/3.4/include/unwind.h +280 -0
  62. data/externals/oclint/lib/clang/3.4/include/varargs.h +26 -0
  63. data/externals/oclint/lib/clang/3.4/include/wmmintrin.h +42 -0
  64. data/externals/oclint/lib/clang/3.4/include/x86intrin.h +79 -0
  65. data/externals/oclint/lib/clang/3.4/include/xmmintrin.h +1001 -0
  66. data/externals/oclint/lib/clang/3.4/include/xopintrin.h +804 -0
  67. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +0 -0
  68. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.i386.a +0 -0
  69. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.profile_osx.a +0 -0
  70. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.ubsan_osx.a +0 -0
  71. data/externals/oclint/lib/clang/3.4/lib/darwin/libclang_rt.x86_64.a +0 -0
  72. data/externals/oclint/lib/oclint/reporters/libHTMLReporter.dylib +0 -0
  73. data/externals/oclint/lib/oclint/reporters/libJSONReporter.dylib +0 -0
  74. data/externals/oclint/lib/oclint/reporters/libPMDReporter.dylib +0 -0
  75. data/externals/oclint/lib/oclint/reporters/libTextReporter.dylib +0 -0
  76. data/externals/oclint/lib/oclint/reporters/libXMLReporter.dylib +0 -0
  77. data/externals/oclint/lib/oclint/rules/libAvoidBranchingStatementAsLastInLoopRule.dylib +0 -0
  78. data/externals/oclint/lib/oclint/rules/libBitwiseOperatorInConditionalRule.dylib +0 -0
  79. data/externals/oclint/lib/oclint/rules/libBrokenNullCheckRule.dylib +0 -0
  80. data/externals/oclint/lib/oclint/rules/libBrokenOddnessCheckRule.dylib +0 -0
  81. data/externals/oclint/lib/oclint/rules/libCollapsibleIfStatementsRule.dylib +0 -0
  82. data/externals/oclint/lib/oclint/rules/libConstantConditionalOperatorRule.dylib +0 -0
  83. data/externals/oclint/lib/oclint/rules/libConstantIfExpressionRule.dylib +0 -0
  84. data/externals/oclint/lib/oclint/rules/libCyclomaticComplexityRule.dylib +0 -0
  85. data/externals/oclint/lib/oclint/rules/libDeadCodeRule.dylib +0 -0
  86. data/externals/oclint/lib/oclint/rules/libDefaultLabelNotLastInSwitchStatementRule.dylib +0 -0
  87. data/externals/oclint/lib/oclint/rules/libDoubleNegativeRule.dylib +0 -0
  88. data/externals/oclint/lib/oclint/rules/libEmptyCatchStatementRule.dylib +0 -0
  89. data/externals/oclint/lib/oclint/rules/libEmptyDoWhileStatementRule.dylib +0 -0
  90. data/externals/oclint/lib/oclint/rules/libEmptyElseBlockRule.dylib +0 -0
  91. data/externals/oclint/lib/oclint/rules/libEmptyFinallyStatementRule.dylib +0 -0
  92. data/externals/oclint/lib/oclint/rules/libEmptyForStatementRule.dylib +0 -0
  93. data/externals/oclint/lib/oclint/rules/libEmptyIfStatementRule.dylib +0 -0
  94. data/externals/oclint/lib/oclint/rules/libEmptySwitchStatementRule.dylib +0 -0
  95. data/externals/oclint/lib/oclint/rules/libEmptyTryStatementRule.dylib +0 -0
  96. data/externals/oclint/lib/oclint/rules/libEmptyWhileStatementRule.dylib +0 -0
  97. data/externals/oclint/lib/oclint/rules/libForLoopShouldBeWhileLoopRule.dylib +0 -0
  98. data/externals/oclint/lib/oclint/rules/libGotoStatementRule.dylib +0 -0
  99. data/externals/oclint/lib/oclint/rules/libInvertedLogicRule.dylib +0 -0
  100. data/externals/oclint/lib/oclint/rules/libJumbledIncrementerRule.dylib +0 -0
  101. data/externals/oclint/lib/oclint/rules/libLongClassRule.dylib +0 -0
  102. data/externals/oclint/lib/oclint/rules/libLongLineRule.dylib +0 -0
  103. data/externals/oclint/lib/oclint/rules/libLongMethodRule.dylib +0 -0
  104. data/externals/oclint/lib/oclint/rules/libLongVariableNameRule.dylib +0 -0
  105. data/externals/oclint/lib/oclint/rules/libMisplacedNullCheckRule.dylib +0 -0
  106. data/externals/oclint/lib/oclint/rules/libMissingBreakInSwitchStatementRule.dylib +0 -0
  107. data/externals/oclint/lib/oclint/rules/libMultipleUnaryOperatorRule.dylib +0 -0
  108. data/externals/oclint/lib/oclint/rules/libNPathComplexityRule.dylib +0 -0
  109. data/externals/oclint/lib/oclint/rules/libNcssMethodCountRule.dylib +0 -0
  110. data/externals/oclint/lib/oclint/rules/libNestedBlockDepthRule.dylib +0 -0
  111. data/externals/oclint/lib/oclint/rules/libNonCaseLabelInSwitchStatementRule.dylib +0 -0
  112. data/externals/oclint/lib/oclint/rules/libObjCBoxedExpressionsRule.dylib +0 -0
  113. data/externals/oclint/lib/oclint/rules/libObjCContainerLiteralsRule.dylib +0 -0
  114. data/externals/oclint/lib/oclint/rules/libObjCNSNumberLiteralsRule.dylib +0 -0
  115. data/externals/oclint/lib/oclint/rules/libObjCObjectSubscriptingRule.dylib +0 -0
  116. data/externals/oclint/lib/oclint/rules/libParameterReassignmentRule.dylib +0 -0
  117. data/externals/oclint/lib/oclint/rules/libRedundantConditionalOperatorRule.dylib +0 -0
  118. data/externals/oclint/lib/oclint/rules/libRedundantIfStatementRule.dylib +0 -0
  119. data/externals/oclint/lib/oclint/rules/libRedundantLocalVariableRule.dylib +0 -0
  120. data/externals/oclint/lib/oclint/rules/libRedundantNilCheckRule.dylib +0 -0
  121. data/externals/oclint/lib/oclint/rules/libReturnFromFinallyBlockRule.dylib +0 -0
  122. data/externals/oclint/lib/oclint/rules/libShortVariableNameRule.dylib +0 -0
  123. data/externals/oclint/lib/oclint/rules/libSwitchStatementsShouldHaveDefaultRule.dylib +0 -0
  124. data/externals/oclint/lib/oclint/rules/libThrowExceptionFromFinallyBlockRule.dylib +0 -0
  125. data/externals/oclint/lib/oclint/rules/libTooFewBranchesInSwitchStatementRule.dylib +0 -0
  126. data/externals/oclint/lib/oclint/rules/libTooManyFieldsRule.dylib +0 -0
  127. data/externals/oclint/lib/oclint/rules/libTooManyMethodsRule.dylib +0 -0
  128. data/externals/oclint/lib/oclint/rules/libTooManyParametersRule.dylib +0 -0
  129. data/externals/oclint/lib/oclint/rules/libUnnecessaryElseStatementRule.dylib +0 -0
  130. data/externals/oclint/lib/oclint/rules/libUnnecessaryNullCheckForCXXDeallocRule.dylib +0 -0
  131. data/externals/oclint/lib/oclint/rules/libUnusedLocalVariableRule.dylib +0 -0
  132. data/externals/oclint/lib/oclint/rules/libUnusedMethodParameterRule.dylib +0 -0
  133. data/externals/oclint/lib/oclint/rules/libUselessParenthesesRule.dylib +0 -0
  134. data/lib/objective_ci/ci_tasks.rb +1 -1
  135. data/lib/objective_ci/version.rb +1 -1
  136. metadata +200 -84
@@ -0,0 +1,26 @@
1
+ /*===---- varargs.h - Variable argument handling -------------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+ #ifndef __VARARGS_H
24
+ #define __VARARGS_H
25
+ #error "Please use <stdarg.h> instead of <varargs.h>"
26
+ #endif
@@ -0,0 +1,42 @@
1
+ /*===---- wmmintrin.h - AES intrinsics ------------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+
24
+ #ifndef _WMMINTRIN_H
25
+ #define _WMMINTRIN_H
26
+
27
+ #include <emmintrin.h>
28
+
29
+ #if !defined (__AES__) && !defined (__PCLMUL__)
30
+ # error "AES/PCLMUL instructions not enabled"
31
+ #else
32
+
33
+ #ifdef __AES__
34
+ #include <__wmmintrin_aes.h>
35
+ #endif /* __AES__ */
36
+
37
+ #ifdef __PCLMUL__
38
+ #include <__wmmintrin_pclmul.h>
39
+ #endif /* __PCLMUL__ */
40
+
41
+ #endif /* __AES__ || __PCLMUL__ */
42
+ #endif /* _WMMINTRIN_H */
@@ -0,0 +1,79 @@
1
+ /*===---- x86intrin.h - X86 intrinsics -------------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+
24
+ #ifndef __X86INTRIN_H
25
+ #define __X86INTRIN_H
26
+
27
+ #include <immintrin.h>
28
+
29
+ #ifdef __3dNOW__
30
+ #include <mm3dnow.h>
31
+ #endif
32
+
33
+ #ifdef __BMI__
34
+ #include <bmiintrin.h>
35
+ #endif
36
+
37
+ #ifdef __BMI2__
38
+ #include <bmi2intrin.h>
39
+ #endif
40
+
41
+ #ifdef __LZCNT__
42
+ #include <lzcntintrin.h>
43
+ #endif
44
+
45
+ #ifdef __POPCNT__
46
+ #include <popcntintrin.h>
47
+ #endif
48
+
49
+ #ifdef __RDSEED__
50
+ #include <rdseedintrin.h>
51
+ #endif
52
+
53
+ #ifdef __PRFCHW__
54
+ #include <prfchwintrin.h>
55
+ #endif
56
+
57
+ #ifdef __SSE4A__
58
+ #include <ammintrin.h>
59
+ #endif
60
+
61
+ #ifdef __FMA4__
62
+ #include <fma4intrin.h>
63
+ #endif
64
+
65
+ #ifdef __XOP__
66
+ #include <xopintrin.h>
67
+ #endif
68
+
69
+ #ifdef __TBM__
70
+ #include <tbmintrin.h>
71
+ #endif
72
+
73
+ #ifdef __F16C__
74
+ #include <f16cintrin.h>
75
+ #endif
76
+
77
+ // FIXME: LWP
78
+
79
+ #endif /* __X86INTRIN_H */
@@ -0,0 +1,1001 @@
1
+ /*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to deal
5
+ * in the Software without restriction, including without limitation the rights
6
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ * copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ * THE SOFTWARE.
20
+ *
21
+ *===-----------------------------------------------------------------------===
22
+ */
23
+
24
+ #ifndef __XMMINTRIN_H
25
+ #define __XMMINTRIN_H
26
+
27
+ #ifndef __SSE__
28
+ #error "SSE instruction set not enabled"
29
+ #else
30
+
31
+ #include <mmintrin.h>
32
+
33
+ typedef int __v4si __attribute__((__vector_size__(16)));
34
+ typedef float __v4sf __attribute__((__vector_size__(16)));
35
+ typedef float __m128 __attribute__((__vector_size__(16)));
36
+
37
+ // This header should only be included in a hosted environment as it depends on
38
+ // a standard library to provide allocation routines.
39
+ #if __STDC_HOSTED__
40
+ #include <mm_malloc.h>
41
+ #endif
42
+
43
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
44
+ _mm_add_ss(__m128 __a, __m128 __b)
45
+ {
46
+ __a[0] += __b[0];
47
+ return __a;
48
+ }
49
+
50
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
51
+ _mm_add_ps(__m128 __a, __m128 __b)
52
+ {
53
+ return __a + __b;
54
+ }
55
+
56
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
57
+ _mm_sub_ss(__m128 __a, __m128 __b)
58
+ {
59
+ __a[0] -= __b[0];
60
+ return __a;
61
+ }
62
+
63
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
64
+ _mm_sub_ps(__m128 __a, __m128 __b)
65
+ {
66
+ return __a - __b;
67
+ }
68
+
69
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
70
+ _mm_mul_ss(__m128 __a, __m128 __b)
71
+ {
72
+ __a[0] *= __b[0];
73
+ return __a;
74
+ }
75
+
76
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
77
+ _mm_mul_ps(__m128 __a, __m128 __b)
78
+ {
79
+ return __a * __b;
80
+ }
81
+
82
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
83
+ _mm_div_ss(__m128 __a, __m128 __b)
84
+ {
85
+ __a[0] /= __b[0];
86
+ return __a;
87
+ }
88
+
89
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
90
+ _mm_div_ps(__m128 __a, __m128 __b)
91
+ {
92
+ return __a / __b;
93
+ }
94
+
95
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
96
+ _mm_sqrt_ss(__m128 __a)
97
+ {
98
+ __m128 __c = __builtin_ia32_sqrtss(__a);
99
+ return (__m128) { __c[0], __a[1], __a[2], __a[3] };
100
+ }
101
+
102
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
103
+ _mm_sqrt_ps(__m128 __a)
104
+ {
105
+ return __builtin_ia32_sqrtps(__a);
106
+ }
107
+
108
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
109
+ _mm_rcp_ss(__m128 __a)
110
+ {
111
+ __m128 __c = __builtin_ia32_rcpss(__a);
112
+ return (__m128) { __c[0], __a[1], __a[2], __a[3] };
113
+ }
114
+
115
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
116
+ _mm_rcp_ps(__m128 __a)
117
+ {
118
+ return __builtin_ia32_rcpps(__a);
119
+ }
120
+
121
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
122
+ _mm_rsqrt_ss(__m128 __a)
123
+ {
124
+ __m128 __c = __builtin_ia32_rsqrtss(__a);
125
+ return (__m128) { __c[0], __a[1], __a[2], __a[3] };
126
+ }
127
+
128
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
129
+ _mm_rsqrt_ps(__m128 __a)
130
+ {
131
+ return __builtin_ia32_rsqrtps(__a);
132
+ }
133
+
134
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
135
+ _mm_min_ss(__m128 __a, __m128 __b)
136
+ {
137
+ return __builtin_ia32_minss(__a, __b);
138
+ }
139
+
140
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
141
+ _mm_min_ps(__m128 __a, __m128 __b)
142
+ {
143
+ return __builtin_ia32_minps(__a, __b);
144
+ }
145
+
146
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
147
+ _mm_max_ss(__m128 __a, __m128 __b)
148
+ {
149
+ return __builtin_ia32_maxss(__a, __b);
150
+ }
151
+
152
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
153
+ _mm_max_ps(__m128 __a, __m128 __b)
154
+ {
155
+ return __builtin_ia32_maxps(__a, __b);
156
+ }
157
+
158
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
159
+ _mm_and_ps(__m128 __a, __m128 __b)
160
+ {
161
+ return (__m128)((__v4si)__a & (__v4si)__b);
162
+ }
163
+
164
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
165
+ _mm_andnot_ps(__m128 __a, __m128 __b)
166
+ {
167
+ return (__m128)(~(__v4si)__a & (__v4si)__b);
168
+ }
169
+
170
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
171
+ _mm_or_ps(__m128 __a, __m128 __b)
172
+ {
173
+ return (__m128)((__v4si)__a | (__v4si)__b);
174
+ }
175
+
176
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
177
+ _mm_xor_ps(__m128 __a, __m128 __b)
178
+ {
179
+ return (__m128)((__v4si)__a ^ (__v4si)__b);
180
+ }
181
+
182
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
183
+ _mm_cmpeq_ss(__m128 __a, __m128 __b)
184
+ {
185
+ return (__m128)__builtin_ia32_cmpss(__a, __b, 0);
186
+ }
187
+
188
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
189
+ _mm_cmpeq_ps(__m128 __a, __m128 __b)
190
+ {
191
+ return (__m128)__builtin_ia32_cmpps(__a, __b, 0);
192
+ }
193
+
194
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
195
+ _mm_cmplt_ss(__m128 __a, __m128 __b)
196
+ {
197
+ return (__m128)__builtin_ia32_cmpss(__a, __b, 1);
198
+ }
199
+
200
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
201
+ _mm_cmplt_ps(__m128 __a, __m128 __b)
202
+ {
203
+ return (__m128)__builtin_ia32_cmpps(__a, __b, 1);
204
+ }
205
+
206
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
207
+ _mm_cmple_ss(__m128 __a, __m128 __b)
208
+ {
209
+ return (__m128)__builtin_ia32_cmpss(__a, __b, 2);
210
+ }
211
+
212
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
213
+ _mm_cmple_ps(__m128 __a, __m128 __b)
214
+ {
215
+ return (__m128)__builtin_ia32_cmpps(__a, __b, 2);
216
+ }
217
+
218
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
219
+ _mm_cmpgt_ss(__m128 __a, __m128 __b)
220
+ {
221
+ return (__m128)__builtin_shufflevector(__a,
222
+ __builtin_ia32_cmpss(__b, __a, 1),
223
+ 4, 1, 2, 3);
224
+ }
225
+
226
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
227
+ _mm_cmpgt_ps(__m128 __a, __m128 __b)
228
+ {
229
+ return (__m128)__builtin_ia32_cmpps(__b, __a, 1);
230
+ }
231
+
232
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
233
+ _mm_cmpge_ss(__m128 __a, __m128 __b)
234
+ {
235
+ return (__m128)__builtin_shufflevector(__a,
236
+ __builtin_ia32_cmpss(__b, __a, 2),
237
+ 4, 1, 2, 3);
238
+ }
239
+
240
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
241
+ _mm_cmpge_ps(__m128 __a, __m128 __b)
242
+ {
243
+ return (__m128)__builtin_ia32_cmpps(__b, __a, 2);
244
+ }
245
+
246
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
247
+ _mm_cmpneq_ss(__m128 __a, __m128 __b)
248
+ {
249
+ return (__m128)__builtin_ia32_cmpss(__a, __b, 4);
250
+ }
251
+
252
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
253
+ _mm_cmpneq_ps(__m128 __a, __m128 __b)
254
+ {
255
+ return (__m128)__builtin_ia32_cmpps(__a, __b, 4);
256
+ }
257
+
258
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
259
+ _mm_cmpnlt_ss(__m128 __a, __m128 __b)
260
+ {
261
+ return (__m128)__builtin_ia32_cmpss(__a, __b, 5);
262
+ }
263
+
264
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
265
+ _mm_cmpnlt_ps(__m128 __a, __m128 __b)
266
+ {
267
+ return (__m128)__builtin_ia32_cmpps(__a, __b, 5);
268
+ }
269
+
270
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
271
+ _mm_cmpnle_ss(__m128 __a, __m128 __b)
272
+ {
273
+ return (__m128)__builtin_ia32_cmpss(__a, __b, 6);
274
+ }
275
+
276
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
277
+ _mm_cmpnle_ps(__m128 __a, __m128 __b)
278
+ {
279
+ return (__m128)__builtin_ia32_cmpps(__a, __b, 6);
280
+ }
281
+
282
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
283
+ _mm_cmpngt_ss(__m128 __a, __m128 __b)
284
+ {
285
+ return (__m128)__builtin_shufflevector(__a,
286
+ __builtin_ia32_cmpss(__b, __a, 5),
287
+ 4, 1, 2, 3);
288
+ }
289
+
290
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
291
+ _mm_cmpngt_ps(__m128 __a, __m128 __b)
292
+ {
293
+ return (__m128)__builtin_ia32_cmpps(__b, __a, 5);
294
+ }
295
+
296
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
297
+ _mm_cmpnge_ss(__m128 __a, __m128 __b)
298
+ {
299
+ return (__m128)__builtin_shufflevector(__a,
300
+ __builtin_ia32_cmpss(__b, __a, 6),
301
+ 4, 1, 2, 3);
302
+ }
303
+
304
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
305
+ _mm_cmpnge_ps(__m128 __a, __m128 __b)
306
+ {
307
+ return (__m128)__builtin_ia32_cmpps(__b, __a, 6);
308
+ }
309
+
310
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
311
+ _mm_cmpord_ss(__m128 __a, __m128 __b)
312
+ {
313
+ return (__m128)__builtin_ia32_cmpss(__a, __b, 7);
314
+ }
315
+
316
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
317
+ _mm_cmpord_ps(__m128 __a, __m128 __b)
318
+ {
319
+ return (__m128)__builtin_ia32_cmpps(__a, __b, 7);
320
+ }
321
+
322
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
323
+ _mm_cmpunord_ss(__m128 __a, __m128 __b)
324
+ {
325
+ return (__m128)__builtin_ia32_cmpss(__a, __b, 3);
326
+ }
327
+
328
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
329
+ _mm_cmpunord_ps(__m128 __a, __m128 __b)
330
+ {
331
+ return (__m128)__builtin_ia32_cmpps(__a, __b, 3);
332
+ }
333
+
334
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
335
+ _mm_comieq_ss(__m128 __a, __m128 __b)
336
+ {
337
+ return __builtin_ia32_comieq(__a, __b);
338
+ }
339
+
340
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
341
+ _mm_comilt_ss(__m128 __a, __m128 __b)
342
+ {
343
+ return __builtin_ia32_comilt(__a, __b);
344
+ }
345
+
346
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
347
+ _mm_comile_ss(__m128 __a, __m128 __b)
348
+ {
349
+ return __builtin_ia32_comile(__a, __b);
350
+ }
351
+
352
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
353
+ _mm_comigt_ss(__m128 __a, __m128 __b)
354
+ {
355
+ return __builtin_ia32_comigt(__a, __b);
356
+ }
357
+
358
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
359
+ _mm_comige_ss(__m128 __a, __m128 __b)
360
+ {
361
+ return __builtin_ia32_comige(__a, __b);
362
+ }
363
+
364
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
365
+ _mm_comineq_ss(__m128 __a, __m128 __b)
366
+ {
367
+ return __builtin_ia32_comineq(__a, __b);
368
+ }
369
+
370
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
371
+ _mm_ucomieq_ss(__m128 __a, __m128 __b)
372
+ {
373
+ return __builtin_ia32_ucomieq(__a, __b);
374
+ }
375
+
376
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
377
+ _mm_ucomilt_ss(__m128 __a, __m128 __b)
378
+ {
379
+ return __builtin_ia32_ucomilt(__a, __b);
380
+ }
381
+
382
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
383
+ _mm_ucomile_ss(__m128 __a, __m128 __b)
384
+ {
385
+ return __builtin_ia32_ucomile(__a, __b);
386
+ }
387
+
388
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
389
+ _mm_ucomigt_ss(__m128 __a, __m128 __b)
390
+ {
391
+ return __builtin_ia32_ucomigt(__a, __b);
392
+ }
393
+
394
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
395
+ _mm_ucomige_ss(__m128 __a, __m128 __b)
396
+ {
397
+ return __builtin_ia32_ucomige(__a, __b);
398
+ }
399
+
400
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
401
+ _mm_ucomineq_ss(__m128 __a, __m128 __b)
402
+ {
403
+ return __builtin_ia32_ucomineq(__a, __b);
404
+ }
405
+
406
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
407
+ _mm_cvtss_si32(__m128 __a)
408
+ {
409
+ return __builtin_ia32_cvtss2si(__a);
410
+ }
411
+
412
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
413
+ _mm_cvt_ss2si(__m128 __a)
414
+ {
415
+ return _mm_cvtss_si32(__a);
416
+ }
417
+
418
+ #ifdef __x86_64__
419
+
420
+ static __inline__ long long __attribute__((__always_inline__, __nodebug__))
421
+ _mm_cvtss_si64(__m128 __a)
422
+ {
423
+ return __builtin_ia32_cvtss2si64(__a);
424
+ }
425
+
426
+ #endif
427
+
428
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
429
+ _mm_cvtps_pi32(__m128 __a)
430
+ {
431
+ return (__m64)__builtin_ia32_cvtps2pi(__a);
432
+ }
433
+
434
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
435
+ _mm_cvt_ps2pi(__m128 __a)
436
+ {
437
+ return _mm_cvtps_pi32(__a);
438
+ }
439
+
440
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
441
+ _mm_cvttss_si32(__m128 __a)
442
+ {
443
+ return __a[0];
444
+ }
445
+
446
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
447
+ _mm_cvtt_ss2si(__m128 __a)
448
+ {
449
+ return _mm_cvttss_si32(__a);
450
+ }
451
+
452
+ static __inline__ long long __attribute__((__always_inline__, __nodebug__))
453
+ _mm_cvttss_si64(__m128 __a)
454
+ {
455
+ return __a[0];
456
+ }
457
+
458
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
459
+ _mm_cvttps_pi32(__m128 __a)
460
+ {
461
+ return (__m64)__builtin_ia32_cvttps2pi(__a);
462
+ }
463
+
464
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
465
+ _mm_cvtt_ps2pi(__m128 __a)
466
+ {
467
+ return _mm_cvttps_pi32(__a);
468
+ }
469
+
470
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
471
+ _mm_cvtsi32_ss(__m128 __a, int __b)
472
+ {
473
+ __a[0] = __b;
474
+ return __a;
475
+ }
476
+
477
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
478
+ _mm_cvt_si2ss(__m128 __a, int __b)
479
+ {
480
+ return _mm_cvtsi32_ss(__a, __b);
481
+ }
482
+
483
+ #ifdef __x86_64__
484
+
485
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
486
+ _mm_cvtsi64_ss(__m128 __a, long long __b)
487
+ {
488
+ __a[0] = __b;
489
+ return __a;
490
+ }
491
+
492
+ #endif
493
+
494
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
495
+ _mm_cvtpi32_ps(__m128 __a, __m64 __b)
496
+ {
497
+ return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b);
498
+ }
499
+
500
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
501
+ _mm_cvt_pi2ps(__m128 __a, __m64 __b)
502
+ {
503
+ return _mm_cvtpi32_ps(__a, __b);
504
+ }
505
+
506
+ static __inline__ float __attribute__((__always_inline__, __nodebug__))
507
+ _mm_cvtss_f32(__m128 __a)
508
+ {
509
+ return __a[0];
510
+ }
511
+
512
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
513
+ _mm_loadh_pi(__m128 __a, const __m64 *__p)
514
+ {
515
+ typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));
516
+ struct __mm_loadh_pi_struct {
517
+ __mm_loadh_pi_v2f32 __u;
518
+ } __attribute__((__packed__, __may_alias__));
519
+ __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;
520
+ __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
521
+ return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
522
+ }
523
+
524
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
525
+ _mm_loadl_pi(__m128 __a, const __m64 *__p)
526
+ {
527
+ typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));
528
+ struct __mm_loadl_pi_struct {
529
+ __mm_loadl_pi_v2f32 __u;
530
+ } __attribute__((__packed__, __may_alias__));
531
+ __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;
532
+ __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
533
+ return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
534
+ }
535
+
536
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
537
+ _mm_load_ss(const float *__p)
538
+ {
539
+ struct __mm_load_ss_struct {
540
+ float __u;
541
+ } __attribute__((__packed__, __may_alias__));
542
+ float __u = ((struct __mm_load_ss_struct*)__p)->__u;
543
+ return (__m128){ __u, 0, 0, 0 };
544
+ }
545
+
546
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
547
+ _mm_load1_ps(const float *__p)
548
+ {
549
+ struct __mm_load1_ps_struct {
550
+ float __u;
551
+ } __attribute__((__packed__, __may_alias__));
552
+ float __u = ((struct __mm_load1_ps_struct*)__p)->__u;
553
+ return (__m128){ __u, __u, __u, __u };
554
+ }
555
+
556
+ #define _mm_load_ps1(p) _mm_load1_ps(p)
557
+
558
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
559
+ _mm_load_ps(const float *__p)
560
+ {
561
+ return *(__m128*)__p;
562
+ }
563
+
564
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
565
+ _mm_loadu_ps(const float *__p)
566
+ {
567
+ struct __loadu_ps {
568
+ __m128 __v;
569
+ } __attribute__((__packed__, __may_alias__));
570
+ return ((struct __loadu_ps*)__p)->__v;
571
+ }
572
+
573
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
574
+ _mm_loadr_ps(const float *__p)
575
+ {
576
+ __m128 __a = _mm_load_ps(__p);
577
+ return __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
578
+ }
579
+
580
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
581
+ _mm_set_ss(float __w)
582
+ {
583
+ return (__m128){ __w, 0, 0, 0 };
584
+ }
585
+
586
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
587
+ _mm_set1_ps(float __w)
588
+ {
589
+ return (__m128){ __w, __w, __w, __w };
590
+ }
591
+
592
+ // Microsoft specific.
593
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
594
+ _mm_set_ps1(float __w)
595
+ {
596
+ return _mm_set1_ps(__w);
597
+ }
598
+
599
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
600
+ _mm_set_ps(float __z, float __y, float __x, float __w)
601
+ {
602
+ return (__m128){ __w, __x, __y, __z };
603
+ }
604
+
605
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
606
+ _mm_setr_ps(float __z, float __y, float __x, float __w)
607
+ {
608
+ return (__m128){ __z, __y, __x, __w };
609
+ }
610
+
611
+ static __inline__ __m128 __attribute__((__always_inline__))
612
+ _mm_setzero_ps(void)
613
+ {
614
+ return (__m128){ 0, 0, 0, 0 };
615
+ }
616
+
617
+ static __inline__ void __attribute__((__always_inline__))
618
+ _mm_storeh_pi(__m64 *__p, __m128 __a)
619
+ {
620
+ __builtin_ia32_storehps((__v2si *)__p, __a);
621
+ }
622
+
623
+ static __inline__ void __attribute__((__always_inline__))
624
+ _mm_storel_pi(__m64 *__p, __m128 __a)
625
+ {
626
+ __builtin_ia32_storelps((__v2si *)__p, __a);
627
+ }
628
+
629
+ static __inline__ void __attribute__((__always_inline__))
630
+ _mm_store_ss(float *__p, __m128 __a)
631
+ {
632
+ struct __mm_store_ss_struct {
633
+ float __u;
634
+ } __attribute__((__packed__, __may_alias__));
635
+ ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
636
+ }
637
+
638
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
639
+ _mm_storeu_ps(float *__p, __m128 __a)
640
+ {
641
+ __builtin_ia32_storeups(__p, __a);
642
+ }
643
+
644
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
645
+ _mm_store1_ps(float *__p, __m128 __a)
646
+ {
647
+ __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0);
648
+ _mm_storeu_ps(__p, __a);
649
+ }
650
+
651
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
652
+ _mm_store_ps1(float *__p, __m128 __a)
653
+ {
654
+ return _mm_store1_ps(__p, __a);
655
+ }
656
+
657
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
658
+ _mm_store_ps(float *__p, __m128 __a)
659
+ {
660
+ *(__m128 *)__p = __a;
661
+ }
662
+
663
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
664
+ _mm_storer_ps(float *__p, __m128 __a)
665
+ {
666
+ __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
667
+ _mm_store_ps(__p, __a);
668
+ }
669
+
670
+ #define _MM_HINT_T0 3
671
+ #define _MM_HINT_T1 2
672
+ #define _MM_HINT_T2 1
673
+ #define _MM_HINT_NTA 0
674
+
675
+ /* FIXME: We have to #define this because "sel" must be a constant integer, and
676
+ Sema doesn't do any form of constant propagation yet. */
677
+
678
+ #define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
679
+
680
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
681
+ _mm_stream_pi(__m64 *__p, __m64 __a)
682
+ {
683
+ __builtin_ia32_movntq(__p, __a);
684
+ }
685
+
686
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
687
+ _mm_stream_ps(float *__p, __m128 __a)
688
+ {
689
+ __builtin_ia32_movntps(__p, __a);
690
+ }
691
+
692
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
693
+ _mm_sfence(void)
694
+ {
695
+ __builtin_ia32_sfence();
696
+ }
697
+
698
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
699
+ _mm_extract_pi16(__m64 __a, int __n)
700
+ {
701
+ __v4hi __b = (__v4hi)__a;
702
+ return (unsigned short)__b[__n & 3];
703
+ }
704
+
705
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
706
+ _mm_insert_pi16(__m64 __a, int __d, int __n)
707
+ {
708
+ __v4hi __b = (__v4hi)__a;
709
+ __b[__n & 3] = __d;
710
+ return (__m64)__b;
711
+ }
712
+
713
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
714
+ _mm_max_pi16(__m64 __a, __m64 __b)
715
+ {
716
+ return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
717
+ }
718
+
719
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
720
+ _mm_max_pu8(__m64 __a, __m64 __b)
721
+ {
722
+ return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
723
+ }
724
+
725
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
726
+ _mm_min_pi16(__m64 __a, __m64 __b)
727
+ {
728
+ return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
729
+ }
730
+
731
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
732
+ _mm_min_pu8(__m64 __a, __m64 __b)
733
+ {
734
+ return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
735
+ }
736
+
737
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
738
+ _mm_movemask_pi8(__m64 __a)
739
+ {
740
+ return __builtin_ia32_pmovmskb((__v8qi)__a);
741
+ }
742
+
743
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
744
+ _mm_mulhi_pu16(__m64 __a, __m64 __b)
745
+ {
746
+ return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
747
+ }
748
+
749
+ #define _mm_shuffle_pi16(a, n) __extension__ ({ \
750
+ __m64 __a = (a); \
751
+ (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
752
+
753
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
754
+ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
755
+ {
756
+ __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
757
+ }
758
+
759
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
760
+ _mm_avg_pu8(__m64 __a, __m64 __b)
761
+ {
762
+ return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
763
+ }
764
+
765
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
766
+ _mm_avg_pu16(__m64 __a, __m64 __b)
767
+ {
768
+ return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
769
+ }
770
+
771
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
772
+ _mm_sad_pu8(__m64 __a, __m64 __b)
773
+ {
774
+ return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
775
+ }
776
+
777
+ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
778
+ _mm_getcsr(void)
779
+ {
780
+ return __builtin_ia32_stmxcsr();
781
+ }
782
+
783
+ static __inline__ void __attribute__((__always_inline__, __nodebug__))
784
+ _mm_setcsr(unsigned int __i)
785
+ {
786
+ __builtin_ia32_ldmxcsr(__i);
787
+ }
788
+
789
+ #define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
790
+ __m128 __a = (a); \
791
+ __m128 __b = (b); \
792
+ (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \
793
+ (mask) & 0x3, ((mask) & 0xc) >> 2, \
794
+ (((mask) & 0x30) >> 4) + 4, \
795
+ (((mask) & 0xc0) >> 6) + 4); })
796
+
797
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
798
+ _mm_unpackhi_ps(__m128 __a, __m128 __b)
799
+ {
800
+ return __builtin_shufflevector(__a, __b, 2, 6, 3, 7);
801
+ }
802
+
803
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
804
+ _mm_unpacklo_ps(__m128 __a, __m128 __b)
805
+ {
806
+ return __builtin_shufflevector(__a, __b, 0, 4, 1, 5);
807
+ }
808
+
809
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
810
+ _mm_move_ss(__m128 __a, __m128 __b)
811
+ {
812
+ return __builtin_shufflevector(__a, __b, 4, 1, 2, 3);
813
+ }
814
+
815
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
816
+ _mm_movehl_ps(__m128 __a, __m128 __b)
817
+ {
818
+ return __builtin_shufflevector(__a, __b, 6, 7, 2, 3);
819
+ }
820
+
821
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
822
+ _mm_movelh_ps(__m128 __a, __m128 __b)
823
+ {
824
+ return __builtin_shufflevector(__a, __b, 0, 1, 4, 5);
825
+ }
826
+
827
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
828
+ _mm_cvtpi16_ps(__m64 __a)
829
+ {
830
+ __m64 __b, __c;
831
+ __m128 __r;
832
+
833
+ __b = _mm_setzero_si64();
834
+ __b = _mm_cmpgt_pi16(__b, __a);
835
+ __c = _mm_unpackhi_pi16(__a, __b);
836
+ __r = _mm_setzero_ps();
837
+ __r = _mm_cvtpi32_ps(__r, __c);
838
+ __r = _mm_movelh_ps(__r, __r);
839
+ __c = _mm_unpacklo_pi16(__a, __b);
840
+ __r = _mm_cvtpi32_ps(__r, __c);
841
+
842
+ return __r;
843
+ }
844
+
845
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
846
+ _mm_cvtpu16_ps(__m64 __a)
847
+ {
848
+ __m64 __b, __c;
849
+ __m128 __r;
850
+
851
+ __b = _mm_setzero_si64();
852
+ __c = _mm_unpackhi_pi16(__a, __b);
853
+ __r = _mm_setzero_ps();
854
+ __r = _mm_cvtpi32_ps(__r, __c);
855
+ __r = _mm_movelh_ps(__r, __r);
856
+ __c = _mm_unpacklo_pi16(__a, __b);
857
+ __r = _mm_cvtpi32_ps(__r, __c);
858
+
859
+ return __r;
860
+ }
861
+
862
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
863
+ _mm_cvtpi8_ps(__m64 __a)
864
+ {
865
+ __m64 __b;
866
+
867
+ __b = _mm_setzero_si64();
868
+ __b = _mm_cmpgt_pi8(__b, __a);
869
+ __b = _mm_unpacklo_pi8(__a, __b);
870
+
871
+ return _mm_cvtpi16_ps(__b);
872
+ }
873
+
874
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
875
+ _mm_cvtpu8_ps(__m64 __a)
876
+ {
877
+ __m64 __b;
878
+
879
+ __b = _mm_setzero_si64();
880
+ __b = _mm_unpacklo_pi8(__a, __b);
881
+
882
+ return _mm_cvtpi16_ps(__b);
883
+ }
884
+
885
+ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
886
+ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
887
+ {
888
+ __m128 __c;
889
+
890
+ __c = _mm_setzero_ps();
891
+ __c = _mm_cvtpi32_ps(__c, __b);
892
+ __c = _mm_movelh_ps(__c, __c);
893
+
894
+ return _mm_cvtpi32_ps(__c, __a);
895
+ }
896
+
897
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
898
+ _mm_cvtps_pi16(__m128 __a)
899
+ {
900
+ __m64 __b, __c;
901
+
902
+ __b = _mm_cvtps_pi32(__a);
903
+ __a = _mm_movehl_ps(__a, __a);
904
+ __c = _mm_cvtps_pi32(__a);
905
+
906
+ return _mm_packs_pi16(__b, __c);
907
+ }
908
+
909
+ static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
910
+ _mm_cvtps_pi8(__m128 __a)
911
+ {
912
+ __m64 __b, __c;
913
+
914
+ __b = _mm_cvtps_pi16(__a);
915
+ __c = _mm_setzero_si64();
916
+
917
+ return _mm_packs_pi16(__b, __c);
918
+ }
919
+
920
+ static __inline__ int __attribute__((__always_inline__, __nodebug__))
921
+ _mm_movemask_ps(__m128 __a)
922
+ {
923
+ return __builtin_ia32_movmskps(__a);
924
+ }
925
+
926
+ #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
927
+
928
+ #define _MM_EXCEPT_INVALID (0x0001)
929
+ #define _MM_EXCEPT_DENORM (0x0002)
930
+ #define _MM_EXCEPT_DIV_ZERO (0x0004)
931
+ #define _MM_EXCEPT_OVERFLOW (0x0008)
932
+ #define _MM_EXCEPT_UNDERFLOW (0x0010)
933
+ #define _MM_EXCEPT_INEXACT (0x0020)
934
+ #define _MM_EXCEPT_MASK (0x003f)
935
+
936
+ #define _MM_MASK_INVALID (0x0080)
937
+ #define _MM_MASK_DENORM (0x0100)
938
+ #define _MM_MASK_DIV_ZERO (0x0200)
939
+ #define _MM_MASK_OVERFLOW (0x0400)
940
+ #define _MM_MASK_UNDERFLOW (0x0800)
941
+ #define _MM_MASK_INEXACT (0x1000)
942
+ #define _MM_MASK_MASK (0x1f80)
943
+
944
+ #define _MM_ROUND_NEAREST (0x0000)
945
+ #define _MM_ROUND_DOWN (0x2000)
946
+ #define _MM_ROUND_UP (0x4000)
947
+ #define _MM_ROUND_TOWARD_ZERO (0x6000)
948
+ #define _MM_ROUND_MASK (0x6000)
949
+
950
+ #define _MM_FLUSH_ZERO_MASK (0x8000)
951
+ #define _MM_FLUSH_ZERO_ON (0x8000)
952
+ #define _MM_FLUSH_ZERO_OFF (0x0000)
953
+
954
+ #define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
955
+ #define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
956
+ #define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
957
+ #define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
958
+
959
+ #define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
960
+ #define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
961
+ #define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
962
+ #define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
963
+
964
+ #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
965
+ do { \
966
+ __m128 tmp3, tmp2, tmp1, tmp0; \
967
+ tmp0 = _mm_unpacklo_ps((row0), (row1)); \
968
+ tmp2 = _mm_unpacklo_ps((row2), (row3)); \
969
+ tmp1 = _mm_unpackhi_ps((row0), (row1)); \
970
+ tmp3 = _mm_unpackhi_ps((row2), (row3)); \
971
+ (row0) = _mm_movelh_ps(tmp0, tmp2); \
972
+ (row1) = _mm_movehl_ps(tmp2, tmp0); \
973
+ (row2) = _mm_movelh_ps(tmp1, tmp3); \
974
+ (row3) = _mm_movehl_ps(tmp3, tmp1); \
975
+ } while (0)
976
+
977
+ /* Aliases for compatibility. */
978
+ #define _m_pextrw _mm_extract_pi16
979
+ #define _m_pinsrw _mm_insert_pi16
980
+ #define _m_pmaxsw _mm_max_pi16
981
+ #define _m_pmaxub _mm_max_pu8
982
+ #define _m_pminsw _mm_min_pi16
983
+ #define _m_pminub _mm_min_pu8
984
+ #define _m_pmovmskb _mm_movemask_pi8
985
+ #define _m_pmulhuw _mm_mulhi_pu16
986
+ #define _m_pshufw _mm_shuffle_pi16
987
+ #define _m_maskmovq _mm_maskmove_si64
988
+ #define _m_pavgb _mm_avg_pu8
989
+ #define _m_pavgw _mm_avg_pu16
990
+ #define _m_psadbw _mm_sad_pu8
991
+ #define _m_ _mm_
992
+ #define _m_ _mm_
993
+
994
+ /* Ugly hack for backwards-compatibility (compatible with gcc) */
995
+ #ifdef __SSE2__
996
+ #include <emmintrin.h>
997
+ #endif
998
+
999
+ #endif /* __SSE__ */
1000
+
1001
+ #endif /* __XMMINTRIN_H */