gccxml_gem 0.9.3-x86-darwin-10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. data/Rakefile +84 -0
  2. data/bin/gccxml +0 -0
  3. data/bin/gccxml_cc1plus +0 -0
  4. data/gccxml.rb +57 -0
  5. data/share/doc/gccxml-0.9/Copyright.txt +55 -0
  6. data/share/doc/gccxml-0.9/gccxml.html +168 -0
  7. data/share/doc/gccxml-0.9/gccxml.txt +293 -0
  8. data/share/gccxml-0.9/GCC/2.95/algorithm +76 -0
  9. data/share/gccxml-0.9/GCC/2.95/bitset +17 -0
  10. data/share/gccxml-0.9/GCC/2.95/cctype +24 -0
  11. data/share/gccxml-0.9/GCC/2.95/clocale +14 -0
  12. data/share/gccxml-0.9/GCC/2.95/cmath +33 -0
  13. data/share/gccxml-0.9/GCC/2.95/complex +38 -0
  14. data/share/gccxml-0.9/GCC/2.95/csetjmp +13 -0
  15. data/share/gccxml-0.9/GCC/2.95/csignal +14 -0
  16. data/share/gccxml-0.9/GCC/2.95/cstdarg +12 -0
  17. data/share/gccxml-0.9/GCC/2.95/cstddef +13 -0
  18. data/share/gccxml-0.9/GCC/2.95/cstdio +55 -0
  19. data/share/gccxml-0.9/GCC/2.95/cstdlib +66 -0
  20. data/share/gccxml-0.9/GCC/2.95/cstring +34 -0
  21. data/share/gccxml-0.9/GCC/2.95/ctime +24 -0
  22. data/share/gccxml-0.9/GCC/2.95/cwchar +65 -0
  23. data/share/gccxml-0.9/GCC/2.95/cwctype +31 -0
  24. data/share/gccxml-0.9/GCC/2.95/deque +19 -0
  25. data/share/gccxml-0.9/GCC/2.95/exception +20 -0
  26. data/share/gccxml-0.9/GCC/2.95/fstream +23 -0
  27. data/share/gccxml-0.9/GCC/2.95/functional +64 -0
  28. data/share/gccxml-0.9/GCC/2.95/gccxml_bitset +1066 -0
  29. data/share/gccxml-0.9/GCC/2.95/iomanip +20 -0
  30. data/share/gccxml-0.9/GCC/2.95/iosfwd +20 -0
  31. data/share/gccxml-0.9/GCC/2.95/iostream +27 -0
  32. data/share/gccxml-0.9/GCC/2.95/iterator +39 -0
  33. data/share/gccxml-0.9/GCC/2.95/list +19 -0
  34. data/share/gccxml-0.9/GCC/2.95/map +20 -0
  35. data/share/gccxml-0.9/GCC/2.95/memory +21 -0
  36. data/share/gccxml-0.9/GCC/2.95/new +13 -0
  37. data/share/gccxml-0.9/GCC/2.95/numeric +15 -0
  38. data/share/gccxml-0.9/GCC/2.95/pthread.h +16 -0
  39. data/share/gccxml-0.9/GCC/2.95/queue +20 -0
  40. data/share/gccxml-0.9/GCC/2.95/set +20 -0
  41. data/share/gccxml-0.9/GCC/2.95/sstream +24 -0
  42. data/share/gccxml-0.9/GCC/2.95/stack +19 -0
  43. data/share/gccxml-0.9/GCC/2.95/std/bastring.cc +524 -0
  44. data/share/gccxml-0.9/GCC/2.95/std/complext.h +397 -0
  45. data/share/gccxml-0.9/GCC/2.95/std/dcomplex.h +92 -0
  46. data/share/gccxml-0.9/GCC/2.95/std/fcomplex.h +88 -0
  47. data/share/gccxml-0.9/GCC/2.95/std/gslice_array.h +170 -0
  48. data/share/gccxml-0.9/GCC/2.95/std/indirect_array.h +157 -0
  49. data/share/gccxml-0.9/GCC/2.95/std/ldcomplex.h +96 -0
  50. data/share/gccxml-0.9/GCC/2.95/std/mask_array.h +154 -0
  51. data/share/gccxml-0.9/GCC/2.95/std/slice_array.h +156 -0
  52. data/share/gccxml-0.9/GCC/2.95/std/std_valarray.h +728 -0
  53. data/share/gccxml-0.9/GCC/2.95/std/straits.h +162 -0
  54. data/share/gccxml-0.9/GCC/2.95/std/valarray_meta.h +1035 -0
  55. data/share/gccxml-0.9/GCC/2.95/stdexcept +17 -0
  56. data/share/gccxml-0.9/GCC/2.95/stl_alloc.h +1057 -0
  57. data/share/gccxml-0.9/GCC/2.95/stl_bvector.h +836 -0
  58. data/share/gccxml-0.9/GCC/2.95/stl_deque.h +1699 -0
  59. data/share/gccxml-0.9/GCC/2.95/stl_list.h +843 -0
  60. data/share/gccxml-0.9/GCC/2.95/stl_tree.h +1331 -0
  61. data/share/gccxml-0.9/GCC/2.95/stl_vector.h +828 -0
  62. data/share/gccxml-0.9/GCC/2.95/string +26 -0
  63. data/share/gccxml-0.9/GCC/2.95/strstream +23 -0
  64. data/share/gccxml-0.9/GCC/2.95/typeinfo +11 -0
  65. data/share/gccxml-0.9/GCC/2.95/utility +25 -0
  66. data/share/gccxml-0.9/GCC/2.95/valarray +52 -0
  67. data/share/gccxml-0.9/GCC/2.95/vector +19 -0
  68. data/share/gccxml-0.9/GCC/2.96/sstream +305 -0
  69. data/share/gccxml-0.9/GCC/3.0/pthread.h +16 -0
  70. data/share/gccxml-0.9/GCC/3.1/pthread.h +16 -0
  71. data/share/gccxml-0.9/GCC/3.2/bits/fstream.tcc +500 -0
  72. data/share/gccxml-0.9/GCC/3.2/bits/gthr-default.h +585 -0
  73. data/share/gccxml-0.9/GCC/3.2/bits/istream.tcc +1207 -0
  74. data/share/gccxml-0.9/GCC/3.2/bits/locale_facets.h +1810 -0
  75. data/share/gccxml-0.9/GCC/3.2/bits/locale_facets.tcc +2397 -0
  76. data/share/gccxml-0.9/GCC/3.2/bits/messages_members.h +108 -0
  77. data/share/gccxml-0.9/GCC/3.2/bits/ostream.tcc +713 -0
  78. data/share/gccxml-0.9/GCC/3.2/bits/sstream.tcc +241 -0
  79. data/share/gccxml-0.9/GCC/3.2/bits/stl_deque.h +1682 -0
  80. data/share/gccxml-0.9/GCC/3.2/bits/stl_list.h +989 -0
  81. data/share/gccxml-0.9/GCC/3.2/bits/stl_tree.h +1462 -0
  82. data/share/gccxml-0.9/GCC/3.2/bits/stl_vector.h +1085 -0
  83. data/share/gccxml-0.9/GCC/3.2/bits/valarray_meta.h +1063 -0
  84. data/share/gccxml-0.9/GCC/3.2/fstream +579 -0
  85. data/share/gccxml-0.9/GCC/3.2/pthread.h +16 -0
  86. data/share/gccxml-0.9/GCC/3.2/sstream +384 -0
  87. data/share/gccxml-0.9/GCC/3.3/bits/fstream.tcc +530 -0
  88. data/share/gccxml-0.9/GCC/3.3/bits/list.tcc +378 -0
  89. data/share/gccxml-0.9/GCC/3.3/bits/locale_facets.h +2050 -0
  90. data/share/gccxml-0.9/GCC/3.3/bits/messages_members.h +108 -0
  91. data/share/gccxml-0.9/GCC/3.3/bits/sstream.tcc +243 -0
  92. data/share/gccxml-0.9/GCC/3.3/bits/stl_deque.h +1603 -0
  93. data/share/gccxml-0.9/GCC/3.3/bits/stl_list.h +1167 -0
  94. data/share/gccxml-0.9/GCC/3.3/bits/stl_tree.h +1462 -0
  95. data/share/gccxml-0.9/GCC/3.3/bits/stl_vector.h +992 -0
  96. data/share/gccxml-0.9/GCC/3.3/bits/valarray_meta.h +1135 -0
  97. data/share/gccxml-0.9/GCC/3.3/fstream +842 -0
  98. data/share/gccxml-0.9/GCC/3.3/gccxml_builtins.h +22 -0
  99. data/share/gccxml-0.9/GCC/3.3/sstream +638 -0
  100. data/share/gccxml-0.9/GCC/3.4/bits/gthr-default.h +669 -0
  101. data/share/gccxml-0.9/GCC/3.4/gccxml_builtins.h +91 -0
  102. data/share/gccxml-0.9/GCC/4.0/emmintrin.h +5 -0
  103. data/share/gccxml-0.9/GCC/4.0/gccxml_apple_emmintrin.h +1037 -0
  104. data/share/gccxml-0.9/GCC/4.0/gccxml_apple_mmintrin.h +669 -0
  105. data/share/gccxml-0.9/GCC/4.0/gccxml_apple_xmmintrin.h +870 -0
  106. data/share/gccxml-0.9/GCC/4.0/gccxml_builtins.h +128 -0
  107. data/share/gccxml-0.9/GCC/4.0/gccxml_gnu_emmintrin.h +977 -0
  108. data/share/gccxml-0.9/GCC/4.0/gccxml_gnu_mmintrin.h +636 -0
  109. data/share/gccxml-0.9/GCC/4.0/gccxml_gnu_xmmintrin.h +833 -0
  110. data/share/gccxml-0.9/GCC/4.0/mmintrin.h +5 -0
  111. data/share/gccxml-0.9/GCC/4.0/xmmintrin.h +5 -0
  112. data/share/gccxml-0.9/GCC/4.1/bits/gthr-default.h +622 -0
  113. data/share/gccxml-0.9/GCC/4.1/emmintrin.h +5 -0
  114. data/share/gccxml-0.9/GCC/4.1/gccxml_apple_emmintrin.h +1509 -0
  115. data/share/gccxml-0.9/GCC/4.1/gccxml_apple_mmintrin.h +942 -0
  116. data/share/gccxml-0.9/GCC/4.1/gccxml_apple_xmmintrin.h +1192 -0
  117. data/share/gccxml-0.9/GCC/4.1/gccxml_builtins.h +131 -0
  118. data/share/gccxml-0.9/GCC/4.1/gccxml_gnu_emmintrin.h +1004 -0
  119. data/share/gccxml-0.9/GCC/4.1/gccxml_gnu_mmintrin.h +637 -0
  120. data/share/gccxml-0.9/GCC/4.1/gccxml_gnu_xmmintrin.h +834 -0
  121. data/share/gccxml-0.9/GCC/4.1/mmintrin.h +5 -0
  122. data/share/gccxml-0.9/GCC/4.1/xmmintrin.h +5 -0
  123. data/share/gccxml-0.9/GCC/4.2/emmintrin.h +5 -0
  124. data/share/gccxml-0.9/GCC/4.2/gccxml_apple_emmintrin.h +1509 -0
  125. data/share/gccxml-0.9/GCC/4.2/gccxml_apple_mmintrin.h +942 -0
  126. data/share/gccxml-0.9/GCC/4.2/gccxml_apple_xmmintrin.h +1192 -0
  127. data/share/gccxml-0.9/GCC/4.2/gccxml_builtins.h +131 -0
  128. data/share/gccxml-0.9/GCC/4.2/gccxml_gnu_emmintrin.h +1013 -0
  129. data/share/gccxml-0.9/GCC/4.2/gccxml_gnu_mmintrin.h +663 -0
  130. data/share/gccxml-0.9/GCC/4.2/gccxml_gnu_xmmintrin.h +860 -0
  131. data/share/gccxml-0.9/GCC/4.2/mmintrin.h +5 -0
  132. data/share/gccxml-0.9/GCC/4.2/xmmintrin.h +5 -0
  133. data/share/gccxml-0.9/GCC/4.3/emmintrin.h +1043 -0
  134. data/share/gccxml-0.9/GCC/4.3/gccxml_builtins.h +183 -0
  135. data/share/gccxml-0.9/GCC/4.3/mmintrin.h +663 -0
  136. data/share/gccxml-0.9/GCC/4.3/xmmintrin.h +867 -0
  137. data/share/gccxml-0.9/GCC/4.4/bits/c++config.h +1431 -0
  138. data/share/gccxml-0.9/GCC/4.4/emmintrin.h +1041 -0
  139. data/share/gccxml-0.9/GCC/4.4/gccxml_builtins.h +153 -0
  140. data/share/gccxml-0.9/GCC/4.4/mmintrin.h +662 -0
  141. data/share/gccxml-0.9/GCC/4.4/xmmintrin.h +864 -0
  142. data/share/gccxml-0.9/GCC/4.5/gccxml_builtins.h +154 -0
  143. data/share/gccxml-0.9/GCC/4.5/iomanip +349 -0
  144. data/share/gccxml-0.9/GCC/COPYING.RUNTIME +73 -0
  145. data/share/gccxml-0.9/GCC/COPYING3 +674 -0
  146. data/share/gccxml-0.9/IBM/8.0/adapt_headers.sh +34 -0
  147. data/share/gccxml-0.9/IBM/8.0/stdlib.h.patch +27 -0
  148. data/share/gccxml-0.9/IBM/8.0/xstring.patch +19 -0
  149. data/share/gccxml-0.9/IBM/README +8 -0
  150. data/share/gccxml-0.9/IBM/find_flags +51 -0
  151. data/share/gccxml-0.9/IBM/find_flags_common +48 -0
  152. data/share/gccxml-0.9/Intel/find_flags +56 -0
  153. data/share/gccxml-0.9/Intel/pthread.h +16 -0
  154. data/share/gccxml-0.9/MIPSpro/7.3/exception +9 -0
  155. data/share/gccxml-0.9/MIPSpro/7.3/gccxml_mpro_internals.h +21 -0
  156. data/share/gccxml-0.9/MIPSpro/7.3/iomanip +161 -0
  157. data/share/gccxml-0.9/MIPSpro/7.3/ostream +9 -0
  158. data/share/gccxml-0.9/MIPSpro/7.3/stddef.h +9 -0
  159. data/share/gccxml-0.9/MIPSpro/7.3/stl_config.h +9 -0
  160. data/share/gccxml-0.9/MIPSpro/7.3/stl_locale.h +17 -0
  161. data/share/gccxml-0.9/MIPSpro/7.3/stl_monetary.h +14 -0
  162. data/share/gccxml-0.9/MIPSpro/7.3/stl_numeric_facets.h +13 -0
  163. data/share/gccxml-0.9/MIPSpro/7.3/stl_threads.h +11 -0
  164. data/share/gccxml-0.9/MIPSpro/7.3/string +18 -0
  165. data/share/gccxml-0.9/MIPSpro/find_flags +70 -0
  166. data/share/gccxml-0.9/MIPSpro/mipspro_defs.cxx +63 -0
  167. data/share/gccxml-0.9/Sun/5.8/Cstd.patch +156 -0
  168. data/share/gccxml-0.9/Sun/5.8/adapt_headers.sh +32 -0
  169. data/share/gccxml-0.9/Sun/5.8/std-5.10.patch +22 -0
  170. data/share/gccxml-0.9/Sun/README +8 -0
  171. data/share/gccxml-0.9/Sun/find_flags +51 -0
  172. data/share/gccxml-0.9/Sun/find_flags_common +42 -0
  173. data/share/gccxml-0.9/gccxml_config +2 -0
  174. data/share/gccxml-0.9/gccxml_identify_compiler.cc +13 -0
  175. data/share/man/man1/gccxml.1 +246 -0
  176. metadata +245 -0
@@ -0,0 +1,867 @@
1
+ /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008
2
+ Free Software Foundation, Inc.
3
+
4
+ This file is part of GCC.
5
+
6
+ GCC is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2, or (at your option)
9
+ any later version.
10
+
11
+ GCC is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with GCC; see the file COPYING. If not, write to
18
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
19
+ Boston, MA 02110-1301, USA. */
20
+
21
+ /* As a special exception, if you include this header file into source
22
+ files compiled by GCC, this header file does not by itself cause
23
+ the resulting executable to be covered by the GNU General Public
24
+ License. This exception does not however invalidate any other
25
+ reasons why the executable file might be covered by the GNU General
26
+ Public License. */
27
+
28
+ /* Implemented from the specification included in the Intel C++ Compiler
29
+ User Guide and Reference, version 9.0. */
30
+
31
+ #ifndef _XMMINTRIN_H_INCLUDED
32
+ #define _XMMINTRIN_H_INCLUDED
33
+
34
+ #ifndef __SSE__
35
+ # error "SSE instruction set not enabled"
36
+ #else
37
+
38
+ /* We need type definitions from the MMX header file. */
39
+ #include <mmintrin.h>
40
+
41
+ /* Get _mm_malloc () and _mm_free (). */
42
+ #include <mm_malloc.h>
43
+
44
+ /* The Intel API is flexible enough that we must allow aliasing with other
45
+ vector types, and their scalar components. */
46
+ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
47
+
48
+ /* Internal data types for implementing the intrinsics. */
49
+ typedef float __v4sf __attribute__ ((__vector_size__ (16)));
50
+
51
+ /* Create a selector for use with the SHUFPS instruction. */
52
+ #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
53
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
54
+
55
+ /* Constants for use with _mm_prefetch. */
56
+ enum _mm_hint
57
+ {
58
+ _MM_HINT_T0 = 3,
59
+ _MM_HINT_T1 = 2,
60
+ _MM_HINT_T2 = 1,
61
+ _MM_HINT_NTA = 0
62
+ };
63
+
64
+ /* Bits in the MXCSR. */
65
+ #define _MM_EXCEPT_MASK 0x003f
66
+ #define _MM_EXCEPT_INVALID 0x0001
67
+ #define _MM_EXCEPT_DENORM 0x0002
68
+ #define _MM_EXCEPT_DIV_ZERO 0x0004
69
+ #define _MM_EXCEPT_OVERFLOW 0x0008
70
+ #define _MM_EXCEPT_UNDERFLOW 0x0010
71
+ #define _MM_EXCEPT_INEXACT 0x0020
72
+
73
+ #define _MM_MASK_MASK 0x1f80
74
+ #define _MM_MASK_INVALID 0x0080
75
+ #define _MM_MASK_DENORM 0x0100
76
+ #define _MM_MASK_DIV_ZERO 0x0200
77
+ #define _MM_MASK_OVERFLOW 0x0400
78
+ #define _MM_MASK_UNDERFLOW 0x0800
79
+ #define _MM_MASK_INEXACT 0x1000
80
+
81
+ #define _MM_ROUND_MASK 0x6000
82
+ #define _MM_ROUND_NEAREST 0x0000
83
+ #define _MM_ROUND_DOWN 0x2000
84
+ #define _MM_ROUND_UP 0x4000
85
+ #define _MM_ROUND_TOWARD_ZERO 0x6000
86
+
87
+ #define _MM_FLUSH_ZERO_MASK 0x8000
88
+ #define _MM_FLUSH_ZERO_ON 0x8000
89
+ #define _MM_FLUSH_ZERO_OFF 0x0000
90
+
91
+ /* Create a vector of zeros. */
92
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
93
+ _mm_setzero_ps (void)
94
+ ;
95
+
96
+ /* Perform the respective operation on the lower SPFP (single-precision
97
+ floating-point) values of A and B; the upper three SPFP values are
98
+ passed through from A. */
99
+
100
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
101
+ _mm_add_ss (__m128 __A, __m128 __B)
102
+ ;
103
+
104
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
105
+ _mm_sub_ss (__m128 __A, __m128 __B)
106
+ ;
107
+
108
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
109
+ _mm_mul_ss (__m128 __A, __m128 __B)
110
+ ;
111
+
112
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
113
+ _mm_div_ss (__m128 __A, __m128 __B)
114
+ ;
115
+
116
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
117
+ _mm_sqrt_ss (__m128 __A)
118
+ ;
119
+
120
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
121
+ _mm_rcp_ss (__m128 __A)
122
+ ;
123
+
124
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
125
+ _mm_rsqrt_ss (__m128 __A)
126
+ ;
127
+
128
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
129
+ _mm_min_ss (__m128 __A, __m128 __B)
130
+ ;
131
+
132
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
133
+ _mm_max_ss (__m128 __A, __m128 __B)
134
+ ;
135
+
136
+ /* Perform the respective operation on the four SPFP values in A and B. */
137
+
138
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
139
+ _mm_add_ps (__m128 __A, __m128 __B)
140
+ ;
141
+
142
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
143
+ _mm_sub_ps (__m128 __A, __m128 __B)
144
+ ;
145
+
146
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
147
+ _mm_mul_ps (__m128 __A, __m128 __B)
148
+ ;
149
+
150
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
151
+ _mm_div_ps (__m128 __A, __m128 __B)
152
+ ;
153
+
154
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
155
+ _mm_sqrt_ps (__m128 __A)
156
+ ;
157
+
158
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159
+ _mm_rcp_ps (__m128 __A)
160
+ ;
161
+
162
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
163
+ _mm_rsqrt_ps (__m128 __A)
164
+ ;
165
+
166
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
167
+ _mm_min_ps (__m128 __A, __m128 __B)
168
+ ;
169
+
170
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
171
+ _mm_max_ps (__m128 __A, __m128 __B)
172
+ ;
173
+
174
+ /* Perform logical bit-wise operations on 128-bit values. */
175
+
176
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
177
+ _mm_and_ps (__m128 __A, __m128 __B)
178
+ ;
179
+
180
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
181
+ _mm_andnot_ps (__m128 __A, __m128 __B)
182
+ ;
183
+
184
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
185
+ _mm_or_ps (__m128 __A, __m128 __B)
186
+ ;
187
+
188
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
189
+ _mm_xor_ps (__m128 __A, __m128 __B)
190
+ ;
191
+
192
+ /* Perform a comparison on the lower SPFP values of A and B. If the
193
+ comparison is true, place a mask of all ones in the result, otherwise a
194
+ mask of zeros. The upper three SPFP values are passed through from A. */
195
+
196
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
197
+ _mm_cmpeq_ss (__m128 __A, __m128 __B)
198
+ ;
199
+
200
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
201
+ _mm_cmplt_ss (__m128 __A, __m128 __B)
202
+ ;
203
+
204
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
205
+ _mm_cmple_ss (__m128 __A, __m128 __B)
206
+ ;
207
+
208
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
209
+ _mm_cmpgt_ss (__m128 __A, __m128 __B)
210
+ ;
211
+
212
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
213
+ _mm_cmpge_ss (__m128 __A, __m128 __B)
214
+ ;
215
+
216
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
217
+ _mm_cmpneq_ss (__m128 __A, __m128 __B)
218
+ ;
219
+
220
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
221
+ _mm_cmpnlt_ss (__m128 __A, __m128 __B)
222
+ ;
223
+
224
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
225
+ _mm_cmpnle_ss (__m128 __A, __m128 __B)
226
+ ;
227
+
228
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
229
+ _mm_cmpngt_ss (__m128 __A, __m128 __B)
230
+ ;
231
+
232
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
233
+ _mm_cmpnge_ss (__m128 __A, __m128 __B)
234
+ ;
235
+
236
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
237
+ _mm_cmpord_ss (__m128 __A, __m128 __B)
238
+ ;
239
+
240
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
241
+ _mm_cmpunord_ss (__m128 __A, __m128 __B)
242
+ ;
243
+
244
+ /* Perform a comparison on the four SPFP values of A and B. For each
245
+ element, if the comparison is true, place a mask of all ones in the
246
+ result, otherwise a mask of zeros. */
247
+
248
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
249
+ _mm_cmpeq_ps (__m128 __A, __m128 __B)
250
+ ;
251
+
252
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
253
+ _mm_cmplt_ps (__m128 __A, __m128 __B)
254
+ ;
255
+
256
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
257
+ _mm_cmple_ps (__m128 __A, __m128 __B)
258
+ ;
259
+
260
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
261
+ _mm_cmpgt_ps (__m128 __A, __m128 __B)
262
+ ;
263
+
264
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
265
+ _mm_cmpge_ps (__m128 __A, __m128 __B)
266
+ ;
267
+
268
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
269
+ _mm_cmpneq_ps (__m128 __A, __m128 __B)
270
+ ;
271
+
272
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
273
+ _mm_cmpnlt_ps (__m128 __A, __m128 __B)
274
+ ;
275
+
276
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
277
+ _mm_cmpnle_ps (__m128 __A, __m128 __B)
278
+ ;
279
+
280
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
281
+ _mm_cmpngt_ps (__m128 __A, __m128 __B)
282
+ ;
283
+
284
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
285
+ _mm_cmpnge_ps (__m128 __A, __m128 __B)
286
+ ;
287
+
288
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
289
+ _mm_cmpord_ps (__m128 __A, __m128 __B)
290
+ ;
291
+
292
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
293
+ _mm_cmpunord_ps (__m128 __A, __m128 __B)
294
+ ;
295
+
296
+ /* Compare the lower SPFP values of A and B and return 1 if true
297
+ and 0 if false. */
298
+
299
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
300
+ _mm_comieq_ss (__m128 __A, __m128 __B)
301
+ ;
302
+
303
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
304
+ _mm_comilt_ss (__m128 __A, __m128 __B)
305
+ ;
306
+
307
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308
+ _mm_comile_ss (__m128 __A, __m128 __B)
309
+ ;
310
+
311
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
312
+ _mm_comigt_ss (__m128 __A, __m128 __B)
313
+ ;
314
+
315
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
316
+ _mm_comige_ss (__m128 __A, __m128 __B)
317
+ ;
318
+
319
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
320
+ _mm_comineq_ss (__m128 __A, __m128 __B)
321
+ ;
322
+
323
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
324
+ _mm_ucomieq_ss (__m128 __A, __m128 __B)
325
+ ;
326
+
327
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
328
+ _mm_ucomilt_ss (__m128 __A, __m128 __B)
329
+ ;
330
+
331
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
332
+ _mm_ucomile_ss (__m128 __A, __m128 __B)
333
+ ;
334
+
335
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
336
+ _mm_ucomigt_ss (__m128 __A, __m128 __B)
337
+ ;
338
+
339
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
340
+ _mm_ucomige_ss (__m128 __A, __m128 __B)
341
+ ;
342
+
343
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
344
+ _mm_ucomineq_ss (__m128 __A, __m128 __B)
345
+ ;
346
+
347
+ /* Convert the lower SPFP value to a 32-bit integer according to the current
348
+ rounding mode. */
349
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
350
+ _mm_cvtss_si32 (__m128 __A)
351
+ ;
352
+
353
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
354
+ _mm_cvt_ss2si (__m128 __A)
355
+ ;
356
+
357
+ #ifdef __x86_64__
358
+ /* Convert the lower SPFP value to a 32-bit integer according to the
359
+ current rounding mode. */
360
+
361
+ /* Intel intrinsic. */
362
+ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
363
+ _mm_cvtss_si64 (__m128 __A)
364
+ ;
365
+
366
+ /* Microsoft intrinsic. */
367
+ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
368
+ _mm_cvtss_si64x (__m128 __A)
369
+ ;
370
+ #endif
371
+
372
+ /* Convert the two lower SPFP values to 32-bit integers according to the
373
+ current rounding mode. Return the integers in packed form. */
374
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
375
+ _mm_cvtps_pi32 (__m128 __A)
376
+ ;
377
+
378
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
379
+ _mm_cvt_ps2pi (__m128 __A)
380
+ ;
381
+
382
+ /* Truncate the lower SPFP value to a 32-bit integer. */
383
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
384
+ _mm_cvttss_si32 (__m128 __A)
385
+ ;
386
+
387
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
388
+ _mm_cvtt_ss2si (__m128 __A)
389
+ ;
390
+
391
+ #ifdef __x86_64__
392
+ /* Truncate the lower SPFP value to a 32-bit integer. */
393
+
394
+ /* Intel intrinsic. */
395
+ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
396
+ _mm_cvttss_si64 (__m128 __A)
397
+ ;
398
+
399
+ /* Microsoft intrinsic. */
400
+ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
401
+ _mm_cvttss_si64x (__m128 __A)
402
+ ;
403
+ #endif
404
+
405
+ /* Truncate the two lower SPFP values to 32-bit integers. Return the
406
+ integers in packed form. */
407
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
408
+ _mm_cvttps_pi32 (__m128 __A)
409
+ ;
410
+
411
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
412
+ _mm_cvtt_ps2pi (__m128 __A)
413
+ ;
414
+
415
+ /* Convert B to a SPFP value and insert it as element zero in A. */
416
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
417
+ _mm_cvtsi32_ss (__m128 __A, int __B)
418
+ ;
419
+
420
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
421
+ _mm_cvt_si2ss (__m128 __A, int __B)
422
+ ;
423
+
424
+ #ifdef __x86_64__
425
+ /* Convert B to a SPFP value and insert it as element zero in A. */
426
+
427
+ /* Intel intrinsic. */
428
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
429
+ _mm_cvtsi64_ss (__m128 __A, long long __B)
430
+ ;
431
+
432
+ /* Microsoft intrinsic. */
433
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434
+ _mm_cvtsi64x_ss (__m128 __A, long long __B)
435
+ ;
436
+ #endif
437
+
438
+ /* Convert the two 32-bit values in B to SPFP form and insert them
439
+ as the two lower elements in A. */
440
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
441
+ _mm_cvtpi32_ps (__m128 __A, __m64 __B)
442
+ ;
443
+
444
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
445
+ _mm_cvt_pi2ps (__m128 __A, __m64 __B)
446
+ ;
447
+
448
+ /* Convert the four signed 16-bit values in A to SPFP form. */
449
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
450
+ _mm_cvtpi16_ps (__m64 __A)
451
+ ;
452
+
453
+ /* Convert the four unsigned 16-bit values in A to SPFP form. */
454
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
455
+ _mm_cvtpu16_ps (__m64 __A)
456
+ ;
457
+
458
+ /* Convert the low four signed 8-bit values in A to SPFP form. */
459
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
460
+ _mm_cvtpi8_ps (__m64 __A)
461
+ ;
462
+
463
+ /* Convert the low four unsigned 8-bit values in A to SPFP form. */
464
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
465
+ _mm_cvtpu8_ps(__m64 __A)
466
+ ;
467
+
468
+ /* Convert the four signed 32-bit values in A and B to SPFP form. */
469
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
470
+ _mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
471
+ ;
472
+
473
+ /* Convert the four SPFP values in A to four signed 16-bit integers. */
474
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
475
+ _mm_cvtps_pi16(__m128 __A)
476
+ ;
477
+
478
+ /* Convert the four SPFP values in A to four signed 8-bit integers. */
479
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
480
+ _mm_cvtps_pi8(__m128 __A)
481
+ ;
482
+
483
+ /* Selects four specific SPFP values from A and B based on MASK. */
484
+ #ifdef __OPTIMIZE__
485
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
486
+ _mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
487
+ ;
488
+ #else
489
+ #define _mm_shuffle_ps(A, B, MASK) \
490
+ ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), \
491
+ (__v4sf)(__m128)(B), (int)(MASK)))
492
+ #endif
493
+
494
+ /* Selects and interleaves the upper two SPFP values from A and B. */
495
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
496
+ _mm_unpackhi_ps (__m128 __A, __m128 __B)
497
+ ;
498
+
499
+ /* Selects and interleaves the lower two SPFP values from A and B. */
500
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
501
+ _mm_unpacklo_ps (__m128 __A, __m128 __B)
502
+ ;
503
+
504
+ /* Sets the upper two SPFP values with 64-bits of data loaded from P;
505
+ the lower two values are passed through from A. */
506
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
507
+ _mm_loadh_pi (__m128 __A, __m64 const *__P)
508
+ ;
509
+
510
+ /* Stores the upper two SPFP values of A into P. */
511
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
512
+ _mm_storeh_pi (__m64 *__P, __m128 __A)
513
+ ;
514
+
515
+ /* Moves the upper two values of B into the lower two values of A. */
516
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
517
+ _mm_movehl_ps (__m128 __A, __m128 __B)
518
+ ;
519
+
520
+ /* Moves the lower two values of B into the upper two values of A. */
521
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
522
+ _mm_movelh_ps (__m128 __A, __m128 __B)
523
+ ;
524
+
525
+ /* Sets the lower two SPFP values with 64-bits of data loaded from P;
526
+ the upper two values are passed through from A. */
527
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
528
+ _mm_loadl_pi (__m128 __A, __m64 const *__P)
529
+ ;
530
+
531
+ /* Stores the lower two SPFP values of A into P. */
532
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
533
+ _mm_storel_pi (__m64 *__P, __m128 __A)
534
+ ;
535
+
536
+ /* Creates a 4-bit mask from the most significant bits of the SPFP values. */
537
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
538
+ _mm_movemask_ps (__m128 __A)
539
+ ;
540
+
541
+ /* Return the contents of the control register. */
542
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
543
+ _mm_getcsr (void)
544
+ ;
545
+
546
+ /* Read exception bits from the control register. */
547
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
548
+ _MM_GET_EXCEPTION_STATE (void)
549
+ ;
550
+
551
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
552
+ _MM_GET_EXCEPTION_MASK (void)
553
+ ;
554
+
555
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
556
+ _MM_GET_ROUNDING_MODE (void)
557
+ ;
558
+
559
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
560
+ _MM_GET_FLUSH_ZERO_MODE (void)
561
+ ;
562
+
563
+ /* Set the control register to I. */
564
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
565
+ _mm_setcsr (unsigned int __I)
566
+ ;
567
+
568
+ /* Set exception bits in the control register. */
569
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
570
+ _MM_SET_EXCEPTION_STATE(unsigned int __mask)
571
+ ;
572
+
573
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
574
+ _MM_SET_EXCEPTION_MASK (unsigned int __mask)
575
+ ;
576
+
577
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
578
+ _MM_SET_ROUNDING_MODE (unsigned int __mode)
579
+ ;
580
+
581
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
582
+ _MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
583
+ ;
584
+
585
+ /* Create a vector with element 0 as F and the rest zero. */
586
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
587
+ _mm_set_ss (float __F)
588
+ ;
589
+
590
+ /* Create a vector with all four elements equal to F. */
591
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
592
+ _mm_set1_ps (float __F)
593
+ ;
594
+
595
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
596
+ _mm_set_ps1 (float __F)
597
+ ;
598
+
599
+ /* Create a vector with element 0 as *P and the rest zero. */
600
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
601
+ _mm_load_ss (float const *__P)
602
+ ;
603
+
604
+ /* Create a vector with all four elements equal to *P. */
605
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
606
+ _mm_load1_ps (float const *__P)
607
+ ;
608
+
609
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
610
+ _mm_load_ps1 (float const *__P)
611
+ ;
612
+
613
+ /* Load four SPFP values from P. The address must be 16-byte aligned. */
614
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
615
+ _mm_load_ps (float const *__P)
616
+ ;
617
+
618
+ /* Load four SPFP values from P. The address need not be 16-byte aligned. */
619
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
620
+ _mm_loadu_ps (float const *__P)
621
+ ;
622
+
623
+ /* Load four SPFP values in reverse order. The address must be aligned. */
624
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
625
+ _mm_loadr_ps (float const *__P)
626
+ ;
627
+
628
+ /* Create the vector [Z Y X W]. */
629
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
630
+ _mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
631
+ ;
632
+
633
+ /* Create the vector [W X Y Z]. */
634
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
635
+ _mm_setr_ps (float __Z, float __Y, float __X, float __W)
636
+ ;
637
+
638
+ /* Stores the lower SPFP value. */
639
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
640
+ _mm_store_ss (float *__P, __m128 __A)
641
+ ;
642
+
643
+ extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
644
+ _mm_cvtss_f32 (__m128 __A)
645
+ ;
646
+
647
+ /* Store four SPFP values. The address must be 16-byte aligned. */
648
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
649
+ _mm_store_ps (float *__P, __m128 __A)
650
+ ;
651
+
652
+ /* Store four SPFP values. The address need not be 16-byte aligned. */
653
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654
+ _mm_storeu_ps (float *__P, __m128 __A)
655
+ ;
656
+
657
+ /* Store the lower SPFP value across four words. */
658
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
659
+ _mm_store1_ps (float *__P, __m128 __A)
660
+ ;
661
+
662
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
663
+ _mm_store_ps1 (float *__P, __m128 __A)
664
+ ;
665
+
666
+ /* Store four SPFP values in reverse order. The address must be aligned. */
667
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
668
+ _mm_storer_ps (float *__P, __m128 __A)
669
+ ;
670
+
671
+ /* Sets the low SPFP value of A from the low value of B. */
672
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
673
+ _mm_move_ss (__m128 __A, __m128 __B)
674
+ ;
675
+
676
+ /* Extracts one of the four words of A. The selector N must be immediate. */
677
+ #ifdef __OPTIMIZE__
678
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
679
+ _mm_extract_pi16 (__m64 const __A, int const __N)
680
+ ;
681
+
682
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
683
+ _m_pextrw (__m64 const __A, int const __N)
684
+ ;
685
+ #else
686
+ #define _mm_extract_pi16(A, N) \
687
+ ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
688
+
689
+ #define _m_pextrw(A, N) _mm_extract_pi16(A, N)
690
+ #endif
691
+
692
+ /* Inserts word D into one of four words of A. The selector N must be
693
+ immediate. */
694
+ #ifdef __OPTIMIZE__
695
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
696
+ _mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
697
+ ;
698
+
699
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
700
+ _m_pinsrw (__m64 const __A, int const __D, int const __N)
701
+ ;
702
+ #else
703
+ #define _mm_insert_pi16(A, D, N) \
704
+ ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), \
705
+ (int)(D), (int)(N)))
706
+
707
+ #define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
708
+ #endif
709
+
710
+ /* Compute the element-wise maximum of signed 16-bit values. */
711
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
712
+ _mm_max_pi16 (__m64 __A, __m64 __B)
713
+ ;
714
+
715
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
716
+ _m_pmaxsw (__m64 __A, __m64 __B)
717
+ ;
718
+
719
+ /* Compute the element-wise maximum of unsigned 8-bit values. */
720
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
721
+ _mm_max_pu8 (__m64 __A, __m64 __B)
722
+ ;
723
+
724
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
725
+ _m_pmaxub (__m64 __A, __m64 __B)
726
+ ;
727
+
728
+ /* Compute the element-wise minimum of signed 16-bit values. */
729
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
730
+ _mm_min_pi16 (__m64 __A, __m64 __B)
731
+ ;
732
+
733
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
734
+ _m_pminsw (__m64 __A, __m64 __B)
735
+ ;
736
+
737
+ /* Compute the element-wise minimum of unsigned 8-bit values. */
738
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
739
+ _mm_min_pu8 (__m64 __A, __m64 __B)
740
+ ;
741
+
742
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
743
+ _m_pminub (__m64 __A, __m64 __B)
744
+ ;
745
+
746
+ /* Create an 8-bit mask of the signs of 8-bit values. */
747
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
748
+ _mm_movemask_pi8 (__m64 __A)
749
+ ;
750
+
751
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
752
+ _m_pmovmskb (__m64 __A)
753
+ ;
754
+
755
+ /* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
756
+ in B and produce the high 16 bits of the 32-bit results. */
757
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
758
+ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
759
+ ;
760
+
761
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762
+ _m_pmulhuw (__m64 __A, __m64 __B)
763
+ ;
764
+
765
+ /* Return a combination of the four 16-bit values in A. The selector
766
+ must be an immediate. */
767
+ #ifdef __OPTIMIZE__
768
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
769
+ _mm_shuffle_pi16 (__m64 __A, int const __N)
770
+ ;
771
+
772
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
773
+ _m_pshufw (__m64 __A, int const __N)
774
+ ;
775
+ #else
776
+ #define _mm_shuffle_pi16(A, N) \
777
+ ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
778
+
779
+ #define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
780
+ #endif
781
+
782
+ /* Conditionally store byte elements of A into P. The high bit of each
783
+ byte in the selector N determines whether the corresponding byte from
784
+ A is stored. */
785
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
786
+ _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
787
+ ;
788
+
789
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
790
+ _m_maskmovq (__m64 __A, __m64 __N, char *__P)
791
+ ;
792
+
793
+ /* Compute the rounded averages of the unsigned 8-bit values in A and B. */
794
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
795
+ _mm_avg_pu8 (__m64 __A, __m64 __B)
796
+ ;
797
+
798
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
799
+ _m_pavgb (__m64 __A, __m64 __B)
800
+ ;
801
+
802
+ /* Compute the rounded averages of the unsigned 16-bit values in A and B. */
803
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
804
+ _mm_avg_pu16 (__m64 __A, __m64 __B)
805
+ ;
806
+
807
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
808
+ _m_pavgw (__m64 __A, __m64 __B)
809
+ ;
810
+
811
+ /* Compute the sum of the absolute differences of the unsigned 8-bit
812
+ values in A and B. Return the value in the lower 16-bit word; the
813
+ upper words are cleared. */
814
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
815
+ _mm_sad_pu8 (__m64 __A, __m64 __B)
816
+ ;
817
+
818
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
819
+ _m_psadbw (__m64 __A, __m64 __B)
820
+ ;
821
+
822
+ /* Loads one cache line from address P to a location "closer" to the
823
+ processor. The selector I specifies the type of prefetch operation. */
824
+ #ifdef __OPTIMIZE__
825
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
826
+ _mm_prefetch (const void *__P, enum _mm_hint __I)
827
+ ;
828
+ #else
829
+ #define _mm_prefetch(P, I) \
830
+ __builtin_prefetch ((P), 0, (I))
831
+ #endif
832
+
833
+ /* Stores the data in A to the address P without polluting the caches. */
834
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
835
+ _mm_stream_pi (__m64 *__P, __m64 __A)
836
+ ;
837
+
838
+ /* Likewise. The address must be 16-byte aligned. */
839
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
840
+ _mm_stream_ps (float *__P, __m128 __A)
841
+ ;
842
+
843
+ /* Guarantees that every preceding store is globally visible before
844
+ any subsequent store. */
845
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
846
+ _mm_sfence (void)
847
+ ;
848
+
849
+ /* The execution of the next instruction is delayed by an implementation
850
+ specific amount of time. The instruction does not modify the
851
+ architectural state. */
852
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
853
+ _mm_pause (void)
854
+ ;
855
+
856
+ /* Transpose the 4x4 matrix composed of row[0-3]. */
857
+ #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
858
+ do { \
859
+ } while (0)
860
+
861
+ /* For backward source compatibility. */
862
+ #ifdef __SSE2__
863
+ # include <emmintrin.h>
864
+ #endif
865
+
866
+ #endif /* __SSE__ */
867
+ #endif /* _XMMINTRIN_H_INCLUDED */