gccxml_gem 0.9.3-x86-darwin-11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. data/Rakefile +84 -0
  2. data/bin/gccxml +0 -0
  3. data/bin/gccxml_cc1plus +0 -0
  4. data/gccxml.rb +57 -0
  5. data/share/doc/gccxml-0.9/Copyright.txt +55 -0
  6. data/share/doc/gccxml-0.9/gccxml.html +168 -0
  7. data/share/doc/gccxml-0.9/gccxml.txt +293 -0
  8. data/share/gccxml-0.9/GCC/2.95/algorithm +76 -0
  9. data/share/gccxml-0.9/GCC/2.95/bitset +17 -0
  10. data/share/gccxml-0.9/GCC/2.95/cctype +24 -0
  11. data/share/gccxml-0.9/GCC/2.95/clocale +14 -0
  12. data/share/gccxml-0.9/GCC/2.95/cmath +33 -0
  13. data/share/gccxml-0.9/GCC/2.95/complex +38 -0
  14. data/share/gccxml-0.9/GCC/2.95/csetjmp +13 -0
  15. data/share/gccxml-0.9/GCC/2.95/csignal +14 -0
  16. data/share/gccxml-0.9/GCC/2.95/cstdarg +12 -0
  17. data/share/gccxml-0.9/GCC/2.95/cstddef +13 -0
  18. data/share/gccxml-0.9/GCC/2.95/cstdio +55 -0
  19. data/share/gccxml-0.9/GCC/2.95/cstdlib +66 -0
  20. data/share/gccxml-0.9/GCC/2.95/cstring +34 -0
  21. data/share/gccxml-0.9/GCC/2.95/ctime +24 -0
  22. data/share/gccxml-0.9/GCC/2.95/cwchar +65 -0
  23. data/share/gccxml-0.9/GCC/2.95/cwctype +31 -0
  24. data/share/gccxml-0.9/GCC/2.95/deque +19 -0
  25. data/share/gccxml-0.9/GCC/2.95/exception +20 -0
  26. data/share/gccxml-0.9/GCC/2.95/fstream +23 -0
  27. data/share/gccxml-0.9/GCC/2.95/functional +64 -0
  28. data/share/gccxml-0.9/GCC/2.95/gccxml_bitset +1066 -0
  29. data/share/gccxml-0.9/GCC/2.95/iomanip +20 -0
  30. data/share/gccxml-0.9/GCC/2.95/iosfwd +20 -0
  31. data/share/gccxml-0.9/GCC/2.95/iostream +27 -0
  32. data/share/gccxml-0.9/GCC/2.95/iterator +39 -0
  33. data/share/gccxml-0.9/GCC/2.95/list +19 -0
  34. data/share/gccxml-0.9/GCC/2.95/map +20 -0
  35. data/share/gccxml-0.9/GCC/2.95/memory +21 -0
  36. data/share/gccxml-0.9/GCC/2.95/new +13 -0
  37. data/share/gccxml-0.9/GCC/2.95/numeric +15 -0
  38. data/share/gccxml-0.9/GCC/2.95/pthread.h +16 -0
  39. data/share/gccxml-0.9/GCC/2.95/queue +20 -0
  40. data/share/gccxml-0.9/GCC/2.95/set +20 -0
  41. data/share/gccxml-0.9/GCC/2.95/sstream +24 -0
  42. data/share/gccxml-0.9/GCC/2.95/stack +19 -0
  43. data/share/gccxml-0.9/GCC/2.95/std/bastring.cc +524 -0
  44. data/share/gccxml-0.9/GCC/2.95/std/complext.h +397 -0
  45. data/share/gccxml-0.9/GCC/2.95/std/dcomplex.h +92 -0
  46. data/share/gccxml-0.9/GCC/2.95/std/fcomplex.h +88 -0
  47. data/share/gccxml-0.9/GCC/2.95/std/gslice_array.h +170 -0
  48. data/share/gccxml-0.9/GCC/2.95/std/indirect_array.h +157 -0
  49. data/share/gccxml-0.9/GCC/2.95/std/ldcomplex.h +96 -0
  50. data/share/gccxml-0.9/GCC/2.95/std/mask_array.h +154 -0
  51. data/share/gccxml-0.9/GCC/2.95/std/slice_array.h +156 -0
  52. data/share/gccxml-0.9/GCC/2.95/std/std_valarray.h +728 -0
  53. data/share/gccxml-0.9/GCC/2.95/std/straits.h +162 -0
  54. data/share/gccxml-0.9/GCC/2.95/std/valarray_meta.h +1035 -0
  55. data/share/gccxml-0.9/GCC/2.95/stdexcept +17 -0
  56. data/share/gccxml-0.9/GCC/2.95/stl_alloc.h +1057 -0
  57. data/share/gccxml-0.9/GCC/2.95/stl_bvector.h +836 -0
  58. data/share/gccxml-0.9/GCC/2.95/stl_deque.h +1699 -0
  59. data/share/gccxml-0.9/GCC/2.95/stl_list.h +843 -0
  60. data/share/gccxml-0.9/GCC/2.95/stl_tree.h +1331 -0
  61. data/share/gccxml-0.9/GCC/2.95/stl_vector.h +828 -0
  62. data/share/gccxml-0.9/GCC/2.95/string +26 -0
  63. data/share/gccxml-0.9/GCC/2.95/strstream +23 -0
  64. data/share/gccxml-0.9/GCC/2.95/typeinfo +11 -0
  65. data/share/gccxml-0.9/GCC/2.95/utility +25 -0
  66. data/share/gccxml-0.9/GCC/2.95/valarray +52 -0
  67. data/share/gccxml-0.9/GCC/2.95/vector +19 -0
  68. data/share/gccxml-0.9/GCC/2.96/sstream +305 -0
  69. data/share/gccxml-0.9/GCC/3.0/pthread.h +16 -0
  70. data/share/gccxml-0.9/GCC/3.1/pthread.h +16 -0
  71. data/share/gccxml-0.9/GCC/3.2/bits/fstream.tcc +500 -0
  72. data/share/gccxml-0.9/GCC/3.2/bits/gthr-default.h +585 -0
  73. data/share/gccxml-0.9/GCC/3.2/bits/istream.tcc +1207 -0
  74. data/share/gccxml-0.9/GCC/3.2/bits/locale_facets.h +1810 -0
  75. data/share/gccxml-0.9/GCC/3.2/bits/locale_facets.tcc +2397 -0
  76. data/share/gccxml-0.9/GCC/3.2/bits/messages_members.h +108 -0
  77. data/share/gccxml-0.9/GCC/3.2/bits/ostream.tcc +713 -0
  78. data/share/gccxml-0.9/GCC/3.2/bits/sstream.tcc +241 -0
  79. data/share/gccxml-0.9/GCC/3.2/bits/stl_deque.h +1682 -0
  80. data/share/gccxml-0.9/GCC/3.2/bits/stl_list.h +989 -0
  81. data/share/gccxml-0.9/GCC/3.2/bits/stl_tree.h +1462 -0
  82. data/share/gccxml-0.9/GCC/3.2/bits/stl_vector.h +1085 -0
  83. data/share/gccxml-0.9/GCC/3.2/bits/valarray_meta.h +1063 -0
  84. data/share/gccxml-0.9/GCC/3.2/fstream +579 -0
  85. data/share/gccxml-0.9/GCC/3.2/pthread.h +16 -0
  86. data/share/gccxml-0.9/GCC/3.2/sstream +384 -0
  87. data/share/gccxml-0.9/GCC/3.3/bits/fstream.tcc +530 -0
  88. data/share/gccxml-0.9/GCC/3.3/bits/list.tcc +378 -0
  89. data/share/gccxml-0.9/GCC/3.3/bits/locale_facets.h +2050 -0
  90. data/share/gccxml-0.9/GCC/3.3/bits/messages_members.h +108 -0
  91. data/share/gccxml-0.9/GCC/3.3/bits/sstream.tcc +243 -0
  92. data/share/gccxml-0.9/GCC/3.3/bits/stl_deque.h +1603 -0
  93. data/share/gccxml-0.9/GCC/3.3/bits/stl_list.h +1167 -0
  94. data/share/gccxml-0.9/GCC/3.3/bits/stl_tree.h +1462 -0
  95. data/share/gccxml-0.9/GCC/3.3/bits/stl_vector.h +992 -0
  96. data/share/gccxml-0.9/GCC/3.3/bits/valarray_meta.h +1135 -0
  97. data/share/gccxml-0.9/GCC/3.3/fstream +842 -0
  98. data/share/gccxml-0.9/GCC/3.3/gccxml_builtins.h +22 -0
  99. data/share/gccxml-0.9/GCC/3.3/sstream +638 -0
  100. data/share/gccxml-0.9/GCC/3.4/bits/gthr-default.h +669 -0
  101. data/share/gccxml-0.9/GCC/3.4/gccxml_builtins.h +91 -0
  102. data/share/gccxml-0.9/GCC/4.0/emmintrin.h +5 -0
  103. data/share/gccxml-0.9/GCC/4.0/gccxml_apple_emmintrin.h +1037 -0
  104. data/share/gccxml-0.9/GCC/4.0/gccxml_apple_mmintrin.h +669 -0
  105. data/share/gccxml-0.9/GCC/4.0/gccxml_apple_xmmintrin.h +870 -0
  106. data/share/gccxml-0.9/GCC/4.0/gccxml_builtins.h +128 -0
  107. data/share/gccxml-0.9/GCC/4.0/gccxml_gnu_emmintrin.h +977 -0
  108. data/share/gccxml-0.9/GCC/4.0/gccxml_gnu_mmintrin.h +636 -0
  109. data/share/gccxml-0.9/GCC/4.0/gccxml_gnu_xmmintrin.h +833 -0
  110. data/share/gccxml-0.9/GCC/4.0/mmintrin.h +5 -0
  111. data/share/gccxml-0.9/GCC/4.0/xmmintrin.h +5 -0
  112. data/share/gccxml-0.9/GCC/4.1/bits/gthr-default.h +622 -0
  113. data/share/gccxml-0.9/GCC/4.1/emmintrin.h +5 -0
  114. data/share/gccxml-0.9/GCC/4.1/gccxml_apple_emmintrin.h +1509 -0
  115. data/share/gccxml-0.9/GCC/4.1/gccxml_apple_mmintrin.h +942 -0
  116. data/share/gccxml-0.9/GCC/4.1/gccxml_apple_xmmintrin.h +1192 -0
  117. data/share/gccxml-0.9/GCC/4.1/gccxml_builtins.h +131 -0
  118. data/share/gccxml-0.9/GCC/4.1/gccxml_gnu_emmintrin.h +1004 -0
  119. data/share/gccxml-0.9/GCC/4.1/gccxml_gnu_mmintrin.h +637 -0
  120. data/share/gccxml-0.9/GCC/4.1/gccxml_gnu_xmmintrin.h +834 -0
  121. data/share/gccxml-0.9/GCC/4.1/mmintrin.h +5 -0
  122. data/share/gccxml-0.9/GCC/4.1/xmmintrin.h +5 -0
  123. data/share/gccxml-0.9/GCC/4.2/emmintrin.h +5 -0
  124. data/share/gccxml-0.9/GCC/4.2/gccxml_apple_emmintrin.h +1509 -0
  125. data/share/gccxml-0.9/GCC/4.2/gccxml_apple_mmintrin.h +942 -0
  126. data/share/gccxml-0.9/GCC/4.2/gccxml_apple_xmmintrin.h +1192 -0
  127. data/share/gccxml-0.9/GCC/4.2/gccxml_builtins.h +131 -0
  128. data/share/gccxml-0.9/GCC/4.2/gccxml_gnu_emmintrin.h +1013 -0
  129. data/share/gccxml-0.9/GCC/4.2/gccxml_gnu_mmintrin.h +663 -0
  130. data/share/gccxml-0.9/GCC/4.2/gccxml_gnu_xmmintrin.h +860 -0
  131. data/share/gccxml-0.9/GCC/4.2/mmintrin.h +5 -0
  132. data/share/gccxml-0.9/GCC/4.2/xmmintrin.h +5 -0
  133. data/share/gccxml-0.9/GCC/4.3/emmintrin.h +1043 -0
  134. data/share/gccxml-0.9/GCC/4.3/gccxml_builtins.h +183 -0
  135. data/share/gccxml-0.9/GCC/4.3/mmintrin.h +663 -0
  136. data/share/gccxml-0.9/GCC/4.3/xmmintrin.h +867 -0
  137. data/share/gccxml-0.9/GCC/4.4/bits/c++config.h +1431 -0
  138. data/share/gccxml-0.9/GCC/4.4/emmintrin.h +1041 -0
  139. data/share/gccxml-0.9/GCC/4.4/gccxml_builtins.h +153 -0
  140. data/share/gccxml-0.9/GCC/4.4/mmintrin.h +662 -0
  141. data/share/gccxml-0.9/GCC/4.4/xmmintrin.h +864 -0
  142. data/share/gccxml-0.9/GCC/4.5/gccxml_builtins.h +154 -0
  143. data/share/gccxml-0.9/GCC/4.5/iomanip +349 -0
  144. data/share/gccxml-0.9/GCC/COPYING.RUNTIME +73 -0
  145. data/share/gccxml-0.9/GCC/COPYING3 +674 -0
  146. data/share/gccxml-0.9/IBM/8.0/adapt_headers.sh +34 -0
  147. data/share/gccxml-0.9/IBM/8.0/stdlib.h.patch +27 -0
  148. data/share/gccxml-0.9/IBM/8.0/xstring.patch +19 -0
  149. data/share/gccxml-0.9/IBM/README +8 -0
  150. data/share/gccxml-0.9/IBM/find_flags +51 -0
  151. data/share/gccxml-0.9/IBM/find_flags_common +48 -0
  152. data/share/gccxml-0.9/Intel/find_flags +56 -0
  153. data/share/gccxml-0.9/Intel/pthread.h +16 -0
  154. data/share/gccxml-0.9/MIPSpro/7.3/exception +9 -0
  155. data/share/gccxml-0.9/MIPSpro/7.3/gccxml_mpro_internals.h +21 -0
  156. data/share/gccxml-0.9/MIPSpro/7.3/iomanip +161 -0
  157. data/share/gccxml-0.9/MIPSpro/7.3/ostream +9 -0
  158. data/share/gccxml-0.9/MIPSpro/7.3/stddef.h +9 -0
  159. data/share/gccxml-0.9/MIPSpro/7.3/stl_config.h +9 -0
  160. data/share/gccxml-0.9/MIPSpro/7.3/stl_locale.h +17 -0
  161. data/share/gccxml-0.9/MIPSpro/7.3/stl_monetary.h +14 -0
  162. data/share/gccxml-0.9/MIPSpro/7.3/stl_numeric_facets.h +13 -0
  163. data/share/gccxml-0.9/MIPSpro/7.3/stl_threads.h +11 -0
  164. data/share/gccxml-0.9/MIPSpro/7.3/string +18 -0
  165. data/share/gccxml-0.9/MIPSpro/find_flags +70 -0
  166. data/share/gccxml-0.9/MIPSpro/mipspro_defs.cxx +63 -0
  167. data/share/gccxml-0.9/Sun/5.8/Cstd.patch +156 -0
  168. data/share/gccxml-0.9/Sun/5.8/adapt_headers.sh +32 -0
  169. data/share/gccxml-0.9/Sun/5.8/std-5.10.patch +22 -0
  170. data/share/gccxml-0.9/Sun/README +8 -0
  171. data/share/gccxml-0.9/Sun/find_flags +51 -0
  172. data/share/gccxml-0.9/Sun/find_flags_common +42 -0
  173. data/share/gccxml-0.9/gccxml_config +2 -0
  174. data/share/gccxml-0.9/gccxml_identify_compiler.cc +13 -0
  175. data/share/man/man1/gccxml.1 +246 -0
  176. metadata +243 -0
@@ -0,0 +1,864 @@
1
+ /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
2
+ Free Software Foundation, Inc.
3
+
4
+ This file is part of GCC.
5
+
6
+ GCC is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 3, or (at your option)
9
+ any later version.
10
+
11
+ GCC is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ Under Section 7 of GPL version 3, you are granted additional
17
+ permissions described in the GCC Runtime Library Exception, version
18
+ 3.1, as published by the Free Software Foundation.
19
+
20
+ You should have received a copy of the GNU General Public License and
21
+ a copy of the GCC Runtime Library Exception along with this program;
22
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23
+ <http://www.gnu.org/licenses/>. */
24
+
25
+ /* Implemented from the specification included in the Intel C++ Compiler
26
+ User Guide and Reference, version 9.0. */
27
+
28
+ #ifndef _XMMINTRIN_H_INCLUDED
29
+ #define _XMMINTRIN_H_INCLUDED
30
+
31
+ #ifndef __SSE__
32
+ # error "SSE instruction set not enabled"
33
+ #else
34
+
35
+ /* We need type definitions from the MMX header file. */
36
+ #include <mmintrin.h>
37
+
38
+ /* Get _mm_malloc () and _mm_free (). */
39
+ #include <mm_malloc.h>
40
+
41
+ /* The Intel API is flexible enough that we must allow aliasing with other
42
+ vector types, and their scalar components. */
43
+ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
44
+
45
+ /* Internal data types for implementing the intrinsics. */
46
+ typedef float __v4sf __attribute__ ((__vector_size__ (16)));
47
+
48
+ /* Create a selector for use with the SHUFPS instruction. */
49
+ #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
50
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
51
+
52
+ /* Constants for use with _mm_prefetch. */
53
+ enum _mm_hint
54
+ {
55
+ _MM_HINT_T0 = 3,
56
+ _MM_HINT_T1 = 2,
57
+ _MM_HINT_T2 = 1,
58
+ _MM_HINT_NTA = 0
59
+ };
60
+
61
+ /* Bits in the MXCSR. */
62
+ #define _MM_EXCEPT_MASK 0x003f
63
+ #define _MM_EXCEPT_INVALID 0x0001
64
+ #define _MM_EXCEPT_DENORM 0x0002
65
+ #define _MM_EXCEPT_DIV_ZERO 0x0004
66
+ #define _MM_EXCEPT_OVERFLOW 0x0008
67
+ #define _MM_EXCEPT_UNDERFLOW 0x0010
68
+ #define _MM_EXCEPT_INEXACT 0x0020
69
+
70
+ #define _MM_MASK_MASK 0x1f80
71
+ #define _MM_MASK_INVALID 0x0080
72
+ #define _MM_MASK_DENORM 0x0100
73
+ #define _MM_MASK_DIV_ZERO 0x0200
74
+ #define _MM_MASK_OVERFLOW 0x0400
75
+ #define _MM_MASK_UNDERFLOW 0x0800
76
+ #define _MM_MASK_INEXACT 0x1000
77
+
78
+ #define _MM_ROUND_MASK 0x6000
79
+ #define _MM_ROUND_NEAREST 0x0000
80
+ #define _MM_ROUND_DOWN 0x2000
81
+ #define _MM_ROUND_UP 0x4000
82
+ #define _MM_ROUND_TOWARD_ZERO 0x6000
83
+
84
+ #define _MM_FLUSH_ZERO_MASK 0x8000
85
+ #define _MM_FLUSH_ZERO_ON 0x8000
86
+ #define _MM_FLUSH_ZERO_OFF 0x0000
87
+
88
+ /* Create a vector of zeros. */
89
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
90
+ _mm_setzero_ps (void)
91
+ ;
92
+
93
+ /* Perform the respective operation on the lower SPFP (single-precision
94
+ floating-point) values of A and B; the upper three SPFP values are
95
+ passed through from A. */
96
+
97
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98
+ _mm_add_ss (__m128 __A, __m128 __B)
99
+ ;
100
+
101
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
102
+ _mm_sub_ss (__m128 __A, __m128 __B)
103
+ ;
104
+
105
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
106
+ _mm_mul_ss (__m128 __A, __m128 __B)
107
+ ;
108
+
109
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
110
+ _mm_div_ss (__m128 __A, __m128 __B)
111
+ ;
112
+
113
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
114
+ _mm_sqrt_ss (__m128 __A)
115
+ ;
116
+
117
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
118
+ _mm_rcp_ss (__m128 __A)
119
+ ;
120
+
121
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
122
+ _mm_rsqrt_ss (__m128 __A)
123
+ ;
124
+
125
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126
+ _mm_min_ss (__m128 __A, __m128 __B)
127
+ ;
128
+
129
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
130
+ _mm_max_ss (__m128 __A, __m128 __B)
131
+ ;
132
+
133
+ /* Perform the respective operation on the four SPFP values in A and B. */
134
+
135
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
136
+ _mm_add_ps (__m128 __A, __m128 __B)
137
+ ;
138
+
139
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
140
+ _mm_sub_ps (__m128 __A, __m128 __B)
141
+ ;
142
+
143
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
144
+ _mm_mul_ps (__m128 __A, __m128 __B)
145
+ ;
146
+
147
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
148
+ _mm_div_ps (__m128 __A, __m128 __B)
149
+ ;
150
+
151
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
152
+ _mm_sqrt_ps (__m128 __A)
153
+ ;
154
+
155
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
156
+ _mm_rcp_ps (__m128 __A)
157
+ ;
158
+
159
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
160
+ _mm_rsqrt_ps (__m128 __A)
161
+ ;
162
+
163
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
164
+ _mm_min_ps (__m128 __A, __m128 __B)
165
+ ;
166
+
167
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
168
+ _mm_max_ps (__m128 __A, __m128 __B)
169
+ ;
170
+
171
+ /* Perform logical bit-wise operations on 128-bit values. */
172
+
173
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174
+ _mm_and_ps (__m128 __A, __m128 __B)
175
+ ;
176
+
177
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
178
+ _mm_andnot_ps (__m128 __A, __m128 __B)
179
+ ;
180
+
181
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
182
+ _mm_or_ps (__m128 __A, __m128 __B)
183
+ ;
184
+
185
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
186
+ _mm_xor_ps (__m128 __A, __m128 __B)
187
+ ;
188
+
189
+ /* Perform a comparison on the lower SPFP values of A and B. If the
190
+ comparison is true, place a mask of all ones in the result, otherwise a
191
+ mask of zeros. The upper three SPFP values are passed through from A. */
192
+
193
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
194
+ _mm_cmpeq_ss (__m128 __A, __m128 __B)
195
+ ;
196
+
197
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
198
+ _mm_cmplt_ss (__m128 __A, __m128 __B)
199
+ ;
200
+
201
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
202
+ _mm_cmple_ss (__m128 __A, __m128 __B)
203
+ ;
204
+
205
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
206
+ _mm_cmpgt_ss (__m128 __A, __m128 __B)
207
+ ;
208
+
209
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
210
+ _mm_cmpge_ss (__m128 __A, __m128 __B)
211
+ ;
212
+
213
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
214
+ _mm_cmpneq_ss (__m128 __A, __m128 __B)
215
+ ;
216
+
217
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
218
+ _mm_cmpnlt_ss (__m128 __A, __m128 __B)
219
+ ;
220
+
221
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
222
+ _mm_cmpnle_ss (__m128 __A, __m128 __B)
223
+ ;
224
+
225
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
226
+ _mm_cmpngt_ss (__m128 __A, __m128 __B)
227
+ ;
228
+
229
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
230
+ _mm_cmpnge_ss (__m128 __A, __m128 __B)
231
+ ;
232
+
233
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
234
+ _mm_cmpord_ss (__m128 __A, __m128 __B)
235
+ ;
236
+
237
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
238
+ _mm_cmpunord_ss (__m128 __A, __m128 __B)
239
+ ;
240
+
241
+ /* Perform a comparison on the four SPFP values of A and B. For each
242
+ element, if the comparison is true, place a mask of all ones in the
243
+ result, otherwise a mask of zeros. */
244
+
245
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
246
+ _mm_cmpeq_ps (__m128 __A, __m128 __B)
247
+ ;
248
+
249
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
250
+ _mm_cmplt_ps (__m128 __A, __m128 __B)
251
+ ;
252
+
253
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
254
+ _mm_cmple_ps (__m128 __A, __m128 __B)
255
+ ;
256
+
257
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
258
+ _mm_cmpgt_ps (__m128 __A, __m128 __B)
259
+ ;
260
+
261
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
262
+ _mm_cmpge_ps (__m128 __A, __m128 __B)
263
+ ;
264
+
265
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
266
+ _mm_cmpneq_ps (__m128 __A, __m128 __B)
267
+ ;
268
+
269
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
270
+ _mm_cmpnlt_ps (__m128 __A, __m128 __B)
271
+ ;
272
+
273
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
274
+ _mm_cmpnle_ps (__m128 __A, __m128 __B)
275
+ ;
276
+
277
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
278
+ _mm_cmpngt_ps (__m128 __A, __m128 __B)
279
+ ;
280
+
281
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
282
+ _mm_cmpnge_ps (__m128 __A, __m128 __B)
283
+ ;
284
+
285
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
286
+ _mm_cmpord_ps (__m128 __A, __m128 __B)
287
+ ;
288
+
289
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
290
+ _mm_cmpunord_ps (__m128 __A, __m128 __B)
291
+ ;
292
+
293
+ /* Compare the lower SPFP values of A and B and return 1 if true
294
+ and 0 if false. */
295
+
296
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
297
+ _mm_comieq_ss (__m128 __A, __m128 __B)
298
+ ;
299
+
300
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
301
+ _mm_comilt_ss (__m128 __A, __m128 __B)
302
+ ;
303
+
304
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
305
+ _mm_comile_ss (__m128 __A, __m128 __B)
306
+ ;
307
+
308
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
309
+ _mm_comigt_ss (__m128 __A, __m128 __B)
310
+ ;
311
+
312
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
313
+ _mm_comige_ss (__m128 __A, __m128 __B)
314
+ ;
315
+
316
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
317
+ _mm_comineq_ss (__m128 __A, __m128 __B)
318
+ ;
319
+
320
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
321
+ _mm_ucomieq_ss (__m128 __A, __m128 __B)
322
+ ;
323
+
324
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
325
+ _mm_ucomilt_ss (__m128 __A, __m128 __B)
326
+ ;
327
+
328
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
329
+ _mm_ucomile_ss (__m128 __A, __m128 __B)
330
+ ;
331
+
332
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
333
+ _mm_ucomigt_ss (__m128 __A, __m128 __B)
334
+ ;
335
+
336
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
337
+ _mm_ucomige_ss (__m128 __A, __m128 __B)
338
+ ;
339
+
340
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
341
+ _mm_ucomineq_ss (__m128 __A, __m128 __B)
342
+ ;
343
+
344
+ /* Convert the lower SPFP value to a 32-bit integer according to the current
345
+ rounding mode. */
346
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
347
+ _mm_cvtss_si32 (__m128 __A)
348
+ ;
349
+
350
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
351
+ _mm_cvt_ss2si (__m128 __A)
352
+ ;
353
+
354
+ #ifdef __x86_64__
355
+ /* Convert the lower SPFP value to a 32-bit integer according to the
356
+ current rounding mode. */
357
+
358
+ /* Intel intrinsic. */
359
+ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
360
+ _mm_cvtss_si64 (__m128 __A)
361
+ ;
362
+
363
+ /* Microsoft intrinsic. */
364
+ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
365
+ _mm_cvtss_si64x (__m128 __A)
366
+ ;
367
+ #endif
368
+
369
+ /* Convert the two lower SPFP values to 32-bit integers according to the
370
+ current rounding mode. Return the integers in packed form. */
371
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
372
+ _mm_cvtps_pi32 (__m128 __A)
373
+ ;
374
+
375
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
376
+ _mm_cvt_ps2pi (__m128 __A)
377
+ ;
378
+
379
+ /* Truncate the lower SPFP value to a 32-bit integer. */
380
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
381
+ _mm_cvttss_si32 (__m128 __A)
382
+ ;
383
+
384
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
385
+ _mm_cvtt_ss2si (__m128 __A)
386
+ ;
387
+
388
+ #ifdef __x86_64__
389
+ /* Truncate the lower SPFP value to a 32-bit integer. */
390
+
391
+ /* Intel intrinsic. */
392
+ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
393
+ _mm_cvttss_si64 (__m128 __A)
394
+ ;
395
+
396
+ /* Microsoft intrinsic. */
397
+ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
398
+ _mm_cvttss_si64x (__m128 __A)
399
+ ;
400
+ #endif
401
+
402
+ /* Truncate the two lower SPFP values to 32-bit integers. Return the
403
+ integers in packed form. */
404
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
405
+ _mm_cvttps_pi32 (__m128 __A)
406
+ ;
407
+
408
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
409
+ _mm_cvtt_ps2pi (__m128 __A)
410
+ ;
411
+
412
+ /* Convert B to a SPFP value and insert it as element zero in A. */
413
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
414
+ _mm_cvtsi32_ss (__m128 __A, int __B)
415
+ ;
416
+
417
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
418
+ _mm_cvt_si2ss (__m128 __A, int __B)
419
+ ;
420
+
421
+ #ifdef __x86_64__
422
+ /* Convert B to a SPFP value and insert it as element zero in A. */
423
+
424
+ /* Intel intrinsic. */
425
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
426
+ _mm_cvtsi64_ss (__m128 __A, long long __B)
427
+ ;
428
+
429
+ /* Microsoft intrinsic. */
430
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
431
+ _mm_cvtsi64x_ss (__m128 __A, long long __B)
432
+ ;
433
+ #endif
434
+
435
+ /* Convert the two 32-bit values in B to SPFP form and insert them
436
+ as the two lower elements in A. */
437
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
438
+ _mm_cvtpi32_ps (__m128 __A, __m64 __B)
439
+ ;
440
+
441
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
442
+ _mm_cvt_pi2ps (__m128 __A, __m64 __B)
443
+ ;
444
+
445
+ /* Convert the four signed 16-bit values in A to SPFP form. */
446
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
447
+ _mm_cvtpi16_ps (__m64 __A)
448
+ ;
449
+
450
+ /* Convert the four unsigned 16-bit values in A to SPFP form. */
451
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
452
+ _mm_cvtpu16_ps (__m64 __A)
453
+ ;
454
+
455
+ /* Convert the low four signed 8-bit values in A to SPFP form. */
456
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
457
+ _mm_cvtpi8_ps (__m64 __A)
458
+ ;
459
+
460
+ /* Convert the low four unsigned 8-bit values in A to SPFP form. */
461
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
462
+ _mm_cvtpu8_ps(__m64 __A)
463
+ ;
464
+
465
+ /* Convert the four signed 32-bit values in A and B to SPFP form. */
466
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
467
+ _mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
468
+ ;
469
+
470
+ /* Convert the four SPFP values in A to four signed 16-bit integers. */
471
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
472
+ _mm_cvtps_pi16(__m128 __A)
473
+ ;
474
+
475
+ /* Convert the four SPFP values in A to four signed 8-bit integers. */
476
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
477
+ _mm_cvtps_pi8(__m128 __A)
478
+ ;
479
+
480
+ /* Selects four specific SPFP values from A and B based on MASK. */
481
+ #ifdef __OPTIMIZE__
482
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
483
+ _mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
484
+ ;
485
+ #else
486
+ #define _mm_shuffle_ps(A, B, MASK) \
487
+ ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), \
488
+ (__v4sf)(__m128)(B), (int)(MASK)))
489
+ #endif
490
+
491
+ /* Selects and interleaves the upper two SPFP values from A and B. */
492
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
493
+ _mm_unpackhi_ps (__m128 __A, __m128 __B)
494
+ ;
495
+
496
+ /* Selects and interleaves the lower two SPFP values from A and B. */
497
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
498
+ _mm_unpacklo_ps (__m128 __A, __m128 __B)
499
+ ;
500
+
501
+ /* Sets the upper two SPFP values with 64-bits of data loaded from P;
502
+ the lower two values are passed through from A. */
503
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
504
+ _mm_loadh_pi (__m128 __A, __m64 const *__P)
505
+ ;
506
+
507
+ /* Stores the upper two SPFP values of A into P. */
508
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
509
+ _mm_storeh_pi (__m64 *__P, __m128 __A)
510
+ ;
511
+
512
+ /* Moves the upper two values of B into the lower two values of A. */
513
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
514
+ _mm_movehl_ps (__m128 __A, __m128 __B)
515
+ ;
516
+
517
+ /* Moves the lower two values of B into the upper two values of A. */
518
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
519
+ _mm_movelh_ps (__m128 __A, __m128 __B)
520
+ ;
521
+
522
+ /* Sets the lower two SPFP values with 64-bits of data loaded from P;
523
+ the upper two values are passed through from A. */
524
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
525
+ _mm_loadl_pi (__m128 __A, __m64 const *__P)
526
+ ;
527
+
528
+ /* Stores the lower two SPFP values of A into P. */
529
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
530
+ _mm_storel_pi (__m64 *__P, __m128 __A)
531
+ ;
532
+
533
+ /* Creates a 4-bit mask from the most significant bits of the SPFP values. */
534
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
535
+ _mm_movemask_ps (__m128 __A)
536
+ ;
537
+
538
+ /* Return the contents of the control register. */
539
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
540
+ _mm_getcsr (void)
541
+ ;
542
+
543
+ /* Read exception bits from the control register. */
544
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
545
+ _MM_GET_EXCEPTION_STATE (void)
546
+ ;
547
+
548
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
549
+ _MM_GET_EXCEPTION_MASK (void)
550
+ ;
551
+
552
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
553
+ _MM_GET_ROUNDING_MODE (void)
554
+ ;
555
+
556
+ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
557
+ _MM_GET_FLUSH_ZERO_MODE (void)
558
+ ;
559
+
560
+ /* Set the control register to I. */
561
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
562
+ _mm_setcsr (unsigned int __I)
563
+ ;
564
+
565
+ /* Set exception bits in the control register. */
566
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
567
+ _MM_SET_EXCEPTION_STATE(unsigned int __mask)
568
+ ;
569
+
570
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
571
+ _MM_SET_EXCEPTION_MASK (unsigned int __mask)
572
+ ;
573
+
574
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
575
+ _MM_SET_ROUNDING_MODE (unsigned int __mode)
576
+ ;
577
+
578
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
579
+ _MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
580
+ ;
581
+
582
+ /* Create a vector with element 0 as F and the rest zero. */
583
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
584
+ _mm_set_ss (float __F)
585
+ ;
586
+
587
+ /* Create a vector with all four elements equal to F. */
588
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
589
+ _mm_set1_ps (float __F)
590
+ ;
591
+
592
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
593
+ _mm_set_ps1 (float __F)
594
+ ;
595
+
596
+ /* Create a vector with element 0 as *P and the rest zero. */
597
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
598
+ _mm_load_ss (float const *__P)
599
+ ;
600
+
601
+ /* Create a vector with all four elements equal to *P. */
602
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
603
+ _mm_load1_ps (float const *__P)
604
+ ;
605
+
606
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
607
+ _mm_load_ps1 (float const *__P)
608
+ ;
609
+
610
+ /* Load four SPFP values from P. The address must be 16-byte aligned. */
611
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
612
+ _mm_load_ps (float const *__P)
613
+ ;
614
+
615
+ /* Load four SPFP values from P. The address need not be 16-byte aligned. */
616
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
617
+ _mm_loadu_ps (float const *__P)
618
+ ;
619
+
620
+ /* Load four SPFP values in reverse order. The address must be aligned. */
621
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
622
+ _mm_loadr_ps (float const *__P)
623
+ ;
624
+
625
+ /* Create the vector [Z Y X W]. */
626
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
627
+ _mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
628
+ ;
629
+
630
+ /* Create the vector [W X Y Z]. */
631
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
632
+ _mm_setr_ps (float __Z, float __Y, float __X, float __W)
633
+ ;
634
+
635
+ /* Stores the lower SPFP value. */
636
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
637
+ _mm_store_ss (float *__P, __m128 __A)
638
+ ;
639
+
640
+ extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
641
+ _mm_cvtss_f32 (__m128 __A)
642
+ ;
643
+
644
+ /* Store four SPFP values. The address must be 16-byte aligned. */
645
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
646
+ _mm_store_ps (float *__P, __m128 __A)
647
+ ;
648
+
649
+ /* Store four SPFP values. The address need not be 16-byte aligned. */
650
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
651
+ _mm_storeu_ps (float *__P, __m128 __A)
652
+ ;
653
+
654
+ /* Store the lower SPFP value across four words. */
655
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
656
+ _mm_store1_ps (float *__P, __m128 __A)
657
+ ;
658
+
659
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
660
+ _mm_store_ps1 (float *__P, __m128 __A)
661
+ ;
662
+
663
+ /* Store four SPFP values in reverse order. The address must be aligned. */
664
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
665
+ _mm_storer_ps (float *__P, __m128 __A)
666
+ ;
667
+
668
+ /* Sets the low SPFP value of A from the low value of B. */
669
+ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
670
+ _mm_move_ss (__m128 __A, __m128 __B)
671
+ ;
672
+
673
+ /* Extracts one of the four words of A. The selector N must be immediate. */
674
+ #ifdef __OPTIMIZE__
675
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
676
+ _mm_extract_pi16 (__m64 const __A, int const __N)
677
+ ;
678
+
679
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
680
+ _m_pextrw (__m64 const __A, int const __N)
681
+ ;
682
+ #else
683
+ #define _mm_extract_pi16(A, N) \
684
+ ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
685
+
686
+ #define _m_pextrw(A, N) _mm_extract_pi16(A, N)
687
+ #endif
688
+
689
+ /* Inserts word D into one of four words of A. The selector N must be
690
+ immediate. */
691
+ #ifdef __OPTIMIZE__
692
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
693
+ _mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
694
+ ;
695
+
696
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
697
+ _m_pinsrw (__m64 const __A, int const __D, int const __N)
698
+ ;
699
+ #else
700
+ #define _mm_insert_pi16(A, D, N) \
701
+ ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), \
702
+ (int)(D), (int)(N)))
703
+
704
+ #define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
705
+ #endif
706
+
707
+ /* Compute the element-wise maximum of signed 16-bit values. */
708
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
709
+ _mm_max_pi16 (__m64 __A, __m64 __B)
710
+ ;
711
+
712
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
713
+ _m_pmaxsw (__m64 __A, __m64 __B)
714
+ ;
715
+
716
+ /* Compute the element-wise maximum of unsigned 8-bit values. */
717
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
718
+ _mm_max_pu8 (__m64 __A, __m64 __B)
719
+ ;
720
+
721
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
722
+ _m_pmaxub (__m64 __A, __m64 __B)
723
+ ;
724
+
725
+ /* Compute the element-wise minimum of signed 16-bit values. */
726
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
727
+ _mm_min_pi16 (__m64 __A, __m64 __B)
728
+ ;
729
+
730
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
731
+ _m_pminsw (__m64 __A, __m64 __B)
732
+ ;
733
+
734
+ /* Compute the element-wise minimum of unsigned 8-bit values. */
735
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
736
+ _mm_min_pu8 (__m64 __A, __m64 __B)
737
+ ;
738
+
739
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
740
+ _m_pminub (__m64 __A, __m64 __B)
741
+ ;
742
+
743
+ /* Create an 8-bit mask of the signs of 8-bit values. */
744
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
745
+ _mm_movemask_pi8 (__m64 __A)
746
+ ;
747
+
748
+ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
749
+ _m_pmovmskb (__m64 __A)
750
+ ;
751
+
752
+ /* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
753
+ in B and produce the high 16 bits of the 32-bit results. */
754
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
755
+ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
756
+ ;
757
+
758
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
759
+ _m_pmulhuw (__m64 __A, __m64 __B)
760
+ ;
761
+
762
+ /* Return a combination of the four 16-bit values in A. The selector
763
+ must be an immediate. */
764
+ #ifdef __OPTIMIZE__
765
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
766
+ _mm_shuffle_pi16 (__m64 __A, int const __N)
767
+ ;
768
+
769
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
770
+ _m_pshufw (__m64 __A, int const __N)
771
+ ;
772
+ #else
773
+ #define _mm_shuffle_pi16(A, N) \
774
+ ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
775
+
776
+ #define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
777
+ #endif
778
+
779
+ /* Conditionally store byte elements of A into P. The high bit of each
780
+ byte in the selector N determines whether the corresponding byte from
781
+ A is stored. */
782
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
783
+ _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
784
+ ;
785
+
786
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
787
+ _m_maskmovq (__m64 __A, __m64 __N, char *__P)
788
+ ;
789
+
790
+ /* Compute the rounded averages of the unsigned 8-bit values in A and B. */
791
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
792
+ _mm_avg_pu8 (__m64 __A, __m64 __B)
793
+ ;
794
+
795
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
796
+ _m_pavgb (__m64 __A, __m64 __B)
797
+ ;
798
+
799
+ /* Compute the rounded averages of the unsigned 16-bit values in A and B. */
800
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801
+ _mm_avg_pu16 (__m64 __A, __m64 __B)
802
+ ;
803
+
804
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
805
+ _m_pavgw (__m64 __A, __m64 __B)
806
+ ;
807
+
808
+ /* Compute the sum of the absolute differences of the unsigned 8-bit
809
+ values in A and B. Return the value in the lower 16-bit word; the
810
+ upper words are cleared. */
811
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
812
+ _mm_sad_pu8 (__m64 __A, __m64 __B)
813
+ ;
814
+
815
+ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
816
+ _m_psadbw (__m64 __A, __m64 __B)
817
+ ;
818
+
819
+ /* Loads one cache line from address P to a location "closer" to the
820
+ processor. The selector I specifies the type of prefetch operation. */
821
+ #ifdef __OPTIMIZE__
822
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
823
+ _mm_prefetch (const void *__P, enum _mm_hint __I)
824
+ ;
825
+ #else
826
+ #define _mm_prefetch(P, I) \
827
+ __builtin_prefetch ((P), 0, (I))
828
+ #endif
829
+
830
+ /* Stores the data in A to the address P without polluting the caches. */
831
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
832
+ _mm_stream_pi (__m64 *__P, __m64 __A)
833
+ ;
834
+
835
+ /* Likewise. The address must be 16-byte aligned. */
836
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
837
+ _mm_stream_ps (float *__P, __m128 __A)
838
+ ;
839
+
840
+ /* Guarantees that every preceding store is globally visible before
841
+ any subsequent store. */
842
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
843
+ _mm_sfence (void)
844
+ ;
845
+
846
+ /* The execution of the next instruction is delayed by an implementation
847
+ specific amount of time. The instruction does not modify the
848
+ architectural state. */
849
+ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
850
+ _mm_pause (void)
851
+ ;
852
+
853
+ /* Transpose the 4x4 matrix composed of row[0-3]. */
854
+ #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
855
+ do { \
856
+ } while (0)
857
+
858
+ /* For backward source compatibility. */
859
+ #ifdef __SSE2__
860
+ # include <emmintrin.h>
861
+ #endif
862
+
863
+ #endif /* __SSE__ */
864
+ #endif /* _XMMINTRIN_H_INCLUDED */