victory 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +4 -6
- data/USAGE.md +261 -0
- data/ext/containers/bitset/LICENSE.txt +20 -0
- data/ext/containers/bitset/bitset.c +652 -0
- data/ext/containers/bitset/builtin.h +1390 -0
- data/ext/containers/bitset/exact-int.h +229 -0
- data/ext/containers/bitset/extconf.rb +4 -0
- data/ext/containers/xor_list/extconf.rb +1 -1
- data/ext/containers/xor_list/xor_list.c +1 -1
- data/extensions.rb +13 -0
- data/lib/algorithms/greedy.rb +26 -0
- data/lib/algorithms/sort.rb +1 -1
- data/lib/containers/bitset.rb +48 -0
- data/lib/containers/xor_list.rb +3 -0
- data/lib/victory/version.rb +1 -1
- data/lib/victory.rb +6 -8
- data/victory.gemspec +4 -10
- metadata +36 -12
@@ -0,0 +1,1390 @@
|
|
1
|
+
/* Builtins and Intrinsics
|
2
|
+
* Portable Snippets - https://gitub.com/nemequ/portable-snippets
|
3
|
+
* Created by Evan Nemerson <evan@nemerson.com>
|
4
|
+
*
|
5
|
+
* To the extent possible under law, the authors have waived all
|
6
|
+
* copyright and related or neighboring rights to this code. For
|
7
|
+
* details, see the Creative Commons Zero 1.0 Universal license at
|
8
|
+
* https://creativecommons.org/publicdomain/zero/1.0/
|
9
|
+
*
|
10
|
+
* Some of these implementations are based on code from
|
11
|
+
* https://graphics.stanford.edu/~seander/bithacks.html which is also
|
12
|
+
* public domain (and a fantastic web site).
|
13
|
+
*/
|
14
|
+
|
15
|
+
#if !defined(PSNIP_BUILTIN_H)
|
16
|
+
#define PSNIP_BUILTIN_H
|
17
|
+
|
18
|
+
#if defined(HEDLEY_GCC_HAS_BUILTIN)
|
19
|
+
# define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,0)
|
20
|
+
#elif defined(__clang__) && defined(__has_builtin)
|
21
|
+
# define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) __has_builtin(builtin)
|
22
|
+
#elif defined(__GNUC__)
|
23
|
+
# define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) (__GNUC__ > major || (major == __GNUC__ && __GNUC_MINOR__ >= minor))
|
24
|
+
#else
|
25
|
+
# define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) (0)
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#if defined(HEDLEY_CLANG_HAS_BUILTIN)
|
29
|
+
# define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) HEDLEY_CLANG_HAS_BUILTIN(builtin)
|
30
|
+
#elif defined(__has_builtin)
|
31
|
+
# define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) __has_builtin(builtin)
|
32
|
+
#else
|
33
|
+
# define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) (0)
|
34
|
+
#endif
|
35
|
+
|
36
|
+
#if defined(HEDLEY_MSVC_VERSION_CHECK)
|
37
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) HEDLEY_MSVC_VERSION_CHECK(major,minor,0)
|
38
|
+
#elif !defined(_MSC_VER)
|
39
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (0)
|
40
|
+
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
41
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000)))
|
42
|
+
#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
|
43
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_FULL_VER >= ((major * 100000) + (minor * 1000)))
|
44
|
+
#else
|
45
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_VER >= ((major * 100) + (minor)))
|
46
|
+
#endif
|
47
|
+
|
48
|
+
#if defined(_MSC_VER)
|
49
|
+
# include <intrin.h>
|
50
|
+
#endif
|
51
|
+
#include <limits.h>
|
52
|
+
#include <stdlib.h>
|
53
|
+
|
54
|
+
#if defined(__i386) || defined(_M_IX86) || \
|
55
|
+
defined(__amd64) || defined(_M_AMD64) || defined(__x86_64)
|
56
|
+
# if defined(_MSC_VER)
|
57
|
+
# define PSNIP_BUILTIN__ENABLE_X86
|
58
|
+
# elif defined(__GNUC__)
|
59
|
+
# define PSNIP_BUILTIN__ENABLE_X86
|
60
|
+
# include <x86intrin.h>
|
61
|
+
# endif
|
62
|
+
#endif
|
63
|
+
|
64
|
+
#if defined(__amd64) || defined(_M_AMD64) || defined(__x86_64)
|
65
|
+
# if defined(_MSC_VER)
|
66
|
+
# define PSNIP_BUILTIN__ENABLE_AMD64
|
67
|
+
# elif defined(__GNUC__)
|
68
|
+
# define PSNIP_BUILTIN__ENABLE_AMD64
|
69
|
+
# include <x86intrin.h>
|
70
|
+
# endif
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#if \
|
74
|
+
!defined(psnip_int64_t) || !defined(psnip_uint64_t) || \
|
75
|
+
!defined(psnip_int32_t) || !defined(psnip_uint32_t) || \
|
76
|
+
!defined(psnip_int16_t) || !defined(psnip_uint16_t) || \
|
77
|
+
!defined(psnip_int8_t) || !defined(psnip_uint8_t)
|
78
|
+
# include "exact-int.h"
|
79
|
+
#endif
|
80
|
+
|
81
|
+
#if defined(HEDLEY_LIKELY) && defined(HEDLEY_UNLIKELY)
|
82
|
+
# define PSNIP_BUILTIN_LIKELY(expr) HEDLEY_LIKELY(expr)
|
83
|
+
# define PSNIP_BUILTIN_UNLIKELY(expr) HEDLEY_UNLIKELY(expr)
|
84
|
+
#elif PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_expect,3,0)
|
85
|
+
# define PSNIP_BUILTIN_LIKELY(expr) __builtin_expect(!!(expr), 1)
|
86
|
+
# define PSNIP_BUILTIN_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
|
87
|
+
#else
|
88
|
+
# define PSNIP_BUILTIN_LIKELY(expr) (!!(expr))
|
89
|
+
# define PSNIP_BUILTIN_UNLIKELY(expr) (!!(expr))
|
90
|
+
#endif
|
91
|
+
|
92
|
+
#if !defined(PSNIP_BUILTIN_STATIC_INLINE)
|
93
|
+
# if defined(__GNUC__)
|
94
|
+
# define PSNIP_BUILTIN__COMPILER_ATTRIBUTES __attribute__((__unused__))
|
95
|
+
# else
|
96
|
+
# define PSNIP_BUILTIN__COMPILER_ATTRIBUTES
|
97
|
+
# endif
|
98
|
+
|
99
|
+
# if defined(HEDLEY_INLINE)
|
100
|
+
# define PSNIP_BUILTIN__INLINE HEDLEY_INLINE
|
101
|
+
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
102
|
+
# define PSNIP_BUILTIN__INLINE inline
|
103
|
+
# elif defined(__GNUC_STDC_INLINE__)
|
104
|
+
# define PSNIP_BUILTIN__INLINE __inline__
|
105
|
+
# elif defined(_MSC_VER) && _MSC_VER >= 1200
|
106
|
+
# define PSNIP_BUILTIN__INLINE __inline
|
107
|
+
# else
|
108
|
+
# define PSNIP_BUILTIN__INLINE
|
109
|
+
# endif
|
110
|
+
|
111
|
+
# define PSNIP_BUILTIN__FUNCTION PSNIP_BUILTIN__COMPILER_ATTRIBUTES static PSNIP_BUILTIN__INLINE
|
112
|
+
#endif
|
113
|
+
|
114
|
+
#define PSNIP_BUILTIN__SUFFIX_B 1
|
115
|
+
#define PSNIP_BUILTIN__SUFFIX_S 2
|
116
|
+
#define PSNIP_BUILTIN__SUFFIX_ 3
|
117
|
+
#define PSNIP_BUILTIN__SUFFIX_L 4
|
118
|
+
#define PSNIP_BUILTIN__SUFFIX_LL 5
|
119
|
+
|
120
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_CHAR)
|
121
|
+
# if CHAR_MIN == (-0x7fLL-1) && CHAR_MAX == 0x7fLL
|
122
|
+
# define PSNIP_BUILTIN__SIZEOF_CHAR 8
|
123
|
+
# elif CHAR_MIN == (-0x7fffLL-1) && CHAR_MAX == 0x7fffLL
|
124
|
+
# define PSNIP_BUILTIN__SIZEOF_CHAR 16
|
125
|
+
# elif CHAR_MIN == (-0x7fffffffLL-1) && CHAR_MAX == 0x7fffffffLL
|
126
|
+
# define PSNIP_BUILTIN__SIZEOF_CHAR 32
|
127
|
+
# elif CHAR_MIN == (-0x7fffffffffffffffLL-1) && CHAR_MAX == 0x7fffffffffffffffLL
|
128
|
+
# define PSNIP_BUILTIN__SIZEOF_CHAR 64
|
129
|
+
# endif
|
130
|
+
#endif
|
131
|
+
|
132
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_SHRT)
|
133
|
+
# if SHRT_MIN == (-0x7fLL-1) && SHRT_MAX == 0x7fLL
|
134
|
+
# define PSNIP_BUILTIN__SIZEOF_SHRT 8
|
135
|
+
# elif SHRT_MIN == (-0x7fffLL-1) && SHRT_MAX == 0x7fffLL
|
136
|
+
# define PSNIP_BUILTIN__SIZEOF_SHRT 16
|
137
|
+
# elif SHRT_MIN == (-0x7fffffffLL-1) && SHRT_MAX == 0x7fffffffLL
|
138
|
+
# define PSNIP_BUILTIN__SIZEOF_SHRT 32
|
139
|
+
# elif SHRT_MIN == (-0x7fffffffffffffffLL-1) && SHRT_MAX == 0x7fffffffffffffffLL
|
140
|
+
# define PSNIP_BUILTIN__SIZEOF_SHRT 64
|
141
|
+
# endif
|
142
|
+
#endif
|
143
|
+
|
144
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_INT)
|
145
|
+
# if INT_MIN == (-0x7fLL-1) && INT_MAX == 0x7fLL
|
146
|
+
# define PSNIP_BUILTIN__SIZEOF_INT 8
|
147
|
+
# elif INT_MIN == (-0x7fffLL-1) && INT_MAX == 0x7fffLL
|
148
|
+
# define PSNIP_BUILTIN__SIZEOF_INT 16
|
149
|
+
# elif INT_MIN == (-0x7fffffffLL-1) && INT_MAX == 0x7fffffffLL
|
150
|
+
# define PSNIP_BUILTIN__SIZEOF_INT 32
|
151
|
+
# elif INT_MIN == (-0x7fffffffffffffffLL-1) && INT_MAX == 0x7fffffffffffffffLL
|
152
|
+
# define PSNIP_BUILTIN__SIZEOF_INT 64
|
153
|
+
# endif
|
154
|
+
#endif
|
155
|
+
|
156
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_LONG)
|
157
|
+
# if LONG_MIN == (-0x7fLL-1) && LONG_MAX == 0x7fLL
|
158
|
+
# define PSNIP_BUILTIN__SIZEOF_LONG 8
|
159
|
+
# elif LONG_MIN == (-0x7fffLL-1) && LONG_MAX == 0x7fffLL
|
160
|
+
# define PSNIP_BUILTIN__SIZEOF_LONG 16
|
161
|
+
# elif LONG_MIN == (-0x7fffffffLL-1) && LONG_MAX == 0x7fffffffLL
|
162
|
+
# define PSNIP_BUILTIN__SIZEOF_LONG 32
|
163
|
+
# elif LONG_MIN == (-0x7fffffffffffffffLL-1) && LONG_MAX == 0x7fffffffffffffffLL
|
164
|
+
# define PSNIP_BUILTIN__SIZEOF_LONG 64
|
165
|
+
# endif
|
166
|
+
#endif
|
167
|
+
|
168
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_LLONG)
|
169
|
+
# if LLONG_MIN == (-0x7fLL-1) && LLONG_MAX == 0x7fLL
|
170
|
+
# define PSNIP_BUILTIN__SIZEOF_LLONG 8
|
171
|
+
# elif LLONG_MIN == (-0x7fffLL-1) && LLONG_MAX == 0x7fffLL
|
172
|
+
# define PSNIP_BUILTIN__SIZEOF_LLONG 16
|
173
|
+
# elif LLONG_MIN == (-0x7fffffffLL-1) && LLONG_MAX == 0x7fffffffLL
|
174
|
+
# define PSNIP_BUILTIN__SIZEOF_LLONG 32
|
175
|
+
# elif LLONG_MIN == (-0x7fffffffffffffffLL-1) && LLONG_MAX == 0x7fffffffffffffffLL
|
176
|
+
# define PSNIP_BUILTIN__SIZEOF_LLONG 64
|
177
|
+
# endif
|
178
|
+
#endif
|
179
|
+
|
180
|
+
#if !defined(PSNIP_BUILTIN_SUFFIX_INT8)
|
181
|
+
# if PSNIP_BUILTIN__SIZEOF_CHAR == 8
|
182
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_B
|
183
|
+
# elif PSNIP_BUILTIN__SIZEOF_SHRT == 8
|
184
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_S
|
185
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 8
|
186
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_
|
187
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 8
|
188
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_L
|
189
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 8
|
190
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_LL
|
191
|
+
# endif
|
192
|
+
#endif
|
193
|
+
|
194
|
+
#if !defined(PSNIP_BUILTIN_SUFFIX_INT16)
|
195
|
+
# if PSNIP_BUILTIN__SIZEOF_CHAR == 16
|
196
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_B
|
197
|
+
# elif PSNIP_BUILTIN__SIZEOF_SHRT == 16
|
198
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_S
|
199
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 16
|
200
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_
|
201
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 16
|
202
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_L
|
203
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 16
|
204
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_LL
|
205
|
+
# endif
|
206
|
+
#endif
|
207
|
+
|
208
|
+
#if !defined(PSNIP_BUILTIN_SUFFIX_INT32)
|
209
|
+
# if PSNIP_BUILTIN__SIZEOF_CHAR == 32
|
210
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_B
|
211
|
+
# elif PSNIP_BUILTIN__SIZEOF_SHRT == 32
|
212
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_S
|
213
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 32
|
214
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_
|
215
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 32
|
216
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_L
|
217
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 32
|
218
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_LL
|
219
|
+
# endif
|
220
|
+
#endif
|
221
|
+
|
222
|
+
#if !defined(PSNIP_BUILTIN_SUFFIX_INT64)
|
223
|
+
# if defined(__APPLE__) && PSNIP_BUILTIN__SIZEOF_LLONG == 64
|
224
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_LL
|
225
|
+
# elif PSNIP_BUILTIN__SIZEOF_CHAR == 64
|
226
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_B
|
227
|
+
# elif PSNIP_BUILTIN__SIZEOF_SHRT == 64
|
228
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_S
|
229
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 64
|
230
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_
|
231
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 64
|
232
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_L
|
233
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
|
234
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_LL
|
235
|
+
# endif
|
236
|
+
#endif
|
237
|
+
|
238
|
+
#if defined(PSNIP_BUILTIN_SUFFIX_INT8)
|
239
|
+
# if PSNIP_BUILTIN_SUFFIX_INT8 == 1
|
240
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##b
|
241
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT8 == 2
|
242
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##s
|
243
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT8 == 3
|
244
|
+
# define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name
|
245
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name
|
246
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT8 == 4
|
247
|
+
# define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name##l
|
248
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##l
|
249
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT8 == 5
|
250
|
+
# define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name##ll
|
251
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##ll
|
252
|
+
# endif
|
253
|
+
#endif
|
254
|
+
|
255
|
+
#if defined(PSNIP_BUILTIN_SUFFIX_INT16)
|
256
|
+
# if PSNIP_BUILTIN_SUFFIX_INT16 == 1
|
257
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##b
|
258
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT16 == 2
|
259
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##s
|
260
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT16 == 3
|
261
|
+
# define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name
|
262
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name
|
263
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT16 == 4
|
264
|
+
# define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name##l
|
265
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##l
|
266
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT16 == 5
|
267
|
+
# define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name##ll
|
268
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##ll
|
269
|
+
# endif
|
270
|
+
#endif
|
271
|
+
|
272
|
+
#if defined(PSNIP_BUILTIN_SUFFIX_INT32)
|
273
|
+
# if PSNIP_BUILTIN_SUFFIX_INT32 == 1
|
274
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##b
|
275
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT32 == 2
|
276
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##s
|
277
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT32 == 3
|
278
|
+
# define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name
|
279
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name
|
280
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT32 == 4
|
281
|
+
# define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name##l
|
282
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##l
|
283
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT32 == 5
|
284
|
+
# define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name##ll
|
285
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##ll
|
286
|
+
# endif
|
287
|
+
#endif
|
288
|
+
|
289
|
+
#if defined(PSNIP_BUILTIN_SUFFIX_INT64)
|
290
|
+
# if PSNIP_BUILTIN_SUFFIX_INT64 == 1
|
291
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##b
|
292
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT64 == 2
|
293
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##s
|
294
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT64 == 3
|
295
|
+
# define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name
|
296
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name
|
297
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT64 == 4
|
298
|
+
# define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name##l
|
299
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##l
|
300
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT64 == 5
|
301
|
+
# define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name##ll
|
302
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##ll
|
303
|
+
# endif
|
304
|
+
#endif
|
305
|
+
|
306
|
+
/******
|
307
|
+
*** GCC-style built-ins
|
308
|
+
******/
|
309
|
+
|
310
|
+
/*** __builtin_ffs ***/
|
311
|
+
|
312
|
+
#define PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(f_n, T) \
|
313
|
+
PSNIP_BUILTIN__FUNCTION \
|
314
|
+
int psnip_builtin_##f_n(T x) { \
|
315
|
+
static const char psnip_builtin_ffs_lookup[256] = { \
|
316
|
+
0, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
317
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
318
|
+
6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
319
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
320
|
+
7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
321
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
322
|
+
6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
323
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
324
|
+
8, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
325
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
326
|
+
6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
327
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
328
|
+
7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
329
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
330
|
+
6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
331
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1 \
|
332
|
+
}; \
|
333
|
+
\
|
334
|
+
unsigned char t; \
|
335
|
+
size_t s = 0; \
|
336
|
+
\
|
337
|
+
while (s < (sizeof(T) * 8)) { \
|
338
|
+
t = (unsigned char) ((x >> s) & 0xff); \
|
339
|
+
if (t) \
|
340
|
+
return psnip_builtin_ffs_lookup[t] + s; \
|
341
|
+
\
|
342
|
+
s += 8; \
|
343
|
+
} \
|
344
|
+
\
|
345
|
+
return 0; \
|
346
|
+
}
|
347
|
+
|
348
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_ffs, 3, 3)
|
349
|
+
# define psnip_builtin_ffs(x) __builtin_ffs(x)
|
350
|
+
# define psnip_builtin_ffsl(x) __builtin_ffsl(x)
|
351
|
+
# define psnip_builtin_ffsll(x) __builtin_ffsll(x)
|
352
|
+
# define psnip_builtin_ffs32(x) PSNIP_BUILTIN__VARIANT_INT32(_,ffs)(x)
|
353
|
+
# define psnip_builtin_ffs64(x) PSNIP_BUILTIN__VARIANT_INT64(_,ffs)(x)
|
354
|
+
#else
|
355
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
|
356
|
+
PSNIP_BUILTIN__FUNCTION
|
357
|
+
int psnip_builtin_ffsll(long long v) {
|
358
|
+
unsigned long r;
|
359
|
+
# if defined(_M_AMD64) || defined(_M_ARM)
|
360
|
+
if (_BitScanForward64(&r, (unsigned long long) v)) {
|
361
|
+
return (int) (r + 1);
|
362
|
+
}
|
363
|
+
# else
|
364
|
+
if (_BitScanForward(&r, (unsigned long) (v))) {
|
365
|
+
return (int) (r + 1);
|
366
|
+
} else if (_BitScanForward(&r, (unsigned long) (v >> 32))) {
|
367
|
+
return (int) (r + 33);
|
368
|
+
}
|
369
|
+
# endif
|
370
|
+
return 0;
|
371
|
+
}
|
372
|
+
|
373
|
+
PSNIP_BUILTIN__FUNCTION
|
374
|
+
int psnip_builtin_ffsl(long v) {
|
375
|
+
unsigned long r;
|
376
|
+
if (_BitScanForward(&r, (unsigned long) v)) {
|
377
|
+
return (int) (r + 1);
|
378
|
+
}
|
379
|
+
return 0;
|
380
|
+
}
|
381
|
+
|
382
|
+
PSNIP_BUILTIN__FUNCTION
|
383
|
+
int psnip_builtin_ffs(int v) {
|
384
|
+
return psnip_builtin_ffsl(v);
|
385
|
+
}
|
386
|
+
# else
|
387
|
+
PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffs, int)
|
388
|
+
PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffsl, long)
|
389
|
+
PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffsll, long long)
|
390
|
+
# endif
|
391
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
392
|
+
# define __builtin_ffsll(v) psnip_builtin_ffsll(v)
|
393
|
+
# define __builtin_ffsl(v) psnip_builtin_ffsl(v)
|
394
|
+
# define __builtin_ffs(v) psnip_builtin_ffs(v)
|
395
|
+
# endif
|
396
|
+
#endif
|
397
|
+
|
398
|
+
#if !defined(psnip_builtin_ffs32)
|
399
|
+
# define psnip_builtin_ffs32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ffs)(x)
|
400
|
+
#endif
|
401
|
+
|
402
|
+
#if !defined(psnip_builtin_ffs64)
|
403
|
+
# define psnip_builtin_ffs64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ffs)(x)
|
404
|
+
#endif
|
405
|
+
|
406
|
+
/*** __builtin_clz ***/
|
407
|
+
|
408
|
+
#define PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(f_n, T) \
|
409
|
+
PSNIP_BUILTIN__FUNCTION \
|
410
|
+
int psnip_builtin_##f_n(T x) { \
|
411
|
+
static const char psnip_builtin_clz_lookup[256] = { \
|
412
|
+
7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, \
|
413
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
|
414
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
|
415
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
|
416
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
417
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
418
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
419
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
420
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
421
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
422
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
423
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
424
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
425
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
426
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
427
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 \
|
428
|
+
}; \
|
429
|
+
size_t s = sizeof(T) * 8; \
|
430
|
+
T r; \
|
431
|
+
\
|
432
|
+
while ((s -= 8) != 0) { \
|
433
|
+
r = x >> s; \
|
434
|
+
if (r != 0) \
|
435
|
+
return psnip_builtin_clz_lookup[r] + \
|
436
|
+
(((sizeof(T) - 1) * 8) - s); \
|
437
|
+
} \
|
438
|
+
\
|
439
|
+
if (x == 0) \
|
440
|
+
return (int) ((sizeof(T) * 8) - 1); \
|
441
|
+
else \
|
442
|
+
return psnip_builtin_clz_lookup[x] + \
|
443
|
+
((sizeof(T) - 1) * 8); \
|
444
|
+
}
|
445
|
+
|
446
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_clz, 3, 4)
|
447
|
+
# define psnip_builtin_clz(x) __builtin_clz(x)
|
448
|
+
# define psnip_builtin_clzl(x) __builtin_clzl(x)
|
449
|
+
# define psnip_builtin_clzll(x) __builtin_clzll(x)
|
450
|
+
# define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(_,clz)(x)
|
451
|
+
# define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(_,clz)(x)
|
452
|
+
#else
|
453
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse,14,0)
|
454
|
+
PSNIP_BUILTIN__FUNCTION
|
455
|
+
int psnip_builtin_clzll(unsigned long long v) {
|
456
|
+
unsigned long r = 0;
|
457
|
+
# if defined(_M_AMD64) || defined(_M_ARM)
|
458
|
+
if (_BitScanReverse64(&r, v)) {
|
459
|
+
return 63 - r;
|
460
|
+
}
|
461
|
+
# else
|
462
|
+
if (_BitScanReverse(&r, (unsigned long) (v >> 32))) {
|
463
|
+
return 31 - r;
|
464
|
+
} else if (_BitScanReverse(&r, (unsigned long) v)) {
|
465
|
+
return 63 - r;
|
466
|
+
}
|
467
|
+
# endif
|
468
|
+
return 63;
|
469
|
+
}
|
470
|
+
|
471
|
+
PSNIP_BUILTIN__FUNCTION
|
472
|
+
int psnip_builtin_clzl(unsigned long v) {
|
473
|
+
unsigned long r = 0;
|
474
|
+
if (_BitScanReverse(&r, v)) {
|
475
|
+
return 31 - r;
|
476
|
+
}
|
477
|
+
return 31;
|
478
|
+
}
|
479
|
+
|
480
|
+
PSNIP_BUILTIN__FUNCTION
|
481
|
+
int psnip_builtin_clz(unsigned int v) {
|
482
|
+
return psnip_builtin_clzl(v);
|
483
|
+
}
|
484
|
+
# define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clz)(x)
|
485
|
+
# define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clz)(x)
|
486
|
+
# else
|
487
|
+
PSNIP_BUILTIN__FUNCTION
|
488
|
+
int psnip_builtin_clz32(psnip_uint32_t v) {
|
489
|
+
static const unsigned char MultiplyDeBruijnBitPosition[] = {
|
490
|
+
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
|
491
|
+
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
|
492
|
+
};
|
493
|
+
|
494
|
+
v |= v >> 1;
|
495
|
+
v |= v >> 2;
|
496
|
+
v |= v >> 4;
|
497
|
+
v |= v >> 8;
|
498
|
+
v |= v >> 16;
|
499
|
+
|
500
|
+
return
|
501
|
+
((sizeof(psnip_uint32_t) * CHAR_BIT) - 1) -
|
502
|
+
MultiplyDeBruijnBitPosition[(psnip_uint32_t)(v * 0x07C4ACDDU) >> 27];
|
503
|
+
}
|
504
|
+
|
505
|
+
PSNIP_BUILTIN__FUNCTION
|
506
|
+
int psnip_builtin_clz64(psnip_uint64_t v) {
|
507
|
+
static const unsigned char MultiplyDeBruijnBitPosition[] = {
|
508
|
+
0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61,
|
509
|
+
54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4, 62,
|
510
|
+
46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
|
511
|
+
25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63
|
512
|
+
};
|
513
|
+
|
514
|
+
v |= v >> 1;
|
515
|
+
v |= v >> 2;
|
516
|
+
v |= v >> 4;
|
517
|
+
v |= v >> 8;
|
518
|
+
v |= v >> 16;
|
519
|
+
v |= v >> 32;
|
520
|
+
|
521
|
+
return
|
522
|
+
((sizeof(psnip_uint64_t) * CHAR_BIT) - 1) -
|
523
|
+
MultiplyDeBruijnBitPosition[(psnip_uint64_t)(v * 0x03F79D71B4CB0A89ULL) >> 58];
|
524
|
+
}
|
525
|
+
|
526
|
+
# if PSNIP_BUILTIN__SIZEOF_INT == 32
|
527
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clz(unsigned int x) { return psnip_builtin_clz32(x); }
|
528
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 64
|
529
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clz(unsigned int x) { return psnip_builtin_clz64(x); }
|
530
|
+
# else
|
531
|
+
PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clz, unsigned int)
|
532
|
+
# endif
|
533
|
+
|
534
|
+
# if PSNIP_BUILTIN__SIZEOF_LONG == 32
|
535
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzl(unsigned long x) { return psnip_builtin_clz32(x); }
|
536
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 64
|
537
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzl(unsigned long x) { return psnip_builtin_clz64(x); }
|
538
|
+
# else
|
539
|
+
PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clzl, unsigned long)
|
540
|
+
# endif
|
541
|
+
|
542
|
+
# if PSNIP_BUILTIN__SIZEOF_LLONG == 32
|
543
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzll(unsigned long long x) { return psnip_builtin_clz32(x); }
|
544
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
|
545
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzll(unsigned long long x) { return psnip_builtin_clz64(x); }
|
546
|
+
# else
|
547
|
+
PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clzll, unsigned long long)
|
548
|
+
# endif
|
549
|
+
|
550
|
+
# endif
|
551
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
552
|
+
# define __builtin_clz(x) psnip_builtin_clz(x)
|
553
|
+
# define __builtin_clzl(x) psnip_builtin_clzl(x)
|
554
|
+
# define __builtin_clzll(x) psnip_builtin_clzll(x)
|
555
|
+
# endif
|
556
|
+
#endif
|
557
|
+
|
558
|
+
#if !defined(psnip_builtin_clz32)
|
559
|
+
# define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clz)(x)
|
560
|
+
#endif
|
561
|
+
|
562
|
+
#if !defined(psnip_builtin_clz64)
|
563
|
+
# define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clz)(x)
|
564
|
+
#endif
|
565
|
+
|
566
|
+
/*** __builtin_ctz ***/
|
567
|
+
|
568
|
+
#define PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(f_n, T) \
|
569
|
+
PSNIP_BUILTIN__FUNCTION \
|
570
|
+
int psnip_builtin_##f_n(T x) { \
|
571
|
+
static const char psnip_builtin_ctz_lookup[256] = { \
|
572
|
+
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
573
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
574
|
+
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
575
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
576
|
+
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
577
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
578
|
+
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
579
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
580
|
+
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
581
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
582
|
+
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
583
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
584
|
+
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
585
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
586
|
+
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
587
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 \
|
588
|
+
}; \
|
589
|
+
size_t s = 0; \
|
590
|
+
T r; \
|
591
|
+
\
|
592
|
+
do { \
|
593
|
+
r = (x >> s) & 0xff; \
|
594
|
+
if (r != 0) \
|
595
|
+
return psnip_builtin_ctz_lookup[r] + (char) s; \
|
596
|
+
} while ((s += 8) < (sizeof(T) * 8)); \
|
597
|
+
\
|
598
|
+
return (int) sizeof(T) - 1; \
|
599
|
+
}
|
600
|
+
|
601
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_ctz, 3, 4)
|
602
|
+
# define psnip_builtin_ctz(x) __builtin_ctz(x)
|
603
|
+
# define psnip_builtin_ctzl(x) __builtin_ctzl(x)
|
604
|
+
# define psnip_builtin_ctzll(x) __builtin_ctzll(x)
|
605
|
+
# define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(_,ctz)(x)
|
606
|
+
# define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(_,ctz)(x)
|
607
|
+
#else
|
608
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
|
609
|
+
PSNIP_BUILTIN__FUNCTION
|
610
|
+
int psnip_builtin_ctzll(unsigned long long v) {
|
611
|
+
unsigned long r = 0;
|
612
|
+
# if defined(_M_AMD64) || defined(_M_ARM)
|
613
|
+
_BitScanForward64(&r, v);
|
614
|
+
return (int) r;
|
615
|
+
# else
|
616
|
+
if (_BitScanForward(&r, (unsigned int) (v)))
|
617
|
+
return (int) (r);
|
618
|
+
|
619
|
+
_BitScanForward(&r, (unsigned int) (v >> 32));
|
620
|
+
return (int) (r + 32);
|
621
|
+
# endif
|
622
|
+
}
|
623
|
+
|
624
|
+
PSNIP_BUILTIN__FUNCTION
|
625
|
+
int psnip_builtin_ctzl(unsigned long v) {
|
626
|
+
unsigned long r = 0;
|
627
|
+
_BitScanForward(&r, v);
|
628
|
+
return (int) r;
|
629
|
+
}
|
630
|
+
|
631
|
+
PSNIP_BUILTIN__FUNCTION
|
632
|
+
int psnip_builtin_ctz(unsigned int v) {
|
633
|
+
return psnip_builtin_ctzl(v);
|
634
|
+
}
|
635
|
+
# define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ctz)(x)
|
636
|
+
# define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ctz)(x)
|
637
|
+
# else
|
638
|
+
PSNIP_BUILTIN__FUNCTION
|
639
|
+
int psnip_builtin_ctz32(psnip_uint32_t v) {
|
640
|
+
static const unsigned char MultiplyDeBruijnBitPosition[] = {
|
641
|
+
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
642
|
+
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
643
|
+
};
|
644
|
+
|
645
|
+
return
|
646
|
+
MultiplyDeBruijnBitPosition[((psnip_uint32_t)((v & -v) * 0x077CB531U)) >> 27];
|
647
|
+
}
|
648
|
+
|
649
|
+
PSNIP_BUILTIN__FUNCTION
|
650
|
+
int psnip_builtin_ctz64(psnip_uint64_t v) {
|
651
|
+
static const unsigned char MultiplyDeBruijnBitPosition[] = {
|
652
|
+
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
653
|
+
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
654
|
+
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
655
|
+
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6
|
656
|
+
};
|
657
|
+
|
658
|
+
return
|
659
|
+
MultiplyDeBruijnBitPosition[((psnip_uint64_t)((v & -v) * 0x03f79d71b4ca8b09ULL)) >> 58];
|
660
|
+
}
|
661
|
+
|
662
|
+
# if PSNIP_BUILTIN__SIZEOF_INT == 32
|
663
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctz(unsigned int x) { return psnip_builtin_ctz32(x); }
|
664
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 64
|
665
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctz(unsigned int x) { return psnip_builtin_ctz64(x); }
|
666
|
+
# else
|
667
|
+
PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctz, unsigned int)
|
668
|
+
# endif
|
669
|
+
|
670
|
+
# if PSNIP_BUILTIN__SIZEOF_LONG == 32
|
671
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzl(unsigned long x) { return psnip_builtin_ctz32(x); }
|
672
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 64
|
673
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzl(unsigned long x) { return psnip_builtin_ctz64(x); }
|
674
|
+
# else
|
675
|
+
PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctzl, unsigned long)
|
676
|
+
# endif
|
677
|
+
|
678
|
+
# if PSNIP_BUILTIN__SIZEOF_LLONG == 32
|
679
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzll(unsigned long long x) { return psnip_builtin_ctz32(x); }
|
680
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
|
681
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzll(unsigned long long x) { return psnip_builtin_ctz64(x); }
|
682
|
+
# else
|
683
|
+
PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctzll, unsigned long long)
|
684
|
+
# endif
|
685
|
+
# endif
|
686
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
687
|
+
# define __builtin_ctz(x) psnip_builtin_ctz(x)
|
688
|
+
# define __builtin_ctzl(x) psnip_builtin_ctzl(x)
|
689
|
+
# define __builtin_ctzll(x) psnip_builtin_ctzll(x)
|
690
|
+
# endif
|
691
|
+
#endif
|
692
|
+
|
693
|
+
#if !defined(psnip_builtin_ctz32)
|
694
|
+
# define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ctz)(x)
|
695
|
+
#endif
|
696
|
+
|
697
|
+
#if !defined(psnip_builtin_ctz64)
|
698
|
+
# define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ctz)(x)
|
699
|
+
#endif
|
700
|
+
|
701
|
+
/*** __builtin_parity ***/
|
702
|
+
|
703
|
+
#define PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(f_n, T) \
|
704
|
+
PSNIP_BUILTIN__FUNCTION \
|
705
|
+
int psnip_builtin_##f_n(T v) { \
|
706
|
+
size_t i; \
|
707
|
+
for (i = (sizeof(T) * CHAR_BIT) / 2 ; i > 2 ; i /= 2) \
|
708
|
+
v ^= v >> i; \
|
709
|
+
v &= 0xf; \
|
710
|
+
return (0x6996 >> v) & 1; \
|
711
|
+
}
|
712
|
+
|
713
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_parity, 3, 4)
|
714
|
+
# define psnip_builtin_parity(x) __builtin_parity(x)
|
715
|
+
# define psnip_builtin_parityl(x) __builtin_parityl(x)
|
716
|
+
# define psnip_builtin_parityll(x) __builtin_parityll(x)
|
717
|
+
# define psnip_builtin_parity32(x) PSNIP_BUILTIN__VARIANT_INT32(_,parity)(x)
|
718
|
+
# define psnip_builtin_parity64(x) PSNIP_BUILTIN__VARIANT_INT64(_,parity)(x)
|
719
|
+
#else
|
720
|
+
PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parity, unsigned int)
|
721
|
+
PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parityl, unsigned long)
|
722
|
+
PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parityll, unsigned long long)
|
723
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
724
|
+
# define __builtin_parity(x) psnip_builtin_parity(x)
|
725
|
+
# define __builtin_parityl(x) psnip_builtin_parityl(x)
|
726
|
+
# define __builtin_parityll(x) psnip_builtin_parityll(x)
|
727
|
+
# endif
|
728
|
+
#endif
|
729
|
+
|
730
|
+
#if !defined(psnip_builtin_parity32)
|
731
|
+
# define psnip_builtin_parity32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,parity)(x)
|
732
|
+
#endif
|
733
|
+
|
734
|
+
#if !defined(psnip_builtin_parity64)
|
735
|
+
# define psnip_builtin_parity64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,parity)(x)
|
736
|
+
#endif
|
737
|
+
|
738
|
+
/*** __builtin_popcount ***/
|
739
|
+
|
740
|
+
#define PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(f_n, T) \
|
741
|
+
PSNIP_BUILTIN__FUNCTION \
|
742
|
+
int psnip_builtin_##f_n(T x) { \
|
743
|
+
x = x - ((x >> 1) & (T)~(T)0/3); \
|
744
|
+
x = (x & (T)~(T)0/15*3) + ((x >> 2) & (T)~(T)0/15*3); \
|
745
|
+
x = (x + (x >> 4)) & (T)~(T)0/255*15; \
|
746
|
+
return (T)(x * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8; \
|
747
|
+
}
|
748
|
+
|
749
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_popcount, 3, 4)
|
750
|
+
# define psnip_builtin_popcount(x) __builtin_popcount(x)
|
751
|
+
# define psnip_builtin_popcountl(x) __builtin_popcountl(x)
|
752
|
+
# define psnip_builtin_popcountll(x) __builtin_popcountll(x)
|
753
|
+
# define psnip_builtin_popcount32(x) PSNIP_BUILTIN__VARIANT_INT32(_,popcount)(x)
|
754
|
+
# define psnip_builtin_popcount64(x) PSNIP_BUILTIN__VARIANT_INT64(_,popcount)(x)
|
755
|
+
#else
|
756
|
+
PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcount, unsigned int)
|
757
|
+
PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcountl, unsigned long)
|
758
|
+
PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcountll, unsigned long long)
|
759
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
760
|
+
# define __builtin_popcount(x) psnip_builtin_popcount(x)
|
761
|
+
# define __builtin_popcountl(x) psnip_builtin_popcountl(x)
|
762
|
+
# define __builtin_popcountll(x) psnip_builtin_popcountll(x)
|
763
|
+
# endif
|
764
|
+
#endif
|
765
|
+
|
766
|
+
#if !defined(psnip_builtin_popcount32)
|
767
|
+
# define psnip_builtin_popcount32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,popcount)(x)
|
768
|
+
#endif
|
769
|
+
|
770
|
+
#if !defined(psnip_builtin_popcount64)
|
771
|
+
# define psnip_builtin_popcount64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,popcount)(x)
|
772
|
+
#endif
|
773
|
+
|
774
|
+
/*** __builtin_clrsb ***/
|
775
|
+
|
776
|
+
#define PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(f_n, clzfn, T) \
|
777
|
+
PSNIP_BUILTIN__FUNCTION \
|
778
|
+
int psnip_builtin_##f_n(T x) { \
|
779
|
+
return (PSNIP_BUILTIN_UNLIKELY(x == -1) ? \
|
780
|
+
((int) sizeof(x) * 8) : \
|
781
|
+
psnip_builtin_##clzfn((x < 0) ? ~x : x)) - 1; \
|
782
|
+
}
|
783
|
+
|
784
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_clrsb, 4, 7)
|
785
|
+
# define psnip_builtin_clrsb(x) __builtin_clrsb(x)
|
786
|
+
# if !defined(__INTEL_COMPILER)
|
787
|
+
# define psnip_builtin_clrsbl(x) __builtin_clrsbl(x)
|
788
|
+
# else
|
789
|
+
# if PSNIP_BUILTIN__SIZEOF_LONG == PSNIP_BUILTIN__SIZEOF_INT
|
790
|
+
# define psnip_builtin_clrsbl(x) ((long) __builtin_clrsb((int) x))
|
791
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == PSNIP_BUILTIN__SIZEOF_LLONG
|
792
|
+
# define psnip_builtin_clrsbl(x) ((long) __builtin_clrsbll((long long) x))
|
793
|
+
# else
|
794
|
+
PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbl, clzl, long)
|
795
|
+
# endif
|
796
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
797
|
+
# define __builtin_clrsbl(x) psnip_builtin_clrsbl(x)
|
798
|
+
# endif
|
799
|
+
# endif
|
800
|
+
# define psnip_builtin_clrsbll(x) __builtin_clrsbll(x)
|
801
|
+
# define psnip_builtin_clrsb32(x) PSNIP_BUILTIN__VARIANT_INT32(_,clrsb)(x)
|
802
|
+
# define psnip_builtin_clrsb64(x) PSNIP_BUILTIN__VARIANT_INT64(_,clrsb)(x)
|
803
|
+
#else
|
804
|
+
PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsb, clz, int)
|
805
|
+
PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbl, clzl, long)
|
806
|
+
PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbll, clzll, long long)
|
807
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
808
|
+
# define __builtin_clrsb(x) psnip_builtin_clrsb(x)
|
809
|
+
# define __builtin_clrsbl(x) psnip_builtin_clrsbl(x)
|
810
|
+
# define __builtin_clrsbll(x) psnip_builtin_clrsbll(x)
|
811
|
+
# endif
|
812
|
+
#endif
|
813
|
+
|
814
|
+
#if !defined(psnip_builtin_clrsb32)
|
815
|
+
# define psnip_builtin_clrsb32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clrsb)(x)
|
816
|
+
#endif
|
817
|
+
|
818
|
+
#if !defined(psnip_builtin_clrsb64)
|
819
|
+
# define psnip_builtin_clrsb64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clrsb)(x)
|
820
|
+
#endif
|
821
|
+
|
822
|
+
/*** __builtin_bitreverse ***/
|
823
|
+
|
824
|
+
#define PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(f_n, T) \
|
825
|
+
PSNIP_BUILTIN__FUNCTION \
|
826
|
+
T psnip_builtin_##f_n(T x) { \
|
827
|
+
size_t s = sizeof(x) * CHAR_BIT; \
|
828
|
+
T mask = (T) 0U; \
|
829
|
+
mask = ~mask; \
|
830
|
+
while ((s >>= 1) > 0) { \
|
831
|
+
mask ^= (mask << s); \
|
832
|
+
x = ((x >> s) & mask) | ((x << s) & ~mask); \
|
833
|
+
} \
|
834
|
+
return x; \
|
835
|
+
}
|
836
|
+
|
837
|
+
#if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_bitreverse64) && !defined(__EMSCRIPTEN__)
|
838
|
+
# define psnip_builtin_bitreverse8(x) __builtin_bitreverse8(x)
|
839
|
+
# define psnip_builtin_bitreverse16(x) __builtin_bitreverse16(x)
|
840
|
+
# define psnip_builtin_bitreverse32(x) __builtin_bitreverse32(x)
|
841
|
+
# define psnip_builtin_bitreverse64(x) __builtin_bitreverse64(x)
|
842
|
+
#else
|
843
|
+
PSNIP_BUILTIN__FUNCTION
|
844
|
+
psnip_uint8_t psnip_builtin_bitreverse8(psnip_uint8_t v) {
|
845
|
+
return (psnip_uint8_t) ((v * 0x0202020202ULL & 0x010884422010ULL) % 1023);
|
846
|
+
}
|
847
|
+
PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse16, psnip_uint16_t)
|
848
|
+
PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse32, psnip_uint32_t)
|
849
|
+
PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse64, psnip_uint64_t)
|
850
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
851
|
+
# define __builtin_bitreverse8(x) psnip_builtin_bitreverse8(x)
|
852
|
+
# define __builtin_bitreverse16(x) psnip_builtin_bitreverse16(x)
|
853
|
+
# define __builtin_bitreverse32(x) psnip_builtin_bitreverse32(x)
|
854
|
+
# define __builtin_bitreverse64(x) psnip_builtin_bitreverse64(x)
|
855
|
+
# endif
|
856
|
+
#endif
|
857
|
+
|
858
|
+
/*** __builtin_addc ***/
|
859
|
+
|
860
|
+
#define PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(f_n, T) \
|
861
|
+
PSNIP_BUILTIN__FUNCTION \
|
862
|
+
T psnip_builtin_##f_n(T x, T y, T ci, T* co) { \
|
863
|
+
T max = 0; \
|
864
|
+
T r = (T) x + y; \
|
865
|
+
max = ~max; \
|
866
|
+
*co = (T) (x > (max - y)); \
|
867
|
+
if (ci) { \
|
868
|
+
if (r == max) \
|
869
|
+
*co = 1; \
|
870
|
+
r += ci; \
|
871
|
+
} \
|
872
|
+
return r; \
|
873
|
+
}
|
874
|
+
|
875
|
+
#if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_addc)
|
876
|
+
# define psnip_builtin_addcb(x, y, ci, co) __builtin_addcb(x, y, ci, co)
|
877
|
+
# define psnip_builtin_addcs(x, y, ci, co) __builtin_addcs(x, y, ci, co)
|
878
|
+
# define psnip_builtin_addc(x, y, ci, co) __builtin_addc(x, y, ci, co)
|
879
|
+
# define psnip_builtin_addcl(x, y, ci, co) __builtin_addcl(x, y, ci, co)
|
880
|
+
# define psnip_builtin_addcll(x, y, ci, co) __builtin_addcll(x, y, ci, co)
|
881
|
+
# define psnip_builtin_addc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(_,addc)(x, y, ci, co)
|
882
|
+
# define psnip_builtin_addc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(_,addc)(x, y, ci, co)
|
883
|
+
# define psnip_builtin_addc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(_,addc)(x, y, ci, co)
|
884
|
+
# define psnip_builtin_addc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(_,addc)(x, y, ci, co)
|
885
|
+
#else
|
886
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcb, unsigned char)
|
887
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcs, unsigned short)
|
888
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addc, unsigned int)
|
889
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcl, unsigned long)
|
890
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcll, unsigned long long)
|
891
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
892
|
+
# define __builtin_addcb(x, y, ci, co) psnip_builtin_addcb(x, y, ci, co)
|
893
|
+
# define __builtin_addcs(x, y, ci, co) psnip_builtin_addcs(x, y, ci, co)
|
894
|
+
# define __builtin_addc(x, y, ci, co) psnip_builtin_addc(x, y, ci, co)
|
895
|
+
# define __builtin_addcl(x, y, ci, co) psnip_builtin_addcl(x, y, ci, co)
|
896
|
+
# define __builtin_addcll(x, y, ci, co) psnip_builtin_addcll(x, y, ci, co)
|
897
|
+
# endif
|
898
|
+
#endif
|
899
|
+
|
900
|
+
#if !defined(psnip_builtin_addc8)
|
901
|
+
# define psnip_builtin_addc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(psnip,addc)(x, y, ci, co)
|
902
|
+
#endif
|
903
|
+
|
904
|
+
#if !defined(psnip_builtin_addc16)
|
905
|
+
# define psnip_builtin_addc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(psnip,addc)(x, y, ci, co)
|
906
|
+
#endif
|
907
|
+
|
908
|
+
#if !defined(psnip_builtin_addc32)
|
909
|
+
# define psnip_builtin_addc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(psnip,addc)(x, y, ci, co)
|
910
|
+
#endif
|
911
|
+
|
912
|
+
#if !defined(psnip_builtin_addc64)
|
913
|
+
# define psnip_builtin_addc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(psnip,addc)(x, y, ci, co)
|
914
|
+
#endif
|
915
|
+
|
916
|
+
/*** __builtin_subc ***/
|
917
|
+
|
918
|
+
#define PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(f_n, T) \
|
919
|
+
PSNIP_BUILTIN__FUNCTION \
|
920
|
+
T psnip_builtin_##f_n(T x, T y, T ci, T* co) { \
|
921
|
+
T r = x - y; \
|
922
|
+
*co = x < y; \
|
923
|
+
if (ci) { \
|
924
|
+
r--; \
|
925
|
+
if (r == 0) \
|
926
|
+
*co = 1; \
|
927
|
+
} \
|
928
|
+
return r; \
|
929
|
+
}
|
930
|
+
|
931
|
+
#if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_subc)
|
932
|
+
# define psnip_builtin_subcb(x, y, ci, co) __builtin_subcb(x, y, ci, co)
|
933
|
+
# define psnip_builtin_subcs(x, y, ci, co) __builtin_subcs(x, y, ci, co)
|
934
|
+
# define psnip_builtin_subc(x, y, ci, co) __builtin_subc(x, y, ci, co)
|
935
|
+
# define psnip_builtin_subcl(x, y, ci, co) __builtin_subcl(x, y, ci, co)
|
936
|
+
# define psnip_builtin_subcll(x, y, ci, co) __builtin_subcll(x, y, ci, co)
|
937
|
+
# define psnip_builtin_subc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(_,subc)(x, y, ci, co)
|
938
|
+
# define psnip_builtin_subc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(_,subc)(x, y, ci, co)
|
939
|
+
# define psnip_builtin_subc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(_,subc)(x, y, ci, co)
|
940
|
+
# define psnip_builtin_subc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(_,subc)(x, y, ci, co)
|
941
|
+
#else
|
942
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcb, unsigned char)
|
943
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcs, unsigned short)
|
944
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subc, unsigned int)
|
945
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcl, unsigned long)
|
946
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcll, unsigned long long)
|
947
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
948
|
+
# define __builtin_subcb(x, y, ci, co) psnip_builtin_subcb(x, y, ci, co)
|
949
|
+
# define __builtin_subcs(x, y, ci, co) psnip_builtin_subcs(x, y, ci, co)
|
950
|
+
# define __builtin_subc(x, y, ci, co) psnip_builtin_subc(x, y, ci, co)
|
951
|
+
# define __builtin_subcl(x, y, ci, co) psnip_builtin_subcl(x, y, ci, co)
|
952
|
+
# define __builtin_subcll(x, y, ci, co) psnip_builtin_subcll(x, y, ci, co)
|
953
|
+
# endif
|
954
|
+
#endif
|
955
|
+
|
956
|
+
#if !defined(psnip_builtin_subc8)
|
957
|
+
# define psnip_builtin_subc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(psnip,subc)(x, y, ci, co)
|
958
|
+
#endif
|
959
|
+
|
960
|
+
#if !defined(psnip_builtin_subc16)
|
961
|
+
# define psnip_builtin_subc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(psnip,subc)(x, y, ci, co)
|
962
|
+
#endif
|
963
|
+
|
964
|
+
#if !defined(psnip_builtin_subc32)
|
965
|
+
# define psnip_builtin_subc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(psnip,subc)(x, y, ci, co)
|
966
|
+
#endif
|
967
|
+
|
968
|
+
#if !defined(psnip_builtin_subc64)
|
969
|
+
# define psnip_builtin_subc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(psnip,subc)(x, y, ci, co)
|
970
|
+
#endif
|
971
|
+
|
972
|
+
/*** __builtin_bswap ***/
|
973
|
+
|
974
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_bswap16, 4, 8)
|
975
|
+
# define psnip_builtin_bswap16(x) __builtin_bswap16(x)
|
976
|
+
#else
|
977
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
|
978
|
+
# define psnip_builtin_bswap16(x) _byteswap_ushort(x)
|
979
|
+
# else
|
980
|
+
PSNIP_BUILTIN__FUNCTION
|
981
|
+
psnip_uint16_t
|
982
|
+
psnip_builtin_bswap16(psnip_uint16_t v) {
|
983
|
+
return
|
984
|
+
((v & (((psnip_uint16_t) 0xff) << 8)) >> 8) |
|
985
|
+
((v & (((psnip_uint16_t) 0xff) )) << 8);
|
986
|
+
}
|
987
|
+
# endif
|
988
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
989
|
+
# define __builtin_bswap16(x) psnip_builtin_bswap16(x)
|
990
|
+
# endif
|
991
|
+
#endif
|
992
|
+
|
993
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_bswap16, 4, 3)
|
994
|
+
# define psnip_builtin_bswap32(x) __builtin_bswap32(x)
|
995
|
+
# define psnip_builtin_bswap64(x) __builtin_bswap64(x)
|
996
|
+
#else
|
997
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
|
998
|
+
# define psnip_builtin_bswap32(x) _byteswap_ulong(x)
|
999
|
+
# define psnip_builtin_bswap64(x) _byteswap_uint64(x)
|
1000
|
+
# else
|
1001
|
+
PSNIP_BUILTIN__FUNCTION
|
1002
|
+
psnip_uint32_t
|
1003
|
+
psnip_builtin_bswap32(psnip_uint32_t v) {
|
1004
|
+
return
|
1005
|
+
((v & (((psnip_uint32_t) 0xff) << 24)) >> 24) |
|
1006
|
+
((v & (((psnip_uint32_t) 0xff) << 16)) >> 8) |
|
1007
|
+
((v & (((psnip_uint32_t) 0xff) << 8)) << 8) |
|
1008
|
+
((v & (((psnip_uint32_t) 0xff) )) << 24);
|
1009
|
+
}
|
1010
|
+
|
1011
|
+
PSNIP_BUILTIN__FUNCTION
|
1012
|
+
psnip_uint64_t
|
1013
|
+
psnip_builtin_bswap64(psnip_uint64_t v) {
|
1014
|
+
return
|
1015
|
+
((v & (((psnip_uint64_t) 0xff) << 56)) >> 56) |
|
1016
|
+
((v & (((psnip_uint64_t) 0xff) << 48)) >> 40) |
|
1017
|
+
((v & (((psnip_uint64_t) 0xff) << 40)) >> 24) |
|
1018
|
+
((v & (((psnip_uint64_t) 0xff) << 32)) >> 8) |
|
1019
|
+
((v & (((psnip_uint64_t) 0xff) << 24)) << 8) |
|
1020
|
+
((v & (((psnip_uint64_t) 0xff) << 16)) << 24) |
|
1021
|
+
((v & (((psnip_uint64_t) 0xff) << 8)) << 40) |
|
1022
|
+
((v & (((psnip_uint64_t) 0xff) )) << 56);
|
1023
|
+
}
|
1024
|
+
# endif
|
1025
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1026
|
+
# define __builtin_bswap32(x) psnip_builtin_bswap32(x)
|
1027
|
+
# define __builtin_bswap64(x) psnip_builtin_bswap64(x)
|
1028
|
+
# endif
|
1029
|
+
#endif
|
1030
|
+
|
1031
|
+
/******
|
1032
|
+
*** MSVC-style intrinsics
|
1033
|
+
******/
|
1034
|
+
|
1035
|
+
/*** _rotl ***/
|
1036
|
+
|
1037
|
+
#define PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(f_n, T, ST) \
|
1038
|
+
PSNIP_BUILTIN__FUNCTION \
|
1039
|
+
T psnip_intrin_##f_n(T value, ST shift) { \
|
1040
|
+
return \
|
1041
|
+
(value >> ((sizeof(T) * 8) - shift)) | \
|
1042
|
+
(value << shift); \
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotl8, 14, 0)
|
1046
|
+
# define psnip_intrin_rotl8(value, shift) _rotl8(value, shift)
|
1047
|
+
# define psnip_intrin_rotl16(value, shift) _rotl16(value, shift)
|
1048
|
+
#else
|
1049
|
+
PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl8, psnip_uint8_t, unsigned char)
|
1050
|
+
PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl16, psnip_uint16_t, unsigned char)
|
1051
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1052
|
+
# if !defined(_rotl8)
|
1053
|
+
# define _rotl8(value, shift) psnip_intrin_rotl8(value, shift)
|
1054
|
+
# endif
|
1055
|
+
# if !defined(_rotl16)
|
1056
|
+
# define _rotl16(value, shift) psnip_intrin_rotl16(value, shift)
|
1057
|
+
# endif
|
1058
|
+
# endif
|
1059
|
+
#endif
|
1060
|
+
|
1061
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotl8, 13, 10)
|
1062
|
+
# define psnip_intrin_rotl(value, shift) _rotl(value, shift)
|
1063
|
+
# define psnip_intrin_rotl64(value, shift) _rotl64(value, shift)
|
1064
|
+
#else
|
1065
|
+
PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl, psnip_uint32_t, int)
|
1066
|
+
PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl64, psnip_uint64_t, int)
|
1067
|
+
|
1068
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1069
|
+
# if !defined(_rotl)
|
1070
|
+
# define _rotl(value, shift) psnip_intrin_rotl(value, shift)
|
1071
|
+
# endif
|
1072
|
+
# if !defined(_rotl64)
|
1073
|
+
# define _rotl64(value, shift) psnip_intrin_rotl64(value, shift)
|
1074
|
+
# endif
|
1075
|
+
# endif
|
1076
|
+
#endif
|
1077
|
+
|
1078
|
+
/*** _rotr ***/
|
1079
|
+
|
1080
|
+
#define PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(f_n, T, ST) \
|
1081
|
+
PSNIP_BUILTIN__FUNCTION \
|
1082
|
+
T psnip_intrin_##f_n(T value, ST shift) { \
|
1083
|
+
return \
|
1084
|
+
(value << ((sizeof(T) * 8) - shift)) | \
|
1085
|
+
(value >> shift); \
|
1086
|
+
}
|
1087
|
+
|
1088
|
+
PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr8, psnip_uint8_t, unsigned char)
|
1089
|
+
PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr16, psnip_uint16_t, unsigned char)
|
1090
|
+
|
1091
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotr8, 14, 0)
|
1092
|
+
# define psnip_intrin_rotr8(value, shift) _rotr8(value, shift)
|
1093
|
+
# define psnip_intrin_rotr16(value, shift) _rotr16(value, shift)
|
1094
|
+
#else
|
1095
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1096
|
+
# define _rotr8(value, shift) psnip_intrin_rotr8(value, shift)
|
1097
|
+
# define _rotr16(value, shift) psnip_intrin_rotr16(value, shift)
|
1098
|
+
# endif
|
1099
|
+
#endif
|
1100
|
+
|
1101
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotr8, 13, 10)
|
1102
|
+
# define psnip_intrin_rotr(value, shift) _rotr(value, shift)
|
1103
|
+
# define psnip_intrin_rotr64(value, shift) _rotr64(value, shift)
|
1104
|
+
#else
|
1105
|
+
PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr, psnip_uint32_t, int)
|
1106
|
+
PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr64, psnip_uint64_t, int)
|
1107
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1108
|
+
# if !defined(_rotr)
|
1109
|
+
# define _rotr(value, shift) psnip_intrin_rotr(value, shift)
|
1110
|
+
# endif
|
1111
|
+
# if !defined(_rotr64)
|
1112
|
+
# define _rotr64(value, shift) psnip_intrin_rotr64(value, shift)
|
1113
|
+
# endif
|
1114
|
+
# endif
|
1115
|
+
#endif
|
1116
|
+
|
1117
|
+
/*** _BitScanForward ***/
|
1118
|
+
|
1119
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
|
1120
|
+
# pragma intrinsic(_BitScanForward)
|
1121
|
+
PSNIP_BUILTIN__FUNCTION
|
1122
|
+
unsigned char psnip_intrin_BitScanForward(unsigned long* Index, psnip_uint32_t Mask) {
|
1123
|
+
const unsigned long M = (unsigned long) Mask;
|
1124
|
+
return _BitScanForward(Index, M);
|
1125
|
+
}
|
1126
|
+
#else
|
1127
|
+
PSNIP_BUILTIN__FUNCTION
|
1128
|
+
unsigned char psnip_intrin_BitScanForward(unsigned long* Index, psnip_uint32_t Mask) {
|
1129
|
+
return PSNIP_BUILTIN_UNLIKELY(Mask == 0) ? 0 : ((*Index = psnip_builtin_ctz32 (Mask)), 1);
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1133
|
+
# define _BitScanForward(Index, Mask) psnip_intrin_BitScanForward(Index, Mask)
|
1134
|
+
# endif
|
1135
|
+
#endif
|
1136
|
+
|
1137
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
|
1138
|
+
# pragma intrinsic(_BitScanForward64)
|
1139
|
+
# define psnip_intrin_BitScanForward64(Index, Mask) _BitScanForward64(Index, Mask)
|
1140
|
+
#else
|
1141
|
+
PSNIP_BUILTIN__FUNCTION
|
1142
|
+
unsigned char psnip_intrin_BitScanForward64(unsigned long* Index, psnip_uint64_t Mask) {
|
1143
|
+
return PSNIP_BUILTIN_UNLIKELY(Mask == 0) ? 0 : ((*Index = psnip_builtin_ctz64 (Mask)), 1);
|
1144
|
+
}
|
1145
|
+
|
1146
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1147
|
+
# define _BitScanForward64(Index, Mask) psnip_intrin_BitScanForward64(Index, Mask)
|
1148
|
+
# endif
|
1149
|
+
#endif
|
1150
|
+
|
1151
|
+
/*** _BitScanReverse ***/
|
1152
|
+
|
1153
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse, 14, 0)
|
1154
|
+
# pragma intrinsic(_BitScanReverse)
|
1155
|
+
PSNIP_BUILTIN__FUNCTION
|
1156
|
+
unsigned char psnip_intrin_BitScanReverse(unsigned long* Index, psnip_uint32_t Mask) {
|
1157
|
+
const unsigned long M = (unsigned long) Mask;
|
1158
|
+
return _BitScanReverse(Index, M);
|
1159
|
+
}
|
1160
|
+
#else
|
1161
|
+
PSNIP_BUILTIN__FUNCTION
|
1162
|
+
unsigned char psnip_intrin_BitScanReverse(unsigned long* Index, psnip_uint32_t Mask) {
|
1163
|
+
return (PSNIP_BUILTIN_UNLIKELY(Mask == 0)) ? 0 : ((*Index = ((sizeof(Mask) * CHAR_BIT) - 1) - psnip_builtin_clz32 (Mask)), 1);
|
1164
|
+
}
|
1165
|
+
|
1166
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1167
|
+
# define _BitScanReverse(Index, Mask) psnip_intrin_BitScanReverse(Index, Mask)
|
1168
|
+
# endif
|
1169
|
+
#endif
|
1170
|
+
|
1171
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
|
1172
|
+
# pragma intrinsic(_BitScanReverse64)
|
1173
|
+
# define psnip_intrin_BitScanReverse64(Index, Mask) _BitScanReverse64(Index, Mask)
|
1174
|
+
#else
|
1175
|
+
PSNIP_BUILTIN__FUNCTION
|
1176
|
+
unsigned char psnip_intrin_BitScanReverse64(unsigned long* Index, psnip_uint64_t Mask) {
|
1177
|
+
return (PSNIP_BUILTIN_UNLIKELY(Mask == 0)) ? 0 : ((*Index = ((sizeof(Mask) * CHAR_BIT) - 1) - psnip_builtin_clz64 (Mask)), 1);
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1181
|
+
# define _BitScanReverse64(Index, Mask) psnip_intrin_BitScanReverse64(Index, Mask)
|
1182
|
+
# endif
|
1183
|
+
#endif
|
1184
|
+
|
1185
|
+
/*** bittest ***/
|
1186
|
+
|
1187
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittest, 14, 0)
|
1188
|
+
# pragma intrinsic(_bittest)
|
1189
|
+
# define psnip_intrin_bittest(a, b) \
|
1190
|
+
__pragma(warning(push)) \
|
1191
|
+
__pragma(warning(disable:4057)) \
|
1192
|
+
_bittest(a, b) \
|
1193
|
+
__pragma(warning(pop))
|
1194
|
+
#else
|
1195
|
+
# define psnip_intrin_bittest(a, b) (((*(a)) >> (b)) & 1)
|
1196
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1197
|
+
# define _bittest(a, b) psnip_intrin_bittest(a, b)
|
1198
|
+
# endif
|
1199
|
+
#endif
|
1200
|
+
|
1201
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittest64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
|
1202
|
+
# pragma intrinsic(_bittest64)
|
1203
|
+
# define psnip_intrin_bittest64(a, b) _bittest64(a, b)
|
1204
|
+
#else
|
1205
|
+
# define psnip_intrin_bittest64(a, b) (((*(a)) >> (b)) & 1)
|
1206
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1207
|
+
# define _bittest64(a, b) psnip_intrin_bittest64(a, b)
|
1208
|
+
# endif
|
1209
|
+
#endif
|
1210
|
+
|
1211
|
+
/*** bittestandcomplement ***/
|
1212
|
+
|
1213
|
+
#define PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(f_n, T, UT) \
|
1214
|
+
PSNIP_BUILTIN__FUNCTION \
|
1215
|
+
unsigned char psnip_intrin_##f_n(T* a, T b) { \
|
1216
|
+
const char r = (*a >> b) & 1; \
|
1217
|
+
*a ^= ((UT) 1) << b; \
|
1218
|
+
return r; \
|
1219
|
+
}
|
1220
|
+
|
1221
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandcomplement, 14, 0)
|
1222
|
+
# pragma intrinsic(_bittestandcomplement)
|
1223
|
+
# define psnip_intrin_bittestandcomplement(a, b) \
|
1224
|
+
__pragma(warning(push)) \
|
1225
|
+
__pragma(warning(disable:4057)) \
|
1226
|
+
_bittestandcomplement(a, b) \
|
1227
|
+
__pragma(warning(pop))
|
1228
|
+
#else
|
1229
|
+
PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(bittestandcomplement, psnip_int32_t, psnip_uint32_t)
|
1230
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1231
|
+
# define _bittestandcomplement(a, b) psnip_intrin_bittestandcomplement(a, b)
|
1232
|
+
# endif
|
1233
|
+
#endif
|
1234
|
+
|
1235
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandcomplement64, 14, 0) && defined(_M_AMD64)
|
1236
|
+
# define psnip_intrin_bittestandcomplement64(a, b) _bittestandcomplement64(a, b)
|
1237
|
+
#else
|
1238
|
+
PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(bittestandcomplement64, psnip_int64_t, psnip_uint64_t)
|
1239
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1240
|
+
# define _bittestandcomplement64(a, b) psnip_intrin_bittestandcomplement64(a, b)
|
1241
|
+
# endif
|
1242
|
+
#endif
|
1243
|
+
|
1244
|
+
/*** bittestandreset ***/
|
1245
|
+
|
1246
|
+
#define PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(f_n, T, UT) \
|
1247
|
+
PSNIP_BUILTIN__FUNCTION \
|
1248
|
+
unsigned char psnip_intrin_##f_n(T* a, T b) { \
|
1249
|
+
const char r = (*a >> b) & 1; \
|
1250
|
+
*a &= ~(((UT) 1) << b); \
|
1251
|
+
return r; \
|
1252
|
+
}
|
1253
|
+
|
1254
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandreset, 14, 0)
|
1255
|
+
# pragma intrinsic(_bittestandreset)
|
1256
|
+
# define psnip_intrin_bittestandreset(a, b) \
|
1257
|
+
__pragma(warning(push)) \
|
1258
|
+
__pragma(warning(disable:4057)) \
|
1259
|
+
_bittestandreset(a, b) \
|
1260
|
+
__pragma(warning(pop))
|
1261
|
+
#else
|
1262
|
+
PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(bittestandreset, psnip_int32_t, psnip_uint32_t)
|
1263
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1264
|
+
# define _bittestandreset(a, b) psnip_intrin_bittestandreset(a, b)
|
1265
|
+
# endif
|
1266
|
+
#endif
|
1267
|
+
|
1268
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandreset64, 14, 0) && (defined(_M_AMD64) || defined(_M_IA64))
|
1269
|
+
# pragma intrinsic(_bittestandreset64)
|
1270
|
+
# define psnip_intrin_bittestandreset64(a, b) _bittestandreset64(a, b)
|
1271
|
+
#else
|
1272
|
+
PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(bittestandreset64, psnip_int64_t, psnip_uint64_t)
|
1273
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1274
|
+
# define _bittestandreset64(a, b) psnip_intrin_bittestandreset64(a, b)
|
1275
|
+
# endif
|
1276
|
+
#endif
|
1277
|
+
|
1278
|
+
/*** bittestandset ***/
|
1279
|
+
|
1280
|
+
#define PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(f_n, T, UT) \
|
1281
|
+
PSNIP_BUILTIN__FUNCTION \
|
1282
|
+
unsigned char psnip_intrin_##f_n(T* a, T b) { \
|
1283
|
+
const char r = (*a >> b) & 1; \
|
1284
|
+
*a |= ((UT) 1) << b; \
|
1285
|
+
return r; \
|
1286
|
+
}
|
1287
|
+
|
1288
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandset, 14, 0)
|
1289
|
+
# pragma intrinsic(_bittestandset)
|
1290
|
+
# define psnip_intrin_bittestandset(a, b) \
|
1291
|
+
__pragma(warning(push)) \
|
1292
|
+
__pragma(warning(disable:4057)) \
|
1293
|
+
_bittestandset(a, b) \
|
1294
|
+
__pragma(warning(pop))
|
1295
|
+
#else
|
1296
|
+
PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(bittestandset, psnip_int32_t, psnip_uint32_t)
|
1297
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1298
|
+
# define _bittestandset(a, b) psnip_intrin_bittestandset(a, b)
|
1299
|
+
# endif
|
1300
|
+
#endif
|
1301
|
+
|
1302
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandset64, 14, 0) && defined(_M_AMD64)
|
1303
|
+
# pragma intrinsic(_bittestandset64)
|
1304
|
+
# define psnip_intrin_bittestandset64(a, b) _bittestandset64(a, b)
|
1305
|
+
#else
|
1306
|
+
PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(bittestandset64, psnip_int64_t, psnip_uint64_t)
|
1307
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1308
|
+
# define _bittestandset64(a, b) psnip_intrin_bittestandset64(a, b)
|
1309
|
+
# endif
|
1310
|
+
#endif
|
1311
|
+
|
1312
|
+
/*** shiftleft128 ***/
|
1313
|
+
|
1314
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(__shiftleft128, 14, 0) && defined(_M_AMD64)
|
1315
|
+
# define psnip_intrin_shiftleft128(LowPart, HighPart, Shift) __shiftleft128(LowPart, HighPart, Shift)
|
1316
|
+
#else
|
1317
|
+
# if defined(__SIZEOF_INT128__)
|
1318
|
+
PSNIP_BUILTIN__FUNCTION
|
1319
|
+
psnip_uint64_t psnip_intrin_shiftleft128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
|
1320
|
+
unsigned __int128 r = HighPart;
|
1321
|
+
r <<= 64;
|
1322
|
+
r |= LowPart;
|
1323
|
+
r <<= Shift % 64;
|
1324
|
+
return (psnip_uint64_t) (r >> 64);
|
1325
|
+
}
|
1326
|
+
# else
|
1327
|
+
PSNIP_BUILTIN__FUNCTION
|
1328
|
+
psnip_uint64_t psnip_intrin_shiftleft128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
|
1329
|
+
Shift %= 64;
|
1330
|
+
return PSNIP_BUILTIN_UNLIKELY(Shift == 0) ? HighPart : ((HighPart << Shift) | (LowPart >> (64 - Shift)));
|
1331
|
+
}
|
1332
|
+
# endif
|
1333
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1334
|
+
# define __shiftleft128(LowPart, HighPart, Shift) psnip_intrin_shiftleft128(LowPart, HighPart, Shift)
|
1335
|
+
# endif
|
1336
|
+
#endif
|
1337
|
+
|
1338
|
+
/*** shiftright128 ***/
|
1339
|
+
|
1340
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(__shiftright128, 14, 0) && defined(_M_AMD64)
|
1341
|
+
# define psnip_intrin_shiftright128(LowPart, HighPart, Shift) __shiftright128(LowPart, HighPart, Shift)
|
1342
|
+
#else
|
1343
|
+
# if defined(__SIZEOF_INT128__)
|
1344
|
+
PSNIP_BUILTIN__FUNCTION
|
1345
|
+
psnip_uint64_t psnip_intrin_shiftright128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
|
1346
|
+
unsigned __int128 r = HighPart;
|
1347
|
+
r <<= 64;
|
1348
|
+
r |= LowPart;
|
1349
|
+
r >>= Shift % 64;
|
1350
|
+
return (psnip_uint64_t) r;
|
1351
|
+
}
|
1352
|
+
# else
|
1353
|
+
PSNIP_BUILTIN__FUNCTION
|
1354
|
+
psnip_uint64_t psnip_intrin_shiftright128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
|
1355
|
+
Shift %= 64;
|
1356
|
+
|
1357
|
+
if (PSNIP_BUILTIN_UNLIKELY(Shift == 0))
|
1358
|
+
return LowPart;
|
1359
|
+
|
1360
|
+
return
|
1361
|
+
(HighPart << (64 - Shift)) |
|
1362
|
+
(LowPart >> Shift);
|
1363
|
+
}
|
1364
|
+
# endif
|
1365
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1366
|
+
# define __shiftright128(LowPart, HighPart, Shift) psnip_intrin_shiftright128(LowPart, HighPart, Shift)
|
1367
|
+
# endif
|
1368
|
+
#endif
|
1369
|
+
|
1370
|
+
/*** byteswap ***/
|
1371
|
+
|
1372
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
|
1373
|
+
# pragma intrinsic(_byteswap_ushort)
|
1374
|
+
# define psnip_intrin_byteswap_ushort(v) _byteswap_ushort(v)
|
1375
|
+
# pragma intrinsic(_byteswap_ulong)
|
1376
|
+
# define psnip_intrin_byteswap_ulong(v) _byteswap_ulong(v)
|
1377
|
+
# pragma intrinsic(_byteswap_uint64)
|
1378
|
+
# define psnip_intrin_byteswap_uint64(v) _byteswap_uint64(v)
|
1379
|
+
#else
|
1380
|
+
# define psnip_intrin_byteswap_ushort(v) psnip_builtin_bswap16(v)
|
1381
|
+
# define psnip_intrin_byteswap_ulong(v) psnip_builtin_bswap32(v)
|
1382
|
+
# define psnip_intrin_byteswap_uint64(v) psnip_builtin_bswap64(v)
|
1383
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1384
|
+
# define _byteswap_ushort(v) psnip_intrin_byteswap_ushort(v)
|
1385
|
+
# define _byteswap_ulong(v) psnip_intrin_byteswap_ulong(v)
|
1386
|
+
# define _byteswap_uint64(v) psnip_intrin_byteswap_uint64(v)
|
1387
|
+
# endif
|
1388
|
+
#endif
|
1389
|
+
|
1390
|
+
#endif /* defined(PSNIP_BUILTIN_H) */
|