victory 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1390 @@
1
+ /* Builtins and Intrinsics
2
+ * Portable Snippets - https://gitub.com/nemequ/portable-snippets
3
+ * Created by Evan Nemerson <evan@nemerson.com>
4
+ *
5
+ * To the extent possible under law, the authors have waived all
6
+ * copyright and related or neighboring rights to this code. For
7
+ * details, see the Creative Commons Zero 1.0 Universal license at
8
+ * https://creativecommons.org/publicdomain/zero/1.0/
9
+ *
10
+ * Some of these implementations are based on code from
11
+ * https://graphics.stanford.edu/~seander/bithacks.html which is also
12
+ * public domain (and a fantastic web site).
13
+ */
14
+
15
+ #if !defined(PSNIP_BUILTIN_H)
16
+ #define PSNIP_BUILTIN_H
17
+
18
+ #if defined(HEDLEY_GCC_HAS_BUILTIN)
19
+ # define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,0)
20
+ #elif defined(__clang__) && defined(__has_builtin)
21
+ # define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) __has_builtin(builtin)
22
+ #elif defined(__GNUC__)
23
+ # define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) (__GNUC__ > major || (major == __GNUC__ && __GNUC_MINOR__ >= minor))
24
+ #else
25
+ # define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) (0)
26
+ #endif
27
+
28
+ #if defined(HEDLEY_CLANG_HAS_BUILTIN)
29
+ # define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) HEDLEY_CLANG_HAS_BUILTIN(builtin)
30
+ #elif defined(__has_builtin)
31
+ # define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) __has_builtin(builtin)
32
+ #else
33
+ # define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) (0)
34
+ #endif
35
+
36
+ #if defined(HEDLEY_MSVC_VERSION_CHECK)
37
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) HEDLEY_MSVC_VERSION_CHECK(major,minor,0)
38
+ #elif !defined(_MSC_VER)
39
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (0)
40
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1400)
41
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000)))
42
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1200)
43
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_FULL_VER >= ((major * 100000) + (minor * 1000)))
44
+ #else
45
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_VER >= ((major * 100) + (minor)))
46
+ #endif
47
+
48
+ #if defined(_MSC_VER)
49
+ # include <intrin.h>
50
+ #endif
51
+ #include <limits.h>
52
+ #include <stdlib.h>
53
+
54
+ #if defined(__i386) || defined(_M_IX86) || \
55
+ defined(__amd64) || defined(_M_AMD64) || defined(__x86_64)
56
+ # if defined(_MSC_VER)
57
+ # define PSNIP_BUILTIN__ENABLE_X86
58
+ # elif defined(__GNUC__)
59
+ # define PSNIP_BUILTIN__ENABLE_X86
60
+ # include <x86intrin.h>
61
+ # endif
62
+ #endif
63
+
64
+ #if defined(__amd64) || defined(_M_AMD64) || defined(__x86_64)
65
+ # if defined(_MSC_VER)
66
+ # define PSNIP_BUILTIN__ENABLE_AMD64
67
+ # elif defined(__GNUC__)
68
+ # define PSNIP_BUILTIN__ENABLE_AMD64
69
+ # include <x86intrin.h>
70
+ # endif
71
+ #endif
72
+
73
+ #if \
74
+ !defined(psnip_int64_t) || !defined(psnip_uint64_t) || \
75
+ !defined(psnip_int32_t) || !defined(psnip_uint32_t) || \
76
+ !defined(psnip_int16_t) || !defined(psnip_uint16_t) || \
77
+ !defined(psnip_int8_t) || !defined(psnip_uint8_t)
78
+ # include "exact-int.h"
79
+ #endif
80
+
81
+ #if defined(HEDLEY_LIKELY) && defined(HEDLEY_UNLIKELY)
82
+ # define PSNIP_BUILTIN_LIKELY(expr) HEDLEY_LIKELY(expr)
83
+ # define PSNIP_BUILTIN_UNLIKELY(expr) HEDLEY_UNLIKELY(expr)
84
+ #elif PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_expect,3,0)
85
+ # define PSNIP_BUILTIN_LIKELY(expr) __builtin_expect(!!(expr), 1)
86
+ # define PSNIP_BUILTIN_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
87
+ #else
88
+ # define PSNIP_BUILTIN_LIKELY(expr) (!!(expr))
89
+ # define PSNIP_BUILTIN_UNLIKELY(expr) (!!(expr))
90
+ #endif
91
+
92
+ #if !defined(PSNIP_BUILTIN_STATIC_INLINE)
93
+ # if defined(__GNUC__)
94
+ # define PSNIP_BUILTIN__COMPILER_ATTRIBUTES __attribute__((__unused__))
95
+ # else
96
+ # define PSNIP_BUILTIN__COMPILER_ATTRIBUTES
97
+ # endif
98
+
99
+ # if defined(HEDLEY_INLINE)
100
+ # define PSNIP_BUILTIN__INLINE HEDLEY_INLINE
101
+ # elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
102
+ # define PSNIP_BUILTIN__INLINE inline
103
+ # elif defined(__GNUC_STDC_INLINE__)
104
+ # define PSNIP_BUILTIN__INLINE __inline__
105
+ # elif defined(_MSC_VER) && _MSC_VER >= 1200
106
+ # define PSNIP_BUILTIN__INLINE __inline
107
+ # else
108
+ # define PSNIP_BUILTIN__INLINE
109
+ # endif
110
+
111
+ # define PSNIP_BUILTIN__FUNCTION PSNIP_BUILTIN__COMPILER_ATTRIBUTES static PSNIP_BUILTIN__INLINE
112
+ #endif
113
+
114
+ #define PSNIP_BUILTIN__SUFFIX_B 1
115
+ #define PSNIP_BUILTIN__SUFFIX_S 2
116
+ #define PSNIP_BUILTIN__SUFFIX_ 3
117
+ #define PSNIP_BUILTIN__SUFFIX_L 4
118
+ #define PSNIP_BUILTIN__SUFFIX_LL 5
119
+
120
+ #if !defined(PSNIP_BUILTIN__SIZEOF_CHAR)
121
+ # if CHAR_MIN == (-0x7fLL-1) && CHAR_MAX == 0x7fLL
122
+ # define PSNIP_BUILTIN__SIZEOF_CHAR 8
123
+ # elif CHAR_MIN == (-0x7fffLL-1) && CHAR_MAX == 0x7fffLL
124
+ # define PSNIP_BUILTIN__SIZEOF_CHAR 16
125
+ # elif CHAR_MIN == (-0x7fffffffLL-1) && CHAR_MAX == 0x7fffffffLL
126
+ # define PSNIP_BUILTIN__SIZEOF_CHAR 32
127
+ # elif CHAR_MIN == (-0x7fffffffffffffffLL-1) && CHAR_MAX == 0x7fffffffffffffffLL
128
+ # define PSNIP_BUILTIN__SIZEOF_CHAR 64
129
+ # endif
130
+ #endif
131
+
132
+ #if !defined(PSNIP_BUILTIN__SIZEOF_SHRT)
133
+ # if SHRT_MIN == (-0x7fLL-1) && SHRT_MAX == 0x7fLL
134
+ # define PSNIP_BUILTIN__SIZEOF_SHRT 8
135
+ # elif SHRT_MIN == (-0x7fffLL-1) && SHRT_MAX == 0x7fffLL
136
+ # define PSNIP_BUILTIN__SIZEOF_SHRT 16
137
+ # elif SHRT_MIN == (-0x7fffffffLL-1) && SHRT_MAX == 0x7fffffffLL
138
+ # define PSNIP_BUILTIN__SIZEOF_SHRT 32
139
+ # elif SHRT_MIN == (-0x7fffffffffffffffLL-1) && SHRT_MAX == 0x7fffffffffffffffLL
140
+ # define PSNIP_BUILTIN__SIZEOF_SHRT 64
141
+ # endif
142
+ #endif
143
+
144
+ #if !defined(PSNIP_BUILTIN__SIZEOF_INT)
145
+ # if INT_MIN == (-0x7fLL-1) && INT_MAX == 0x7fLL
146
+ # define PSNIP_BUILTIN__SIZEOF_INT 8
147
+ # elif INT_MIN == (-0x7fffLL-1) && INT_MAX == 0x7fffLL
148
+ # define PSNIP_BUILTIN__SIZEOF_INT 16
149
+ # elif INT_MIN == (-0x7fffffffLL-1) && INT_MAX == 0x7fffffffLL
150
+ # define PSNIP_BUILTIN__SIZEOF_INT 32
151
+ # elif INT_MIN == (-0x7fffffffffffffffLL-1) && INT_MAX == 0x7fffffffffffffffLL
152
+ # define PSNIP_BUILTIN__SIZEOF_INT 64
153
+ # endif
154
+ #endif
155
+
156
+ #if !defined(PSNIP_BUILTIN__SIZEOF_LONG)
157
+ # if LONG_MIN == (-0x7fLL-1) && LONG_MAX == 0x7fLL
158
+ # define PSNIP_BUILTIN__SIZEOF_LONG 8
159
+ # elif LONG_MIN == (-0x7fffLL-1) && LONG_MAX == 0x7fffLL
160
+ # define PSNIP_BUILTIN__SIZEOF_LONG 16
161
+ # elif LONG_MIN == (-0x7fffffffLL-1) && LONG_MAX == 0x7fffffffLL
162
+ # define PSNIP_BUILTIN__SIZEOF_LONG 32
163
+ # elif LONG_MIN == (-0x7fffffffffffffffLL-1) && LONG_MAX == 0x7fffffffffffffffLL
164
+ # define PSNIP_BUILTIN__SIZEOF_LONG 64
165
+ # endif
166
+ #endif
167
+
168
+ #if !defined(PSNIP_BUILTIN__SIZEOF_LLONG)
169
+ # if LLONG_MIN == (-0x7fLL-1) && LLONG_MAX == 0x7fLL
170
+ # define PSNIP_BUILTIN__SIZEOF_LLONG 8
171
+ # elif LLONG_MIN == (-0x7fffLL-1) && LLONG_MAX == 0x7fffLL
172
+ # define PSNIP_BUILTIN__SIZEOF_LLONG 16
173
+ # elif LLONG_MIN == (-0x7fffffffLL-1) && LLONG_MAX == 0x7fffffffLL
174
+ # define PSNIP_BUILTIN__SIZEOF_LLONG 32
175
+ # elif LLONG_MIN == (-0x7fffffffffffffffLL-1) && LLONG_MAX == 0x7fffffffffffffffLL
176
+ # define PSNIP_BUILTIN__SIZEOF_LLONG 64
177
+ # endif
178
+ #endif
179
+
180
+ #if !defined(PSNIP_BUILTIN_SUFFIX_INT8)
181
+ # if PSNIP_BUILTIN__SIZEOF_CHAR == 8
182
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_B
183
+ # elif PSNIP_BUILTIN__SIZEOF_SHRT == 8
184
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_S
185
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 8
186
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_
187
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 8
188
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_L
189
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 8
190
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_LL
191
+ # endif
192
+ #endif
193
+
194
+ #if !defined(PSNIP_BUILTIN_SUFFIX_INT16)
195
+ # if PSNIP_BUILTIN__SIZEOF_CHAR == 16
196
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_B
197
+ # elif PSNIP_BUILTIN__SIZEOF_SHRT == 16
198
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_S
199
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 16
200
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_
201
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 16
202
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_L
203
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 16
204
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_LL
205
+ # endif
206
+ #endif
207
+
208
+ #if !defined(PSNIP_BUILTIN_SUFFIX_INT32)
209
+ # if PSNIP_BUILTIN__SIZEOF_CHAR == 32
210
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_B
211
+ # elif PSNIP_BUILTIN__SIZEOF_SHRT == 32
212
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_S
213
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 32
214
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_
215
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 32
216
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_L
217
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 32
218
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_LL
219
+ # endif
220
+ #endif
221
+
222
+ #if !defined(PSNIP_BUILTIN_SUFFIX_INT64)
223
+ # if defined(__APPLE__) && PSNIP_BUILTIN__SIZEOF_LLONG == 64
224
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_LL
225
+ # elif PSNIP_BUILTIN__SIZEOF_CHAR == 64
226
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_B
227
+ # elif PSNIP_BUILTIN__SIZEOF_SHRT == 64
228
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_S
229
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 64
230
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_
231
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 64
232
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_L
233
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
234
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_LL
235
+ # endif
236
+ #endif
237
+
238
+ #if defined(PSNIP_BUILTIN_SUFFIX_INT8)
239
+ # if PSNIP_BUILTIN_SUFFIX_INT8 == 1
240
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##b
241
+ # elif PSNIP_BUILTIN_SUFFIX_INT8 == 2
242
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##s
243
+ # elif PSNIP_BUILTIN_SUFFIX_INT8 == 3
244
+ # define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name
245
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name
246
+ # elif PSNIP_BUILTIN_SUFFIX_INT8 == 4
247
+ # define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name##l
248
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##l
249
+ # elif PSNIP_BUILTIN_SUFFIX_INT8 == 5
250
+ # define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name##ll
251
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##ll
252
+ # endif
253
+ #endif
254
+
255
+ #if defined(PSNIP_BUILTIN_SUFFIX_INT16)
256
+ # if PSNIP_BUILTIN_SUFFIX_INT16 == 1
257
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##b
258
+ # elif PSNIP_BUILTIN_SUFFIX_INT16 == 2
259
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##s
260
+ # elif PSNIP_BUILTIN_SUFFIX_INT16 == 3
261
+ # define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name
262
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name
263
+ # elif PSNIP_BUILTIN_SUFFIX_INT16 == 4
264
+ # define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name##l
265
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##l
266
+ # elif PSNIP_BUILTIN_SUFFIX_INT16 == 5
267
+ # define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name##ll
268
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##ll
269
+ # endif
270
+ #endif
271
+
272
+ #if defined(PSNIP_BUILTIN_SUFFIX_INT32)
273
+ # if PSNIP_BUILTIN_SUFFIX_INT32 == 1
274
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##b
275
+ # elif PSNIP_BUILTIN_SUFFIX_INT32 == 2
276
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##s
277
+ # elif PSNIP_BUILTIN_SUFFIX_INT32 == 3
278
+ # define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name
279
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name
280
+ # elif PSNIP_BUILTIN_SUFFIX_INT32 == 4
281
+ # define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name##l
282
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##l
283
+ # elif PSNIP_BUILTIN_SUFFIX_INT32 == 5
284
+ # define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name##ll
285
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##ll
286
+ # endif
287
+ #endif
288
+
289
+ #if defined(PSNIP_BUILTIN_SUFFIX_INT64)
290
+ # if PSNIP_BUILTIN_SUFFIX_INT64 == 1
291
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##b
292
+ # elif PSNIP_BUILTIN_SUFFIX_INT64 == 2
293
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##s
294
+ # elif PSNIP_BUILTIN_SUFFIX_INT64 == 3
295
+ # define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name
296
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name
297
+ # elif PSNIP_BUILTIN_SUFFIX_INT64 == 4
298
+ # define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name##l
299
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##l
300
+ # elif PSNIP_BUILTIN_SUFFIX_INT64 == 5
301
+ # define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name##ll
302
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##ll
303
+ # endif
304
+ #endif
305
+
306
+ /******
307
+ *** GCC-style built-ins
308
+ ******/
309
+
310
+ /*** __builtin_ffs ***/
311
+
312
+ #define PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(f_n, T) \
313
+ PSNIP_BUILTIN__FUNCTION \
314
+ int psnip_builtin_##f_n(T x) { \
315
+ static const char psnip_builtin_ffs_lookup[256] = { \
316
+ 0, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
317
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
318
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
319
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
320
+ 7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
321
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
322
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
323
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
324
+ 8, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
325
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
326
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
327
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
328
+ 7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
329
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
330
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
331
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1 \
332
+ }; \
333
+ \
334
+ unsigned char t; \
335
+ size_t s = 0; \
336
+ \
337
+ while (s < (sizeof(T) * 8)) { \
338
+ t = (unsigned char) ((x >> s) & 0xff); \
339
+ if (t) \
340
+ return psnip_builtin_ffs_lookup[t] + s; \
341
+ \
342
+ s += 8; \
343
+ } \
344
+ \
345
+ return 0; \
346
+ }
347
+
348
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_ffs, 3, 3)
349
+ # define psnip_builtin_ffs(x) __builtin_ffs(x)
350
+ # define psnip_builtin_ffsl(x) __builtin_ffsl(x)
351
+ # define psnip_builtin_ffsll(x) __builtin_ffsll(x)
352
+ # define psnip_builtin_ffs32(x) PSNIP_BUILTIN__VARIANT_INT32(_,ffs)(x)
353
+ # define psnip_builtin_ffs64(x) PSNIP_BUILTIN__VARIANT_INT64(_,ffs)(x)
354
+ #else
355
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
356
+ PSNIP_BUILTIN__FUNCTION
357
+ int psnip_builtin_ffsll(long long v) {
358
+ unsigned long r;
359
+ # if defined(_M_AMD64) || defined(_M_ARM)
360
+ if (_BitScanForward64(&r, (unsigned long long) v)) {
361
+ return (int) (r + 1);
362
+ }
363
+ # else
364
+ if (_BitScanForward(&r, (unsigned long) (v))) {
365
+ return (int) (r + 1);
366
+ } else if (_BitScanForward(&r, (unsigned long) (v >> 32))) {
367
+ return (int) (r + 33);
368
+ }
369
+ # endif
370
+ return 0;
371
+ }
372
+
373
+ PSNIP_BUILTIN__FUNCTION
374
+ int psnip_builtin_ffsl(long v) {
375
+ unsigned long r;
376
+ if (_BitScanForward(&r, (unsigned long) v)) {
377
+ return (int) (r + 1);
378
+ }
379
+ return 0;
380
+ }
381
+
382
+ PSNIP_BUILTIN__FUNCTION
383
+ int psnip_builtin_ffs(int v) {
384
+ return psnip_builtin_ffsl(v);
385
+ }
386
+ # else
387
+ PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffs, int)
388
+ PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffsl, long)
389
+ PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffsll, long long)
390
+ # endif
391
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
392
+ # define __builtin_ffsll(v) psnip_builtin_ffsll(v)
393
+ # define __builtin_ffsl(v) psnip_builtin_ffsl(v)
394
+ # define __builtin_ffs(v) psnip_builtin_ffs(v)
395
+ # endif
396
+ #endif
397
+
398
+ #if !defined(psnip_builtin_ffs32)
399
+ # define psnip_builtin_ffs32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ffs)(x)
400
+ #endif
401
+
402
+ #if !defined(psnip_builtin_ffs64)
403
+ # define psnip_builtin_ffs64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ffs)(x)
404
+ #endif
405
+
406
+ /*** __builtin_clz ***/
407
+
408
+ #define PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(f_n, T) \
409
+ PSNIP_BUILTIN__FUNCTION \
410
+ int psnip_builtin_##f_n(T x) { \
411
+ static const char psnip_builtin_clz_lookup[256] = { \
412
+ 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, \
413
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
414
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
415
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
416
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
417
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
418
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
419
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
420
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
421
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
422
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
423
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
424
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
425
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
426
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
427
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 \
428
+ }; \
429
+ size_t s = sizeof(T) * 8; \
430
+ T r; \
431
+ \
432
+ while ((s -= 8) != 0) { \
433
+ r = x >> s; \
434
+ if (r != 0) \
435
+ return psnip_builtin_clz_lookup[r] + \
436
+ (((sizeof(T) - 1) * 8) - s); \
437
+ } \
438
+ \
439
+ if (x == 0) \
440
+ return (int) ((sizeof(T) * 8) - 1); \
441
+ else \
442
+ return psnip_builtin_clz_lookup[x] + \
443
+ ((sizeof(T) - 1) * 8); \
444
+ }
445
+
446
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_clz, 3, 4)
447
+ # define psnip_builtin_clz(x) __builtin_clz(x)
448
+ # define psnip_builtin_clzl(x) __builtin_clzl(x)
449
+ # define psnip_builtin_clzll(x) __builtin_clzll(x)
450
+ # define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(_,clz)(x)
451
+ # define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(_,clz)(x)
452
+ #else
453
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse,14,0)
454
+ PSNIP_BUILTIN__FUNCTION
455
+ int psnip_builtin_clzll(unsigned long long v) {
456
+ unsigned long r = 0;
457
+ # if defined(_M_AMD64) || defined(_M_ARM)
458
+ if (_BitScanReverse64(&r, v)) {
459
+ return 63 - r;
460
+ }
461
+ # else
462
+ if (_BitScanReverse(&r, (unsigned long) (v >> 32))) {
463
+ return 31 - r;
464
+ } else if (_BitScanReverse(&r, (unsigned long) v)) {
465
+ return 63 - r;
466
+ }
467
+ # endif
468
+ return 63;
469
+ }
470
+
471
+ PSNIP_BUILTIN__FUNCTION
472
+ int psnip_builtin_clzl(unsigned long v) {
473
+ unsigned long r = 0;
474
+ if (_BitScanReverse(&r, v)) {
475
+ return 31 - r;
476
+ }
477
+ return 31;
478
+ }
479
+
480
+ PSNIP_BUILTIN__FUNCTION
481
+ int psnip_builtin_clz(unsigned int v) {
482
+ return psnip_builtin_clzl(v);
483
+ }
484
+ # define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clz)(x)
485
+ # define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clz)(x)
486
+ # else
487
+ PSNIP_BUILTIN__FUNCTION
488
+ int psnip_builtin_clz32(psnip_uint32_t v) {
489
+ static const unsigned char MultiplyDeBruijnBitPosition[] = {
490
+ 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
491
+ 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
492
+ };
493
+
494
+ v |= v >> 1;
495
+ v |= v >> 2;
496
+ v |= v >> 4;
497
+ v |= v >> 8;
498
+ v |= v >> 16;
499
+
500
+ return
501
+ ((sizeof(psnip_uint32_t) * CHAR_BIT) - 1) -
502
+ MultiplyDeBruijnBitPosition[(psnip_uint32_t)(v * 0x07C4ACDDU) >> 27];
503
+ }
504
+
505
+ PSNIP_BUILTIN__FUNCTION
506
+ int psnip_builtin_clz64(psnip_uint64_t v) {
507
+ static const unsigned char MultiplyDeBruijnBitPosition[] = {
508
+ 0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61,
509
+ 54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4, 62,
510
+ 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
511
+ 25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63
512
+ };
513
+
514
+ v |= v >> 1;
515
+ v |= v >> 2;
516
+ v |= v >> 4;
517
+ v |= v >> 8;
518
+ v |= v >> 16;
519
+ v |= v >> 32;
520
+
521
+ return
522
+ ((sizeof(psnip_uint64_t) * CHAR_BIT) - 1) -
523
+ MultiplyDeBruijnBitPosition[(psnip_uint64_t)(v * 0x03F79D71B4CB0A89ULL) >> 58];
524
+ }
525
+
526
+ # if PSNIP_BUILTIN__SIZEOF_INT == 32
527
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clz(unsigned int x) { return psnip_builtin_clz32(x); }
528
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 64
529
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clz(unsigned int x) { return psnip_builtin_clz64(x); }
530
+ # else
531
+ PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clz, unsigned int)
532
+ # endif
533
+
534
+ # if PSNIP_BUILTIN__SIZEOF_LONG == 32
535
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzl(unsigned long x) { return psnip_builtin_clz32(x); }
536
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 64
537
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzl(unsigned long x) { return psnip_builtin_clz64(x); }
538
+ # else
539
+ PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clzl, unsigned long)
540
+ # endif
541
+
542
+ # if PSNIP_BUILTIN__SIZEOF_LLONG == 32
543
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzll(unsigned long long x) { return psnip_builtin_clz32(x); }
544
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
545
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzll(unsigned long long x) { return psnip_builtin_clz64(x); }
546
+ # else
547
+ PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clzll, unsigned long long)
548
+ # endif
549
+
550
+ # endif
551
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
552
+ # define __builtin_clz(x) psnip_builtin_clz(x)
553
+ # define __builtin_clzl(x) psnip_builtin_clzl(x)
554
+ # define __builtin_clzll(x) psnip_builtin_clzll(x)
555
+ # endif
556
+ #endif
557
+
558
+ #if !defined(psnip_builtin_clz32)
559
+ # define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clz)(x)
560
+ #endif
561
+
562
+ #if !defined(psnip_builtin_clz64)
563
+ # define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clz)(x)
564
+ #endif
565
+
566
+ /*** __builtin_ctz ***/
567
+
568
+ #define PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(f_n, T) \
569
+ PSNIP_BUILTIN__FUNCTION \
570
+ int psnip_builtin_##f_n(T x) { \
571
+ static const char psnip_builtin_ctz_lookup[256] = { \
572
+ 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
573
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
574
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
575
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
576
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
577
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
578
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
579
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
580
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
581
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
582
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
583
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
584
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
585
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
586
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
587
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 \
588
+ }; \
589
+ size_t s = 0; \
590
+ T r; \
591
+ \
592
+ do { \
593
+ r = (x >> s) & 0xff; \
594
+ if (r != 0) \
595
+ return psnip_builtin_ctz_lookup[r] + (char) s; \
596
+ } while ((s += 8) < (sizeof(T) * 8)); \
597
+ \
598
+ return (int) sizeof(T) - 1; \
599
+ }
600
+
601
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_ctz, 3, 4)
602
+ # define psnip_builtin_ctz(x) __builtin_ctz(x)
603
+ # define psnip_builtin_ctzl(x) __builtin_ctzl(x)
604
+ # define psnip_builtin_ctzll(x) __builtin_ctzll(x)
605
+ # define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(_,ctz)(x)
606
+ # define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(_,ctz)(x)
607
+ #else
608
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
609
+ PSNIP_BUILTIN__FUNCTION
610
+ int psnip_builtin_ctzll(unsigned long long v) {
611
+ unsigned long r = 0;
612
+ # if defined(_M_AMD64) || defined(_M_ARM)
613
+ _BitScanForward64(&r, v);
614
+ return (int) r;
615
+ # else
616
+ if (_BitScanForward(&r, (unsigned int) (v)))
617
+ return (int) (r);
618
+
619
+ _BitScanForward(&r, (unsigned int) (v >> 32));
620
+ return (int) (r + 32);
621
+ # endif
622
+ }
623
+
624
+ PSNIP_BUILTIN__FUNCTION
625
+ int psnip_builtin_ctzl(unsigned long v) {
626
+ unsigned long r = 0;
627
+ _BitScanForward(&r, v);
628
+ return (int) r;
629
+ }
630
+
631
+ PSNIP_BUILTIN__FUNCTION
632
+ int psnip_builtin_ctz(unsigned int v) {
633
+ return psnip_builtin_ctzl(v);
634
+ }
635
+ # define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ctz)(x)
636
+ # define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ctz)(x)
637
+ # else
638
+ PSNIP_BUILTIN__FUNCTION
639
+ int psnip_builtin_ctz32(psnip_uint32_t v) {
640
+ static const unsigned char MultiplyDeBruijnBitPosition[] = {
641
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
642
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
643
+ };
644
+
645
+ return
646
+ MultiplyDeBruijnBitPosition[((psnip_uint32_t)((v & -v) * 0x077CB531U)) >> 27];
647
+ }
648
+
649
+ PSNIP_BUILTIN__FUNCTION
650
+ int psnip_builtin_ctz64(psnip_uint64_t v) {
651
+ static const unsigned char MultiplyDeBruijnBitPosition[] = {
652
+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
653
+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
654
+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
655
+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6
656
+ };
657
+
658
+ return
659
+ MultiplyDeBruijnBitPosition[((psnip_uint64_t)((v & -v) * 0x03f79d71b4ca8b09ULL)) >> 58];
660
+ }
661
+
662
+ # if PSNIP_BUILTIN__SIZEOF_INT == 32
663
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctz(unsigned int x) { return psnip_builtin_ctz32(x); }
664
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 64
665
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctz(unsigned int x) { return psnip_builtin_ctz64(x); }
666
+ # else
667
+ PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctz, unsigned int)
668
+ # endif
669
+
670
+ # if PSNIP_BUILTIN__SIZEOF_LONG == 32
671
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzl(unsigned long x) { return psnip_builtin_ctz32(x); }
672
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 64
673
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzl(unsigned long x) { return psnip_builtin_ctz64(x); }
674
+ # else
675
+ PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctzl, unsigned long)
676
+ # endif
677
+
678
+ # if PSNIP_BUILTIN__SIZEOF_LLONG == 32
679
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzll(unsigned long long x) { return psnip_builtin_ctz32(x); }
680
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
681
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzll(unsigned long long x) { return psnip_builtin_ctz64(x); }
682
+ # else
683
+ PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctzll, unsigned long long)
684
+ # endif
685
+ # endif
686
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
687
+ # define __builtin_ctz(x) psnip_builtin_ctz(x)
688
+ # define __builtin_ctzl(x) psnip_builtin_ctzl(x)
689
+ # define __builtin_ctzll(x) psnip_builtin_ctzll(x)
690
+ # endif
691
+ #endif
692
+
693
+ #if !defined(psnip_builtin_ctz32)
694
+ # define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ctz)(x)
695
+ #endif
696
+
697
+ #if !defined(psnip_builtin_ctz64)
698
+ # define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ctz)(x)
699
+ #endif
700
+
701
+ /*** __builtin_parity ***/
702
+
703
+ #define PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(f_n, T) \
704
+ PSNIP_BUILTIN__FUNCTION \
705
+ int psnip_builtin_##f_n(T v) { \
706
+ size_t i; \
707
+ for (i = (sizeof(T) * CHAR_BIT) / 2 ; i > 2 ; i /= 2) \
708
+ v ^= v >> i; \
709
+ v &= 0xf; \
710
+ return (0x6996 >> v) & 1; \
711
+ }
712
+
713
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_parity, 3, 4)
714
+ # define psnip_builtin_parity(x) __builtin_parity(x)
715
+ # define psnip_builtin_parityl(x) __builtin_parityl(x)
716
+ # define psnip_builtin_parityll(x) __builtin_parityll(x)
717
+ # define psnip_builtin_parity32(x) PSNIP_BUILTIN__VARIANT_INT32(_,parity)(x)
718
+ # define psnip_builtin_parity64(x) PSNIP_BUILTIN__VARIANT_INT64(_,parity)(x)
719
+ #else
720
+ PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parity, unsigned int)
721
+ PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parityl, unsigned long)
722
+ PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parityll, unsigned long long)
723
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
724
+ # define __builtin_parity(x) psnip_builtin_parity(x)
725
+ # define __builtin_parityl(x) psnip_builtin_parityl(x)
726
+ # define __builtin_parityll(x) psnip_builtin_parityll(x)
727
+ # endif
728
+ #endif
729
+
730
+ #if !defined(psnip_builtin_parity32)
731
+ # define psnip_builtin_parity32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,parity)(x)
732
+ #endif
733
+
734
+ #if !defined(psnip_builtin_parity64)
735
+ # define psnip_builtin_parity64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,parity)(x)
736
+ #endif
737
+
738
+ /*** __builtin_popcount ***/
739
+
740
+ #define PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(f_n, T) \
741
+ PSNIP_BUILTIN__FUNCTION \
742
+ int psnip_builtin_##f_n(T x) { \
743
+ x = x - ((x >> 1) & (T)~(T)0/3); \
744
+ x = (x & (T)~(T)0/15*3) + ((x >> 2) & (T)~(T)0/15*3); \
745
+ x = (x + (x >> 4)) & (T)~(T)0/255*15; \
746
+ return (T)(x * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8; \
747
+ }
748
+
749
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_popcount, 3, 4)
750
+ # define psnip_builtin_popcount(x) __builtin_popcount(x)
751
+ # define psnip_builtin_popcountl(x) __builtin_popcountl(x)
752
+ # define psnip_builtin_popcountll(x) __builtin_popcountll(x)
753
+ # define psnip_builtin_popcount32(x) PSNIP_BUILTIN__VARIANT_INT32(_,popcount)(x)
754
+ # define psnip_builtin_popcount64(x) PSNIP_BUILTIN__VARIANT_INT64(_,popcount)(x)
755
+ #else
756
+ PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcount, unsigned int)
757
+ PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcountl, unsigned long)
758
+ PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcountll, unsigned long long)
759
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
760
+ # define __builtin_popcount(x) psnip_builtin_popcount(x)
761
+ # define __builtin_popcountl(x) psnip_builtin_popcountl(x)
762
+ # define __builtin_popcountll(x) psnip_builtin_popcountll(x)
763
+ # endif
764
+ #endif
765
+
766
+ #if !defined(psnip_builtin_popcount32)
767
+ # define psnip_builtin_popcount32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,popcount)(x)
768
+ #endif
769
+
770
+ #if !defined(psnip_builtin_popcount64)
771
+ # define psnip_builtin_popcount64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,popcount)(x)
772
+ #endif
773
+
774
+ /*** __builtin_clrsb ***/
775
+
776
+ #define PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(f_n, clzfn, T) \
777
+ PSNIP_BUILTIN__FUNCTION \
778
+ int psnip_builtin_##f_n(T x) { \
779
+ return (PSNIP_BUILTIN_UNLIKELY(x == -1) ? \
780
+ ((int) sizeof(x) * 8) : \
781
+ psnip_builtin_##clzfn((x < 0) ? ~x : x)) - 1; \
782
+ }
783
+
784
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_clrsb, 4, 7)
785
+ # define psnip_builtin_clrsb(x) __builtin_clrsb(x)
786
+ # if !defined(__INTEL_COMPILER)
787
+ # define psnip_builtin_clrsbl(x) __builtin_clrsbl(x)
788
+ # else
789
+ # if PSNIP_BUILTIN__SIZEOF_LONG == PSNIP_BUILTIN__SIZEOF_INT
790
+ # define psnip_builtin_clrsbl(x) ((long) __builtin_clrsb((int) x))
791
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == PSNIP_BUILTIN__SIZEOF_LLONG
792
+ # define psnip_builtin_clrsbl(x) ((long) __builtin_clrsbll((long long) x))
793
+ # else
794
+ PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbl, clzl, long)
795
+ # endif
796
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
797
+ # define __builtin_clrsbl(x) psnip_builtin_clrsbl(x)
798
+ # endif
799
+ # endif
800
+ # define psnip_builtin_clrsbll(x) __builtin_clrsbll(x)
801
+ # define psnip_builtin_clrsb32(x) PSNIP_BUILTIN__VARIANT_INT32(_,clrsb)(x)
802
+ # define psnip_builtin_clrsb64(x) PSNIP_BUILTIN__VARIANT_INT64(_,clrsb)(x)
803
+ #else
804
+ PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsb, clz, int)
805
+ PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbl, clzl, long)
806
+ PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbll, clzll, long long)
807
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
808
+ # define __builtin_clrsb(x) psnip_builtin_clrsb(x)
809
+ # define __builtin_clrsbl(x) psnip_builtin_clrsbl(x)
810
+ # define __builtin_clrsbll(x) psnip_builtin_clrsbll(x)
811
+ # endif
812
+ #endif
813
+
814
+ #if !defined(psnip_builtin_clrsb32)
815
+ # define psnip_builtin_clrsb32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clrsb)(x)
816
+ #endif
817
+
818
+ #if !defined(psnip_builtin_clrsb64)
819
+ # define psnip_builtin_clrsb64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clrsb)(x)
820
+ #endif
821
+
822
+ /*** __builtin_bitreverse ***/
823
+
824
+ #define PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(f_n, T) \
825
+ PSNIP_BUILTIN__FUNCTION \
826
+ T psnip_builtin_##f_n(T x) { \
827
+ size_t s = sizeof(x) * CHAR_BIT; \
828
+ T mask = (T) 0U; \
829
+ mask = ~mask; \
830
+ while ((s >>= 1) > 0) { \
831
+ mask ^= (mask << s); \
832
+ x = ((x >> s) & mask) | ((x << s) & ~mask); \
833
+ } \
834
+ return x; \
835
+ }
836
+
837
+ #if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_bitreverse64) && !defined(__EMSCRIPTEN__)
838
+ # define psnip_builtin_bitreverse8(x) __builtin_bitreverse8(x)
839
+ # define psnip_builtin_bitreverse16(x) __builtin_bitreverse16(x)
840
+ # define psnip_builtin_bitreverse32(x) __builtin_bitreverse32(x)
841
+ # define psnip_builtin_bitreverse64(x) __builtin_bitreverse64(x)
842
+ #else
843
+ PSNIP_BUILTIN__FUNCTION
844
+ psnip_uint8_t psnip_builtin_bitreverse8(psnip_uint8_t v) {
845
+ return (psnip_uint8_t) ((v * 0x0202020202ULL & 0x010884422010ULL) % 1023);
846
+ }
847
+ PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse16, psnip_uint16_t)
848
+ PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse32, psnip_uint32_t)
849
+ PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse64, psnip_uint64_t)
850
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
851
+ # define __builtin_bitreverse8(x) psnip_builtin_bitreverse8(x)
852
+ # define __builtin_bitreverse16(x) psnip_builtin_bitreverse16(x)
853
+ # define __builtin_bitreverse32(x) psnip_builtin_bitreverse32(x)
854
+ # define __builtin_bitreverse64(x) psnip_builtin_bitreverse64(x)
855
+ # endif
856
+ #endif
857
+
858
+ /*** __builtin_addc ***/
859
+
860
+ #define PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(f_n, T) \
861
+ PSNIP_BUILTIN__FUNCTION \
862
+ T psnip_builtin_##f_n(T x, T y, T ci, T* co) { \
863
+ T max = 0; \
864
+ T r = (T) x + y; \
865
+ max = ~max; \
866
+ *co = (T) (x > (max - y)); \
867
+ if (ci) { \
868
+ if (r == max) \
869
+ *co = 1; \
870
+ r += ci; \
871
+ } \
872
+ return r; \
873
+ }
874
+
875
+ #if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_addc)
876
+ # define psnip_builtin_addcb(x, y, ci, co) __builtin_addcb(x, y, ci, co)
877
+ # define psnip_builtin_addcs(x, y, ci, co) __builtin_addcs(x, y, ci, co)
878
+ # define psnip_builtin_addc(x, y, ci, co) __builtin_addc(x, y, ci, co)
879
+ # define psnip_builtin_addcl(x, y, ci, co) __builtin_addcl(x, y, ci, co)
880
+ # define psnip_builtin_addcll(x, y, ci, co) __builtin_addcll(x, y, ci, co)
881
+ # define psnip_builtin_addc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(_,addc)(x, y, ci, co)
882
+ # define psnip_builtin_addc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(_,addc)(x, y, ci, co)
883
+ # define psnip_builtin_addc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(_,addc)(x, y, ci, co)
884
+ # define psnip_builtin_addc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(_,addc)(x, y, ci, co)
885
+ #else
886
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcb, unsigned char)
887
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcs, unsigned short)
888
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addc, unsigned int)
889
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcl, unsigned long)
890
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcll, unsigned long long)
891
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
892
+ # define __builtin_addcb(x, y, ci, co) psnip_builtin_addcb(x, y, ci, co)
893
+ # define __builtin_addcs(x, y, ci, co) psnip_builtin_addcs(x, y, ci, co)
894
+ # define __builtin_addc(x, y, ci, co) psnip_builtin_addc(x, y, ci, co)
895
+ # define __builtin_addcl(x, y, ci, co) psnip_builtin_addcl(x, y, ci, co)
896
+ # define __builtin_addcll(x, y, ci, co) psnip_builtin_addcll(x, y, ci, co)
897
+ # endif
898
+ #endif
899
+
900
+ #if !defined(psnip_builtin_addc8)
901
+ # define psnip_builtin_addc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(psnip,addc)(x, y, ci, co)
902
+ #endif
903
+
904
+ #if !defined(psnip_builtin_addc16)
905
+ # define psnip_builtin_addc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(psnip,addc)(x, y, ci, co)
906
+ #endif
907
+
908
+ #if !defined(psnip_builtin_addc32)
909
+ # define psnip_builtin_addc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(psnip,addc)(x, y, ci, co)
910
+ #endif
911
+
912
+ #if !defined(psnip_builtin_addc64)
913
+ # define psnip_builtin_addc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(psnip,addc)(x, y, ci, co)
914
+ #endif
915
+
916
+ /*** __builtin_subc ***/
917
+
918
+ #define PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(f_n, T) \
919
+ PSNIP_BUILTIN__FUNCTION \
920
+ T psnip_builtin_##f_n(T x, T y, T ci, T* co) { \
921
+ T r = x - y; \
922
+ *co = x < y; \
923
+ if (ci) { \
924
+ r--; \
925
+ if (r == 0) \
926
+ *co = 1; \
927
+ } \
928
+ return r; \
929
+ }
930
+
931
+ #if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_subc)
932
+ # define psnip_builtin_subcb(x, y, ci, co) __builtin_subcb(x, y, ci, co)
933
+ # define psnip_builtin_subcs(x, y, ci, co) __builtin_subcs(x, y, ci, co)
934
+ # define psnip_builtin_subc(x, y, ci, co) __builtin_subc(x, y, ci, co)
935
+ # define psnip_builtin_subcl(x, y, ci, co) __builtin_subcl(x, y, ci, co)
936
+ # define psnip_builtin_subcll(x, y, ci, co) __builtin_subcll(x, y, ci, co)
937
+ # define psnip_builtin_subc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(_,subc)(x, y, ci, co)
938
+ # define psnip_builtin_subc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(_,subc)(x, y, ci, co)
939
+ # define psnip_builtin_subc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(_,subc)(x, y, ci, co)
940
+ # define psnip_builtin_subc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(_,subc)(x, y, ci, co)
941
+ #else
942
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcb, unsigned char)
943
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcs, unsigned short)
944
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subc, unsigned int)
945
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcl, unsigned long)
946
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcll, unsigned long long)
947
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
948
+ # define __builtin_subcb(x, y, ci, co) psnip_builtin_subcb(x, y, ci, co)
949
+ # define __builtin_subcs(x, y, ci, co) psnip_builtin_subcs(x, y, ci, co)
950
+ # define __builtin_subc(x, y, ci, co) psnip_builtin_subc(x, y, ci, co)
951
+ # define __builtin_subcl(x, y, ci, co) psnip_builtin_subcl(x, y, ci, co)
952
+ # define __builtin_subcll(x, y, ci, co) psnip_builtin_subcll(x, y, ci, co)
953
+ # endif
954
+ #endif
955
+
956
+ #if !defined(psnip_builtin_subc8)
957
+ # define psnip_builtin_subc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(psnip,subc)(x, y, ci, co)
958
+ #endif
959
+
960
+ #if !defined(psnip_builtin_subc16)
961
+ # define psnip_builtin_subc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(psnip,subc)(x, y, ci, co)
962
+ #endif
963
+
964
+ #if !defined(psnip_builtin_subc32)
965
+ # define psnip_builtin_subc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(psnip,subc)(x, y, ci, co)
966
+ #endif
967
+
968
+ #if !defined(psnip_builtin_subc64)
969
+ # define psnip_builtin_subc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(psnip,subc)(x, y, ci, co)
970
+ #endif
971
+
972
+ /*** __builtin_bswap ***/
973
+
974
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_bswap16, 4, 8)
975
+ # define psnip_builtin_bswap16(x) __builtin_bswap16(x)
976
+ #else
977
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
978
+ # define psnip_builtin_bswap16(x) _byteswap_ushort(x)
979
+ # else
980
+ PSNIP_BUILTIN__FUNCTION
981
+ psnip_uint16_t
982
+ psnip_builtin_bswap16(psnip_uint16_t v) {
983
+ return
984
+ ((v & (((psnip_uint16_t) 0xff) << 8)) >> 8) |
985
+ ((v & (((psnip_uint16_t) 0xff) )) << 8);
986
+ }
987
+ # endif
988
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
989
+ # define __builtin_bswap16(x) psnip_builtin_bswap16(x)
990
+ # endif
991
+ #endif
992
+
993
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_bswap16, 4, 3)
994
+ # define psnip_builtin_bswap32(x) __builtin_bswap32(x)
995
+ # define psnip_builtin_bswap64(x) __builtin_bswap64(x)
996
+ #else
997
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
998
+ # define psnip_builtin_bswap32(x) _byteswap_ulong(x)
999
+ # define psnip_builtin_bswap64(x) _byteswap_uint64(x)
1000
+ # else
1001
+ PSNIP_BUILTIN__FUNCTION
1002
+ psnip_uint32_t
1003
+ psnip_builtin_bswap32(psnip_uint32_t v) {
1004
+ return
1005
+ ((v & (((psnip_uint32_t) 0xff) << 24)) >> 24) |
1006
+ ((v & (((psnip_uint32_t) 0xff) << 16)) >> 8) |
1007
+ ((v & (((psnip_uint32_t) 0xff) << 8)) << 8) |
1008
+ ((v & (((psnip_uint32_t) 0xff) )) << 24);
1009
+ }
1010
+
1011
+ PSNIP_BUILTIN__FUNCTION
1012
+ psnip_uint64_t
1013
+ psnip_builtin_bswap64(psnip_uint64_t v) {
1014
+ return
1015
+ ((v & (((psnip_uint64_t) 0xff) << 56)) >> 56) |
1016
+ ((v & (((psnip_uint64_t) 0xff) << 48)) >> 40) |
1017
+ ((v & (((psnip_uint64_t) 0xff) << 40)) >> 24) |
1018
+ ((v & (((psnip_uint64_t) 0xff) << 32)) >> 8) |
1019
+ ((v & (((psnip_uint64_t) 0xff) << 24)) << 8) |
1020
+ ((v & (((psnip_uint64_t) 0xff) << 16)) << 24) |
1021
+ ((v & (((psnip_uint64_t) 0xff) << 8)) << 40) |
1022
+ ((v & (((psnip_uint64_t) 0xff) )) << 56);
1023
+ }
1024
+ # endif
1025
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1026
+ # define __builtin_bswap32(x) psnip_builtin_bswap32(x)
1027
+ # define __builtin_bswap64(x) psnip_builtin_bswap64(x)
1028
+ # endif
1029
+ #endif
1030
+
1031
+ /******
1032
+ *** MSVC-style intrinsics
1033
+ ******/
1034
+
1035
+ /*** _rotl ***/
1036
+
1037
+ #define PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(f_n, T, ST) \
1038
+ PSNIP_BUILTIN__FUNCTION \
1039
+ T psnip_intrin_##f_n(T value, ST shift) { \
1040
+ return \
1041
+ (value >> ((sizeof(T) * 8) - shift)) | \
1042
+ (value << shift); \
1043
+ }
1044
+
1045
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotl8, 14, 0)
1046
+ # define psnip_intrin_rotl8(value, shift) _rotl8(value, shift)
1047
+ # define psnip_intrin_rotl16(value, shift) _rotl16(value, shift)
1048
+ #else
1049
+ PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl8, psnip_uint8_t, unsigned char)
1050
+ PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl16, psnip_uint16_t, unsigned char)
1051
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1052
+ # if !defined(_rotl8)
1053
+ # define _rotl8(value, shift) psnip_intrin_rotl8(value, shift)
1054
+ # endif
1055
+ # if !defined(_rotl16)
1056
+ # define _rotl16(value, shift) psnip_intrin_rotl16(value, shift)
1057
+ # endif
1058
+ # endif
1059
+ #endif
1060
+
1061
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotl8, 13, 10)
1062
+ # define psnip_intrin_rotl(value, shift) _rotl(value, shift)
1063
+ # define psnip_intrin_rotl64(value, shift) _rotl64(value, shift)
1064
+ #else
1065
+ PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl, psnip_uint32_t, int)
1066
+ PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl64, psnip_uint64_t, int)
1067
+
1068
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1069
+ # if !defined(_rotl)
1070
+ # define _rotl(value, shift) psnip_intrin_rotl(value, shift)
1071
+ # endif
1072
+ # if !defined(_rotl64)
1073
+ # define _rotl64(value, shift) psnip_intrin_rotl64(value, shift)
1074
+ # endif
1075
+ # endif
1076
+ #endif
1077
+
1078
+ /*** _rotr ***/
1079
+
1080
+ #define PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(f_n, T, ST) \
1081
+ PSNIP_BUILTIN__FUNCTION \
1082
+ T psnip_intrin_##f_n(T value, ST shift) { \
1083
+ return \
1084
+ (value << ((sizeof(T) * 8) - shift)) | \
1085
+ (value >> shift); \
1086
+ }
1087
+
1088
+ PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr8, psnip_uint8_t, unsigned char)
1089
+ PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr16, psnip_uint16_t, unsigned char)
1090
+
1091
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotr8, 14, 0)
1092
+ # define psnip_intrin_rotr8(value, shift) _rotr8(value, shift)
1093
+ # define psnip_intrin_rotr16(value, shift) _rotr16(value, shift)
1094
+ #else
1095
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1096
+ # define _rotr8(value, shift) psnip_intrin_rotr8(value, shift)
1097
+ # define _rotr16(value, shift) psnip_intrin_rotr16(value, shift)
1098
+ # endif
1099
+ #endif
1100
+
1101
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotr8, 13, 10)
1102
+ # define psnip_intrin_rotr(value, shift) _rotr(value, shift)
1103
+ # define psnip_intrin_rotr64(value, shift) _rotr64(value, shift)
1104
+ #else
1105
+ PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr, psnip_uint32_t, int)
1106
+ PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr64, psnip_uint64_t, int)
1107
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1108
+ # if !defined(_rotr)
1109
+ # define _rotr(value, shift) psnip_intrin_rotr(value, shift)
1110
+ # endif
1111
+ # if !defined(_rotr64)
1112
+ # define _rotr64(value, shift) psnip_intrin_rotr64(value, shift)
1113
+ # endif
1114
+ # endif
1115
+ #endif
1116
+
1117
+ /*** _BitScanForward ***/
1118
+
1119
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
1120
+ # pragma intrinsic(_BitScanForward)
1121
+ PSNIP_BUILTIN__FUNCTION
1122
+ unsigned char psnip_intrin_BitScanForward(unsigned long* Index, psnip_uint32_t Mask) {
1123
+ const unsigned long M = (unsigned long) Mask;
1124
+ return _BitScanForward(Index, M);
1125
+ }
1126
+ #else
1127
+ PSNIP_BUILTIN__FUNCTION
1128
+ unsigned char psnip_intrin_BitScanForward(unsigned long* Index, psnip_uint32_t Mask) {
1129
+ return PSNIP_BUILTIN_UNLIKELY(Mask == 0) ? 0 : ((*Index = psnip_builtin_ctz32 (Mask)), 1);
1130
+ }
1131
+
1132
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1133
+ # define _BitScanForward(Index, Mask) psnip_intrin_BitScanForward(Index, Mask)
1134
+ # endif
1135
+ #endif
1136
+
1137
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
1138
+ # pragma intrinsic(_BitScanForward64)
1139
+ # define psnip_intrin_BitScanForward64(Index, Mask) _BitScanForward64(Index, Mask)
1140
+ #else
1141
+ PSNIP_BUILTIN__FUNCTION
1142
+ unsigned char psnip_intrin_BitScanForward64(unsigned long* Index, psnip_uint64_t Mask) {
1143
+ return PSNIP_BUILTIN_UNLIKELY(Mask == 0) ? 0 : ((*Index = psnip_builtin_ctz64 (Mask)), 1);
1144
+ }
1145
+
1146
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1147
+ # define _BitScanForward64(Index, Mask) psnip_intrin_BitScanForward64(Index, Mask)
1148
+ # endif
1149
+ #endif
1150
+
1151
+ /*** _BitScanReverse ***/
1152
+
1153
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse, 14, 0)
1154
+ # pragma intrinsic(_BitScanReverse)
1155
+ PSNIP_BUILTIN__FUNCTION
1156
+ unsigned char psnip_intrin_BitScanReverse(unsigned long* Index, psnip_uint32_t Mask) {
1157
+ const unsigned long M = (unsigned long) Mask;
1158
+ return _BitScanReverse(Index, M);
1159
+ }
1160
+ #else
1161
+ PSNIP_BUILTIN__FUNCTION
1162
+ unsigned char psnip_intrin_BitScanReverse(unsigned long* Index, psnip_uint32_t Mask) {
1163
+ return (PSNIP_BUILTIN_UNLIKELY(Mask == 0)) ? 0 : ((*Index = ((sizeof(Mask) * CHAR_BIT) - 1) - psnip_builtin_clz32 (Mask)), 1);
1164
+ }
1165
+
1166
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1167
+ # define _BitScanReverse(Index, Mask) psnip_intrin_BitScanReverse(Index, Mask)
1168
+ # endif
1169
+ #endif
1170
+
1171
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
1172
+ # pragma intrinsic(_BitScanReverse64)
1173
+ # define psnip_intrin_BitScanReverse64(Index, Mask) _BitScanReverse64(Index, Mask)
1174
+ #else
1175
+ PSNIP_BUILTIN__FUNCTION
1176
+ unsigned char psnip_intrin_BitScanReverse64(unsigned long* Index, psnip_uint64_t Mask) {
1177
+ return (PSNIP_BUILTIN_UNLIKELY(Mask == 0)) ? 0 : ((*Index = ((sizeof(Mask) * CHAR_BIT) - 1) - psnip_builtin_clz64 (Mask)), 1);
1178
+ }
1179
+
1180
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1181
+ # define _BitScanReverse64(Index, Mask) psnip_intrin_BitScanReverse64(Index, Mask)
1182
+ # endif
1183
+ #endif
1184
+
1185
+ /*** bittest ***/
1186
+
1187
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittest, 14, 0)
1188
+ # pragma intrinsic(_bittest)
1189
+ # define psnip_intrin_bittest(a, b) \
1190
+ __pragma(warning(push)) \
1191
+ __pragma(warning(disable:4057)) \
1192
+ _bittest(a, b) \
1193
+ __pragma(warning(pop))
1194
+ #else
1195
+ # define psnip_intrin_bittest(a, b) (((*(a)) >> (b)) & 1)
1196
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1197
+ # define _bittest(a, b) psnip_intrin_bittest(a, b)
1198
+ # endif
1199
+ #endif
1200
+
1201
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittest64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
1202
+ # pragma intrinsic(_bittest64)
1203
+ # define psnip_intrin_bittest64(a, b) _bittest64(a, b)
1204
+ #else
1205
+ # define psnip_intrin_bittest64(a, b) (((*(a)) >> (b)) & 1)
1206
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1207
+ # define _bittest64(a, b) psnip_intrin_bittest64(a, b)
1208
+ # endif
1209
+ #endif
1210
+
1211
+ /*** bittestandcomplement ***/
1212
+
1213
+ #define PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(f_n, T, UT) \
1214
+ PSNIP_BUILTIN__FUNCTION \
1215
+ unsigned char psnip_intrin_##f_n(T* a, T b) { \
1216
+ const char r = (*a >> b) & 1; \
1217
+ *a ^= ((UT) 1) << b; \
1218
+ return r; \
1219
+ }
1220
+
1221
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandcomplement, 14, 0)
1222
+ # pragma intrinsic(_bittestandcomplement)
1223
+ # define psnip_intrin_bittestandcomplement(a, b) \
1224
+ __pragma(warning(push)) \
1225
+ __pragma(warning(disable:4057)) \
1226
+ _bittestandcomplement(a, b) \
1227
+ __pragma(warning(pop))
1228
+ #else
1229
+ PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(bittestandcomplement, psnip_int32_t, psnip_uint32_t)
1230
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1231
+ # define _bittestandcomplement(a, b) psnip_intrin_bittestandcomplement(a, b)
1232
+ # endif
1233
+ #endif
1234
+
1235
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandcomplement64, 14, 0) && defined(_M_AMD64)
1236
+ # define psnip_intrin_bittestandcomplement64(a, b) _bittestandcomplement64(a, b)
1237
+ #else
1238
+ PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(bittestandcomplement64, psnip_int64_t, psnip_uint64_t)
1239
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1240
+ # define _bittestandcomplement64(a, b) psnip_intrin_bittestandcomplement64(a, b)
1241
+ # endif
1242
+ #endif
1243
+
1244
+ /*** bittestandreset ***/
1245
+
1246
+ #define PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(f_n, T, UT) \
1247
+ PSNIP_BUILTIN__FUNCTION \
1248
+ unsigned char psnip_intrin_##f_n(T* a, T b) { \
1249
+ const char r = (*a >> b) & 1; \
1250
+ *a &= ~(((UT) 1) << b); \
1251
+ return r; \
1252
+ }
1253
+
1254
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandreset, 14, 0)
1255
+ # pragma intrinsic(_bittestandreset)
1256
+ # define psnip_intrin_bittestandreset(a, b) \
1257
+ __pragma(warning(push)) \
1258
+ __pragma(warning(disable:4057)) \
1259
+ _bittestandreset(a, b) \
1260
+ __pragma(warning(pop))
1261
+ #else
1262
+ PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(bittestandreset, psnip_int32_t, psnip_uint32_t)
1263
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1264
+ # define _bittestandreset(a, b) psnip_intrin_bittestandreset(a, b)
1265
+ # endif
1266
+ #endif
1267
+
1268
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandreset64, 14, 0) && (defined(_M_AMD64) || defined(_M_IA64))
1269
+ # pragma intrinsic(_bittestandreset64)
1270
+ # define psnip_intrin_bittestandreset64(a, b) _bittestandreset64(a, b)
1271
+ #else
1272
+ PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(bittestandreset64, psnip_int64_t, psnip_uint64_t)
1273
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1274
+ # define _bittestandreset64(a, b) psnip_intrin_bittestandreset64(a, b)
1275
+ # endif
1276
+ #endif
1277
+
1278
+ /*** bittestandset ***/
1279
+
1280
+ #define PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(f_n, T, UT) \
1281
+ PSNIP_BUILTIN__FUNCTION \
1282
+ unsigned char psnip_intrin_##f_n(T* a, T b) { \
1283
+ const char r = (*a >> b) & 1; \
1284
+ *a |= ((UT) 1) << b; \
1285
+ return r; \
1286
+ }
1287
+
1288
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandset, 14, 0)
1289
+ # pragma intrinsic(_bittestandset)
1290
+ # define psnip_intrin_bittestandset(a, b) \
1291
+ __pragma(warning(push)) \
1292
+ __pragma(warning(disable:4057)) \
1293
+ _bittestandset(a, b) \
1294
+ __pragma(warning(pop))
1295
+ #else
1296
+ PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(bittestandset, psnip_int32_t, psnip_uint32_t)
1297
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1298
+ # define _bittestandset(a, b) psnip_intrin_bittestandset(a, b)
1299
+ # endif
1300
+ #endif
1301
+
1302
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandset64, 14, 0) && defined(_M_AMD64)
1303
+ # pragma intrinsic(_bittestandset64)
1304
+ # define psnip_intrin_bittestandset64(a, b) _bittestandset64(a, b)
1305
+ #else
1306
+ PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(bittestandset64, psnip_int64_t, psnip_uint64_t)
1307
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1308
+ # define _bittestandset64(a, b) psnip_intrin_bittestandset64(a, b)
1309
+ # endif
1310
+ #endif
1311
+
1312
+ /*** shiftleft128 ***/
1313
+
1314
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(__shiftleft128, 14, 0) && defined(_M_AMD64)
1315
+ # define psnip_intrin_shiftleft128(LowPart, HighPart, Shift) __shiftleft128(LowPart, HighPart, Shift)
1316
+ #else
1317
+ # if defined(__SIZEOF_INT128__)
1318
+ PSNIP_BUILTIN__FUNCTION
1319
+ psnip_uint64_t psnip_intrin_shiftleft128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
1320
+ unsigned __int128 r = HighPart;
1321
+ r <<= 64;
1322
+ r |= LowPart;
1323
+ r <<= Shift % 64;
1324
+ return (psnip_uint64_t) (r >> 64);
1325
+ }
1326
+ # else
1327
+ PSNIP_BUILTIN__FUNCTION
1328
+ psnip_uint64_t psnip_intrin_shiftleft128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
1329
+ Shift %= 64;
1330
+ return PSNIP_BUILTIN_UNLIKELY(Shift == 0) ? HighPart : ((HighPart << Shift) | (LowPart >> (64 - Shift)));
1331
+ }
1332
+ # endif
1333
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1334
+ # define __shiftleft128(LowPart, HighPart, Shift) psnip_intrin_shiftleft128(LowPart, HighPart, Shift)
1335
+ # endif
1336
+ #endif
1337
+
1338
+ /*** shiftright128 ***/
1339
+
1340
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(__shiftright128, 14, 0) && defined(_M_AMD64)
1341
+ # define psnip_intrin_shiftright128(LowPart, HighPart, Shift) __shiftright128(LowPart, HighPart, Shift)
1342
+ #else
1343
+ # if defined(__SIZEOF_INT128__)
1344
+ PSNIP_BUILTIN__FUNCTION
1345
+ psnip_uint64_t psnip_intrin_shiftright128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
1346
+ unsigned __int128 r = HighPart;
1347
+ r <<= 64;
1348
+ r |= LowPart;
1349
+ r >>= Shift % 64;
1350
+ return (psnip_uint64_t) r;
1351
+ }
1352
+ # else
1353
+ PSNIP_BUILTIN__FUNCTION
1354
+ psnip_uint64_t psnip_intrin_shiftright128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
1355
+ Shift %= 64;
1356
+
1357
+ if (PSNIP_BUILTIN_UNLIKELY(Shift == 0))
1358
+ return LowPart;
1359
+
1360
+ return
1361
+ (HighPart << (64 - Shift)) |
1362
+ (LowPart >> Shift);
1363
+ }
1364
+ # endif
1365
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1366
+ # define __shiftright128(LowPart, HighPart, Shift) psnip_intrin_shiftright128(LowPart, HighPart, Shift)
1367
+ # endif
1368
+ #endif
1369
+
1370
+ /*** byteswap ***/
1371
+
1372
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
1373
+ # pragma intrinsic(_byteswap_ushort)
1374
+ # define psnip_intrin_byteswap_ushort(v) _byteswap_ushort(v)
1375
+ # pragma intrinsic(_byteswap_ulong)
1376
+ # define psnip_intrin_byteswap_ulong(v) _byteswap_ulong(v)
1377
+ # pragma intrinsic(_byteswap_uint64)
1378
+ # define psnip_intrin_byteswap_uint64(v) _byteswap_uint64(v)
1379
+ #else
1380
+ # define psnip_intrin_byteswap_ushort(v) psnip_builtin_bswap16(v)
1381
+ # define psnip_intrin_byteswap_ulong(v) psnip_builtin_bswap32(v)
1382
+ # define psnip_intrin_byteswap_uint64(v) psnip_builtin_bswap64(v)
1383
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1384
+ # define _byteswap_ushort(v) psnip_intrin_byteswap_ushort(v)
1385
+ # define _byteswap_ulong(v) psnip_intrin_byteswap_ulong(v)
1386
+ # define _byteswap_uint64(v) psnip_intrin_byteswap_uint64(v)
1387
+ # endif
1388
+ #endif
1389
+
1390
+ #endif /* defined(PSNIP_BUILTIN_H) */