thread_safety 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,667 @@
1
+ /*
2
+ * Vendored from Ruby v3_4_8 (tag: v3_4_8)
3
+ * Source: https://github.com/ruby/ruby/blob/v3_4_8/internal/bits.h
4
+ *
5
+ * Reason: Ruby 3.4's modular GC was designed for in-tree builds only.
6
+ * The BUILDING_MODULAR_GC guards that provide inline implementations
7
+ * (avoiding these internal headers) were added in Ruby 4.0:
8
+ * - Commit 9130023cf5: "Remove dependency on bits.h in default.c when BUILDING_MODULAR_GC"
9
+ * - Commit 04f538c144: "Remove dependency on sanitizers.h in default.c when BUILDING_MODULAR_GC"
10
+ * These commits are only in Ruby 4.0+ and were not backported to 3.4.x.
11
+ *
12
+ * License: Ruby's License (BSD-2-Clause) - see https://www.ruby-lang.org/en/about/license.txt
13
+ *
14
+ * Maintenance: When updating to a new Ruby 3.4.x patch release, re-vendor this
15
+ * file from the corresponding tag. This file can be removed once
16
+ * Ruby 4.0+ is the minimum supported version.
17
+ */
18
+ #ifndef INTERNAL_BITS_H /*-*-C-*-vi:se ft=c:*/
19
+ #define INTERNAL_BITS_H
20
+ /**
21
+ * @author Ruby developers <ruby-core@ruby-lang.org>
22
+ * @copyright This file is a part of the programming language Ruby.
23
+ * Permission is hereby granted, to either redistribute and/or
24
+ * modify this file, provided that the conditions mentioned in the
25
+ * file COPYING are met. Consult the file for details.
26
+ * @brief Internal header for bitwise integer algorithms.
27
+ * @see Henry S. Warren Jr., "Hacker's Delight" (2nd ed.), 2013.
28
+ * @see SEI CERT C Coding Standard INT32-C. "Ensure that operations on
29
+ * signed integers do not result in overflow"
30
+ * @see https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html
31
+ * @see https://clang.llvm.org/docs/LanguageExtensions.html#builtin-rotateleft
32
+ * @see https://clang.llvm.org/docs/LanguageExtensions.html#builtin-rotateright
33
+ * @see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/byteswap-uint64-byteswap-ulong-byteswap-ushort
34
+ * @see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64
35
+ * @see https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanforward-bitscanforward64
36
+ * @see https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanreverse-bitscanreverse64
37
+ * @see https://docs.microsoft.com/en-us/cpp/intrinsics/lzcnt16-lzcnt-lzcnt64
38
+ * @see https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64
39
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32
40
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32
41
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rotl64
42
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rotr64
43
+ * @see https://stackoverflow.com/a/776523
44
+ */
45
+ #include "ruby/internal/config.h"
46
+ #include <limits.h> /* for CHAR_BITS */
47
+ #include <stdint.h> /* for uintptr_t */
48
+ #include "internal/compilers.h" /* for MSC_VERSION_SINCE */
49
+
50
+ #if MSC_VERSION_SINCE(1310)
51
+ # include <stdlib.h> /* for _byteswap_uint64 */
52
+ #endif
53
+
54
+ #if defined(HAVE_X86INTRIN_H)
55
+ # include <x86intrin.h> /* for _lzcnt_u64 */
56
+ #elif MSC_VERSION_SINCE(1310)
57
+ # include <intrin.h> /* for the following intrinsics */
58
+ #endif
59
+
60
+ #if defined(_MSC_VER) && defined(__AVX__)
61
+ # pragma intrinsic(__popcnt)
62
+ # pragma intrinsic(__popcnt64)
63
+ #endif
64
+
65
+ #if defined(_MSC_VER) && defined(__AVX2__)
66
+ # pragma intrinsic(__lzcnt)
67
+ # pragma intrinsic(__lzcnt64)
68
+ #endif
69
+
70
+ #if MSC_VERSION_SINCE(1310)
71
+ # pragma intrinsic(_rotl)
72
+ # pragma intrinsic(_rotr)
73
+ # ifdef _WIN64
74
+ # pragma intrinsic(_rotl64)
75
+ # pragma intrinsic(_rotr64)
76
+ # endif
77
+ #endif
78
+
79
+ #if MSC_VERSION_SINCE(1400)
80
+ # pragma intrinsic(_BitScanForward)
81
+ # pragma intrinsic(_BitScanReverse)
82
+ # ifdef _WIN64
83
+ # pragma intrinsic(_BitScanForward64)
84
+ # pragma intrinsic(_BitScanReverse64)
85
+ # endif
86
+ #endif
87
+
88
+ #include "ruby/ruby.h" /* for VALUE */
89
+ #include "internal/static_assert.h" /* for STATIC_ASSERT */
90
+
91
+ /* The most significant bit of the lower part of half-long integer.
92
+ * If sizeof(long) == 4, this is 0x8000.
93
+ * If sizeof(long) == 8, this is 0x80000000.
94
+ */
95
+ #define HALF_LONG_MSB ((SIGNED_VALUE)1<<((SIZEOF_LONG*CHAR_BIT-1)/2))
96
+
97
+ #define SIGNED_INTEGER_TYPE_P(T) (0 > ((T)0)-1)
98
+
99
+ #define SIGNED_INTEGER_MIN(T) \
100
+ ((sizeof(T) == sizeof(int8_t)) ? ((T)INT8_MIN) : \
101
+ ((sizeof(T) == sizeof(int16_t)) ? ((T)INT16_MIN) : \
102
+ ((sizeof(T) == sizeof(int32_t)) ? ((T)INT32_MIN) : \
103
+ ((sizeof(T) == sizeof(int64_t)) ? ((T)INT64_MIN) : \
104
+ 0))))
105
+
106
+ #define SIGNED_INTEGER_MAX(T) ((T)(SIGNED_INTEGER_MIN(T) ^ ((T)~(T)0)))
107
+
108
+ #define UNSIGNED_INTEGER_MAX(T) ((T)~(T)0)
109
+
110
+ #ifndef MUL_OVERFLOW_SIGNED_INTEGER_P
111
+ #if __has_builtin(__builtin_mul_overflow_p)
112
+ # define MUL_OVERFLOW_P(a, b) \
113
+ __builtin_mul_overflow_p((a), (b), (__typeof__(a * b))0)
114
+ #elif __has_builtin(__builtin_mul_overflow)
115
+ # define MUL_OVERFLOW_P(a, b) \
116
+ __extension__ ({ __typeof__(a) c; __builtin_mul_overflow((a), (b), &c); })
117
+ #endif
118
+
119
+ #define MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
120
+ (a) == 0 ? 0 : \
121
+ (a) == -1 ? (b) < -(max) : \
122
+ (a) > 0 ? \
123
+ ((b) > 0 ? (max) / (a) < (b) : (min) / (a) > (b)) : \
124
+ ((b) > 0 ? (min) / (a) < (b) : (max) / (a) > (b)))
125
+
126
+ #if __has_builtin(__builtin_mul_overflow_p)
127
+ /* __builtin_mul_overflow_p can take bitfield */
128
+ /* and GCC permits bitfields for integers other than int */
129
+ # define MUL_OVERFLOW_FIXNUM_P(a, b) \
130
+ __extension__ ({ \
131
+ struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \
132
+ __builtin_mul_overflow_p((a), (b), c.fixnum); \
133
+ })
134
+ #else
135
+ # define MUL_OVERFLOW_FIXNUM_P(a, b) \
136
+ MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX)
137
+ #endif
138
+
139
+ #if defined(MUL_OVERFLOW_P) && defined(USE___BUILTIN_MUL_OVERFLOW_LONG_LONG)
140
+ # define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_P(a, b)
141
+ #else
142
+ # define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
143
+ #endif
144
+
145
+ #ifdef MUL_OVERFLOW_P
146
+ # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_P(a, b)
147
+ # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_P(a, b)
148
+ #else
149
+ # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX)
150
+ # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX)
151
+ #endif
152
+ #endif
153
+
154
+ #ifndef ADD_OVERFLOW_SIGNED_INTEGER_P
155
+ #if __has_builtin(__builtin_add_overflow_p)
156
+ # define ADD_OVERFLOW_P(a, b) \
157
+ __builtin_add_overflow_p((a), (b), (__typeof__(a * b))0)
158
+ #elif __has_builtin(__builtin_add_overflow)
159
+ # define ADD_OVERFLOW_P(a, b) \
160
+ __extension__ ({ __typeof__(a) c; __builtin_add_overflow((a), (b), &c); })
161
+ #endif
162
+
163
+ #define ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
164
+ (a) > 0 ? (b) > (max) - (a) : (b) < (min) - (a))
165
+
166
+ #if __has_builtin(__builtin_add_overflow_p)
167
+ /* __builtin_add_overflow_p can take bitfield */
168
+ /* and GCC permits bitfields for integers other than int */
169
+ # define ADD_OVERFLOW_FIXNUM_P(a, b) \
170
+ __extension__ ({ \
171
+ struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \
172
+ __builtin_add_overflow_p((a), (b), c.fixnum); \
173
+ })
174
+ #else
175
+ # define ADD_OVERFLOW_FIXNUM_P(a, b) \
176
+ ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX)
177
+ #endif
178
+
179
+ #if defined(ADD_OVERFLOW_P) && defined(USE___BUILTIN_ADD_OVERFLOW_LONG_LONG)
180
+ # define ADD_OVERFLOW_LONG_LONG_P(a, b) ADD_OVERFLOW_P(a, b)
181
+ #else
182
+ # define ADD_OVERFLOW_LONG_LONG_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
183
+ #endif
184
+
185
+ #ifdef ADD_OVERFLOW_P
186
+ # define ADD_OVERFLOW_LONG_P(a, b) ADD_OVERFLOW_P(a, b)
187
+ # define ADD_OVERFLOW_INT_P(a, b) ADD_OVERFLOW_P(a, b)
188
+ #else
189
+ # define ADD_OVERFLOW_LONG_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX)
190
+ # define ADD_OVERFLOW_INT_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX)
191
+ #endif
192
+ #endif
193
+
194
+ #ifndef SUB_OVERFLOW_SIGNED_INTEGER_P
195
+ #if __has_builtin(__builtin_sub_overflow_p)
196
+ # define SUB_OVERFLOW_P(a, b) \
197
+ __builtin_sub_overflow_p((a), (b), (__typeof__(a * b))0)
198
+ #elif __has_builtin(__builtin_sub_overflow)
199
+ # define SUB_OVERFLOW_P(a, b) \
200
+ __extension__ ({ __typeof__(a) c; __builtin_sub_overflow((a), (b), &c); })
201
+ #endif
202
+
203
+ #define SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
204
+ (b) > 0 ? (a) < (min) + (b) : (a) > (max) + (b))
205
+
206
+ #if __has_builtin(__builtin_sub_overflow_p)
207
+ /* __builtin_sub_overflow_p can take bitfield */
208
+ /* and GCC permits bitfields for integers other than int */
209
+ # define SUB_OVERFLOW_FIXNUM_P(a, b) \
210
+ __extension__ ({ \
211
+ struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \
212
+ __builtin_sub_overflow_p((a), (b), c.fixnum); \
213
+ })
214
+ #else
215
+ # define SUB_OVERFLOW_FIXNUM_P(a, b) \
216
+ SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX)
217
+ #endif
218
+
219
+ #if defined(SUB_OVERFLOW_P) && defined(USE___BUILTIN_SUB_OVERFLOW_LONG_LONG)
220
+ # define SUB_OVERFLOW_LONG_LONG_P(a, b) SUB_OVERFLOW_P(a, b)
221
+ #else
222
+ # define SUB_OVERFLOW_LONG_LONG_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
223
+ #endif
224
+
225
+ #ifdef SUB_OVERFLOW_P
226
+ # define SUB_OVERFLOW_LONG_P(a, b) SUB_OVERFLOW_P(a, b)
227
+ # define SUB_OVERFLOW_INT_P(a, b) SUB_OVERFLOW_P(a, b)
228
+ #else
229
+ # define SUB_OVERFLOW_LONG_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX)
230
+ # define SUB_OVERFLOW_INT_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX)
231
+ #endif
232
+ #endif
233
+
234
+ #ifdef HAVE_UINT128_T
235
+ # define bit_length(x) \
236
+ (unsigned int) \
237
+ (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
238
+ sizeof(x) <= sizeof(int64_t) ? 64 - nlz_int64((uint64_t)(x)) : \
239
+ 128 - nlz_int128((uint128_t)(x)))
240
+ #else
241
+ # define bit_length(x) \
242
+ (unsigned int) \
243
+ (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
244
+ 64 - nlz_int64((uint64_t)(x)))
245
+ #endif
246
+
247
+ #ifndef swap16
248
+ # define swap16 ruby_swap16
249
+ #endif
250
+
251
+ #ifndef swap32
252
+ # define swap32 ruby_swap32
253
+ #endif
254
+
255
+ #ifndef swap64
256
+ # define swap64 ruby_swap64
257
+ #endif
258
+
259
+ static inline uint16_t ruby_swap16(uint16_t);
260
+ static inline uint32_t ruby_swap32(uint32_t);
261
+ static inline uint64_t ruby_swap64(uint64_t);
262
+ static inline unsigned nlz_int(unsigned x);
263
+ static inline unsigned nlz_long(unsigned long x);
264
+ static inline unsigned nlz_long_long(unsigned long long x);
265
+ static inline unsigned nlz_intptr(uintptr_t x);
266
+ static inline unsigned nlz_int32(uint32_t x);
267
+ static inline unsigned nlz_int64(uint64_t x);
268
+ #ifdef HAVE_UINT128_T
269
+ static inline unsigned nlz_int128(uint128_t x);
270
+ #endif
271
+ static inline unsigned rb_popcount32(uint32_t x);
272
+ static inline unsigned rb_popcount64(uint64_t x);
273
+ static inline unsigned rb_popcount_intptr(uintptr_t x);
274
+ static inline int ntz_int32(uint32_t x);
275
+ static inline int ntz_int64(uint64_t x);
276
+ static inline int ntz_intptr(uintptr_t x);
277
+ static inline VALUE RUBY_BIT_ROTL(VALUE, int);
278
+ static inline VALUE RUBY_BIT_ROTR(VALUE, int);
279
+
280
+ static inline uint16_t
281
+ ruby_swap16(uint16_t x)
282
+ {
283
+ #if __has_builtin(__builtin_bswap16)
284
+ return __builtin_bswap16(x);
285
+
286
+ #elif MSC_VERSION_SINCE(1310)
287
+ return _byteswap_ushort(x);
288
+
289
+ #else
290
+ return (x << 8) | (x >> 8);
291
+
292
+ #endif
293
+ }
294
+
295
+ static inline uint32_t
296
+ ruby_swap32(uint32_t x)
297
+ {
298
+ #if __has_builtin(__builtin_bswap32)
299
+ return __builtin_bswap32(x);
300
+
301
+ #elif MSC_VERSION_SINCE(1310)
302
+ return _byteswap_ulong(x);
303
+
304
+ #else
305
+ x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16);
306
+ x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8);
307
+ return x;
308
+
309
+ #endif
310
+ }
311
+
312
+ static inline uint64_t
313
+ ruby_swap64(uint64_t x)
314
+ {
315
+ #if __has_builtin(__builtin_bswap64)
316
+ return __builtin_bswap64(x);
317
+
318
+ #elif MSC_VERSION_SINCE(1310)
319
+ return _byteswap_uint64(x);
320
+
321
+ #else
322
+ x = ((x & 0x00000000FFFFFFFFULL) << 32) | ((x & 0xFFFFFFFF00000000ULL) >> 32);
323
+ x = ((x & 0x0000FFFF0000FFFFULL) << 16) | ((x & 0xFFFF0000FFFF0000ULL) >> 16);
324
+ x = ((x & 0x00FF00FF00FF00FFULL) << 8) | ((x & 0xFF00FF00FF00FF00ULL) >> 8);
325
+ return x;
326
+
327
+ #endif
328
+ }
329
+
330
+ static inline unsigned int
331
+ nlz_int32(uint32_t x)
332
+ {
333
+ #if defined(_MSC_VER) && defined(__AVX2__)
334
+ /* Note: It seems there is no such thing like __LZCNT__ predefined in MSVC.
335
+ * AMD CPUs have had this instruction for decades (since K10) but for
336
+ * Intel, Haswell is the oldest one. We need to use __AVX2__ for maximum
337
+ * safety. */
338
+ return (unsigned int)__lzcnt(x);
339
+
340
+ #elif defined(__x86_64__) && defined(__LZCNT__)
341
+ return (unsigned int)_lzcnt_u32(x);
342
+
343
+ #elif MSC_VERSION_SINCE(1400) /* &&! defined(__AVX2__) */
344
+ unsigned long r;
345
+ return _BitScanReverse(&r, x) ? (31 - (int)r) : 32;
346
+
347
+ #elif __has_builtin(__builtin_clz)
348
+ STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT == 32);
349
+ return x ? (unsigned int)__builtin_clz(x) : 32;
350
+
351
+ #else
352
+ uint32_t y;
353
+ unsigned n = 32;
354
+ y = x >> 16; if (y) {n -= 16; x = y;}
355
+ y = x >> 8; if (y) {n -= 8; x = y;}
356
+ y = x >> 4; if (y) {n -= 4; x = y;}
357
+ y = x >> 2; if (y) {n -= 2; x = y;}
358
+ y = x >> 1; if (y) {return n - 2;}
359
+ return (unsigned int)(n - x);
360
+ #endif
361
+ }
362
+
363
+ static inline unsigned int
364
+ nlz_int64(uint64_t x)
365
+ {
366
+ #if defined(_MSC_VER) && defined(__AVX2__)
367
+ return (unsigned int)__lzcnt64(x);
368
+
369
+ #elif defined(__x86_64__) && defined(__LZCNT__)
370
+ return (unsigned int)_lzcnt_u64(x);
371
+
372
+ #elif defined(_WIN64) && MSC_VERSION_SINCE(1400) /* &&! defined(__AVX2__) */
373
+ unsigned long r;
374
+ return _BitScanReverse64(&r, x) ? (63u - (unsigned int)r) : 64;
375
+
376
+ #elif __has_builtin(__builtin_clzl)
377
+ if (x == 0) {
378
+ return 64;
379
+ }
380
+ else if (sizeof(long) * CHAR_BIT == 64) {
381
+ return (unsigned int)__builtin_clzl((unsigned long)x);
382
+ }
383
+ else if (sizeof(long long) * CHAR_BIT == 64) {
384
+ return (unsigned int)__builtin_clzll((unsigned long long)x);
385
+ }
386
+ else {
387
+ /* :FIXME: Is there a way to make this branch a compile-time error? */
388
+ UNREACHABLE_RETURN(~0);
389
+ }
390
+
391
+ #else
392
+ uint64_t y;
393
+ unsigned int n = 64;
394
+ y = x >> 32; if (y) {n -= 32; x = y;}
395
+ y = x >> 16; if (y) {n -= 16; x = y;}
396
+ y = x >> 8; if (y) {n -= 8; x = y;}
397
+ y = x >> 4; if (y) {n -= 4; x = y;}
398
+ y = x >> 2; if (y) {n -= 2; x = y;}
399
+ y = x >> 1; if (y) {return n - 2;}
400
+ return (unsigned int)(n - x);
401
+
402
+ #endif
403
+ }
404
+
405
+ #ifdef HAVE_UINT128_T
406
+ static inline unsigned int
407
+ nlz_int128(uint128_t x)
408
+ {
409
+ uint64_t y = (uint64_t)(x >> 64);
410
+
411
+ if (x == 0) {
412
+ return 128;
413
+ }
414
+ else if (y == 0) {
415
+ return (unsigned int)nlz_int64(x) + 64;
416
+ }
417
+ else {
418
+ return (unsigned int)nlz_int64(y);
419
+ }
420
+ }
421
+ #endif
422
+
423
+ static inline unsigned int
424
+ nlz_int(unsigned int x)
425
+ {
426
+ if (sizeof(unsigned int) * CHAR_BIT == 32) {
427
+ return nlz_int32((uint32_t)x);
428
+ }
429
+ else if (sizeof(unsigned int) * CHAR_BIT == 64) {
430
+ return nlz_int64((uint64_t)x);
431
+ }
432
+ else {
433
+ UNREACHABLE_RETURN(~0);
434
+ }
435
+ }
436
+
437
+ static inline unsigned int
438
+ nlz_long(unsigned long x)
439
+ {
440
+ if (sizeof(unsigned long) * CHAR_BIT == 32) {
441
+ return nlz_int32((uint32_t)x);
442
+ }
443
+ else if (sizeof(unsigned long) * CHAR_BIT == 64) {
444
+ return nlz_int64((uint64_t)x);
445
+ }
446
+ else {
447
+ UNREACHABLE_RETURN(~0);
448
+ }
449
+ }
450
+
451
+ static inline unsigned int
452
+ nlz_long_long(unsigned long long x)
453
+ {
454
+ if (sizeof(unsigned long long) * CHAR_BIT == 64) {
455
+ return nlz_int64((uint64_t)x);
456
+ }
457
+ #ifdef HAVE_UINT128_T
458
+ else if (sizeof(unsigned long long) * CHAR_BIT == 128) {
459
+ return nlz_int128((uint128_t)x);
460
+ }
461
+ #endif
462
+ else {
463
+ UNREACHABLE_RETURN(~0);
464
+ }
465
+ }
466
+
467
+ static inline unsigned int
468
+ nlz_intptr(uintptr_t x)
469
+ {
470
+ if (sizeof(uintptr_t) == sizeof(unsigned int)) {
471
+ return nlz_int((unsigned int)x);
472
+ }
473
+ if (sizeof(uintptr_t) == sizeof(unsigned long)) {
474
+ return nlz_long((unsigned long)x);
475
+ }
476
+ if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
477
+ return nlz_long_long((unsigned long long)x);
478
+ }
479
+ else {
480
+ UNREACHABLE_RETURN(~0);
481
+ }
482
+ }
483
+
484
+ static inline unsigned int
485
+ rb_popcount32(uint32_t x)
486
+ {
487
+ #if defined(_MSC_VER) && defined(__AVX__)
488
+ /* Note: CPUs since Nehalem and Barcelona have had this instruction so SSE
489
+ * 4.2 should suffice, but it seems there is no such thing like __SSE_4_2__
490
+ * predefined macro in MSVC. They do have __AVX__ so use it instead. */
491
+ return (unsigned int)__popcnt(x);
492
+
493
+ #elif __has_builtin(__builtin_popcount)
494
+ STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT >= 32);
495
+ return (unsigned int)__builtin_popcount(x);
496
+
497
+ #else
498
+ x = (x & 0x55555555) + (x >> 1 & 0x55555555);
499
+ x = (x & 0x33333333) + (x >> 2 & 0x33333333);
500
+ x = (x & 0x07070707) + (x >> 4 & 0x07070707);
501
+ x = (x & 0x000f000f) + (x >> 8 & 0x000f000f);
502
+ x = (x & 0x0000001f) + (x >>16 & 0x0000001f);
503
+ return (unsigned int)x;
504
+
505
+ #endif
506
+ }
507
+
508
+ static inline unsigned int
509
+ rb_popcount64(uint64_t x)
510
+ {
511
+ #if defined(_MSC_VER) && defined(__AVX__)
512
+ return (unsigned int)__popcnt64(x);
513
+
514
+ #elif __has_builtin(__builtin_popcount)
515
+ if (sizeof(long) * CHAR_BIT == 64) {
516
+ return (unsigned int)__builtin_popcountl((unsigned long)x);
517
+ }
518
+ else if (sizeof(long long) * CHAR_BIT == 64) {
519
+ return (unsigned int)__builtin_popcountll((unsigned long long)x);
520
+ }
521
+ else {
522
+ /* :FIXME: Is there a way to make this branch a compile-time error? */
523
+ UNREACHABLE_RETURN(~0);
524
+ }
525
+
526
+ #else
527
+ x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555);
528
+ x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333);
529
+ x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707);
530
+ x = (x & 0x000f000f000f000f) + (x >> 8 & 0x000f000f000f000f);
531
+ x = (x & 0x0000001f0000001f) + (x >>16 & 0x0000001f0000001f);
532
+ x = (x & 0x000000000000003f) + (x >>32 & 0x000000000000003f);
533
+ return (unsigned int)x;
534
+
535
+ #endif
536
+ }
537
+
538
+ static inline unsigned int
539
+ rb_popcount_intptr(uintptr_t x)
540
+ {
541
+ if (sizeof(uintptr_t) * CHAR_BIT == 64) {
542
+ return rb_popcount64((uint64_t)x);
543
+ }
544
+ else if (sizeof(uintptr_t) * CHAR_BIT == 32) {
545
+ return rb_popcount32((uint32_t)x);
546
+ }
547
+ else {
548
+ UNREACHABLE_RETURN(~0);
549
+ }
550
+ }
551
+
552
+ static inline int
553
+ ntz_int32(uint32_t x)
554
+ {
555
+ #if defined(__x86_64__) && defined(__BMI__)
556
+ return (unsigned)_tzcnt_u32(x);
557
+
558
+ #elif MSC_VERSION_SINCE(1400)
559
+ /* :FIXME: Is there any way to issue TZCNT instead of BSF, apart from using
560
+ * assembly? Because issuing LZCNT seems possible (see nlz.h). */
561
+ unsigned long r;
562
+ return _BitScanForward(&r, x) ? (int)r : 32;
563
+
564
+ #elif __has_builtin(__builtin_ctz)
565
+ STATIC_ASSERT(sizeof_int, sizeof(int) * CHAR_BIT == 32);
566
+ return x ? (unsigned)__builtin_ctz(x) : 32;
567
+
568
+ #else
569
+ return rb_popcount32((~x) & (x-1));
570
+
571
+ #endif
572
+ }
573
+
574
+ static inline int
575
+ ntz_int64(uint64_t x)
576
+ {
577
+ #if defined(__x86_64__) && defined(__BMI__)
578
+ return (unsigned)_tzcnt_u64(x);
579
+
580
+ #elif defined(_WIN64) && MSC_VERSION_SINCE(1400)
581
+ unsigned long r;
582
+ return _BitScanForward64(&r, x) ? (int)r : 64;
583
+
584
+ #elif __has_builtin(__builtin_ctzl)
585
+ if (x == 0) {
586
+ return 64;
587
+ }
588
+ else if (sizeof(long) * CHAR_BIT == 64) {
589
+ return (unsigned)__builtin_ctzl((unsigned long)x);
590
+ }
591
+ else if (sizeof(long long) * CHAR_BIT == 64) {
592
+ return (unsigned)__builtin_ctzll((unsigned long long)x);
593
+ }
594
+ else {
595
+ /* :FIXME: Is there a way to make this branch a compile-time error? */
596
+ UNREACHABLE_RETURN(~0);
597
+ }
598
+
599
+ #else
600
+ return rb_popcount64((~x) & (x-1));
601
+
602
+ #endif
603
+ }
604
+
605
+ static inline int
606
+ ntz_intptr(uintptr_t x)
607
+ {
608
+ if (sizeof(uintptr_t) * CHAR_BIT == 64) {
609
+ return ntz_int64((uint64_t)x);
610
+ }
611
+ else if (sizeof(uintptr_t) * CHAR_BIT == 32) {
612
+ return ntz_int32((uint32_t)x);
613
+ }
614
+ else {
615
+ UNREACHABLE_RETURN(~0);
616
+ }
617
+ }
618
+
619
+ static inline VALUE
620
+ RUBY_BIT_ROTL(VALUE v, int n)
621
+ {
622
+ #if __has_builtin(__builtin_rotateleft32) && (SIZEOF_VALUE * CHAR_BIT == 32)
623
+ return __builtin_rotateleft32(v, n);
624
+
625
+ #elif __has_builtin(__builtin_rotateleft64) && (SIZEOF_VALUE * CHAR_BIT == 64)
626
+ return __builtin_rotateleft64(v, n);
627
+
628
+ #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 32)
629
+ return _rotl(v, n);
630
+
631
+ #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 64)
632
+ return _rotl64(v, n);
633
+
634
+ #elif defined(_lrotl) && (SIZEOF_VALUE == SIZEOF_LONG)
635
+ return _lrotl(v, n);
636
+
637
+ #else
638
+ const int m = (sizeof(VALUE) * CHAR_BIT) - 1;
639
+ return (v << (n & m)) | (v >> (-n & m));
640
+ #endif
641
+ }
642
+
643
+ static inline VALUE
644
+ RUBY_BIT_ROTR(VALUE v, int n)
645
+ {
646
+ #if __has_builtin(__builtin_rotateright32) && (SIZEOF_VALUE * CHAR_BIT == 32)
647
+ return __builtin_rotateright32(v, n);
648
+
649
+ #elif __has_builtin(__builtin_rotateright64) && (SIZEOF_VALUE * CHAR_BIT == 64)
650
+ return __builtin_rotateright64(v, n);
651
+
652
+ #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 32)
653
+ return _rotr(v, n);
654
+
655
+ #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 64)
656
+ return _rotr64(v, n);
657
+
658
+ #elif defined(_lrotr) && (SIZEOF_VALUE == SIZEOF_LONG)
659
+ return _lrotr(v, n);
660
+
661
+ #else
662
+ const int m = (sizeof(VALUE) * CHAR_BIT) - 1;
663
+ return (v << (-n & m)) | (v >> (n & m));
664
+ #endif
665
+ }
666
+
667
+ #endif /* INTERNAL_BITS_H */