bitset 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.markdown +12 -5
- data/Rakefile +3 -1
- data/VERSION +1 -1
- data/bitset.gemspec +20 -29
- data/ext/bitset/bitset.c +82 -43
- data/ext/bitset/builtin.h +1390 -0
- data/ext/bitset/exact-int.h +229 -0
- data/ext/bitset/extconf.rb +1 -1
- data/lib/{bitset/bitset.rb → bitset.rb} +2 -0
- data/spec/bitset_spec.rb +31 -2
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7264d2aacb2a035cc9d03a759e3d7df802490d7
|
4
|
+
data.tar.gz: b67c9f93d7b5dc611807cdd148ffdcf871588bbf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5374f4557cf95b59488c7e84f9af6865b8c2b92dc605369e7e756e4be6fc1abfbbbef85f6198fe192ed43a4594bd59519da98bcb8577bd8024961429ac1c6a9
|
7
|
+
data.tar.gz: 77c7dd54f09f8f7f0e439324a8e01c22959e6d6a7e2d517607eb803724099173012fbd85cf736aef61a8e3519d2ac01fde340e298094e66a917f0418e4f0841a
|
data/README.markdown
CHANGED
@@ -52,6 +52,8 @@ Obviously you can also set and clear bits...
|
|
52
52
|
>> bitset.clear(1, 5)
|
53
53
|
=> 00010001
|
54
54
|
|
55
|
+
Arrays of ints can also be passed to #clear and #set (c/o brendon9x).
|
56
|
+
|
55
57
|
The point of a bitset is to be, effectively, an array of single bits. It should
|
56
58
|
support basic set and bitwise operations. So, let's look at a few of those.
|
57
59
|
|
@@ -89,8 +91,10 @@ support basic set and bitwise operations. So, let's look at a few of those.
|
|
89
91
|
>> a.set? 6
|
90
92
|
=> true
|
91
93
|
|
92
|
-
# Return a new Bitset composed of bits #1, #3, #5, #4, and #1
|
93
|
-
|
94
|
+
# Return a new Bitset composed of bits #1, #3, #5, #4, and #1
|
95
|
+
# again. Unlike Array#values_at, this function currently only
|
96
|
+
# accepts an array of Fixnums as its argument.
|
97
|
+
>> a.values_at [1,3,5,4,1]
|
94
98
|
=> 00110
|
95
99
|
|
96
100
|
# Tell whether all of the given bit numbers are clear
|
@@ -116,15 +120,15 @@ support basic set and bitwise operations. So, let's look at a few of those.
|
|
116
120
|
7
|
117
121
|
|
118
122
|
# Return an array of the positions of all set bits
|
119
|
-
>> b.each_set
|
123
|
+
>> b.each_set # AKA b.to_a
|
120
124
|
=> [1, 3, 5, 7]
|
121
125
|
|
122
126
|
# The following methods modify a Bitset in place very quickly:
|
123
127
|
>> a.intersect!(b) # like a &= b
|
124
128
|
>> a.union!(b) # like a |= b
|
125
129
|
>> a.difference!(b) # like a -= b
|
126
|
-
>> a.xor!(b) #
|
127
|
-
>> a.reset!
|
130
|
+
>> a.xor!(b) # like a ^= b
|
131
|
+
>> a.reset! # Zeroes all bits
|
128
132
|
|
129
133
|
# Above, "like" does not mean "identical to." a |= b creates a new
|
130
134
|
# Bitset object. a.union!(b) changes an existing object which
|
@@ -134,6 +138,9 @@ support basic set and bitwise operations. So, let's look at a few of those.
|
|
134
138
|
# equivalents between bitsets of different sizes will raise an
|
135
139
|
# ArgumentError.
|
136
140
|
|
141
|
+
>> b.to_binary_array
|
142
|
+
=> [0, 1, 0, 1, 0, 1, 0, 1]
|
143
|
+
|
137
144
|
# b.dup and b.clone are also available.
|
138
145
|
|
139
146
|
# Marshal.dump and Marshal.load are also supported. If you want to
|
data/Rakefile
CHANGED
@@ -15,9 +15,11 @@ Jeweler::Tasks.new do |gem|
|
|
15
15
|
gem.homepage = "http://github.com/ericboesch/bitset"
|
16
16
|
gem.license = "MIT"
|
17
17
|
gem.summary = 'Bitset implementation.'
|
18
|
-
gem.description = 'A fast C-based Bitset. It supports the standard set operations as well as operations you may expect on bit arrays
|
18
|
+
gem.description = 'A fast C-based Bitset. It supports the standard set operations as well as operations you may expect on bit arrays,such as popcount.'
|
19
19
|
gem.email = "eric.boesch@nist.gov"
|
20
20
|
gem.authors = ["Tyler McMullen"]
|
21
|
+
# Other significant contributions from Eric Boesch, Gabriel Formica, and Brendon McLean.
|
22
|
+
|
21
23
|
end
|
22
24
|
Jeweler::RubygemsDotOrgTasks.new
|
23
25
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.1
|
data/bitset.gemspec
CHANGED
@@ -2,49 +2,40 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: bitset 1.0.1 ruby lib
|
6
|
+
# stub: ext/bitset/extconf.rb
|
5
7
|
|
6
8
|
Gem::Specification.new do |s|
|
7
|
-
s.name =
|
8
|
-
s.version = "0.
|
9
|
+
s.name = "bitset".freeze
|
10
|
+
s.version = "1.0.1"
|
9
11
|
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
s.
|
14
|
-
s.
|
15
|
-
s.
|
12
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
13
|
+
s.require_paths = ["lib".freeze]
|
14
|
+
s.authors = ["Tyler McMullen".freeze]
|
15
|
+
s.date = "2017-05-26"
|
16
|
+
s.description = "A fast C-based Bitset. It supports the standard set operations as well as operations you may expect on bit arrays,such as popcount.".freeze
|
17
|
+
s.email = "eric.boesch@nist.gov".freeze
|
18
|
+
s.extensions = ["ext/bitset/extconf.rb".freeze]
|
16
19
|
s.extra_rdoc_files = [
|
17
20
|
"LICENSE.txt",
|
18
|
-
"README.
|
21
|
+
"README.markdown"
|
19
22
|
]
|
20
23
|
s.files = [
|
21
24
|
"LICENSE.txt",
|
22
|
-
"README.
|
25
|
+
"README.markdown",
|
23
26
|
"Rakefile",
|
24
27
|
"VERSION",
|
25
28
|
"bitset.gemspec",
|
26
29
|
"ext/bitset/bitset.c",
|
30
|
+
"ext/bitset/builtin.h",
|
31
|
+
"ext/bitset/exact-int.h",
|
27
32
|
"ext/bitset/extconf.rb",
|
28
|
-
"lib/bitset
|
33
|
+
"lib/bitset.rb",
|
29
34
|
"spec/bitset_spec.rb"
|
30
35
|
]
|
31
|
-
s.homepage =
|
32
|
-
s.licenses = ["MIT"]
|
33
|
-
s.
|
34
|
-
s.
|
35
|
-
s.summary = %q{Bitset implementation.}
|
36
|
-
s.test_files = [
|
37
|
-
"spec/bitset_spec.rb"
|
38
|
-
]
|
39
|
-
|
40
|
-
if s.respond_to? :specification_version then
|
41
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
42
|
-
s.specification_version = 3
|
43
|
-
|
44
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
-
else
|
46
|
-
end
|
47
|
-
else
|
48
|
-
end
|
36
|
+
s.homepage = "http://github.com/ericboesch/bitset".freeze
|
37
|
+
s.licenses = ["MIT".freeze]
|
38
|
+
s.rubygems_version = "2.6.12".freeze
|
39
|
+
s.summary = "Bitset implementation.".freeze
|
49
40
|
end
|
50
41
|
|
data/ext/bitset/bitset.c
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
+
#include "builtin.h"
|
2
3
|
|
3
4
|
#include <stdint.h>
|
4
5
|
#include <string.h>
|
@@ -105,11 +106,21 @@ static VALUE rb_bitset_aset(VALUE self, VALUE index, VALUE value) {
|
|
105
106
|
static VALUE rb_bitset_set(int argc, VALUE * argv, VALUE self) {
|
106
107
|
int i;
|
107
108
|
Bitset * bs = get_bitset(self);
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
109
|
+
|
110
|
+
if (argc == 1 && rb_obj_is_kind_of(argv[0], rb_const_get(rb_cObject, rb_intern("Array")))) {
|
111
|
+
for(i = 0; i < RARRAY_LEN(argv[0]); i++) {
|
112
|
+
VALUE index = RARRAY_PTR(argv[0])[i];
|
113
|
+
int idx = NUM2INT(index);
|
114
|
+
validate_index(bs, idx);
|
115
|
+
_set_bit(bs, idx);
|
116
|
+
}
|
117
|
+
} else {
|
118
|
+
for(i = 0; i < argc; i++) {
|
119
|
+
VALUE index = argv[i];
|
120
|
+
int idx = NUM2INT(index);
|
121
|
+
validate_index(bs, idx);
|
122
|
+
_set_bit(bs, idx);
|
123
|
+
}
|
113
124
|
}
|
114
125
|
return Qtrue;
|
115
126
|
}
|
@@ -117,11 +128,21 @@ static VALUE rb_bitset_set(int argc, VALUE * argv, VALUE self) {
|
|
117
128
|
static VALUE rb_bitset_clear(int argc, VALUE * argv, VALUE self) {
|
118
129
|
int i;
|
119
130
|
Bitset * bs = get_bitset(self);
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
131
|
+
|
132
|
+
if (argc == 1 && rb_obj_is_kind_of(argv[0], rb_const_get(rb_cObject, rb_intern("Array")))) {
|
133
|
+
for(i = 0; i < RARRAY_LEN(argv[0]); i++) {
|
134
|
+
VALUE index = RARRAY_PTR(argv[0])[i];
|
135
|
+
int idx = NUM2INT(index);
|
136
|
+
validate_index(bs, idx);
|
137
|
+
_clear_bit(bs, idx);
|
138
|
+
}
|
139
|
+
} else {
|
140
|
+
for(i = 0; i < argc; i++) {
|
141
|
+
VALUE index = argv[i];
|
142
|
+
int idx = NUM2INT(index);
|
143
|
+
validate_index(bs, idx);
|
144
|
+
_clear_bit(bs, idx);
|
145
|
+
}
|
125
146
|
}
|
126
147
|
return Qtrue;
|
127
148
|
}
|
@@ -158,7 +179,7 @@ static VALUE rb_bitset_cardinality(VALUE self) {
|
|
158
179
|
int max = INTS(bs);
|
159
180
|
int count = 0;
|
160
181
|
for(i = 0; i < max; i++) {
|
161
|
-
count +=
|
182
|
+
count += psnip_builtin_popcount64(bs->data[i]);
|
162
183
|
}
|
163
184
|
return INT2NUM(count);
|
164
185
|
}
|
@@ -166,13 +187,14 @@ static VALUE rb_bitset_cardinality(VALUE self) {
|
|
166
187
|
static VALUE rb_bitset_intersect(VALUE self, VALUE other) {
|
167
188
|
Bitset * bs = get_bitset(self);
|
168
189
|
Bitset * other_bs = get_bitset(other);
|
169
|
-
|
190
|
+
Bitset * new_bs;
|
191
|
+
int max = INTS(bs);
|
192
|
+
int i;
|
170
193
|
|
171
|
-
|
194
|
+
verify_equal_size(bs, other_bs);
|
195
|
+
new_bs = bitset_new();
|
172
196
|
bitset_setup(new_bs, bs->len);
|
173
197
|
|
174
|
-
int max = INTS(bs);
|
175
|
-
int i;
|
176
198
|
for(i = 0; i < max; i++) {
|
177
199
|
uint64_t segment = bs->data[i];
|
178
200
|
uint64_t other_segment = other_bs->data[i];
|
@@ -185,13 +207,14 @@ static VALUE rb_bitset_intersect(VALUE self, VALUE other) {
|
|
185
207
|
static VALUE rb_bitset_union(VALUE self, VALUE other) {
|
186
208
|
Bitset * bs = get_bitset(self);
|
187
209
|
Bitset * other_bs = get_bitset(other);
|
188
|
-
|
210
|
+
Bitset * new_bs;
|
211
|
+
int max = INTS(bs);
|
212
|
+
int i;
|
189
213
|
|
190
|
-
|
214
|
+
verify_equal_size(bs, other_bs);
|
215
|
+
new_bs = bitset_new();
|
191
216
|
bitset_setup(new_bs, bs->len);
|
192
217
|
|
193
|
-
int max = INTS(bs);
|
194
|
-
int i;
|
195
218
|
for(i = 0; i < max; i++) {
|
196
219
|
uint64_t segment = bs->data[i];
|
197
220
|
uint64_t other_segment = other_bs->data[i];
|
@@ -204,13 +227,14 @@ static VALUE rb_bitset_union(VALUE self, VALUE other) {
|
|
204
227
|
static VALUE rb_bitset_difference(VALUE self, VALUE other) {
|
205
228
|
Bitset * bs = get_bitset(self);
|
206
229
|
Bitset * other_bs = get_bitset(other);
|
207
|
-
|
230
|
+
Bitset * new_bs;
|
231
|
+
int max = INTS(bs);
|
232
|
+
int i;
|
208
233
|
|
209
|
-
|
234
|
+
verify_equal_size(bs, other_bs);
|
235
|
+
new_bs = bitset_new();
|
210
236
|
bitset_setup(new_bs, bs->len);
|
211
237
|
|
212
|
-
int max = INTS(bs);
|
213
|
-
int i;
|
214
238
|
for(i = 0; i < max; i++) {
|
215
239
|
uint64_t segment = bs->data[i];
|
216
240
|
uint64_t other_segment = other_bs->data[i];
|
@@ -223,13 +247,14 @@ static VALUE rb_bitset_difference(VALUE self, VALUE other) {
|
|
223
247
|
static VALUE rb_bitset_xor(VALUE self, VALUE other) {
|
224
248
|
Bitset * bs = get_bitset(self);
|
225
249
|
Bitset * other_bs = get_bitset(other);
|
226
|
-
|
250
|
+
Bitset * new_bs;
|
251
|
+
int max = INTS(bs);
|
252
|
+
int i;
|
227
253
|
|
228
|
-
|
254
|
+
verify_equal_size(bs, other_bs);
|
255
|
+
new_bs = bitset_new();
|
229
256
|
bitset_setup(new_bs, bs->len);
|
230
257
|
|
231
|
-
int max = INTS(bs);
|
232
|
-
int i;
|
233
258
|
for(i = 0; i < max; i++) {
|
234
259
|
uint64_t segment = bs->data[i];
|
235
260
|
uint64_t other_segment = other_bs->data[i];
|
@@ -241,13 +266,11 @@ static VALUE rb_bitset_xor(VALUE self, VALUE other) {
|
|
241
266
|
|
242
267
|
static VALUE rb_bitset_not(VALUE self) {
|
243
268
|
Bitset * bs = get_bitset(self);
|
244
|
-
|
245
269
|
Bitset * new_bs = bitset_new();
|
246
|
-
bitset_setup(new_bs, bs->len);
|
247
|
-
|
248
270
|
int max = INTS(bs);
|
249
|
-
|
250
271
|
int i;
|
272
|
+
|
273
|
+
bitset_setup(new_bs, bs->len);
|
251
274
|
for(i = 0; i < max; i++) {
|
252
275
|
uint64_t segment = bs->data[i];
|
253
276
|
new_bs->data[i] = ~segment;
|
@@ -274,11 +297,11 @@ static VALUE rb_bitset_to_s(VALUE self) {
|
|
274
297
|
static VALUE rb_bitset_from_s(VALUE self, VALUE s) {
|
275
298
|
int length = RSTRING_LEN(s);
|
276
299
|
char* data = StringValuePtr(s);
|
277
|
-
|
278
300
|
Bitset * new_bs = bitset_new();
|
301
|
+
int i;
|
302
|
+
|
279
303
|
bitset_setup(new_bs, length);
|
280
304
|
|
281
|
-
int i;
|
282
305
|
for (i = 0; i < length; i++) {
|
283
306
|
if (data[i] == '1') {
|
284
307
|
_set_bit(new_bs, i);
|
@@ -298,7 +321,7 @@ static VALUE rb_bitset_hamming(VALUE self, VALUE other) {
|
|
298
321
|
for(i = 0; i < max; i++) {
|
299
322
|
uint64_t segment = bs->data[i];
|
300
323
|
uint64_t other_segment = other_bs->data[i];
|
301
|
-
count +=
|
324
|
+
count += psnip_builtin_popcount64(segment ^ other_segment);
|
302
325
|
}
|
303
326
|
|
304
327
|
return INT2NUM(count);
|
@@ -318,7 +341,7 @@ static VALUE rb_bitset_each(VALUE self) {
|
|
318
341
|
static VALUE rb_bitset_marshall_dump(VALUE self) {
|
319
342
|
Bitset * bs = get_bitset(self);
|
320
343
|
VALUE hash = rb_hash_new();
|
321
|
-
VALUE data = rb_str_new(bs->data, BYTES(bs));
|
344
|
+
VALUE data = rb_str_new((const char *) bs->data, BYTES(bs));
|
322
345
|
|
323
346
|
rb_hash_aset(hash, ID2SYM(rb_intern("len")), UINT2NUM(bs->len));
|
324
347
|
rb_hash_aset(hash, ID2SYM(rb_intern("data")), data);
|
@@ -340,13 +363,25 @@ static VALUE rb_bitset_marshall_load(VALUE self, VALUE hash) {
|
|
340
363
|
return Qnil;
|
341
364
|
}
|
342
365
|
|
366
|
+
static VALUE rb_bitset_to_binary_array(VALUE self) {
|
367
|
+
Bitset * bs = get_bitset(self);
|
368
|
+
int i;
|
369
|
+
|
370
|
+
VALUE array = rb_ary_new2(bs->len / 2);
|
371
|
+
for(i = 0; i < bs->len; i++) {
|
372
|
+
rb_ary_push(array, INT2NUM(_get_bit(bs, i) > 0 ? 1 : 0));
|
373
|
+
}
|
374
|
+
|
375
|
+
return array;
|
376
|
+
}
|
377
|
+
|
343
378
|
static VALUE rb_bitset_dup(VALUE self) {
|
344
379
|
Bitset * bs = get_bitset(self);
|
380
|
+
int max = INTS(bs);
|
345
381
|
|
346
382
|
Bitset * new_bs = bitset_new();
|
347
383
|
bitset_setup(new_bs, bs->len);
|
348
384
|
|
349
|
-
int max = INTS(bs);
|
350
385
|
memcpy(new_bs->data, bs->data, max * sizeof(bs->data[0]));
|
351
386
|
return Data_Wrap_Struct(cBitset, 0, bitset_free, new_bs);
|
352
387
|
}
|
@@ -372,7 +407,7 @@ static VALUE rb_bitset_each_set(VALUE self) {
|
|
372
407
|
VALUE v;
|
373
408
|
|
374
409
|
if (!(segment & 1)) {
|
375
|
-
int shift =
|
410
|
+
int shift = psnip_builtin_ctz64(segment);
|
376
411
|
bit_position += shift;
|
377
412
|
segment >>= shift;
|
378
413
|
}
|
@@ -404,10 +439,9 @@ static VALUE rb_bitset_empty_p(VALUE self) {
|
|
404
439
|
return Qtrue;
|
405
440
|
}
|
406
441
|
|
407
|
-
static VALUE
|
442
|
+
static VALUE rb_bitset_values_at(VALUE self, VALUE index_array) {
|
408
443
|
int i;
|
409
444
|
Bitset * bs = get_bitset(self);
|
410
|
-
struct RArray *arr = RARRAY(index_array);
|
411
445
|
int blen = bs->len;
|
412
446
|
int alen = RARRAY_LEN(index_array);
|
413
447
|
VALUE *ptr = RARRAY_PTR(index_array);
|
@@ -443,10 +477,10 @@ static VALUE rb_bitset_equal(VALUE self, VALUE other) {
|
|
443
477
|
int i;
|
444
478
|
Bitset * bs = get_bitset(self);
|
445
479
|
Bitset * other_bs = get_bitset(other);
|
480
|
+
int max = INTS(bs);
|
446
481
|
|
447
482
|
if (bs->len != other_bs->len)
|
448
483
|
return Qfalse;
|
449
|
-
int max = INTS(bs);
|
450
484
|
for(i = 0; i < max; i++) {
|
451
485
|
if (bs->data[i] != other_bs->data[i]) {
|
452
486
|
return Qfalse;
|
@@ -464,10 +498,10 @@ inline uint64_t difference(uint64_t a, uint64_t b) { return a & ~b; }
|
|
464
498
|
static VALUE mutable(VALUE self, VALUE other, bitwise_op operator) {
|
465
499
|
Bitset * bs = get_bitset(self);
|
466
500
|
Bitset * other_bs = get_bitset(other);
|
467
|
-
verify_equal_size(bs, other_bs);
|
468
|
-
|
469
501
|
int max = INTS(bs);
|
470
502
|
int i;
|
503
|
+
verify_equal_size(bs, other_bs);
|
504
|
+
|
471
505
|
for(i = 0; i < max; i++) {
|
472
506
|
uint64_t segment = bs->data[i];
|
473
507
|
uint64_t other_segment = other_bs->data[i];
|
@@ -539,11 +573,16 @@ void Init_bitset() {
|
|
539
573
|
rb_define_singleton_method(cBitset, "from_s", rb_bitset_from_s, 1);
|
540
574
|
rb_define_method(cBitset, "marshal_dump", rb_bitset_marshall_dump, 0);
|
541
575
|
rb_define_method(cBitset, "marshal_load", rb_bitset_marshall_load, 1);
|
576
|
+
rb_define_method(cBitset, "to_binary_array", rb_bitset_to_binary_array, 0);
|
542
577
|
rb_define_method(cBitset, "dup", rb_bitset_dup, 0);
|
543
578
|
rb_define_alias(cBitset, "clone", "dup");
|
544
579
|
rb_define_method(cBitset, "each_set", rb_bitset_each_set, 0);
|
580
|
+
rb_define_alias(cBitset, "to_a", "each_set");
|
581
|
+
/* #each_set allows an optional block, and #to_a normally doesn't.
|
582
|
+
But an alias is simpler than having two different functions. */
|
545
583
|
rb_define_method(cBitset, "empty?", rb_bitset_empty_p, 0);
|
546
|
-
rb_define_method(cBitset, "
|
584
|
+
rb_define_method(cBitset, "values_at", rb_bitset_values_at, 1);
|
585
|
+
rb_define_alias(cBitset, "select_bits", "values_at");
|
547
586
|
rb_define_method(cBitset, "reverse", rb_bitset_reverse, 0);
|
548
587
|
rb_define_method(cBitset, "==", rb_bitset_equal, 1);
|
549
588
|
}
|
@@ -0,0 +1,1390 @@
|
|
1
|
+
/* Builtins and Intrinsics
|
2
|
+
* Portable Snippets - https://gitub.com/nemequ/portable-snippets
|
3
|
+
* Created by Evan Nemerson <evan@nemerson.com>
|
4
|
+
*
|
5
|
+
* To the extent possible under law, the authors have waived all
|
6
|
+
* copyright and related or neighboring rights to this code. For
|
7
|
+
* details, see the Creative Commons Zero 1.0 Universal license at
|
8
|
+
* https://creativecommons.org/publicdomain/zero/1.0/
|
9
|
+
*
|
10
|
+
* Some of these implementations are based on code from
|
11
|
+
* https://graphics.stanford.edu/~seander/bithacks.html which is also
|
12
|
+
* public domain (and a fantastic web site).
|
13
|
+
*/
|
14
|
+
|
15
|
+
#if !defined(PSNIP_BUILTIN_H)
|
16
|
+
#define PSNIP_BUILTIN_H
|
17
|
+
|
18
|
+
#if defined(HEDLEY_GCC_HAS_BUILTIN)
|
19
|
+
# define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,0)
|
20
|
+
#elif defined(__clang__) && defined(__has_builtin)
|
21
|
+
# define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) __has_builtin(builtin)
|
22
|
+
#elif defined(__GNUC__)
|
23
|
+
# define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) (__GNUC__ > major || (major == __GNUC__ && __GNUC_MINOR__ >= minor))
|
24
|
+
#else
|
25
|
+
# define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) (0)
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#if defined(HEDLEY_CLANG_HAS_BUILTIN)
|
29
|
+
# define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) HEDLEY_CLANG_HAS_BUILTIN(builtin)
|
30
|
+
#elif defined(__has_builtin)
|
31
|
+
# define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) __has_builtin(builtin)
|
32
|
+
#else
|
33
|
+
# define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) (0)
|
34
|
+
#endif
|
35
|
+
|
36
|
+
#if defined(HEDLEY_MSVC_VERSION_CHECK)
|
37
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) HEDLEY_MSVC_VERSION_CHECK(major,minor,0)
|
38
|
+
#elif !defined(_MSC_VER)
|
39
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (0)
|
40
|
+
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
41
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000)))
|
42
|
+
#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
|
43
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_FULL_VER >= ((major * 100000) + (minor * 1000)))
|
44
|
+
#else
|
45
|
+
# define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_VER >= ((major * 100) + (minor)))
|
46
|
+
#endif
|
47
|
+
|
48
|
+
#if defined(_MSC_VER)
|
49
|
+
# include <intrin.h>
|
50
|
+
#endif
|
51
|
+
#include <limits.h>
|
52
|
+
#include <stdlib.h>
|
53
|
+
|
54
|
+
#if defined(__i386) || defined(_M_IX86) || \
|
55
|
+
defined(__amd64) || defined(_M_AMD64) || defined(__x86_64)
|
56
|
+
# if defined(_MSC_VER)
|
57
|
+
# define PSNIP_BUILTIN__ENABLE_X86
|
58
|
+
# elif defined(__GNUC__)
|
59
|
+
# define PSNIP_BUILTIN__ENABLE_X86
|
60
|
+
# include <x86intrin.h>
|
61
|
+
# endif
|
62
|
+
#endif
|
63
|
+
|
64
|
+
#if defined(__amd64) || defined(_M_AMD64) || defined(__x86_64)
|
65
|
+
# if defined(_MSC_VER)
|
66
|
+
# define PSNIP_BUILTIN__ENABLE_AMD64
|
67
|
+
# elif defined(__GNUC__)
|
68
|
+
# define PSNIP_BUILTIN__ENABLE_AMD64
|
69
|
+
# include <x86intrin.h>
|
70
|
+
# endif
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#if \
|
74
|
+
!defined(psnip_int64_t) || !defined(psnip_uint64_t) || \
|
75
|
+
!defined(psnip_int32_t) || !defined(psnip_uint32_t) || \
|
76
|
+
!defined(psnip_int16_t) || !defined(psnip_uint16_t) || \
|
77
|
+
!defined(psnip_int8_t) || !defined(psnip_uint8_t)
|
78
|
+
# include "exact-int.h"
|
79
|
+
#endif
|
80
|
+
|
81
|
+
#if defined(HEDLEY_LIKELY) && defined(HEDLEY_UNLIKELY)
|
82
|
+
# define PSNIP_BUILTIN_LIKELY(expr) HEDLEY_LIKELY(expr)
|
83
|
+
# define PSNIP_BUILTIN_UNLIKELY(expr) HEDLEY_UNLIKELY(expr)
|
84
|
+
#elif PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_expect,3,0)
|
85
|
+
# define PSNIP_BUILTIN_LIKELY(expr) __builtin_expect(!!(expr), 1)
|
86
|
+
# define PSNIP_BUILTIN_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
|
87
|
+
#else
|
88
|
+
# define PSNIP_BUILTIN_LIKELY(expr) (!!(expr))
|
89
|
+
# define PSNIP_BUILTIN_UNLIKELY(expr) (!!(expr))
|
90
|
+
#endif
|
91
|
+
|
92
|
+
#if !defined(PSNIP_BUILTIN_STATIC_INLINE)
|
93
|
+
# if defined(__GNUC__)
|
94
|
+
# define PSNIP_BUILTIN__COMPILER_ATTRIBUTES __attribute__((__unused__))
|
95
|
+
# else
|
96
|
+
# define PSNIP_BUILTIN__COMPILER_ATTRIBUTES
|
97
|
+
# endif
|
98
|
+
|
99
|
+
# if defined(HEDLEY_INLINE)
|
100
|
+
# define PSNIP_BUILTIN__INLINE HEDLEY_INLINE
|
101
|
+
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
102
|
+
# define PSNIP_BUILTIN__INLINE inline
|
103
|
+
# elif defined(__GNUC_STDC_INLINE__)
|
104
|
+
# define PSNIP_BUILTIN__INLINE __inline__
|
105
|
+
# elif defined(_MSC_VER) && _MSC_VER >= 1200
|
106
|
+
# define PSNIP_BUILTIN__INLINE __inline
|
107
|
+
# else
|
108
|
+
# define PSNIP_BUILTIN__INLINE
|
109
|
+
# endif
|
110
|
+
|
111
|
+
# define PSNIP_BUILTIN__FUNCTION PSNIP_BUILTIN__COMPILER_ATTRIBUTES static PSNIP_BUILTIN__INLINE
|
112
|
+
#endif
|
113
|
+
|
114
|
+
#define PSNIP_BUILTIN__SUFFIX_B 1
|
115
|
+
#define PSNIP_BUILTIN__SUFFIX_S 2
|
116
|
+
#define PSNIP_BUILTIN__SUFFIX_ 3
|
117
|
+
#define PSNIP_BUILTIN__SUFFIX_L 4
|
118
|
+
#define PSNIP_BUILTIN__SUFFIX_LL 5
|
119
|
+
|
120
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_CHAR)
|
121
|
+
# if CHAR_MIN == (-0x7fLL-1) && CHAR_MAX == 0x7fLL
|
122
|
+
# define PSNIP_BUILTIN__SIZEOF_CHAR 8
|
123
|
+
# elif CHAR_MIN == (-0x7fffLL-1) && CHAR_MAX == 0x7fffLL
|
124
|
+
# define PSNIP_BUILTIN__SIZEOF_CHAR 16
|
125
|
+
# elif CHAR_MIN == (-0x7fffffffLL-1) && CHAR_MAX == 0x7fffffffLL
|
126
|
+
# define PSNIP_BUILTIN__SIZEOF_CHAR 32
|
127
|
+
# elif CHAR_MIN == (-0x7fffffffffffffffLL-1) && CHAR_MAX == 0x7fffffffffffffffLL
|
128
|
+
# define PSNIP_BUILTIN__SIZEOF_CHAR 64
|
129
|
+
# endif
|
130
|
+
#endif
|
131
|
+
|
132
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_SHRT)
|
133
|
+
# if SHRT_MIN == (-0x7fLL-1) && SHRT_MAX == 0x7fLL
|
134
|
+
# define PSNIP_BUILTIN__SIZEOF_SHRT 8
|
135
|
+
# elif SHRT_MIN == (-0x7fffLL-1) && SHRT_MAX == 0x7fffLL
|
136
|
+
# define PSNIP_BUILTIN__SIZEOF_SHRT 16
|
137
|
+
# elif SHRT_MIN == (-0x7fffffffLL-1) && SHRT_MAX == 0x7fffffffLL
|
138
|
+
# define PSNIP_BUILTIN__SIZEOF_SHRT 32
|
139
|
+
# elif SHRT_MIN == (-0x7fffffffffffffffLL-1) && SHRT_MAX == 0x7fffffffffffffffLL
|
140
|
+
# define PSNIP_BUILTIN__SIZEOF_SHRT 64
|
141
|
+
# endif
|
142
|
+
#endif
|
143
|
+
|
144
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_INT)
|
145
|
+
# if INT_MIN == (-0x7fLL-1) && INT_MAX == 0x7fLL
|
146
|
+
# define PSNIP_BUILTIN__SIZEOF_INT 8
|
147
|
+
# elif INT_MIN == (-0x7fffLL-1) && INT_MAX == 0x7fffLL
|
148
|
+
# define PSNIP_BUILTIN__SIZEOF_INT 16
|
149
|
+
# elif INT_MIN == (-0x7fffffffLL-1) && INT_MAX == 0x7fffffffLL
|
150
|
+
# define PSNIP_BUILTIN__SIZEOF_INT 32
|
151
|
+
# elif INT_MIN == (-0x7fffffffffffffffLL-1) && INT_MAX == 0x7fffffffffffffffLL
|
152
|
+
# define PSNIP_BUILTIN__SIZEOF_INT 64
|
153
|
+
# endif
|
154
|
+
#endif
|
155
|
+
|
156
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_LONG)
|
157
|
+
# if LONG_MIN == (-0x7fLL-1) && LONG_MAX == 0x7fLL
|
158
|
+
# define PSNIP_BUILTIN__SIZEOF_LONG 8
|
159
|
+
# elif LONG_MIN == (-0x7fffLL-1) && LONG_MAX == 0x7fffLL
|
160
|
+
# define PSNIP_BUILTIN__SIZEOF_LONG 16
|
161
|
+
# elif LONG_MIN == (-0x7fffffffLL-1) && LONG_MAX == 0x7fffffffLL
|
162
|
+
# define PSNIP_BUILTIN__SIZEOF_LONG 32
|
163
|
+
# elif LONG_MIN == (-0x7fffffffffffffffLL-1) && LONG_MAX == 0x7fffffffffffffffLL
|
164
|
+
# define PSNIP_BUILTIN__SIZEOF_LONG 64
|
165
|
+
# endif
|
166
|
+
#endif
|
167
|
+
|
168
|
+
#if !defined(PSNIP_BUILTIN__SIZEOF_LLONG)
|
169
|
+
# if LLONG_MIN == (-0x7fLL-1) && LLONG_MAX == 0x7fLL
|
170
|
+
# define PSNIP_BUILTIN__SIZEOF_LLONG 8
|
171
|
+
# elif LLONG_MIN == (-0x7fffLL-1) && LLONG_MAX == 0x7fffLL
|
172
|
+
# define PSNIP_BUILTIN__SIZEOF_LLONG 16
|
173
|
+
# elif LLONG_MIN == (-0x7fffffffLL-1) && LLONG_MAX == 0x7fffffffLL
|
174
|
+
# define PSNIP_BUILTIN__SIZEOF_LLONG 32
|
175
|
+
# elif LLONG_MIN == (-0x7fffffffffffffffLL-1) && LLONG_MAX == 0x7fffffffffffffffLL
|
176
|
+
# define PSNIP_BUILTIN__SIZEOF_LLONG 64
|
177
|
+
# endif
|
178
|
+
#endif
|
179
|
+
|
180
|
+
#if !defined(PSNIP_BUILTIN_SUFFIX_INT8)
|
181
|
+
# if PSNIP_BUILTIN__SIZEOF_CHAR == 8
|
182
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_B
|
183
|
+
# elif PSNIP_BUILTIN__SIZEOF_SHRT == 8
|
184
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_S
|
185
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 8
|
186
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_
|
187
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 8
|
188
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_L
|
189
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 8
|
190
|
+
# define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_LL
|
191
|
+
# endif
|
192
|
+
#endif
|
193
|
+
|
194
|
+
#if !defined(PSNIP_BUILTIN_SUFFIX_INT16)
|
195
|
+
# if PSNIP_BUILTIN__SIZEOF_CHAR == 16
|
196
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_B
|
197
|
+
# elif PSNIP_BUILTIN__SIZEOF_SHRT == 16
|
198
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_S
|
199
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 16
|
200
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_
|
201
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 16
|
202
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_L
|
203
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 16
|
204
|
+
# define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_LL
|
205
|
+
# endif
|
206
|
+
#endif
|
207
|
+
|
208
|
+
#if !defined(PSNIP_BUILTIN_SUFFIX_INT32)
|
209
|
+
# if PSNIP_BUILTIN__SIZEOF_CHAR == 32
|
210
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_B
|
211
|
+
# elif PSNIP_BUILTIN__SIZEOF_SHRT == 32
|
212
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_S
|
213
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 32
|
214
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_
|
215
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 32
|
216
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_L
|
217
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 32
|
218
|
+
# define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_LL
|
219
|
+
# endif
|
220
|
+
#endif
|
221
|
+
|
222
|
+
#if !defined(PSNIP_BUILTIN_SUFFIX_INT64)
|
223
|
+
# if defined(__APPLE__) && PSNIP_BUILTIN__SIZEOF_LLONG == 64
|
224
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_LL
|
225
|
+
# elif PSNIP_BUILTIN__SIZEOF_CHAR == 64
|
226
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_B
|
227
|
+
# elif PSNIP_BUILTIN__SIZEOF_SHRT == 64
|
228
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_S
|
229
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 64
|
230
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_
|
231
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 64
|
232
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_L
|
233
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
|
234
|
+
# define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_LL
|
235
|
+
# endif
|
236
|
+
#endif
|
237
|
+
|
238
|
+
#if defined(PSNIP_BUILTIN_SUFFIX_INT8)
|
239
|
+
# if PSNIP_BUILTIN_SUFFIX_INT8 == 1
|
240
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##b
|
241
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT8 == 2
|
242
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##s
|
243
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT8 == 3
|
244
|
+
# define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name
|
245
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name
|
246
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT8 == 4
|
247
|
+
# define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name##l
|
248
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##l
|
249
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT8 == 5
|
250
|
+
# define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name##ll
|
251
|
+
# define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##ll
|
252
|
+
# endif
|
253
|
+
#endif
|
254
|
+
|
255
|
+
#if defined(PSNIP_BUILTIN_SUFFIX_INT16)
|
256
|
+
# if PSNIP_BUILTIN_SUFFIX_INT16 == 1
|
257
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##b
|
258
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT16 == 2
|
259
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##s
|
260
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT16 == 3
|
261
|
+
# define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name
|
262
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name
|
263
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT16 == 4
|
264
|
+
# define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name##l
|
265
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##l
|
266
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT16 == 5
|
267
|
+
# define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name##ll
|
268
|
+
# define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##ll
|
269
|
+
# endif
|
270
|
+
#endif
|
271
|
+
|
272
|
+
#if defined(PSNIP_BUILTIN_SUFFIX_INT32)
|
273
|
+
# if PSNIP_BUILTIN_SUFFIX_INT32 == 1
|
274
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##b
|
275
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT32 == 2
|
276
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##s
|
277
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT32 == 3
|
278
|
+
# define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name
|
279
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name
|
280
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT32 == 4
|
281
|
+
# define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name##l
|
282
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##l
|
283
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT32 == 5
|
284
|
+
# define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name##ll
|
285
|
+
# define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##ll
|
286
|
+
# endif
|
287
|
+
#endif
|
288
|
+
|
289
|
+
#if defined(PSNIP_BUILTIN_SUFFIX_INT64)
|
290
|
+
# if PSNIP_BUILTIN_SUFFIX_INT64 == 1
|
291
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##b
|
292
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT64 == 2
|
293
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##s
|
294
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT64 == 3
|
295
|
+
# define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name
|
296
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name
|
297
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT64 == 4
|
298
|
+
# define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name##l
|
299
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##l
|
300
|
+
# elif PSNIP_BUILTIN_SUFFIX_INT64 == 5
|
301
|
+
# define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name##ll
|
302
|
+
# define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##ll
|
303
|
+
# endif
|
304
|
+
#endif
|
305
|
+
|
306
|
+
/******
|
307
|
+
*** GCC-style built-ins
|
308
|
+
******/
|
309
|
+
|
310
|
+
/*** __builtin_ffs ***/
|
311
|
+
|
312
|
+
#define PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(f_n, T) \
|
313
|
+
PSNIP_BUILTIN__FUNCTION \
|
314
|
+
int psnip_builtin_##f_n(T x) { \
|
315
|
+
static const char psnip_builtin_ffs_lookup[256] = { \
|
316
|
+
0, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
317
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
318
|
+
6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
319
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
320
|
+
7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
321
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
322
|
+
6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
323
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
324
|
+
8, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
325
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
326
|
+
6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
327
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
328
|
+
7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
329
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
330
|
+
6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
|
331
|
+
5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1 \
|
332
|
+
}; \
|
333
|
+
\
|
334
|
+
unsigned char t; \
|
335
|
+
size_t s = 0; \
|
336
|
+
\
|
337
|
+
while (s < (sizeof(T) * 8)) { \
|
338
|
+
t = (unsigned char) ((x >> s) & 0xff); \
|
339
|
+
if (t) \
|
340
|
+
return psnip_builtin_ffs_lookup[t] + s; \
|
341
|
+
\
|
342
|
+
s += 8; \
|
343
|
+
} \
|
344
|
+
\
|
345
|
+
return 0; \
|
346
|
+
}
|
347
|
+
|
348
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_ffs, 3, 3)
|
349
|
+
# define psnip_builtin_ffs(x) __builtin_ffs(x)
|
350
|
+
# define psnip_builtin_ffsl(x) __builtin_ffsl(x)
|
351
|
+
# define psnip_builtin_ffsll(x) __builtin_ffsll(x)
|
352
|
+
# define psnip_builtin_ffs32(x) PSNIP_BUILTIN__VARIANT_INT32(_,ffs)(x)
|
353
|
+
# define psnip_builtin_ffs64(x) PSNIP_BUILTIN__VARIANT_INT64(_,ffs)(x)
|
354
|
+
#else
|
355
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
|
356
|
+
PSNIP_BUILTIN__FUNCTION
|
357
|
+
int psnip_builtin_ffsll(long long v) {
|
358
|
+
unsigned long r;
|
359
|
+
# if defined(_M_AMD64) || defined(_M_ARM)
|
360
|
+
if (_BitScanForward64(&r, (unsigned long long) v)) {
|
361
|
+
return (int) (r + 1);
|
362
|
+
}
|
363
|
+
# else
|
364
|
+
if (_BitScanForward(&r, (unsigned long) (v))) {
|
365
|
+
return (int) (r + 1);
|
366
|
+
} else if (_BitScanForward(&r, (unsigned long) (v >> 32))) {
|
367
|
+
return (int) (r + 33);
|
368
|
+
}
|
369
|
+
# endif
|
370
|
+
return 0;
|
371
|
+
}
|
372
|
+
|
373
|
+
PSNIP_BUILTIN__FUNCTION
|
374
|
+
int psnip_builtin_ffsl(long v) {
|
375
|
+
unsigned long r;
|
376
|
+
if (_BitScanForward(&r, (unsigned long) v)) {
|
377
|
+
return (int) (r + 1);
|
378
|
+
}
|
379
|
+
return 0;
|
380
|
+
}
|
381
|
+
|
382
|
+
PSNIP_BUILTIN__FUNCTION
|
383
|
+
int psnip_builtin_ffs(int v) {
|
384
|
+
return psnip_builtin_ffsl(v);
|
385
|
+
}
|
386
|
+
# else
|
387
|
+
PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffs, int)
|
388
|
+
PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffsl, long)
|
389
|
+
PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffsll, long long)
|
390
|
+
# endif
|
391
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
392
|
+
# define __builtin_ffsll(v) psnip_builtin_ffsll(v)
|
393
|
+
# define __builtin_ffsl(v) psnip_builtin_ffsl(v)
|
394
|
+
# define __builtin_ffs(v) psnip_builtin_ffs(v)
|
395
|
+
# endif
|
396
|
+
#endif
|
397
|
+
|
398
|
+
#if !defined(psnip_builtin_ffs32)
|
399
|
+
# define psnip_builtin_ffs32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ffs)(x)
|
400
|
+
#endif
|
401
|
+
|
402
|
+
#if !defined(psnip_builtin_ffs64)
|
403
|
+
# define psnip_builtin_ffs64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ffs)(x)
|
404
|
+
#endif
|
405
|
+
|
406
|
+
/*** __builtin_clz ***/
|
407
|
+
|
408
|
+
#define PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(f_n, T) \
|
409
|
+
PSNIP_BUILTIN__FUNCTION \
|
410
|
+
int psnip_builtin_##f_n(T x) { \
|
411
|
+
static const char psnip_builtin_clz_lookup[256] = { \
|
412
|
+
7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, \
|
413
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
|
414
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
|
415
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
|
416
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
417
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
418
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
419
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
420
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
421
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
422
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
423
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
424
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
425
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
426
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
427
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 \
|
428
|
+
}; \
|
429
|
+
size_t s = sizeof(T) * 8; \
|
430
|
+
T r; \
|
431
|
+
\
|
432
|
+
while ((s -= 8) != 0) { \
|
433
|
+
r = x >> s; \
|
434
|
+
if (r != 0) \
|
435
|
+
return psnip_builtin_clz_lookup[r] + \
|
436
|
+
(((sizeof(T) - 1) * 8) - s); \
|
437
|
+
} \
|
438
|
+
\
|
439
|
+
if (x == 0) \
|
440
|
+
return (int) ((sizeof(T) * 8) - 1); \
|
441
|
+
else \
|
442
|
+
return psnip_builtin_clz_lookup[x] + \
|
443
|
+
((sizeof(T) - 1) * 8); \
|
444
|
+
}
|
445
|
+
|
446
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_clz, 3, 4)
|
447
|
+
# define psnip_builtin_clz(x) __builtin_clz(x)
|
448
|
+
# define psnip_builtin_clzl(x) __builtin_clzl(x)
|
449
|
+
# define psnip_builtin_clzll(x) __builtin_clzll(x)
|
450
|
+
# define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(_,clz)(x)
|
451
|
+
# define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(_,clz)(x)
|
452
|
+
#else
|
453
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse,14,0)
|
454
|
+
PSNIP_BUILTIN__FUNCTION
|
455
|
+
int psnip_builtin_clzll(unsigned long long v) {
|
456
|
+
unsigned long r = 0;
|
457
|
+
# if defined(_M_AMD64) || defined(_M_ARM)
|
458
|
+
if (_BitScanReverse64(&r, v)) {
|
459
|
+
return 63 - r;
|
460
|
+
}
|
461
|
+
# else
|
462
|
+
if (_BitScanReverse(&r, (unsigned long) (v >> 32))) {
|
463
|
+
return 31 - r;
|
464
|
+
} else if (_BitScanReverse(&r, (unsigned long) v)) {
|
465
|
+
return 63 - r;
|
466
|
+
}
|
467
|
+
# endif
|
468
|
+
return 63;
|
469
|
+
}
|
470
|
+
|
471
|
+
PSNIP_BUILTIN__FUNCTION
|
472
|
+
int psnip_builtin_clzl(unsigned long v) {
|
473
|
+
unsigned long r = 0;
|
474
|
+
if (_BitScanReverse(&r, v)) {
|
475
|
+
return 31 - r;
|
476
|
+
}
|
477
|
+
return 31;
|
478
|
+
}
|
479
|
+
|
480
|
+
PSNIP_BUILTIN__FUNCTION
|
481
|
+
int psnip_builtin_clz(unsigned int v) {
|
482
|
+
return psnip_builtin_clzl(v);
|
483
|
+
}
|
484
|
+
# define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clz)(x)
|
485
|
+
# define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clz)(x)
|
486
|
+
# else
|
487
|
+
PSNIP_BUILTIN__FUNCTION
|
488
|
+
int psnip_builtin_clz32(psnip_uint32_t v) {
|
489
|
+
static const unsigned char MultiplyDeBruijnBitPosition[] = {
|
490
|
+
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
|
491
|
+
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
|
492
|
+
};
|
493
|
+
|
494
|
+
v |= v >> 1;
|
495
|
+
v |= v >> 2;
|
496
|
+
v |= v >> 4;
|
497
|
+
v |= v >> 8;
|
498
|
+
v |= v >> 16;
|
499
|
+
|
500
|
+
return
|
501
|
+
((sizeof(psnip_uint32_t) * CHAR_BIT) - 1) -
|
502
|
+
MultiplyDeBruijnBitPosition[(psnip_uint32_t)(v * 0x07C4ACDDU) >> 27];
|
503
|
+
}
|
504
|
+
|
505
|
+
PSNIP_BUILTIN__FUNCTION
|
506
|
+
int psnip_builtin_clz64(psnip_uint64_t v) {
|
507
|
+
static const unsigned char MultiplyDeBruijnBitPosition[] = {
|
508
|
+
0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61,
|
509
|
+
54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4, 62,
|
510
|
+
46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
|
511
|
+
25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63
|
512
|
+
};
|
513
|
+
|
514
|
+
v |= v >> 1;
|
515
|
+
v |= v >> 2;
|
516
|
+
v |= v >> 4;
|
517
|
+
v |= v >> 8;
|
518
|
+
v |= v >> 16;
|
519
|
+
v |= v >> 32;
|
520
|
+
|
521
|
+
return
|
522
|
+
((sizeof(psnip_uint64_t) * CHAR_BIT) - 1) -
|
523
|
+
MultiplyDeBruijnBitPosition[(psnip_uint64_t)(v * 0x03F79D71B4CB0A89ULL) >> 58];
|
524
|
+
}
|
525
|
+
|
526
|
+
# if PSNIP_BUILTIN__SIZEOF_INT == 32
|
527
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clz(unsigned int x) { return psnip_builtin_clz32(x); }
|
528
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 64
|
529
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clz(unsigned int x) { return psnip_builtin_clz64(x); }
|
530
|
+
# else
|
531
|
+
PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clz, unsigned int)
|
532
|
+
# endif
|
533
|
+
|
534
|
+
# if PSNIP_BUILTIN__SIZEOF_LONG == 32
|
535
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzl(unsigned long x) { return psnip_builtin_clz32(x); }
|
536
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 64
|
537
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzl(unsigned long x) { return psnip_builtin_clz64(x); }
|
538
|
+
# else
|
539
|
+
PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clzl, unsigned long)
|
540
|
+
# endif
|
541
|
+
|
542
|
+
# if PSNIP_BUILTIN__SIZEOF_LLONG == 32
|
543
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzll(unsigned long long x) { return psnip_builtin_clz32(x); }
|
544
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
|
545
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzll(unsigned long long x) { return psnip_builtin_clz64(x); }
|
546
|
+
# else
|
547
|
+
PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clzll, unsigned long long)
|
548
|
+
# endif
|
549
|
+
|
550
|
+
# endif
|
551
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
552
|
+
# define __builtin_clz(x) psnip_builtin_clz(x)
|
553
|
+
# define __builtin_clzl(x) psnip_builtin_clzl(x)
|
554
|
+
# define __builtin_clzll(x) psnip_builtin_clzll(x)
|
555
|
+
# endif
|
556
|
+
#endif
|
557
|
+
|
558
|
+
#if !defined(psnip_builtin_clz32)
|
559
|
+
# define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clz)(x)
|
560
|
+
#endif
|
561
|
+
|
562
|
+
#if !defined(psnip_builtin_clz64)
|
563
|
+
# define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clz)(x)
|
564
|
+
#endif
|
565
|
+
|
566
|
+
/*** __builtin_ctz ***/
|
567
|
+
|
568
|
+
#define PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(f_n, T) \
|
569
|
+
PSNIP_BUILTIN__FUNCTION \
|
570
|
+
int psnip_builtin_##f_n(T x) { \
|
571
|
+
static const char psnip_builtin_ctz_lookup[256] = { \
|
572
|
+
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
573
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
574
|
+
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
575
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
576
|
+
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
577
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
578
|
+
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
579
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
580
|
+
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
581
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
582
|
+
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
583
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
584
|
+
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
585
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
586
|
+
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
|
587
|
+
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 \
|
588
|
+
}; \
|
589
|
+
size_t s = 0; \
|
590
|
+
T r; \
|
591
|
+
\
|
592
|
+
do { \
|
593
|
+
r = (x >> s) & 0xff; \
|
594
|
+
if (r != 0) \
|
595
|
+
return psnip_builtin_ctz_lookup[r] + (char) s; \
|
596
|
+
} while ((s += 8) < (sizeof(T) * 8)); \
|
597
|
+
\
|
598
|
+
return (int) sizeof(T) - 1; \
|
599
|
+
}
|
600
|
+
|
601
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_ctz, 3, 4)
|
602
|
+
# define psnip_builtin_ctz(x) __builtin_ctz(x)
|
603
|
+
# define psnip_builtin_ctzl(x) __builtin_ctzl(x)
|
604
|
+
# define psnip_builtin_ctzll(x) __builtin_ctzll(x)
|
605
|
+
# define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(_,ctz)(x)
|
606
|
+
# define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(_,ctz)(x)
|
607
|
+
#else
|
608
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
|
609
|
+
PSNIP_BUILTIN__FUNCTION
|
610
|
+
int psnip_builtin_ctzll(unsigned long long v) {
|
611
|
+
unsigned long r = 0;
|
612
|
+
# if defined(_M_AMD64) || defined(_M_ARM)
|
613
|
+
_BitScanForward64(&r, v);
|
614
|
+
return (int) r;
|
615
|
+
# else
|
616
|
+
if (_BitScanForward(&r, (unsigned int) (v)))
|
617
|
+
return (int) (r);
|
618
|
+
|
619
|
+
_BitScanForward(&r, (unsigned int) (v >> 32));
|
620
|
+
return (int) (r + 32);
|
621
|
+
# endif
|
622
|
+
}
|
623
|
+
|
624
|
+
PSNIP_BUILTIN__FUNCTION
|
625
|
+
int psnip_builtin_ctzl(unsigned long v) {
|
626
|
+
unsigned long r = 0;
|
627
|
+
_BitScanForward(&r, v);
|
628
|
+
return (int) r;
|
629
|
+
}
|
630
|
+
|
631
|
+
PSNIP_BUILTIN__FUNCTION
|
632
|
+
int psnip_builtin_ctz(unsigned int v) {
|
633
|
+
return psnip_builtin_ctzl(v);
|
634
|
+
}
|
635
|
+
# define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ctz)(x)
|
636
|
+
# define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ctz)(x)
|
637
|
+
# else
|
638
|
+
PSNIP_BUILTIN__FUNCTION
|
639
|
+
int psnip_builtin_ctz32(psnip_uint32_t v) {
|
640
|
+
static const unsigned char MultiplyDeBruijnBitPosition[] = {
|
641
|
+
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
642
|
+
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
643
|
+
};
|
644
|
+
|
645
|
+
return
|
646
|
+
MultiplyDeBruijnBitPosition[((psnip_uint32_t)((v & -v) * 0x077CB531U)) >> 27];
|
647
|
+
}
|
648
|
+
|
649
|
+
PSNIP_BUILTIN__FUNCTION
|
650
|
+
int psnip_builtin_ctz64(psnip_uint64_t v) {
|
651
|
+
static const unsigned char MultiplyDeBruijnBitPosition[] = {
|
652
|
+
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
653
|
+
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
654
|
+
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
655
|
+
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6
|
656
|
+
};
|
657
|
+
|
658
|
+
return
|
659
|
+
MultiplyDeBruijnBitPosition[((psnip_uint64_t)((v & -v) * 0x03f79d71b4ca8b09ULL)) >> 58];
|
660
|
+
}
|
661
|
+
|
662
|
+
# if PSNIP_BUILTIN__SIZEOF_INT == 32
|
663
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctz(unsigned int x) { return psnip_builtin_ctz32(x); }
|
664
|
+
# elif PSNIP_BUILTIN__SIZEOF_INT == 64
|
665
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctz(unsigned int x) { return psnip_builtin_ctz64(x); }
|
666
|
+
# else
|
667
|
+
PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctz, unsigned int)
|
668
|
+
# endif
|
669
|
+
|
670
|
+
# if PSNIP_BUILTIN__SIZEOF_LONG == 32
|
671
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzl(unsigned long x) { return psnip_builtin_ctz32(x); }
|
672
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == 64
|
673
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzl(unsigned long x) { return psnip_builtin_ctz64(x); }
|
674
|
+
# else
|
675
|
+
PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctzl, unsigned long)
|
676
|
+
# endif
|
677
|
+
|
678
|
+
# if PSNIP_BUILTIN__SIZEOF_LLONG == 32
|
679
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzll(unsigned long long x) { return psnip_builtin_ctz32(x); }
|
680
|
+
# elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
|
681
|
+
PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzll(unsigned long long x) { return psnip_builtin_ctz64(x); }
|
682
|
+
# else
|
683
|
+
PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctzll, unsigned long long)
|
684
|
+
# endif
|
685
|
+
# endif
|
686
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
687
|
+
# define __builtin_ctz(x) psnip_builtin_ctz(x)
|
688
|
+
# define __builtin_ctzl(x) psnip_builtin_ctzl(x)
|
689
|
+
# define __builtin_ctzll(x) psnip_builtin_ctzll(x)
|
690
|
+
# endif
|
691
|
+
#endif
|
692
|
+
|
693
|
+
#if !defined(psnip_builtin_ctz32)
|
694
|
+
# define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ctz)(x)
|
695
|
+
#endif
|
696
|
+
|
697
|
+
#if !defined(psnip_builtin_ctz64)
|
698
|
+
# define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ctz)(x)
|
699
|
+
#endif
|
700
|
+
|
701
|
+
/*** __builtin_parity ***/
|
702
|
+
|
703
|
+
#define PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(f_n, T) \
|
704
|
+
PSNIP_BUILTIN__FUNCTION \
|
705
|
+
int psnip_builtin_##f_n(T v) { \
|
706
|
+
size_t i; \
|
707
|
+
for (i = (sizeof(T) * CHAR_BIT) / 2 ; i > 2 ; i /= 2) \
|
708
|
+
v ^= v >> i; \
|
709
|
+
v &= 0xf; \
|
710
|
+
return (0x6996 >> v) & 1; \
|
711
|
+
}
|
712
|
+
|
713
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_parity, 3, 4)
|
714
|
+
# define psnip_builtin_parity(x) __builtin_parity(x)
|
715
|
+
# define psnip_builtin_parityl(x) __builtin_parityl(x)
|
716
|
+
# define psnip_builtin_parityll(x) __builtin_parityll(x)
|
717
|
+
# define psnip_builtin_parity32(x) PSNIP_BUILTIN__VARIANT_INT32(_,parity)(x)
|
718
|
+
# define psnip_builtin_parity64(x) PSNIP_BUILTIN__VARIANT_INT64(_,parity)(x)
|
719
|
+
#else
|
720
|
+
PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parity, unsigned int)
|
721
|
+
PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parityl, unsigned long)
|
722
|
+
PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parityll, unsigned long long)
|
723
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
724
|
+
# define __builtin_parity(x) psnip_builtin_parity(x)
|
725
|
+
# define __builtin_parityl(x) psnip_builtin_parityl(x)
|
726
|
+
# define __builtin_parityll(x) psnip_builtin_parityll(x)
|
727
|
+
# endif
|
728
|
+
#endif
|
729
|
+
|
730
|
+
#if !defined(psnip_builtin_parity32)
|
731
|
+
# define psnip_builtin_parity32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,parity)(x)
|
732
|
+
#endif
|
733
|
+
|
734
|
+
#if !defined(psnip_builtin_parity64)
|
735
|
+
# define psnip_builtin_parity64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,parity)(x)
|
736
|
+
#endif
|
737
|
+
|
738
|
+
/*** __builtin_popcount ***/
|
739
|
+
|
740
|
+
#define PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(f_n, T) \
|
741
|
+
PSNIP_BUILTIN__FUNCTION \
|
742
|
+
int psnip_builtin_##f_n(T x) { \
|
743
|
+
x = x - ((x >> 1) & (T)~(T)0/3); \
|
744
|
+
x = (x & (T)~(T)0/15*3) + ((x >> 2) & (T)~(T)0/15*3); \
|
745
|
+
x = (x + (x >> 4)) & (T)~(T)0/255*15; \
|
746
|
+
return (T)(x * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8; \
|
747
|
+
}
|
748
|
+
|
749
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_popcount, 3, 4)
|
750
|
+
# define psnip_builtin_popcount(x) __builtin_popcount(x)
|
751
|
+
# define psnip_builtin_popcountl(x) __builtin_popcountl(x)
|
752
|
+
# define psnip_builtin_popcountll(x) __builtin_popcountll(x)
|
753
|
+
# define psnip_builtin_popcount32(x) PSNIP_BUILTIN__VARIANT_INT32(_,popcount)(x)
|
754
|
+
# define psnip_builtin_popcount64(x) PSNIP_BUILTIN__VARIANT_INT64(_,popcount)(x)
|
755
|
+
#else
|
756
|
+
PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcount, unsigned int)
|
757
|
+
PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcountl, unsigned long)
|
758
|
+
PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcountll, unsigned long long)
|
759
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
760
|
+
# define __builtin_popcount(x) psnip_builtin_popcount(x)
|
761
|
+
# define __builtin_popcountl(x) psnip_builtin_popcountl(x)
|
762
|
+
# define __builtin_popcountll(x) psnip_builtin_popcountll(x)
|
763
|
+
# endif
|
764
|
+
#endif
|
765
|
+
|
766
|
+
#if !defined(psnip_builtin_popcount32)
|
767
|
+
# define psnip_builtin_popcount32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,popcount)(x)
|
768
|
+
#endif
|
769
|
+
|
770
|
+
#if !defined(psnip_builtin_popcount64)
|
771
|
+
# define psnip_builtin_popcount64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,popcount)(x)
|
772
|
+
#endif
|
773
|
+
|
774
|
+
/*** __builtin_clrsb ***/
|
775
|
+
|
776
|
+
#define PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(f_n, clzfn, T) \
|
777
|
+
PSNIP_BUILTIN__FUNCTION \
|
778
|
+
int psnip_builtin_##f_n(T x) { \
|
779
|
+
return (PSNIP_BUILTIN_UNLIKELY(x == -1) ? \
|
780
|
+
((int) sizeof(x) * 8) : \
|
781
|
+
psnip_builtin_##clzfn((x < 0) ? ~x : x)) - 1; \
|
782
|
+
}
|
783
|
+
|
784
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_clrsb, 4, 7)
|
785
|
+
# define psnip_builtin_clrsb(x) __builtin_clrsb(x)
|
786
|
+
# if !defined(__INTEL_COMPILER)
|
787
|
+
# define psnip_builtin_clrsbl(x) __builtin_clrsbl(x)
|
788
|
+
# else
|
789
|
+
# if PSNIP_BUILTIN__SIZEOF_LONG == PSNIP_BUILTIN__SIZEOF_INT
|
790
|
+
# define psnip_builtin_clrsbl(x) ((long) __builtin_clrsb((int) x))
|
791
|
+
# elif PSNIP_BUILTIN__SIZEOF_LONG == PSNIP_BUILTIN__SIZEOF_LLONG
|
792
|
+
# define psnip_builtin_clrsbl(x) ((long) __builtin_clrsbll((long long) x))
|
793
|
+
# else
|
794
|
+
PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbl, clzl, long)
|
795
|
+
# endif
|
796
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
797
|
+
# define __builtin_clrsbl(x) psnip_builtin_clrsbl(x)
|
798
|
+
# endif
|
799
|
+
# endif
|
800
|
+
# define psnip_builtin_clrsbll(x) __builtin_clrsbll(x)
|
801
|
+
# define psnip_builtin_clrsb32(x) PSNIP_BUILTIN__VARIANT_INT32(_,clrsb)(x)
|
802
|
+
# define psnip_builtin_clrsb64(x) PSNIP_BUILTIN__VARIANT_INT64(_,clrsb)(x)
|
803
|
+
#else
|
804
|
+
PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsb, clz, int)
|
805
|
+
PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbl, clzl, long)
|
806
|
+
PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbll, clzll, long long)
|
807
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
808
|
+
# define __builtin_clrsb(x) psnip_builtin_clrsb(x)
|
809
|
+
# define __builtin_clrsbl(x) psnip_builtin_clrsbl(x)
|
810
|
+
# define __builtin_clrsbll(x) psnip_builtin_clrsbll(x)
|
811
|
+
# endif
|
812
|
+
#endif
|
813
|
+
|
814
|
+
#if !defined(psnip_builtin_clrsb32)
|
815
|
+
# define psnip_builtin_clrsb32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clrsb)(x)
|
816
|
+
#endif
|
817
|
+
|
818
|
+
#if !defined(psnip_builtin_clrsb64)
|
819
|
+
# define psnip_builtin_clrsb64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clrsb)(x)
|
820
|
+
#endif
|
821
|
+
|
822
|
+
/*** __builtin_bitreverse ***/
|
823
|
+
|
824
|
+
#define PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(f_n, T) \
|
825
|
+
PSNIP_BUILTIN__FUNCTION \
|
826
|
+
T psnip_builtin_##f_n(T x) { \
|
827
|
+
size_t s = sizeof(x) * CHAR_BIT; \
|
828
|
+
T mask = (T) 0U; \
|
829
|
+
mask = ~mask; \
|
830
|
+
while ((s >>= 1) > 0) { \
|
831
|
+
mask ^= (mask << s); \
|
832
|
+
x = ((x >> s) & mask) | ((x << s) & ~mask); \
|
833
|
+
} \
|
834
|
+
return x; \
|
835
|
+
}
|
836
|
+
|
837
|
+
#if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_bitreverse64) && !defined(__EMSCRIPTEN__)
|
838
|
+
# define psnip_builtin_bitreverse8(x) __builtin_bitreverse8(x)
|
839
|
+
# define psnip_builtin_bitreverse16(x) __builtin_bitreverse16(x)
|
840
|
+
# define psnip_builtin_bitreverse32(x) __builtin_bitreverse32(x)
|
841
|
+
# define psnip_builtin_bitreverse64(x) __builtin_bitreverse64(x)
|
842
|
+
#else
|
843
|
+
PSNIP_BUILTIN__FUNCTION
|
844
|
+
psnip_uint8_t psnip_builtin_bitreverse8(psnip_uint8_t v) {
|
845
|
+
return (psnip_uint8_t) ((v * 0x0202020202ULL & 0x010884422010ULL) % 1023);
|
846
|
+
}
|
847
|
+
PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse16, psnip_uint16_t)
|
848
|
+
PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse32, psnip_uint32_t)
|
849
|
+
PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse64, psnip_uint64_t)
|
850
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
851
|
+
# define __builtin_bitreverse8(x) psnip_builtin_bitreverse8(x)
|
852
|
+
# define __builtin_bitreverse16(x) psnip_builtin_bitreverse16(x)
|
853
|
+
# define __builtin_bitreverse32(x) psnip_builtin_bitreverse32(x)
|
854
|
+
# define __builtin_bitreverse64(x) psnip_builtin_bitreverse64(x)
|
855
|
+
# endif
|
856
|
+
#endif
|
857
|
+
|
858
|
+
/*** __builtin_addc ***/
|
859
|
+
|
860
|
+
#define PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(f_n, T) \
|
861
|
+
PSNIP_BUILTIN__FUNCTION \
|
862
|
+
T psnip_builtin_##f_n(T x, T y, T ci, T* co) { \
|
863
|
+
T max = 0; \
|
864
|
+
T r = (T) x + y; \
|
865
|
+
max = ~max; \
|
866
|
+
*co = (T) (x > (max - y)); \
|
867
|
+
if (ci) { \
|
868
|
+
if (r == max) \
|
869
|
+
*co = 1; \
|
870
|
+
r += ci; \
|
871
|
+
} \
|
872
|
+
return r; \
|
873
|
+
}
|
874
|
+
|
875
|
+
#if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_addc)
|
876
|
+
# define psnip_builtin_addcb(x, y, ci, co) __builtin_addcb(x, y, ci, co)
|
877
|
+
# define psnip_builtin_addcs(x, y, ci, co) __builtin_addcs(x, y, ci, co)
|
878
|
+
# define psnip_builtin_addc(x, y, ci, co) __builtin_addc(x, y, ci, co)
|
879
|
+
# define psnip_builtin_addcl(x, y, ci, co) __builtin_addcl(x, y, ci, co)
|
880
|
+
# define psnip_builtin_addcll(x, y, ci, co) __builtin_addcll(x, y, ci, co)
|
881
|
+
# define psnip_builtin_addc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(_,addc)(x, y, ci, co)
|
882
|
+
# define psnip_builtin_addc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(_,addc)(x, y, ci, co)
|
883
|
+
# define psnip_builtin_addc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(_,addc)(x, y, ci, co)
|
884
|
+
# define psnip_builtin_addc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(_,addc)(x, y, ci, co)
|
885
|
+
#else
|
886
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcb, unsigned char)
|
887
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcs, unsigned short)
|
888
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addc, unsigned int)
|
889
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcl, unsigned long)
|
890
|
+
PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcll, unsigned long long)
|
891
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
892
|
+
# define __builtin_addcb(x, y, ci, co) psnip_builtin_addcb(x, y, ci, co)
|
893
|
+
# define __builtin_addcs(x, y, ci, co) psnip_builtin_addcs(x, y, ci, co)
|
894
|
+
# define __builtin_addc(x, y, ci, co) psnip_builtin_addc(x, y, ci, co)
|
895
|
+
# define __builtin_addcl(x, y, ci, co) psnip_builtin_addcl(x, y, ci, co)
|
896
|
+
# define __builtin_addcll(x, y, ci, co) psnip_builtin_addcll(x, y, ci, co)
|
897
|
+
# endif
|
898
|
+
#endif
|
899
|
+
|
900
|
+
#if !defined(psnip_builtin_addc8)
|
901
|
+
# define psnip_builtin_addc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(psnip,addc)(x, y, ci, co)
|
902
|
+
#endif
|
903
|
+
|
904
|
+
#if !defined(psnip_builtin_addc16)
|
905
|
+
# define psnip_builtin_addc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(psnip,addc)(x, y, ci, co)
|
906
|
+
#endif
|
907
|
+
|
908
|
+
#if !defined(psnip_builtin_addc32)
|
909
|
+
# define psnip_builtin_addc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(psnip,addc)(x, y, ci, co)
|
910
|
+
#endif
|
911
|
+
|
912
|
+
#if !defined(psnip_builtin_addc64)
|
913
|
+
# define psnip_builtin_addc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(psnip,addc)(x, y, ci, co)
|
914
|
+
#endif
|
915
|
+
|
916
|
+
/*** __builtin_subc ***/
|
917
|
+
|
918
|
+
#define PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(f_n, T) \
|
919
|
+
PSNIP_BUILTIN__FUNCTION \
|
920
|
+
T psnip_builtin_##f_n(T x, T y, T ci, T* co) { \
|
921
|
+
T r = x - y; \
|
922
|
+
*co = x < y; \
|
923
|
+
if (ci) { \
|
924
|
+
r--; \
|
925
|
+
if (r == 0) \
|
926
|
+
*co = 1; \
|
927
|
+
} \
|
928
|
+
return r; \
|
929
|
+
}
|
930
|
+
|
931
|
+
#if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_subc)
|
932
|
+
# define psnip_builtin_subcb(x, y, ci, co) __builtin_subcb(x, y, ci, co)
|
933
|
+
# define psnip_builtin_subcs(x, y, ci, co) __builtin_subcs(x, y, ci, co)
|
934
|
+
# define psnip_builtin_subc(x, y, ci, co) __builtin_subc(x, y, ci, co)
|
935
|
+
# define psnip_builtin_subcl(x, y, ci, co) __builtin_subcl(x, y, ci, co)
|
936
|
+
# define psnip_builtin_subcll(x, y, ci, co) __builtin_subcll(x, y, ci, co)
|
937
|
+
# define psnip_builtin_subc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(_,subc)(x, y, ci, co)
|
938
|
+
# define psnip_builtin_subc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(_,subc)(x, y, ci, co)
|
939
|
+
# define psnip_builtin_subc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(_,subc)(x, y, ci, co)
|
940
|
+
# define psnip_builtin_subc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(_,subc)(x, y, ci, co)
|
941
|
+
#else
|
942
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcb, unsigned char)
|
943
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcs, unsigned short)
|
944
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subc, unsigned int)
|
945
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcl, unsigned long)
|
946
|
+
PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcll, unsigned long long)
|
947
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
948
|
+
# define __builtin_subcb(x, y, ci, co) psnip_builtin_subcb(x, y, ci, co)
|
949
|
+
# define __builtin_subcs(x, y, ci, co) psnip_builtin_subcs(x, y, ci, co)
|
950
|
+
# define __builtin_subc(x, y, ci, co) psnip_builtin_subc(x, y, ci, co)
|
951
|
+
# define __builtin_subcl(x, y, ci, co) psnip_builtin_subcl(x, y, ci, co)
|
952
|
+
# define __builtin_subcll(x, y, ci, co) psnip_builtin_subcll(x, y, ci, co)
|
953
|
+
# endif
|
954
|
+
#endif
|
955
|
+
|
956
|
+
#if !defined(psnip_builtin_subc8)
|
957
|
+
# define psnip_builtin_subc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(psnip,subc)(x, y, ci, co)
|
958
|
+
#endif
|
959
|
+
|
960
|
+
#if !defined(psnip_builtin_subc16)
|
961
|
+
# define psnip_builtin_subc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(psnip,subc)(x, y, ci, co)
|
962
|
+
#endif
|
963
|
+
|
964
|
+
#if !defined(psnip_builtin_subc32)
|
965
|
+
# define psnip_builtin_subc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(psnip,subc)(x, y, ci, co)
|
966
|
+
#endif
|
967
|
+
|
968
|
+
#if !defined(psnip_builtin_subc64)
|
969
|
+
# define psnip_builtin_subc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(psnip,subc)(x, y, ci, co)
|
970
|
+
#endif
|
971
|
+
|
972
|
+
/*** __builtin_bswap ***/
|
973
|
+
|
974
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_bswap16, 4, 8)
|
975
|
+
# define psnip_builtin_bswap16(x) __builtin_bswap16(x)
|
976
|
+
#else
|
977
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
|
978
|
+
# define psnip_builtin_bswap16(x) _byteswap_ushort(x)
|
979
|
+
# else
|
980
|
+
PSNIP_BUILTIN__FUNCTION
|
981
|
+
psnip_uint16_t
|
982
|
+
psnip_builtin_bswap16(psnip_uint16_t v) {
|
983
|
+
return
|
984
|
+
((v & (((psnip_uint16_t) 0xff) << 8)) >> 8) |
|
985
|
+
((v & (((psnip_uint16_t) 0xff) )) << 8);
|
986
|
+
}
|
987
|
+
# endif
|
988
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
989
|
+
# define __builtin_bswap16(x) psnip_builtin_bswap16(x)
|
990
|
+
# endif
|
991
|
+
#endif
|
992
|
+
|
993
|
+
#if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_bswap16, 4, 3)
|
994
|
+
# define psnip_builtin_bswap32(x) __builtin_bswap32(x)
|
995
|
+
# define psnip_builtin_bswap64(x) __builtin_bswap64(x)
|
996
|
+
#else
|
997
|
+
# if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
|
998
|
+
# define psnip_builtin_bswap32(x) _byteswap_ulong(x)
|
999
|
+
# define psnip_builtin_bswap64(x) _byteswap_uint64(x)
|
1000
|
+
# else
|
1001
|
+
PSNIP_BUILTIN__FUNCTION
|
1002
|
+
psnip_uint32_t
|
1003
|
+
psnip_builtin_bswap32(psnip_uint32_t v) {
|
1004
|
+
return
|
1005
|
+
((v & (((psnip_uint32_t) 0xff) << 24)) >> 24) |
|
1006
|
+
((v & (((psnip_uint32_t) 0xff) << 16)) >> 8) |
|
1007
|
+
((v & (((psnip_uint32_t) 0xff) << 8)) << 8) |
|
1008
|
+
((v & (((psnip_uint32_t) 0xff) )) << 24);
|
1009
|
+
}
|
1010
|
+
|
1011
|
+
PSNIP_BUILTIN__FUNCTION
|
1012
|
+
psnip_uint64_t
|
1013
|
+
psnip_builtin_bswap64(psnip_uint64_t v) {
|
1014
|
+
return
|
1015
|
+
((v & (((psnip_uint64_t) 0xff) << 56)) >> 56) |
|
1016
|
+
((v & (((psnip_uint64_t) 0xff) << 48)) >> 40) |
|
1017
|
+
((v & (((psnip_uint64_t) 0xff) << 40)) >> 24) |
|
1018
|
+
((v & (((psnip_uint64_t) 0xff) << 32)) >> 8) |
|
1019
|
+
((v & (((psnip_uint64_t) 0xff) << 24)) << 8) |
|
1020
|
+
((v & (((psnip_uint64_t) 0xff) << 16)) << 24) |
|
1021
|
+
((v & (((psnip_uint64_t) 0xff) << 8)) << 40) |
|
1022
|
+
((v & (((psnip_uint64_t) 0xff) )) << 56);
|
1023
|
+
}
|
1024
|
+
# endif
|
1025
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1026
|
+
# define __builtin_bswap32(x) psnip_builtin_bswap32(x)
|
1027
|
+
# define __builtin_bswap64(x) psnip_builtin_bswap64(x)
|
1028
|
+
# endif
|
1029
|
+
#endif
|
1030
|
+
|
1031
|
+
/******
|
1032
|
+
*** MSVC-style intrinsics
|
1033
|
+
******/
|
1034
|
+
|
1035
|
+
/*** _rotl ***/
|
1036
|
+
|
1037
|
+
#define PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(f_n, T, ST) \
|
1038
|
+
PSNIP_BUILTIN__FUNCTION \
|
1039
|
+
T psnip_intrin_##f_n(T value, ST shift) { \
|
1040
|
+
return \
|
1041
|
+
(value >> ((sizeof(T) * 8) - shift)) | \
|
1042
|
+
(value << shift); \
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotl8, 14, 0)
|
1046
|
+
# define psnip_intrin_rotl8(value, shift) _rotl8(value, shift)
|
1047
|
+
# define psnip_intrin_rotl16(value, shift) _rotl16(value, shift)
|
1048
|
+
#else
|
1049
|
+
PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl8, psnip_uint8_t, unsigned char)
|
1050
|
+
PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl16, psnip_uint16_t, unsigned char)
|
1051
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1052
|
+
# if !defined(_rotl8)
|
1053
|
+
# define _rotl8(value, shift) psnip_intrin_rotl8(value, shift)
|
1054
|
+
# endif
|
1055
|
+
# if !defined(_rotl16)
|
1056
|
+
# define _rotl16(value, shift) psnip_intrin_rotl16(value, shift)
|
1057
|
+
# endif
|
1058
|
+
# endif
|
1059
|
+
#endif
|
1060
|
+
|
1061
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotl8, 13, 10)
|
1062
|
+
# define psnip_intrin_rotl(value, shift) _rotl(value, shift)
|
1063
|
+
# define psnip_intrin_rotl64(value, shift) _rotl64(value, shift)
|
1064
|
+
#else
|
1065
|
+
PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl, psnip_uint32_t, int)
|
1066
|
+
PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl64, psnip_uint64_t, int)
|
1067
|
+
|
1068
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1069
|
+
# if !defined(_rotl)
|
1070
|
+
# define _rotl(value, shift) psnip_intrin_rotl(value, shift)
|
1071
|
+
# endif
|
1072
|
+
# if !defined(_rotl64)
|
1073
|
+
# define _rotl64(value, shift) psnip_intrin_rotl64(value, shift)
|
1074
|
+
# endif
|
1075
|
+
# endif
|
1076
|
+
#endif
|
1077
|
+
|
1078
|
+
/*** _rotr ***/
|
1079
|
+
|
1080
|
+
#define PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(f_n, T, ST) \
|
1081
|
+
PSNIP_BUILTIN__FUNCTION \
|
1082
|
+
T psnip_intrin_##f_n(T value, ST shift) { \
|
1083
|
+
return \
|
1084
|
+
(value << ((sizeof(T) * 8) - shift)) | \
|
1085
|
+
(value >> shift); \
|
1086
|
+
}
|
1087
|
+
|
1088
|
+
PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr8, psnip_uint8_t, unsigned char)
|
1089
|
+
PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr16, psnip_uint16_t, unsigned char)
|
1090
|
+
|
1091
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotr8, 14, 0)
|
1092
|
+
# define psnip_intrin_rotr8(value, shift) _rotr8(value, shift)
|
1093
|
+
# define psnip_intrin_rotr16(value, shift) _rotr16(value, shift)
|
1094
|
+
#else
|
1095
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1096
|
+
# define _rotr8(value, shift) psnip_intrin_rotr8(value, shift)
|
1097
|
+
# define _rotr16(value, shift) psnip_intrin_rotr16(value, shift)
|
1098
|
+
# endif
|
1099
|
+
#endif
|
1100
|
+
|
1101
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotr8, 13, 10)
|
1102
|
+
# define psnip_intrin_rotr(value, shift) _rotr(value, shift)
|
1103
|
+
# define psnip_intrin_rotr64(value, shift) _rotr64(value, shift)
|
1104
|
+
#else
|
1105
|
+
PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr, psnip_uint32_t, int)
|
1106
|
+
PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr64, psnip_uint64_t, int)
|
1107
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1108
|
+
# if !defined(_rotr)
|
1109
|
+
# define _rotr(value, shift) psnip_intrin_rotr(value, shift)
|
1110
|
+
# endif
|
1111
|
+
# if !defined(_rotr64)
|
1112
|
+
# define _rotr64(value, shift) psnip_intrin_rotr64(value, shift)
|
1113
|
+
# endif
|
1114
|
+
# endif
|
1115
|
+
#endif
|
1116
|
+
|
1117
|
+
/*** _BitScanForward ***/
|
1118
|
+
|
1119
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
|
1120
|
+
# pragma intrinsic(_BitScanForward)
|
1121
|
+
PSNIP_BUILTIN__FUNCTION
|
1122
|
+
unsigned char psnip_intrin_BitScanForward(unsigned long* Index, psnip_uint32_t Mask) {
|
1123
|
+
const unsigned long M = (unsigned long) Mask;
|
1124
|
+
return _BitScanForward(Index, M);
|
1125
|
+
}
|
1126
|
+
#else
|
1127
|
+
PSNIP_BUILTIN__FUNCTION
|
1128
|
+
unsigned char psnip_intrin_BitScanForward(unsigned long* Index, psnip_uint32_t Mask) {
|
1129
|
+
return PSNIP_BUILTIN_UNLIKELY(Mask == 0) ? 0 : ((*Index = psnip_builtin_ctz32 (Mask)), 1);
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1133
|
+
# define _BitScanForward(Index, Mask) psnip_intrin_BitScanForward(Index, Mask)
|
1134
|
+
# endif
|
1135
|
+
#endif
|
1136
|
+
|
1137
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
|
1138
|
+
# pragma intrinsic(_BitScanForward64)
|
1139
|
+
# define psnip_intrin_BitScanForward64(Index, Mask) _BitScanForward64(Index, Mask)
|
1140
|
+
#else
|
1141
|
+
PSNIP_BUILTIN__FUNCTION
|
1142
|
+
unsigned char psnip_intrin_BitScanForward64(unsigned long* Index, psnip_uint64_t Mask) {
|
1143
|
+
return PSNIP_BUILTIN_UNLIKELY(Mask == 0) ? 0 : ((*Index = psnip_builtin_ctz64 (Mask)), 1);
|
1144
|
+
}
|
1145
|
+
|
1146
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1147
|
+
# define _BitScanForward64(Index, Mask) psnip_intrin_BitScanForward64(Index, Mask)
|
1148
|
+
# endif
|
1149
|
+
#endif
|
1150
|
+
|
1151
|
+
/*** _BitScanReverse ***/
|
1152
|
+
|
1153
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse, 14, 0)
|
1154
|
+
# pragma intrinsic(_BitScanReverse)
|
1155
|
+
PSNIP_BUILTIN__FUNCTION
|
1156
|
+
unsigned char psnip_intrin_BitScanReverse(unsigned long* Index, psnip_uint32_t Mask) {
|
1157
|
+
const unsigned long M = (unsigned long) Mask;
|
1158
|
+
return _BitScanReverse(Index, M);
|
1159
|
+
}
|
1160
|
+
#else
|
1161
|
+
PSNIP_BUILTIN__FUNCTION
|
1162
|
+
unsigned char psnip_intrin_BitScanReverse(unsigned long* Index, psnip_uint32_t Mask) {
|
1163
|
+
return (PSNIP_BUILTIN_UNLIKELY(Mask == 0)) ? 0 : ((*Index = ((sizeof(Mask) * CHAR_BIT) - 1) - psnip_builtin_clz32 (Mask)), 1);
|
1164
|
+
}
|
1165
|
+
|
1166
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1167
|
+
# define _BitScanReverse(Index, Mask) psnip_intrin_BitScanReverse(Index, Mask)
|
1168
|
+
# endif
|
1169
|
+
#endif
|
1170
|
+
|
1171
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
|
1172
|
+
# pragma intrinsic(_BitScanReverse64)
|
1173
|
+
# define psnip_intrin_BitScanReverse64(Index, Mask) _BitScanReverse64(Index, Mask)
|
1174
|
+
#else
|
1175
|
+
PSNIP_BUILTIN__FUNCTION
|
1176
|
+
unsigned char psnip_intrin_BitScanReverse64(unsigned long* Index, psnip_uint64_t Mask) {
|
1177
|
+
return (PSNIP_BUILTIN_UNLIKELY(Mask == 0)) ? 0 : ((*Index = ((sizeof(Mask) * CHAR_BIT) - 1) - psnip_builtin_clz64 (Mask)), 1);
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1181
|
+
# define _BitScanReverse64(Index, Mask) psnip_intrin_BitScanReverse64(Index, Mask)
|
1182
|
+
# endif
|
1183
|
+
#endif
|
1184
|
+
|
1185
|
+
/*** bittest ***/
|
1186
|
+
|
1187
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittest, 14, 0)
|
1188
|
+
# pragma intrinsic(_bittest)
|
1189
|
+
# define psnip_intrin_bittest(a, b) \
|
1190
|
+
__pragma(warning(push)) \
|
1191
|
+
__pragma(warning(disable:4057)) \
|
1192
|
+
_bittest(a, b) \
|
1193
|
+
__pragma(warning(pop))
|
1194
|
+
#else
|
1195
|
+
# define psnip_intrin_bittest(a, b) (((*(a)) >> (b)) & 1)
|
1196
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1197
|
+
# define _bittest(a, b) psnip_intrin_bittest(a, b)
|
1198
|
+
# endif
|
1199
|
+
#endif
|
1200
|
+
|
1201
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittest64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
|
1202
|
+
# pragma intrinsic(_bittest64)
|
1203
|
+
# define psnip_intrin_bittest64(a, b) _bittest64(a, b)
|
1204
|
+
#else
|
1205
|
+
# define psnip_intrin_bittest64(a, b) (((*(a)) >> (b)) & 1)
|
1206
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1207
|
+
# define _bittest64(a, b) psnip_intrin_bittest64(a, b)
|
1208
|
+
# endif
|
1209
|
+
#endif
|
1210
|
+
|
1211
|
+
/*** bittestandcomplement ***/
|
1212
|
+
|
1213
|
+
#define PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(f_n, T, UT) \
|
1214
|
+
PSNIP_BUILTIN__FUNCTION \
|
1215
|
+
unsigned char psnip_intrin_##f_n(T* a, T b) { \
|
1216
|
+
const char r = (*a >> b) & 1; \
|
1217
|
+
*a ^= ((UT) 1) << b; \
|
1218
|
+
return r; \
|
1219
|
+
}
|
1220
|
+
|
1221
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandcomplement, 14, 0)
|
1222
|
+
# pragma intrinsic(_bittestandcomplement)
|
1223
|
+
# define psnip_intrin_bittestandcomplement(a, b) \
|
1224
|
+
__pragma(warning(push)) \
|
1225
|
+
__pragma(warning(disable:4057)) \
|
1226
|
+
_bittestandcomplement(a, b) \
|
1227
|
+
__pragma(warning(pop))
|
1228
|
+
#else
|
1229
|
+
PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(bittestandcomplement, psnip_int32_t, psnip_uint32_t)
|
1230
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1231
|
+
# define _bittestandcomplement(a, b) psnip_intrin_bittestandcomplement(a, b)
|
1232
|
+
# endif
|
1233
|
+
#endif
|
1234
|
+
|
1235
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandcomplement64, 14, 0) && defined(_M_AMD64)
|
1236
|
+
# define psnip_intrin_bittestandcomplement64(a, b) _bittestandcomplement64(a, b)
|
1237
|
+
#else
|
1238
|
+
PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(bittestandcomplement64, psnip_int64_t, psnip_uint64_t)
|
1239
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1240
|
+
# define _bittestandcomplement64(a, b) psnip_intrin_bittestandcomplement64(a, b)
|
1241
|
+
# endif
|
1242
|
+
#endif
|
1243
|
+
|
1244
|
+
/*** bittestandreset ***/
|
1245
|
+
|
1246
|
+
#define PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(f_n, T, UT) \
|
1247
|
+
PSNIP_BUILTIN__FUNCTION \
|
1248
|
+
unsigned char psnip_intrin_##f_n(T* a, T b) { \
|
1249
|
+
const char r = (*a >> b) & 1; \
|
1250
|
+
*a &= ~(((UT) 1) << b); \
|
1251
|
+
return r; \
|
1252
|
+
}
|
1253
|
+
|
1254
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandreset, 14, 0)
|
1255
|
+
# pragma intrinsic(_bittestandreset)
|
1256
|
+
# define psnip_intrin_bittestandreset(a, b) \
|
1257
|
+
__pragma(warning(push)) \
|
1258
|
+
__pragma(warning(disable:4057)) \
|
1259
|
+
_bittestandreset(a, b) \
|
1260
|
+
__pragma(warning(pop))
|
1261
|
+
#else
|
1262
|
+
PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(bittestandreset, psnip_int32_t, psnip_uint32_t)
|
1263
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1264
|
+
# define _bittestandreset(a, b) psnip_intrin_bittestandreset(a, b)
|
1265
|
+
# endif
|
1266
|
+
#endif
|
1267
|
+
|
1268
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandreset64, 14, 0) && (defined(_M_AMD64) || defined(_M_IA64))
|
1269
|
+
# pragma intrinsic(_bittestandreset64)
|
1270
|
+
# define psnip_intrin_bittestandreset64(a, b) _bittestandreset64(a, b)
|
1271
|
+
#else
|
1272
|
+
PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(bittestandreset64, psnip_int64_t, psnip_uint64_t)
|
1273
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1274
|
+
# define _bittestandreset64(a, b) psnip_intrin_bittestandreset64(a, b)
|
1275
|
+
# endif
|
1276
|
+
#endif
|
1277
|
+
|
1278
|
+
/*** bittestandset ***/
|
1279
|
+
|
1280
|
+
#define PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(f_n, T, UT) \
|
1281
|
+
PSNIP_BUILTIN__FUNCTION \
|
1282
|
+
unsigned char psnip_intrin_##f_n(T* a, T b) { \
|
1283
|
+
const char r = (*a >> b) & 1; \
|
1284
|
+
*a |= ((UT) 1) << b; \
|
1285
|
+
return r; \
|
1286
|
+
}
|
1287
|
+
|
1288
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandset, 14, 0)
|
1289
|
+
# pragma intrinsic(_bittestandset)
|
1290
|
+
# define psnip_intrin_bittestandset(a, b) \
|
1291
|
+
__pragma(warning(push)) \
|
1292
|
+
__pragma(warning(disable:4057)) \
|
1293
|
+
_bittestandset(a, b) \
|
1294
|
+
__pragma(warning(pop))
|
1295
|
+
#else
|
1296
|
+
PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(bittestandset, psnip_int32_t, psnip_uint32_t)
|
1297
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1298
|
+
# define _bittestandset(a, b) psnip_intrin_bittestandset(a, b)
|
1299
|
+
# endif
|
1300
|
+
#endif
|
1301
|
+
|
1302
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandset64, 14, 0) && defined(_M_AMD64)
|
1303
|
+
# pragma intrinsic(_bittestandset64)
|
1304
|
+
# define psnip_intrin_bittestandset64(a, b) _bittestandset64(a, b)
|
1305
|
+
#else
|
1306
|
+
PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(bittestandset64, psnip_int64_t, psnip_uint64_t)
|
1307
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1308
|
+
# define _bittestandset64(a, b) psnip_intrin_bittestandset64(a, b)
|
1309
|
+
# endif
|
1310
|
+
#endif
|
1311
|
+
|
1312
|
+
/*** shiftleft128 ***/
|
1313
|
+
|
1314
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(__shiftleft128, 14, 0) && defined(_M_AMD64)
|
1315
|
+
# define psnip_intrin_shiftleft128(LowPart, HighPart, Shift) __shiftleft128(LowPart, HighPart, Shift)
|
1316
|
+
#else
|
1317
|
+
# if defined(__SIZEOF_INT128__)
|
1318
|
+
PSNIP_BUILTIN__FUNCTION
|
1319
|
+
psnip_uint64_t psnip_intrin_shiftleft128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
|
1320
|
+
unsigned __int128 r = HighPart;
|
1321
|
+
r <<= 64;
|
1322
|
+
r |= LowPart;
|
1323
|
+
r <<= Shift % 64;
|
1324
|
+
return (psnip_uint64_t) (r >> 64);
|
1325
|
+
}
|
1326
|
+
# else
|
1327
|
+
PSNIP_BUILTIN__FUNCTION
|
1328
|
+
psnip_uint64_t psnip_intrin_shiftleft128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
|
1329
|
+
Shift %= 64;
|
1330
|
+
return PSNIP_BUILTIN_UNLIKELY(Shift == 0) ? HighPart : ((HighPart << Shift) | (LowPart >> (64 - Shift)));
|
1331
|
+
}
|
1332
|
+
# endif
|
1333
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1334
|
+
# define __shiftleft128(LowPart, HighPart, Shift) psnip_intrin_shiftleft128(LowPart, HighPart, Shift)
|
1335
|
+
# endif
|
1336
|
+
#endif
|
1337
|
+
|
1338
|
+
/*** shiftright128 ***/
|
1339
|
+
|
1340
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(__shiftright128, 14, 0) && defined(_M_AMD64)
|
1341
|
+
# define psnip_intrin_shiftright128(LowPart, HighPart, Shift) __shiftright128(LowPart, HighPart, Shift)
|
1342
|
+
#else
|
1343
|
+
# if defined(__SIZEOF_INT128__)
|
1344
|
+
PSNIP_BUILTIN__FUNCTION
|
1345
|
+
psnip_uint64_t psnip_intrin_shiftright128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
|
1346
|
+
unsigned __int128 r = HighPart;
|
1347
|
+
r <<= 64;
|
1348
|
+
r |= LowPart;
|
1349
|
+
r >>= Shift % 64;
|
1350
|
+
return (psnip_uint64_t) r;
|
1351
|
+
}
|
1352
|
+
# else
|
1353
|
+
PSNIP_BUILTIN__FUNCTION
|
1354
|
+
psnip_uint64_t psnip_intrin_shiftright128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
|
1355
|
+
Shift %= 64;
|
1356
|
+
|
1357
|
+
if (PSNIP_BUILTIN_UNLIKELY(Shift == 0))
|
1358
|
+
return LowPart;
|
1359
|
+
|
1360
|
+
return
|
1361
|
+
(HighPart << (64 - Shift)) |
|
1362
|
+
(LowPart >> Shift);
|
1363
|
+
}
|
1364
|
+
# endif
|
1365
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1366
|
+
# define __shiftright128(LowPart, HighPart, Shift) psnip_intrin_shiftright128(LowPart, HighPart, Shift)
|
1367
|
+
# endif
|
1368
|
+
#endif
|
1369
|
+
|
1370
|
+
/*** byteswap ***/
|
1371
|
+
|
1372
|
+
#if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
|
1373
|
+
# pragma intrinsic(_byteswap_ushort)
|
1374
|
+
# define psnip_intrin_byteswap_ushort(v) _byteswap_ushort(v)
|
1375
|
+
# pragma intrinsic(_byteswap_ulong)
|
1376
|
+
# define psnip_intrin_byteswap_ulong(v) _byteswap_ulong(v)
|
1377
|
+
# pragma intrinsic(_byteswap_uint64)
|
1378
|
+
# define psnip_intrin_byteswap_uint64(v) _byteswap_uint64(v)
|
1379
|
+
#else
|
1380
|
+
# define psnip_intrin_byteswap_ushort(v) psnip_builtin_bswap16(v)
|
1381
|
+
# define psnip_intrin_byteswap_ulong(v) psnip_builtin_bswap32(v)
|
1382
|
+
# define psnip_intrin_byteswap_uint64(v) psnip_builtin_bswap64(v)
|
1383
|
+
# if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
|
1384
|
+
# define _byteswap_ushort(v) psnip_intrin_byteswap_ushort(v)
|
1385
|
+
# define _byteswap_ulong(v) psnip_intrin_byteswap_ulong(v)
|
1386
|
+
# define _byteswap_uint64(v) psnip_intrin_byteswap_uint64(v)
|
1387
|
+
# endif
|
1388
|
+
#endif
|
1389
|
+
|
1390
|
+
#endif /* defined(PSNIP_BUILTIN_H) */
|