bitset 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 67a16f9ffaa21475905ca59bde6734954735e3c7
4
- data.tar.gz: c734af4e2b220c01b760f1e93fd05940ca79569d
3
+ metadata.gz: d7264d2aacb2a035cc9d03a759e3d7df802490d7
4
+ data.tar.gz: b67c9f93d7b5dc611807cdd148ffdcf871588bbf
5
5
  SHA512:
6
- metadata.gz: 6603741fd0c7f3a5e348e3db792695fdfc5926062567d401e49b2c66f8ae7e44dc8e1eed66fc8d69859305dc265a3abe2377dad5486ee234da872fa427a9119b
7
- data.tar.gz: 50f4c82523b31f15d176d7dc26a47be1610135bd4b826f2633abe960394c3fea407d67b7b624dc27148004374f1035968880fb007753c7efd2e440166d429431
6
+ metadata.gz: f5374f4557cf95b59488c7e84f9af6865b8c2b92dc605369e7e756e4be6fc1abfbbbef85f6198fe192ed43a4594bd59519da98bcb8577bd8024961429ac1c6a9
7
+ data.tar.gz: 77c7dd54f09f8f7f0e439324a8e01c22959e6d6a7e2d517607eb803724099173012fbd85cf736aef61a8e3519d2ac01fde340e298094e66a917f0418e4f0841a
@@ -52,6 +52,8 @@ Obviously you can also set and clear bits...
52
52
  >> bitset.clear(1, 5)
53
53
  => 00010001
54
54
 
55
+ Arrays of ints can also be passed to #clear and #set (c/o brendon9x).
56
+
55
57
  The point of a bitset is to be, effectively, an array of single bits. It should
56
58
  support basic set and bitwise operations. So, let's look at a few of those.
57
59
 
@@ -89,8 +91,10 @@ support basic set and bitwise operations. So, let's look at a few of those.
89
91
  >> a.set? 6
90
92
  => true
91
93
 
92
- # Return a new Bitset composed of bits #1, #3, #5, #4, and #1 again
93
- >> a.select_bits [1,3,5,4,1]
94
+ # Return a new Bitset composed of bits #1, #3, #5, #4, and #1
95
+ # again. Unlike Array#values_at, this function currently only
96
+ # accepts an array of Fixnums as its argument.
97
+ >> a.values_at [1,3,5,4,1]
94
98
  => 00110
95
99
 
96
100
  # Tell whether all of the given bit numbers are clear
@@ -116,15 +120,15 @@ support basic set and bitwise operations. So, let's look at a few of those.
116
120
  7
117
121
 
118
122
  # Return an array of the positions of all set bits
119
- >> b.each_set
123
+ >> b.each_set # AKA b.to_a
120
124
  => [1, 3, 5, 7]
121
125
 
122
126
  # The following methods modify a Bitset in place very quickly:
123
127
  >> a.intersect!(b) # like a &= b
124
128
  >> a.union!(b) # like a |= b
125
129
  >> a.difference!(b) # like a -= b
126
- >> a.xor!(b) # alias a.symmetric_difference!(b), like a ^= b
127
- >> a.reset!
130
+ >> a.xor!(b) # like a ^= b
131
+ >> a.reset! # Zeroes all bits
128
132
 
129
133
  # Above, "like" does not mean "identical to." a |= b creates a new
130
134
  # Bitset object. a.union!(b) changes an existing object which
@@ -134,6 +138,9 @@ support basic set and bitwise operations. So, let's look at a few of those.
134
138
  # equivalents between bitsets of different sizes will raise an
135
139
  # ArgumentError.
136
140
 
141
+ >> b.to_binary_array
142
+ => [0, 1, 0, 1, 0, 1, 0, 1]
143
+
137
144
  # b.dup and b.clone are also available.
138
145
 
139
146
  # Marshal.dump and Marshal.load are also supported. If you want to
data/Rakefile CHANGED
@@ -15,9 +15,11 @@ Jeweler::Tasks.new do |gem|
15
15
  gem.homepage = "http://github.com/ericboesch/bitset"
16
16
  gem.license = "MIT"
17
17
  gem.summary = 'Bitset implementation.'
18
- gem.description = 'A fast C-based Bitset. It supports the standard set operations as well as operations you may expect on bit arrays. (popcount, for instance)'
18
+ gem.description = 'A fast C-based Bitset. It supports the standard set operations as well as operations you may expect on bit arrays,such as popcount.'
19
19
  gem.email = "eric.boesch@nist.gov"
20
20
  gem.authors = ["Tyler McMullen"]
21
+ # Other significant contributions from Eric Boesch, Gabriel Formica, and Brendon McLean.
22
+
21
23
  end
22
24
  Jeweler::RubygemsDotOrgTasks.new
23
25
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.0
1
+ 1.0.1
@@ -2,49 +2,40 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
+ # stub: bitset 1.0.1 ruby lib
6
+ # stub: ext/bitset/extconf.rb
5
7
 
6
8
  Gem::Specification.new do |s|
7
- s.name = %q{bitset}
8
- s.version = "0.3.0"
9
+ s.name = "bitset".freeze
10
+ s.version = "1.0.1"
9
11
 
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Tyler McMullen", "Eric Boesch"]
12
- s.date = %q{2016-02-22}
13
- s.description = %q{A fast C-based Bitset. It supports the standard set operations as well as operations you may expect on bit arrays. (popcount, for instance)}
14
- s.email = %q{ericboesch@gmail.com}
15
- s.extensions = ["ext/bitset/extconf.rb"]
12
+ s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
13
+ s.require_paths = ["lib".freeze]
14
+ s.authors = ["Tyler McMullen".freeze]
15
+ s.date = "2017-05-26"
16
+ s.description = "A fast C-based Bitset. It supports the standard set operations as well as operations you may expect on bit arrays,such as popcount.".freeze
17
+ s.email = "eric.boesch@nist.gov".freeze
18
+ s.extensions = ["ext/bitset/extconf.rb".freeze]
16
19
  s.extra_rdoc_files = [
17
20
  "LICENSE.txt",
18
- "README.rdoc"
21
+ "README.markdown"
19
22
  ]
20
23
  s.files = [
21
24
  "LICENSE.txt",
22
- "README.rdoc",
25
+ "README.markdown",
23
26
  "Rakefile",
24
27
  "VERSION",
25
28
  "bitset.gemspec",
26
29
  "ext/bitset/bitset.c",
30
+ "ext/bitset/builtin.h",
31
+ "ext/bitset/exact-int.h",
27
32
  "ext/bitset/extconf.rb",
28
- "lib/bitset/bitset.rb",
33
+ "lib/bitset.rb",
29
34
  "spec/bitset_spec.rb"
30
35
  ]
31
- s.homepage = %q{http://github.com/ericboesch/bitset}
32
- s.licenses = ["MIT"]
33
- s.require_paths = ["lib"]
34
- s.rubygems_version = %q{1.3.7}
35
- s.summary = %q{Bitset implementation.}
36
- s.test_files = [
37
- "spec/bitset_spec.rb"
38
- ]
39
-
40
- if s.respond_to? :specification_version then
41
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
42
- s.specification_version = 3
43
-
44
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
- else
46
- end
47
- else
48
- end
36
+ s.homepage = "http://github.com/ericboesch/bitset".freeze
37
+ s.licenses = ["MIT".freeze]
38
+ s.rubygems_version = "2.6.12".freeze
39
+ s.summary = "Bitset implementation.".freeze
49
40
  end
50
41
 
@@ -1,4 +1,5 @@
1
1
  #include "ruby.h"
2
+ #include "builtin.h"
2
3
 
3
4
  #include <stdint.h>
4
5
  #include <string.h>
@@ -105,11 +106,21 @@ static VALUE rb_bitset_aset(VALUE self, VALUE index, VALUE value) {
105
106
  static VALUE rb_bitset_set(int argc, VALUE * argv, VALUE self) {
106
107
  int i;
107
108
  Bitset * bs = get_bitset(self);
108
- for(i = 0; i < argc; i++) {
109
- VALUE index = argv[i];
110
- int idx = NUM2INT(index);
111
- validate_index(bs, idx);
112
- _set_bit(bs, idx);
109
+
110
+ if (argc == 1 && rb_obj_is_kind_of(argv[0], rb_const_get(rb_cObject, rb_intern("Array")))) {
111
+ for(i = 0; i < RARRAY_LEN(argv[0]); i++) {
112
+ VALUE index = RARRAY_PTR(argv[0])[i];
113
+ int idx = NUM2INT(index);
114
+ validate_index(bs, idx);
115
+ _set_bit(bs, idx);
116
+ }
117
+ } else {
118
+ for(i = 0; i < argc; i++) {
119
+ VALUE index = argv[i];
120
+ int idx = NUM2INT(index);
121
+ validate_index(bs, idx);
122
+ _set_bit(bs, idx);
123
+ }
113
124
  }
114
125
  return Qtrue;
115
126
  }
@@ -117,11 +128,21 @@ static VALUE rb_bitset_set(int argc, VALUE * argv, VALUE self) {
117
128
  static VALUE rb_bitset_clear(int argc, VALUE * argv, VALUE self) {
118
129
  int i;
119
130
  Bitset * bs = get_bitset(self);
120
- for(i = 0; i < argc; i++) {
121
- VALUE index = argv[i];
122
- int idx = NUM2INT(index);
123
- validate_index(bs, idx);
124
- _clear_bit(bs, idx);
131
+
132
+ if (argc == 1 && rb_obj_is_kind_of(argv[0], rb_const_get(rb_cObject, rb_intern("Array")))) {
133
+ for(i = 0; i < RARRAY_LEN(argv[0]); i++) {
134
+ VALUE index = RARRAY_PTR(argv[0])[i];
135
+ int idx = NUM2INT(index);
136
+ validate_index(bs, idx);
137
+ _clear_bit(bs, idx);
138
+ }
139
+ } else {
140
+ for(i = 0; i < argc; i++) {
141
+ VALUE index = argv[i];
142
+ int idx = NUM2INT(index);
143
+ validate_index(bs, idx);
144
+ _clear_bit(bs, idx);
145
+ }
125
146
  }
126
147
  return Qtrue;
127
148
  }
@@ -158,7 +179,7 @@ static VALUE rb_bitset_cardinality(VALUE self) {
158
179
  int max = INTS(bs);
159
180
  int count = 0;
160
181
  for(i = 0; i < max; i++) {
161
- count += __builtin_popcountll(bs->data[i]);
182
+ count += psnip_builtin_popcount64(bs->data[i]);
162
183
  }
163
184
  return INT2NUM(count);
164
185
  }
@@ -166,13 +187,14 @@ static VALUE rb_bitset_cardinality(VALUE self) {
166
187
  static VALUE rb_bitset_intersect(VALUE self, VALUE other) {
167
188
  Bitset * bs = get_bitset(self);
168
189
  Bitset * other_bs = get_bitset(other);
169
- verify_equal_size(bs, other_bs);
190
+ Bitset * new_bs;
191
+ int max = INTS(bs);
192
+ int i;
170
193
 
171
- Bitset * new_bs = bitset_new();
194
+ verify_equal_size(bs, other_bs);
195
+ new_bs = bitset_new();
172
196
  bitset_setup(new_bs, bs->len);
173
197
 
174
- int max = INTS(bs);
175
- int i;
176
198
  for(i = 0; i < max; i++) {
177
199
  uint64_t segment = bs->data[i];
178
200
  uint64_t other_segment = other_bs->data[i];
@@ -185,13 +207,14 @@ static VALUE rb_bitset_intersect(VALUE self, VALUE other) {
185
207
  static VALUE rb_bitset_union(VALUE self, VALUE other) {
186
208
  Bitset * bs = get_bitset(self);
187
209
  Bitset * other_bs = get_bitset(other);
188
- verify_equal_size(bs, other_bs);
210
+ Bitset * new_bs;
211
+ int max = INTS(bs);
212
+ int i;
189
213
 
190
- Bitset * new_bs = bitset_new();
214
+ verify_equal_size(bs, other_bs);
215
+ new_bs = bitset_new();
191
216
  bitset_setup(new_bs, bs->len);
192
217
 
193
- int max = INTS(bs);
194
- int i;
195
218
  for(i = 0; i < max; i++) {
196
219
  uint64_t segment = bs->data[i];
197
220
  uint64_t other_segment = other_bs->data[i];
@@ -204,13 +227,14 @@ static VALUE rb_bitset_union(VALUE self, VALUE other) {
204
227
  static VALUE rb_bitset_difference(VALUE self, VALUE other) {
205
228
  Bitset * bs = get_bitset(self);
206
229
  Bitset * other_bs = get_bitset(other);
207
- verify_equal_size(bs, other_bs);
230
+ Bitset * new_bs;
231
+ int max = INTS(bs);
232
+ int i;
208
233
 
209
- Bitset * new_bs = bitset_new();
234
+ verify_equal_size(bs, other_bs);
235
+ new_bs = bitset_new();
210
236
  bitset_setup(new_bs, bs->len);
211
237
 
212
- int max = INTS(bs);
213
- int i;
214
238
  for(i = 0; i < max; i++) {
215
239
  uint64_t segment = bs->data[i];
216
240
  uint64_t other_segment = other_bs->data[i];
@@ -223,13 +247,14 @@ static VALUE rb_bitset_difference(VALUE self, VALUE other) {
223
247
  static VALUE rb_bitset_xor(VALUE self, VALUE other) {
224
248
  Bitset * bs = get_bitset(self);
225
249
  Bitset * other_bs = get_bitset(other);
226
- verify_equal_size(bs, other_bs);
250
+ Bitset * new_bs;
251
+ int max = INTS(bs);
252
+ int i;
227
253
 
228
- Bitset * new_bs = bitset_new();
254
+ verify_equal_size(bs, other_bs);
255
+ new_bs = bitset_new();
229
256
  bitset_setup(new_bs, bs->len);
230
257
 
231
- int max = INTS(bs);
232
- int i;
233
258
  for(i = 0; i < max; i++) {
234
259
  uint64_t segment = bs->data[i];
235
260
  uint64_t other_segment = other_bs->data[i];
@@ -241,13 +266,11 @@ static VALUE rb_bitset_xor(VALUE self, VALUE other) {
241
266
 
242
267
  static VALUE rb_bitset_not(VALUE self) {
243
268
  Bitset * bs = get_bitset(self);
244
-
245
269
  Bitset * new_bs = bitset_new();
246
- bitset_setup(new_bs, bs->len);
247
-
248
270
  int max = INTS(bs);
249
-
250
271
  int i;
272
+
273
+ bitset_setup(new_bs, bs->len);
251
274
  for(i = 0; i < max; i++) {
252
275
  uint64_t segment = bs->data[i];
253
276
  new_bs->data[i] = ~segment;
@@ -274,11 +297,11 @@ static VALUE rb_bitset_to_s(VALUE self) {
274
297
  static VALUE rb_bitset_from_s(VALUE self, VALUE s) {
275
298
  int length = RSTRING_LEN(s);
276
299
  char* data = StringValuePtr(s);
277
-
278
300
  Bitset * new_bs = bitset_new();
301
+ int i;
302
+
279
303
  bitset_setup(new_bs, length);
280
304
 
281
- int i;
282
305
  for (i = 0; i < length; i++) {
283
306
  if (data[i] == '1') {
284
307
  _set_bit(new_bs, i);
@@ -298,7 +321,7 @@ static VALUE rb_bitset_hamming(VALUE self, VALUE other) {
298
321
  for(i = 0; i < max; i++) {
299
322
  uint64_t segment = bs->data[i];
300
323
  uint64_t other_segment = other_bs->data[i];
301
- count += __builtin_popcountll(segment ^ other_segment);
324
+ count += psnip_builtin_popcount64(segment ^ other_segment);
302
325
  }
303
326
 
304
327
  return INT2NUM(count);
@@ -318,7 +341,7 @@ static VALUE rb_bitset_each(VALUE self) {
318
341
  static VALUE rb_bitset_marshall_dump(VALUE self) {
319
342
  Bitset * bs = get_bitset(self);
320
343
  VALUE hash = rb_hash_new();
321
- VALUE data = rb_str_new(bs->data, BYTES(bs));
344
+ VALUE data = rb_str_new((const char *) bs->data, BYTES(bs));
322
345
 
323
346
  rb_hash_aset(hash, ID2SYM(rb_intern("len")), UINT2NUM(bs->len));
324
347
  rb_hash_aset(hash, ID2SYM(rb_intern("data")), data);
@@ -340,13 +363,25 @@ static VALUE rb_bitset_marshall_load(VALUE self, VALUE hash) {
340
363
  return Qnil;
341
364
  }
342
365
 
366
+ static VALUE rb_bitset_to_binary_array(VALUE self) {
367
+ Bitset * bs = get_bitset(self);
368
+ int i;
369
+
370
+ VALUE array = rb_ary_new2(bs->len / 2);
371
+ for(i = 0; i < bs->len; i++) {
372
+ rb_ary_push(array, INT2NUM(_get_bit(bs, i) > 0 ? 1 : 0));
373
+ }
374
+
375
+ return array;
376
+ }
377
+
343
378
  static VALUE rb_bitset_dup(VALUE self) {
344
379
  Bitset * bs = get_bitset(self);
380
+ int max = INTS(bs);
345
381
 
346
382
  Bitset * new_bs = bitset_new();
347
383
  bitset_setup(new_bs, bs->len);
348
384
 
349
- int max = INTS(bs);
350
385
  memcpy(new_bs->data, bs->data, max * sizeof(bs->data[0]));
351
386
  return Data_Wrap_Struct(cBitset, 0, bitset_free, new_bs);
352
387
  }
@@ -372,7 +407,7 @@ static VALUE rb_bitset_each_set(VALUE self) {
372
407
  VALUE v;
373
408
 
374
409
  if (!(segment & 1)) {
375
- int shift = __builtin_ctzll(segment);
410
+ int shift = psnip_builtin_ctz64(segment);
376
411
  bit_position += shift;
377
412
  segment >>= shift;
378
413
  }
@@ -404,10 +439,9 @@ static VALUE rb_bitset_empty_p(VALUE self) {
404
439
  return Qtrue;
405
440
  }
406
441
 
407
- static VALUE rb_bitset_select_bits(VALUE self, VALUE index_array) {
442
+ static VALUE rb_bitset_values_at(VALUE self, VALUE index_array) {
408
443
  int i;
409
444
  Bitset * bs = get_bitset(self);
410
- struct RArray *arr = RARRAY(index_array);
411
445
  int blen = bs->len;
412
446
  int alen = RARRAY_LEN(index_array);
413
447
  VALUE *ptr = RARRAY_PTR(index_array);
@@ -443,10 +477,10 @@ static VALUE rb_bitset_equal(VALUE self, VALUE other) {
443
477
  int i;
444
478
  Bitset * bs = get_bitset(self);
445
479
  Bitset * other_bs = get_bitset(other);
480
+ int max = INTS(bs);
446
481
 
447
482
  if (bs->len != other_bs->len)
448
483
  return Qfalse;
449
- int max = INTS(bs);
450
484
  for(i = 0; i < max; i++) {
451
485
  if (bs->data[i] != other_bs->data[i]) {
452
486
  return Qfalse;
@@ -464,10 +498,10 @@ inline uint64_t difference(uint64_t a, uint64_t b) { return a & ~b; }
464
498
  static VALUE mutable(VALUE self, VALUE other, bitwise_op operator) {
465
499
  Bitset * bs = get_bitset(self);
466
500
  Bitset * other_bs = get_bitset(other);
467
- verify_equal_size(bs, other_bs);
468
-
469
501
  int max = INTS(bs);
470
502
  int i;
503
+ verify_equal_size(bs, other_bs);
504
+
471
505
  for(i = 0; i < max; i++) {
472
506
  uint64_t segment = bs->data[i];
473
507
  uint64_t other_segment = other_bs->data[i];
@@ -539,11 +573,16 @@ void Init_bitset() {
539
573
  rb_define_singleton_method(cBitset, "from_s", rb_bitset_from_s, 1);
540
574
  rb_define_method(cBitset, "marshal_dump", rb_bitset_marshall_dump, 0);
541
575
  rb_define_method(cBitset, "marshal_load", rb_bitset_marshall_load, 1);
576
+ rb_define_method(cBitset, "to_binary_array", rb_bitset_to_binary_array, 0);
542
577
  rb_define_method(cBitset, "dup", rb_bitset_dup, 0);
543
578
  rb_define_alias(cBitset, "clone", "dup");
544
579
  rb_define_method(cBitset, "each_set", rb_bitset_each_set, 0);
580
+ rb_define_alias(cBitset, "to_a", "each_set");
581
+ /* #each_set allows an optional block, and #to_a normally doesn't.
582
+ But an alias is simpler than having two different functions. */
545
583
  rb_define_method(cBitset, "empty?", rb_bitset_empty_p, 0);
546
- rb_define_method(cBitset, "select_bits", rb_bitset_select_bits, 1);
584
+ rb_define_method(cBitset, "values_at", rb_bitset_values_at, 1);
585
+ rb_define_alias(cBitset, "select_bits", "values_at");
547
586
  rb_define_method(cBitset, "reverse", rb_bitset_reverse, 0);
548
587
  rb_define_method(cBitset, "==", rb_bitset_equal, 1);
549
588
  }
@@ -0,0 +1,1390 @@
1
+ /* Builtins and Intrinsics
2
+ * Portable Snippets - https://gitub.com/nemequ/portable-snippets
3
+ * Created by Evan Nemerson <evan@nemerson.com>
4
+ *
5
+ * To the extent possible under law, the authors have waived all
6
+ * copyright and related or neighboring rights to this code. For
7
+ * details, see the Creative Commons Zero 1.0 Universal license at
8
+ * https://creativecommons.org/publicdomain/zero/1.0/
9
+ *
10
+ * Some of these implementations are based on code from
11
+ * https://graphics.stanford.edu/~seander/bithacks.html which is also
12
+ * public domain (and a fantastic web site).
13
+ */
14
+
15
+ #if !defined(PSNIP_BUILTIN_H)
16
+ #define PSNIP_BUILTIN_H
17
+
18
+ #if defined(HEDLEY_GCC_HAS_BUILTIN)
19
+ # define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,0)
20
+ #elif defined(__clang__) && defined(__has_builtin)
21
+ # define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) __has_builtin(builtin)
22
+ #elif defined(__GNUC__)
23
+ # define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) (__GNUC__ > major || (major == __GNUC__ && __GNUC_MINOR__ >= minor))
24
+ #else
25
+ # define PSNIP_BUILTIN_GNU_HAS_BUILTIN(builtin,major,minor) (0)
26
+ #endif
27
+
28
+ #if defined(HEDLEY_CLANG_HAS_BUILTIN)
29
+ # define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) HEDLEY_CLANG_HAS_BUILTIN(builtin)
30
+ #elif defined(__has_builtin)
31
+ # define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) __has_builtin(builtin)
32
+ #else
33
+ # define PSNIP_BUILTIN_CLANG_HAS_BUILTIN(builtin) (0)
34
+ #endif
35
+
36
+ #if defined(HEDLEY_MSVC_VERSION_CHECK)
37
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) HEDLEY_MSVC_VERSION_CHECK(major,minor,0)
38
+ #elif !defined(_MSC_VER)
39
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (0)
40
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1400)
41
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000)))
42
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1200)
43
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_FULL_VER >= ((major * 100000) + (minor * 1000)))
44
+ #else
45
+ # define PSNIP_BUILTIN_MSVC_HAS_INTRIN(intrin,major,minor) (_MSC_VER >= ((major * 100) + (minor)))
46
+ #endif
47
+
48
+ #if defined(_MSC_VER)
49
+ # include <intrin.h>
50
+ #endif
51
+ #include <limits.h>
52
+ #include <stdlib.h>
53
+
54
+ #if defined(__i386) || defined(_M_IX86) || \
55
+ defined(__amd64) || defined(_M_AMD64) || defined(__x86_64)
56
+ # if defined(_MSC_VER)
57
+ # define PSNIP_BUILTIN__ENABLE_X86
58
+ # elif defined(__GNUC__)
59
+ # define PSNIP_BUILTIN__ENABLE_X86
60
+ # include <x86intrin.h>
61
+ # endif
62
+ #endif
63
+
64
+ #if defined(__amd64) || defined(_M_AMD64) || defined(__x86_64)
65
+ # if defined(_MSC_VER)
66
+ # define PSNIP_BUILTIN__ENABLE_AMD64
67
+ # elif defined(__GNUC__)
68
+ # define PSNIP_BUILTIN__ENABLE_AMD64
69
+ # include <x86intrin.h>
70
+ # endif
71
+ #endif
72
+
73
+ #if \
74
+ !defined(psnip_int64_t) || !defined(psnip_uint64_t) || \
75
+ !defined(psnip_int32_t) || !defined(psnip_uint32_t) || \
76
+ !defined(psnip_int16_t) || !defined(psnip_uint16_t) || \
77
+ !defined(psnip_int8_t) || !defined(psnip_uint8_t)
78
+ # include "exact-int.h"
79
+ #endif
80
+
81
+ #if defined(HEDLEY_LIKELY) && defined(HEDLEY_UNLIKELY)
82
+ # define PSNIP_BUILTIN_LIKELY(expr) HEDLEY_LIKELY(expr)
83
+ # define PSNIP_BUILTIN_UNLIKELY(expr) HEDLEY_UNLIKELY(expr)
84
+ #elif PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_expect,3,0)
85
+ # define PSNIP_BUILTIN_LIKELY(expr) __builtin_expect(!!(expr), 1)
86
+ # define PSNIP_BUILTIN_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
87
+ #else
88
+ # define PSNIP_BUILTIN_LIKELY(expr) (!!(expr))
89
+ # define PSNIP_BUILTIN_UNLIKELY(expr) (!!(expr))
90
+ #endif
91
+
92
+ #if !defined(PSNIP_BUILTIN_STATIC_INLINE)
93
+ # if defined(__GNUC__)
94
+ # define PSNIP_BUILTIN__COMPILER_ATTRIBUTES __attribute__((__unused__))
95
+ # else
96
+ # define PSNIP_BUILTIN__COMPILER_ATTRIBUTES
97
+ # endif
98
+
99
+ # if defined(HEDLEY_INLINE)
100
+ # define PSNIP_BUILTIN__INLINE HEDLEY_INLINE
101
+ # elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
102
+ # define PSNIP_BUILTIN__INLINE inline
103
+ # elif defined(__GNUC_STDC_INLINE__)
104
+ # define PSNIP_BUILTIN__INLINE __inline__
105
+ # elif defined(_MSC_VER) && _MSC_VER >= 1200
106
+ # define PSNIP_BUILTIN__INLINE __inline
107
+ # else
108
+ # define PSNIP_BUILTIN__INLINE
109
+ # endif
110
+
111
+ # define PSNIP_BUILTIN__FUNCTION PSNIP_BUILTIN__COMPILER_ATTRIBUTES static PSNIP_BUILTIN__INLINE
112
+ #endif
113
+
114
+ #define PSNIP_BUILTIN__SUFFIX_B 1
115
+ #define PSNIP_BUILTIN__SUFFIX_S 2
116
+ #define PSNIP_BUILTIN__SUFFIX_ 3
117
+ #define PSNIP_BUILTIN__SUFFIX_L 4
118
+ #define PSNIP_BUILTIN__SUFFIX_LL 5
119
+
120
+ #if !defined(PSNIP_BUILTIN__SIZEOF_CHAR)
121
+ # if CHAR_MIN == (-0x7fLL-1) && CHAR_MAX == 0x7fLL
122
+ # define PSNIP_BUILTIN__SIZEOF_CHAR 8
123
+ # elif CHAR_MIN == (-0x7fffLL-1) && CHAR_MAX == 0x7fffLL
124
+ # define PSNIP_BUILTIN__SIZEOF_CHAR 16
125
+ # elif CHAR_MIN == (-0x7fffffffLL-1) && CHAR_MAX == 0x7fffffffLL
126
+ # define PSNIP_BUILTIN__SIZEOF_CHAR 32
127
+ # elif CHAR_MIN == (-0x7fffffffffffffffLL-1) && CHAR_MAX == 0x7fffffffffffffffLL
128
+ # define PSNIP_BUILTIN__SIZEOF_CHAR 64
129
+ # endif
130
+ #endif
131
+
132
+ #if !defined(PSNIP_BUILTIN__SIZEOF_SHRT)
133
+ # if SHRT_MIN == (-0x7fLL-1) && SHRT_MAX == 0x7fLL
134
+ # define PSNIP_BUILTIN__SIZEOF_SHRT 8
135
+ # elif SHRT_MIN == (-0x7fffLL-1) && SHRT_MAX == 0x7fffLL
136
+ # define PSNIP_BUILTIN__SIZEOF_SHRT 16
137
+ # elif SHRT_MIN == (-0x7fffffffLL-1) && SHRT_MAX == 0x7fffffffLL
138
+ # define PSNIP_BUILTIN__SIZEOF_SHRT 32
139
+ # elif SHRT_MIN == (-0x7fffffffffffffffLL-1) && SHRT_MAX == 0x7fffffffffffffffLL
140
+ # define PSNIP_BUILTIN__SIZEOF_SHRT 64
141
+ # endif
142
+ #endif
143
+
144
+ #if !defined(PSNIP_BUILTIN__SIZEOF_INT)
145
+ # if INT_MIN == (-0x7fLL-1) && INT_MAX == 0x7fLL
146
+ # define PSNIP_BUILTIN__SIZEOF_INT 8
147
+ # elif INT_MIN == (-0x7fffLL-1) && INT_MAX == 0x7fffLL
148
+ # define PSNIP_BUILTIN__SIZEOF_INT 16
149
+ # elif INT_MIN == (-0x7fffffffLL-1) && INT_MAX == 0x7fffffffLL
150
+ # define PSNIP_BUILTIN__SIZEOF_INT 32
151
+ # elif INT_MIN == (-0x7fffffffffffffffLL-1) && INT_MAX == 0x7fffffffffffffffLL
152
+ # define PSNIP_BUILTIN__SIZEOF_INT 64
153
+ # endif
154
+ #endif
155
+
156
+ #if !defined(PSNIP_BUILTIN__SIZEOF_LONG)
157
+ # if LONG_MIN == (-0x7fLL-1) && LONG_MAX == 0x7fLL
158
+ # define PSNIP_BUILTIN__SIZEOF_LONG 8
159
+ # elif LONG_MIN == (-0x7fffLL-1) && LONG_MAX == 0x7fffLL
160
+ # define PSNIP_BUILTIN__SIZEOF_LONG 16
161
+ # elif LONG_MIN == (-0x7fffffffLL-1) && LONG_MAX == 0x7fffffffLL
162
+ # define PSNIP_BUILTIN__SIZEOF_LONG 32
163
+ # elif LONG_MIN == (-0x7fffffffffffffffLL-1) && LONG_MAX == 0x7fffffffffffffffLL
164
+ # define PSNIP_BUILTIN__SIZEOF_LONG 64
165
+ # endif
166
+ #endif
167
+
168
+ #if !defined(PSNIP_BUILTIN__SIZEOF_LLONG)
169
+ # if LLONG_MIN == (-0x7fLL-1) && LLONG_MAX == 0x7fLL
170
+ # define PSNIP_BUILTIN__SIZEOF_LLONG 8
171
+ # elif LLONG_MIN == (-0x7fffLL-1) && LLONG_MAX == 0x7fffLL
172
+ # define PSNIP_BUILTIN__SIZEOF_LLONG 16
173
+ # elif LLONG_MIN == (-0x7fffffffLL-1) && LLONG_MAX == 0x7fffffffLL
174
+ # define PSNIP_BUILTIN__SIZEOF_LLONG 32
175
+ # elif LLONG_MIN == (-0x7fffffffffffffffLL-1) && LLONG_MAX == 0x7fffffffffffffffLL
176
+ # define PSNIP_BUILTIN__SIZEOF_LLONG 64
177
+ # endif
178
+ #endif
179
+
180
+ #if !defined(PSNIP_BUILTIN_SUFFIX_INT8)
181
+ # if PSNIP_BUILTIN__SIZEOF_CHAR == 8
182
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_B
183
+ # elif PSNIP_BUILTIN__SIZEOF_SHRT == 8
184
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_S
185
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 8
186
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_
187
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 8
188
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_L
189
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 8
190
+ # define PSNIP_BUILTIN_SUFFIX_INT8 PSNIP_BUILTIN__SUFFIX_LL
191
+ # endif
192
+ #endif
193
+
194
+ #if !defined(PSNIP_BUILTIN_SUFFIX_INT16)
195
+ # if PSNIP_BUILTIN__SIZEOF_CHAR == 16
196
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_B
197
+ # elif PSNIP_BUILTIN__SIZEOF_SHRT == 16
198
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_S
199
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 16
200
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_
201
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 16
202
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_L
203
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 16
204
+ # define PSNIP_BUILTIN_SUFFIX_INT16 PSNIP_BUILTIN__SUFFIX_LL
205
+ # endif
206
+ #endif
207
+
208
+ #if !defined(PSNIP_BUILTIN_SUFFIX_INT32)
209
+ # if PSNIP_BUILTIN__SIZEOF_CHAR == 32
210
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_B
211
+ # elif PSNIP_BUILTIN__SIZEOF_SHRT == 32
212
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_S
213
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 32
214
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_
215
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 32
216
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_L
217
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 32
218
+ # define PSNIP_BUILTIN_SUFFIX_INT32 PSNIP_BUILTIN__SUFFIX_LL
219
+ # endif
220
+ #endif
221
+
222
+ #if !defined(PSNIP_BUILTIN_SUFFIX_INT64)
223
+ # if defined(__APPLE__) && PSNIP_BUILTIN__SIZEOF_LLONG == 64
224
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_LL
225
+ # elif PSNIP_BUILTIN__SIZEOF_CHAR == 64
226
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_B
227
+ # elif PSNIP_BUILTIN__SIZEOF_SHRT == 64
228
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_S
229
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 64
230
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_
231
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 64
232
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_L
233
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
234
+ # define PSNIP_BUILTIN_SUFFIX_INT64 PSNIP_BUILTIN__SUFFIX_LL
235
+ # endif
236
+ #endif
237
+
238
+ #if defined(PSNIP_BUILTIN_SUFFIX_INT8)
239
+ # if PSNIP_BUILTIN_SUFFIX_INT8 == 1
240
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##b
241
+ # elif PSNIP_BUILTIN_SUFFIX_INT8 == 2
242
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##s
243
+ # elif PSNIP_BUILTIN_SUFFIX_INT8 == 3
244
+ # define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name
245
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name
246
+ # elif PSNIP_BUILTIN_SUFFIX_INT8 == 4
247
+ # define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name##l
248
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##l
249
+ # elif PSNIP_BUILTIN_SUFFIX_INT8 == 5
250
+ # define PSNIP_BUILTIN__VARIANT_INT8(prefix,name) prefix##_builtin_##name##ll
251
+ # define PSNIP_BUILTIN__VARIANT2_INT8(prefix,name) prefix##_builtin_##name##ll
252
+ # endif
253
+ #endif
254
+
255
+ #if defined(PSNIP_BUILTIN_SUFFIX_INT16)
256
+ # if PSNIP_BUILTIN_SUFFIX_INT16 == 1
257
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##b
258
+ # elif PSNIP_BUILTIN_SUFFIX_INT16 == 2
259
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##s
260
+ # elif PSNIP_BUILTIN_SUFFIX_INT16 == 3
261
+ # define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name
262
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name
263
+ # elif PSNIP_BUILTIN_SUFFIX_INT16 == 4
264
+ # define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name##l
265
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##l
266
+ # elif PSNIP_BUILTIN_SUFFIX_INT16 == 5
267
+ # define PSNIP_BUILTIN__VARIANT_INT16(prefix,name) prefix##_builtin_##name##ll
268
+ # define PSNIP_BUILTIN__VARIANT2_INT16(prefix,name) prefix##_builtin_##name##ll
269
+ # endif
270
+ #endif
271
+
272
+ #if defined(PSNIP_BUILTIN_SUFFIX_INT32)
273
+ # if PSNIP_BUILTIN_SUFFIX_INT32 == 1
274
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##b
275
+ # elif PSNIP_BUILTIN_SUFFIX_INT32 == 2
276
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##s
277
+ # elif PSNIP_BUILTIN_SUFFIX_INT32 == 3
278
+ # define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name
279
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name
280
+ # elif PSNIP_BUILTIN_SUFFIX_INT32 == 4
281
+ # define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name##l
282
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##l
283
+ # elif PSNIP_BUILTIN_SUFFIX_INT32 == 5
284
+ # define PSNIP_BUILTIN__VARIANT_INT32(prefix,name) prefix##_builtin_##name##ll
285
+ # define PSNIP_BUILTIN__VARIANT2_INT32(prefix,name) prefix##_builtin_##name##ll
286
+ # endif
287
+ #endif
288
+
289
+ #if defined(PSNIP_BUILTIN_SUFFIX_INT64)
290
+ # if PSNIP_BUILTIN_SUFFIX_INT64 == 1
291
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##b
292
+ # elif PSNIP_BUILTIN_SUFFIX_INT64 == 2
293
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##s
294
+ # elif PSNIP_BUILTIN_SUFFIX_INT64 == 3
295
+ # define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name
296
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name
297
+ # elif PSNIP_BUILTIN_SUFFIX_INT64 == 4
298
+ # define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name##l
299
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##l
300
+ # elif PSNIP_BUILTIN_SUFFIX_INT64 == 5
301
+ # define PSNIP_BUILTIN__VARIANT_INT64(prefix,name) prefix##_builtin_##name##ll
302
+ # define PSNIP_BUILTIN__VARIANT2_INT64(prefix,name) prefix##_builtin_##name##ll
303
+ # endif
304
+ #endif
305
+
306
+ /******
307
+ *** GCC-style built-ins
308
+ ******/
309
+
310
+ /*** __builtin_ffs ***/
311
+
312
+ #define PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(f_n, T) \
313
+ PSNIP_BUILTIN__FUNCTION \
314
+ int psnip_builtin_##f_n(T x) { \
315
+ static const char psnip_builtin_ffs_lookup[256] = { \
316
+ 0, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
317
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
318
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
319
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
320
+ 7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
321
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
322
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
323
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
324
+ 8, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
325
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
326
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
327
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
328
+ 7, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
329
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
330
+ 6, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, \
331
+ 5, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1 \
332
+ }; \
333
+ \
334
+ unsigned char t; \
335
+ size_t s = 0; \
336
+ \
337
+ while (s < (sizeof(T) * 8)) { \
338
+ t = (unsigned char) ((x >> s) & 0xff); \
339
+ if (t) \
340
+ return psnip_builtin_ffs_lookup[t] + s; \
341
+ \
342
+ s += 8; \
343
+ } \
344
+ \
345
+ return 0; \
346
+ }
347
+
348
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_ffs, 3, 3)
349
+ # define psnip_builtin_ffs(x) __builtin_ffs(x)
350
+ # define psnip_builtin_ffsl(x) __builtin_ffsl(x)
351
+ # define psnip_builtin_ffsll(x) __builtin_ffsll(x)
352
+ # define psnip_builtin_ffs32(x) PSNIP_BUILTIN__VARIANT_INT32(_,ffs)(x)
353
+ # define psnip_builtin_ffs64(x) PSNIP_BUILTIN__VARIANT_INT64(_,ffs)(x)
354
+ #else
355
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
356
+ PSNIP_BUILTIN__FUNCTION
357
+ int psnip_builtin_ffsll(long long v) {
358
+ unsigned long r;
359
+ # if defined(_M_AMD64) || defined(_M_ARM)
360
+ if (_BitScanForward64(&r, (unsigned long long) v)) {
361
+ return (int) (r + 1);
362
+ }
363
+ # else
364
+ if (_BitScanForward(&r, (unsigned long) (v))) {
365
+ return (int) (r + 1);
366
+ } else if (_BitScanForward(&r, (unsigned long) (v >> 32))) {
367
+ return (int) (r + 33);
368
+ }
369
+ # endif
370
+ return 0;
371
+ }
372
+
373
+ PSNIP_BUILTIN__FUNCTION
374
+ int psnip_builtin_ffsl(long v) {
375
+ unsigned long r;
376
+ if (_BitScanForward(&r, (unsigned long) v)) {
377
+ return (int) (r + 1);
378
+ }
379
+ return 0;
380
+ }
381
+
382
+ PSNIP_BUILTIN__FUNCTION
383
+ int psnip_builtin_ffs(int v) {
384
+ return psnip_builtin_ffsl(v);
385
+ }
386
+ # else
387
+ PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffs, int)
388
+ PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffsl, long)
389
+ PSNIP_BUILTIN__FFS_DEFINE_PORTABLE(ffsll, long long)
390
+ # endif
391
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
392
+ # define __builtin_ffsll(v) psnip_builtin_ffsll(v)
393
+ # define __builtin_ffsl(v) psnip_builtin_ffsl(v)
394
+ # define __builtin_ffs(v) psnip_builtin_ffs(v)
395
+ # endif
396
+ #endif
397
+
398
+ #if !defined(psnip_builtin_ffs32)
399
+ # define psnip_builtin_ffs32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ffs)(x)
400
+ #endif
401
+
402
+ #if !defined(psnip_builtin_ffs64)
403
+ # define psnip_builtin_ffs64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ffs)(x)
404
+ #endif
405
+
406
+ /*** __builtin_clz ***/
407
+
408
+ #define PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(f_n, T) \
409
+ PSNIP_BUILTIN__FUNCTION \
410
+ int psnip_builtin_##f_n(T x) { \
411
+ static const char psnip_builtin_clz_lookup[256] = { \
412
+ 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, \
413
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
414
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
415
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
416
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
417
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
418
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
419
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
420
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
421
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
422
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
423
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
424
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
425
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
426
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
427
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 \
428
+ }; \
429
+ size_t s = sizeof(T) * 8; \
430
+ T r; \
431
+ \
432
+ while ((s -= 8) != 0) { \
433
+ r = x >> s; \
434
+ if (r != 0) \
435
+ return psnip_builtin_clz_lookup[r] + \
436
+ (((sizeof(T) - 1) * 8) - s); \
437
+ } \
438
+ \
439
+ if (x == 0) \
440
+ return (int) ((sizeof(T) * 8) - 1); \
441
+ else \
442
+ return psnip_builtin_clz_lookup[x] + \
443
+ ((sizeof(T) - 1) * 8); \
444
+ }
445
+
446
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_clz, 3, 4)
447
+ # define psnip_builtin_clz(x) __builtin_clz(x)
448
+ # define psnip_builtin_clzl(x) __builtin_clzl(x)
449
+ # define psnip_builtin_clzll(x) __builtin_clzll(x)
450
+ # define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(_,clz)(x)
451
+ # define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(_,clz)(x)
452
+ #else
453
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse,14,0)
454
+ PSNIP_BUILTIN__FUNCTION
455
+ int psnip_builtin_clzll(unsigned long long v) {
456
+ unsigned long r = 0;
457
+ # if defined(_M_AMD64) || defined(_M_ARM)
458
+ if (_BitScanReverse64(&r, v)) {
459
+ return 63 - r;
460
+ }
461
+ # else
462
+ if (_BitScanReverse(&r, (unsigned long) (v >> 32))) {
463
+ return 31 - r;
464
+ } else if (_BitScanReverse(&r, (unsigned long) v)) {
465
+ return 63 - r;
466
+ }
467
+ # endif
468
+ return 63;
469
+ }
470
+
471
+ PSNIP_BUILTIN__FUNCTION
472
+ int psnip_builtin_clzl(unsigned long v) {
473
+ unsigned long r = 0;
474
+ if (_BitScanReverse(&r, v)) {
475
+ return 31 - r;
476
+ }
477
+ return 31;
478
+ }
479
+
480
+ PSNIP_BUILTIN__FUNCTION
481
+ int psnip_builtin_clz(unsigned int v) {
482
+ return psnip_builtin_clzl(v);
483
+ }
484
+ # define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clz)(x)
485
+ # define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clz)(x)
486
+ # else
487
+ PSNIP_BUILTIN__FUNCTION
488
+ int psnip_builtin_clz32(psnip_uint32_t v) {
489
+ static const unsigned char MultiplyDeBruijnBitPosition[] = {
490
+ 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
491
+ 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
492
+ };
493
+
494
+ v |= v >> 1;
495
+ v |= v >> 2;
496
+ v |= v >> 4;
497
+ v |= v >> 8;
498
+ v |= v >> 16;
499
+
500
+ return
501
+ ((sizeof(psnip_uint32_t) * CHAR_BIT) - 1) -
502
+ MultiplyDeBruijnBitPosition[(psnip_uint32_t)(v * 0x07C4ACDDU) >> 27];
503
+ }
504
+
505
+ PSNIP_BUILTIN__FUNCTION
506
+ int psnip_builtin_clz64(psnip_uint64_t v) {
507
+ static const unsigned char MultiplyDeBruijnBitPosition[] = {
508
+ 0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61,
509
+ 54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4, 62,
510
+ 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
511
+ 25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63
512
+ };
513
+
514
+ v |= v >> 1;
515
+ v |= v >> 2;
516
+ v |= v >> 4;
517
+ v |= v >> 8;
518
+ v |= v >> 16;
519
+ v |= v >> 32;
520
+
521
+ return
522
+ ((sizeof(psnip_uint64_t) * CHAR_BIT) - 1) -
523
+ MultiplyDeBruijnBitPosition[(psnip_uint64_t)(v * 0x03F79D71B4CB0A89ULL) >> 58];
524
+ }
525
+
526
+ # if PSNIP_BUILTIN__SIZEOF_INT == 32
527
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clz(unsigned int x) { return psnip_builtin_clz32(x); }
528
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 64
529
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clz(unsigned int x) { return psnip_builtin_clz64(x); }
530
+ # else
531
+ PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clz, unsigned int)
532
+ # endif
533
+
534
+ # if PSNIP_BUILTIN__SIZEOF_LONG == 32
535
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzl(unsigned long x) { return psnip_builtin_clz32(x); }
536
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 64
537
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzl(unsigned long x) { return psnip_builtin_clz64(x); }
538
+ # else
539
+ PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clzl, unsigned long)
540
+ # endif
541
+
542
+ # if PSNIP_BUILTIN__SIZEOF_LLONG == 32
543
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzll(unsigned long long x) { return psnip_builtin_clz32(x); }
544
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
545
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_clzll(unsigned long long x) { return psnip_builtin_clz64(x); }
546
+ # else
547
+ PSNIP_BUILTIN__CLZ_DEFINE_PORTABLE(clzll, unsigned long long)
548
+ # endif
549
+
550
+ # endif
551
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
552
+ # define __builtin_clz(x) psnip_builtin_clz(x)
553
+ # define __builtin_clzl(x) psnip_builtin_clzl(x)
554
+ # define __builtin_clzll(x) psnip_builtin_clzll(x)
555
+ # endif
556
+ #endif
557
+
558
+ #if !defined(psnip_builtin_clz32)
559
+ # define psnip_builtin_clz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clz)(x)
560
+ #endif
561
+
562
+ #if !defined(psnip_builtin_clz64)
563
+ # define psnip_builtin_clz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clz)(x)
564
+ #endif
565
+
566
+ /*** __builtin_ctz ***/
567
+
568
+ #define PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(f_n, T) \
569
+ PSNIP_BUILTIN__FUNCTION \
570
+ int psnip_builtin_##f_n(T x) { \
571
+ static const char psnip_builtin_ctz_lookup[256] = { \
572
+ 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
573
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
574
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
575
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
576
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
577
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
578
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
579
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
580
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
581
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
582
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
583
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
584
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
585
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
586
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, \
587
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 \
588
+ }; \
589
+ size_t s = 0; \
590
+ T r; \
591
+ \
592
+ do { \
593
+ r = (x >> s) & 0xff; \
594
+ if (r != 0) \
595
+ return psnip_builtin_ctz_lookup[r] + (char) s; \
596
+ } while ((s += 8) < (sizeof(T) * 8)); \
597
+ \
598
+ return (int) sizeof(T) - 1; \
599
+ }
600
+
601
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_ctz, 3, 4)
602
+ # define psnip_builtin_ctz(x) __builtin_ctz(x)
603
+ # define psnip_builtin_ctzl(x) __builtin_ctzl(x)
604
+ # define psnip_builtin_ctzll(x) __builtin_ctzll(x)
605
+ # define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(_,ctz)(x)
606
+ # define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(_,ctz)(x)
607
+ #else
608
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
609
+ PSNIP_BUILTIN__FUNCTION
610
+ int psnip_builtin_ctzll(unsigned long long v) {
611
+ unsigned long r = 0;
612
+ # if defined(_M_AMD64) || defined(_M_ARM)
613
+ _BitScanForward64(&r, v);
614
+ return (int) r;
615
+ # else
616
+ if (_BitScanForward(&r, (unsigned int) (v)))
617
+ return (int) (r);
618
+
619
+ _BitScanForward(&r, (unsigned int) (v >> 32));
620
+ return (int) (r + 32);
621
+ # endif
622
+ }
623
+
624
+ PSNIP_BUILTIN__FUNCTION
625
+ int psnip_builtin_ctzl(unsigned long v) {
626
+ unsigned long r = 0;
627
+ _BitScanForward(&r, v);
628
+ return (int) r;
629
+ }
630
+
631
+ PSNIP_BUILTIN__FUNCTION
632
+ int psnip_builtin_ctz(unsigned int v) {
633
+ return psnip_builtin_ctzl(v);
634
+ }
635
+ # define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ctz)(x)
636
+ # define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ctz)(x)
637
+ # else
638
+ PSNIP_BUILTIN__FUNCTION
639
+ int psnip_builtin_ctz32(psnip_uint32_t v) {
640
+ static const unsigned char MultiplyDeBruijnBitPosition[] = {
641
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
642
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
643
+ };
644
+
645
+ return
646
+ MultiplyDeBruijnBitPosition[((psnip_uint32_t)((v & -v) * 0x077CB531U)) >> 27];
647
+ }
648
+
649
+ PSNIP_BUILTIN__FUNCTION
650
+ int psnip_builtin_ctz64(psnip_uint64_t v) {
651
+ static const unsigned char MultiplyDeBruijnBitPosition[] = {
652
+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
653
+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
654
+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
655
+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6
656
+ };
657
+
658
+ return
659
+ MultiplyDeBruijnBitPosition[((psnip_uint64_t)((v & -v) * 0x03f79d71b4ca8b09ULL)) >> 58];
660
+ }
661
+
662
+ # if PSNIP_BUILTIN__SIZEOF_INT == 32
663
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctz(unsigned int x) { return psnip_builtin_ctz32(x); }
664
+ # elif PSNIP_BUILTIN__SIZEOF_INT == 64
665
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctz(unsigned int x) { return psnip_builtin_ctz64(x); }
666
+ # else
667
+ PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctz, unsigned int)
668
+ # endif
669
+
670
+ # if PSNIP_BUILTIN__SIZEOF_LONG == 32
671
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzl(unsigned long x) { return psnip_builtin_ctz32(x); }
672
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == 64
673
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzl(unsigned long x) { return psnip_builtin_ctz64(x); }
674
+ # else
675
+ PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctzl, unsigned long)
676
+ # endif
677
+
678
+ # if PSNIP_BUILTIN__SIZEOF_LLONG == 32
679
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzll(unsigned long long x) { return psnip_builtin_ctz32(x); }
680
+ # elif PSNIP_BUILTIN__SIZEOF_LLONG == 64
681
+ PSNIP_BUILTIN__FUNCTION int psnip_builtin_ctzll(unsigned long long x) { return psnip_builtin_ctz64(x); }
682
+ # else
683
+ PSNIP_BUILTIN__CTZ_DEFINE_PORTABLE(ctzll, unsigned long long)
684
+ # endif
685
+ # endif
686
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
687
+ # define __builtin_ctz(x) psnip_builtin_ctz(x)
688
+ # define __builtin_ctzl(x) psnip_builtin_ctzl(x)
689
+ # define __builtin_ctzll(x) psnip_builtin_ctzll(x)
690
+ # endif
691
+ #endif
692
+
693
+ #if !defined(psnip_builtin_ctz32)
694
+ # define psnip_builtin_ctz32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,ctz)(x)
695
+ #endif
696
+
697
+ #if !defined(psnip_builtin_ctz64)
698
+ # define psnip_builtin_ctz64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,ctz)(x)
699
+ #endif
700
+
701
+ /*** __builtin_parity ***/
702
+
703
+ #define PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(f_n, T) \
704
+ PSNIP_BUILTIN__FUNCTION \
705
+ int psnip_builtin_##f_n(T v) { \
706
+ size_t i; \
707
+ for (i = (sizeof(T) * CHAR_BIT) / 2 ; i > 2 ; i /= 2) \
708
+ v ^= v >> i; \
709
+ v &= 0xf; \
710
+ return (0x6996 >> v) & 1; \
711
+ }
712
+
713
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_parity, 3, 4)
714
+ # define psnip_builtin_parity(x) __builtin_parity(x)
715
+ # define psnip_builtin_parityl(x) __builtin_parityl(x)
716
+ # define psnip_builtin_parityll(x) __builtin_parityll(x)
717
+ # define psnip_builtin_parity32(x) PSNIP_BUILTIN__VARIANT_INT32(_,parity)(x)
718
+ # define psnip_builtin_parity64(x) PSNIP_BUILTIN__VARIANT_INT64(_,parity)(x)
719
+ #else
720
+ PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parity, unsigned int)
721
+ PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parityl, unsigned long)
722
+ PSNIP_BUILTIN__PARITY_DEFINE_PORTABLE(parityll, unsigned long long)
723
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
724
+ # define __builtin_parity(x) psnip_builtin_parity(x)
725
+ # define __builtin_parityl(x) psnip_builtin_parityl(x)
726
+ # define __builtin_parityll(x) psnip_builtin_parityll(x)
727
+ # endif
728
+ #endif
729
+
730
+ #if !defined(psnip_builtin_parity32)
731
+ # define psnip_builtin_parity32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,parity)(x)
732
+ #endif
733
+
734
+ #if !defined(psnip_builtin_parity64)
735
+ # define psnip_builtin_parity64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,parity)(x)
736
+ #endif
737
+
738
+ /*** __builtin_popcount ***/
739
+
740
+ #define PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(f_n, T) \
741
+ PSNIP_BUILTIN__FUNCTION \
742
+ int psnip_builtin_##f_n(T x) { \
743
+ x = x - ((x >> 1) & (T)~(T)0/3); \
744
+ x = (x & (T)~(T)0/15*3) + ((x >> 2) & (T)~(T)0/15*3); \
745
+ x = (x + (x >> 4)) & (T)~(T)0/255*15; \
746
+ return (T)(x * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8; \
747
+ }
748
+
749
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_popcount, 3, 4)
750
+ # define psnip_builtin_popcount(x) __builtin_popcount(x)
751
+ # define psnip_builtin_popcountl(x) __builtin_popcountl(x)
752
+ # define psnip_builtin_popcountll(x) __builtin_popcountll(x)
753
+ # define psnip_builtin_popcount32(x) PSNIP_BUILTIN__VARIANT_INT32(_,popcount)(x)
754
+ # define psnip_builtin_popcount64(x) PSNIP_BUILTIN__VARIANT_INT64(_,popcount)(x)
755
+ #else
756
+ PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcount, unsigned int)
757
+ PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcountl, unsigned long)
758
+ PSNIP_BUILTIN__POPCOUNT_DEFINE_PORTABLE(popcountll, unsigned long long)
759
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
760
+ # define __builtin_popcount(x) psnip_builtin_popcount(x)
761
+ # define __builtin_popcountl(x) psnip_builtin_popcountl(x)
762
+ # define __builtin_popcountll(x) psnip_builtin_popcountll(x)
763
+ # endif
764
+ #endif
765
+
766
+ #if !defined(psnip_builtin_popcount32)
767
+ # define psnip_builtin_popcount32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,popcount)(x)
768
+ #endif
769
+
770
+ #if !defined(psnip_builtin_popcount64)
771
+ # define psnip_builtin_popcount64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,popcount)(x)
772
+ #endif
773
+
774
+ /*** __builtin_clrsb ***/
775
+
776
+ #define PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(f_n, clzfn, T) \
777
+ PSNIP_BUILTIN__FUNCTION \
778
+ int psnip_builtin_##f_n(T x) { \
779
+ return (PSNIP_BUILTIN_UNLIKELY(x == -1) ? \
780
+ ((int) sizeof(x) * 8) : \
781
+ psnip_builtin_##clzfn((x < 0) ? ~x : x)) - 1; \
782
+ }
783
+
784
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_clrsb, 4, 7)
785
+ # define psnip_builtin_clrsb(x) __builtin_clrsb(x)
786
+ # if !defined(__INTEL_COMPILER)
787
+ # define psnip_builtin_clrsbl(x) __builtin_clrsbl(x)
788
+ # else
789
+ # if PSNIP_BUILTIN__SIZEOF_LONG == PSNIP_BUILTIN__SIZEOF_INT
790
+ # define psnip_builtin_clrsbl(x) ((long) __builtin_clrsb((int) x))
791
+ # elif PSNIP_BUILTIN__SIZEOF_LONG == PSNIP_BUILTIN__SIZEOF_LLONG
792
+ # define psnip_builtin_clrsbl(x) ((long) __builtin_clrsbll((long long) x))
793
+ # else
794
+ PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbl, clzl, long)
795
+ # endif
796
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
797
+ # define __builtin_clrsbl(x) psnip_builtin_clrsbl(x)
798
+ # endif
799
+ # endif
800
+ # define psnip_builtin_clrsbll(x) __builtin_clrsbll(x)
801
+ # define psnip_builtin_clrsb32(x) PSNIP_BUILTIN__VARIANT_INT32(_,clrsb)(x)
802
+ # define psnip_builtin_clrsb64(x) PSNIP_BUILTIN__VARIANT_INT64(_,clrsb)(x)
803
+ #else
804
+ PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsb, clz, int)
805
+ PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbl, clzl, long)
806
+ PSNIP_BUILTIN__CLRSB_DEFINE_PORTABLE(clrsbll, clzll, long long)
807
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
808
+ # define __builtin_clrsb(x) psnip_builtin_clrsb(x)
809
+ # define __builtin_clrsbl(x) psnip_builtin_clrsbl(x)
810
+ # define __builtin_clrsbll(x) psnip_builtin_clrsbll(x)
811
+ # endif
812
+ #endif
813
+
814
+ #if !defined(psnip_builtin_clrsb32)
815
+ # define psnip_builtin_clrsb32(x) PSNIP_BUILTIN__VARIANT_INT32(psnip,clrsb)(x)
816
+ #endif
817
+
818
+ #if !defined(psnip_builtin_clrsb64)
819
+ # define psnip_builtin_clrsb64(x) PSNIP_BUILTIN__VARIANT_INT64(psnip,clrsb)(x)
820
+ #endif
821
+
822
+ /*** __builtin_bitreverse ***/
823
+
824
+ #define PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(f_n, T) \
825
+ PSNIP_BUILTIN__FUNCTION \
826
+ T psnip_builtin_##f_n(T x) { \
827
+ size_t s = sizeof(x) * CHAR_BIT; \
828
+ T mask = (T) 0U; \
829
+ mask = ~mask; \
830
+ while ((s >>= 1) > 0) { \
831
+ mask ^= (mask << s); \
832
+ x = ((x >> s) & mask) | ((x << s) & ~mask); \
833
+ } \
834
+ return x; \
835
+ }
836
+
837
+ #if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_bitreverse64) && !defined(__EMSCRIPTEN__)
838
+ # define psnip_builtin_bitreverse8(x) __builtin_bitreverse8(x)
839
+ # define psnip_builtin_bitreverse16(x) __builtin_bitreverse16(x)
840
+ # define psnip_builtin_bitreverse32(x) __builtin_bitreverse32(x)
841
+ # define psnip_builtin_bitreverse64(x) __builtin_bitreverse64(x)
842
+ #else
843
+ PSNIP_BUILTIN__FUNCTION
844
+ psnip_uint8_t psnip_builtin_bitreverse8(psnip_uint8_t v) {
845
+ return (psnip_uint8_t) ((v * 0x0202020202ULL & 0x010884422010ULL) % 1023);
846
+ }
847
+ PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse16, psnip_uint16_t)
848
+ PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse32, psnip_uint32_t)
849
+ PSNIP_BUILTIN__BITREVERSE_DEFINE_PORTABLE(bitreverse64, psnip_uint64_t)
850
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
851
+ # define __builtin_bitreverse8(x) psnip_builtin_bitreverse8(x)
852
+ # define __builtin_bitreverse16(x) psnip_builtin_bitreverse16(x)
853
+ # define __builtin_bitreverse32(x) psnip_builtin_bitreverse32(x)
854
+ # define __builtin_bitreverse64(x) psnip_builtin_bitreverse64(x)
855
+ # endif
856
+ #endif
857
+
858
+ /*** __builtin_addc ***/
859
+
860
+ #define PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(f_n, T) \
861
+ PSNIP_BUILTIN__FUNCTION \
862
+ T psnip_builtin_##f_n(T x, T y, T ci, T* co) { \
863
+ T max = 0; \
864
+ T r = (T) x + y; \
865
+ max = ~max; \
866
+ *co = (T) (x > (max - y)); \
867
+ if (ci) { \
868
+ if (r == max) \
869
+ *co = 1; \
870
+ r += ci; \
871
+ } \
872
+ return r; \
873
+ }
874
+
875
+ #if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_addc)
876
+ # define psnip_builtin_addcb(x, y, ci, co) __builtin_addcb(x, y, ci, co)
877
+ # define psnip_builtin_addcs(x, y, ci, co) __builtin_addcs(x, y, ci, co)
878
+ # define psnip_builtin_addc(x, y, ci, co) __builtin_addc(x, y, ci, co)
879
+ # define psnip_builtin_addcl(x, y, ci, co) __builtin_addcl(x, y, ci, co)
880
+ # define psnip_builtin_addcll(x, y, ci, co) __builtin_addcll(x, y, ci, co)
881
+ # define psnip_builtin_addc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(_,addc)(x, y, ci, co)
882
+ # define psnip_builtin_addc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(_,addc)(x, y, ci, co)
883
+ # define psnip_builtin_addc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(_,addc)(x, y, ci, co)
884
+ # define psnip_builtin_addc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(_,addc)(x, y, ci, co)
885
+ #else
886
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcb, unsigned char)
887
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcs, unsigned short)
888
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addc, unsigned int)
889
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcl, unsigned long)
890
+ PSNIP_BUILTIN__ADDC_DEFINE_PORTABLE(addcll, unsigned long long)
891
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
892
+ # define __builtin_addcb(x, y, ci, co) psnip_builtin_addcb(x, y, ci, co)
893
+ # define __builtin_addcs(x, y, ci, co) psnip_builtin_addcs(x, y, ci, co)
894
+ # define __builtin_addc(x, y, ci, co) psnip_builtin_addc(x, y, ci, co)
895
+ # define __builtin_addcl(x, y, ci, co) psnip_builtin_addcl(x, y, ci, co)
896
+ # define __builtin_addcll(x, y, ci, co) psnip_builtin_addcll(x, y, ci, co)
897
+ # endif
898
+ #endif
899
+
900
+ #if !defined(psnip_builtin_addc8)
901
+ # define psnip_builtin_addc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(psnip,addc)(x, y, ci, co)
902
+ #endif
903
+
904
+ #if !defined(psnip_builtin_addc16)
905
+ # define psnip_builtin_addc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(psnip,addc)(x, y, ci, co)
906
+ #endif
907
+
908
+ #if !defined(psnip_builtin_addc32)
909
+ # define psnip_builtin_addc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(psnip,addc)(x, y, ci, co)
910
+ #endif
911
+
912
+ #if !defined(psnip_builtin_addc64)
913
+ # define psnip_builtin_addc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(psnip,addc)(x, y, ci, co)
914
+ #endif
915
+
916
+ /*** __builtin_subc ***/
917
+
918
+ #define PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(f_n, T) \
919
+ PSNIP_BUILTIN__FUNCTION \
920
+ T psnip_builtin_##f_n(T x, T y, T ci, T* co) { \
921
+ T r = x - y; \
922
+ *co = x < y; \
923
+ if (ci) { \
924
+ r--; \
925
+ if (r == 0) \
926
+ *co = 1; \
927
+ } \
928
+ return r; \
929
+ }
930
+
931
+ #if PSNIP_BUILTIN_CLANG_HAS_BUILTIN(__builtin_subc)
932
+ # define psnip_builtin_subcb(x, y, ci, co) __builtin_subcb(x, y, ci, co)
933
+ # define psnip_builtin_subcs(x, y, ci, co) __builtin_subcs(x, y, ci, co)
934
+ # define psnip_builtin_subc(x, y, ci, co) __builtin_subc(x, y, ci, co)
935
+ # define psnip_builtin_subcl(x, y, ci, co) __builtin_subcl(x, y, ci, co)
936
+ # define psnip_builtin_subcll(x, y, ci, co) __builtin_subcll(x, y, ci, co)
937
+ # define psnip_builtin_subc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(_,subc)(x, y, ci, co)
938
+ # define psnip_builtin_subc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(_,subc)(x, y, ci, co)
939
+ # define psnip_builtin_subc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(_,subc)(x, y, ci, co)
940
+ # define psnip_builtin_subc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(_,subc)(x, y, ci, co)
941
+ #else
942
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcb, unsigned char)
943
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcs, unsigned short)
944
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subc, unsigned int)
945
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcl, unsigned long)
946
+ PSNIP_BUILTIN__SUBC_DEFINE_PORTABLE(subcll, unsigned long long)
947
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
948
+ # define __builtin_subcb(x, y, ci, co) psnip_builtin_subcb(x, y, ci, co)
949
+ # define __builtin_subcs(x, y, ci, co) psnip_builtin_subcs(x, y, ci, co)
950
+ # define __builtin_subc(x, y, ci, co) psnip_builtin_subc(x, y, ci, co)
951
+ # define __builtin_subcl(x, y, ci, co) psnip_builtin_subcl(x, y, ci, co)
952
+ # define __builtin_subcll(x, y, ci, co) psnip_builtin_subcll(x, y, ci, co)
953
+ # endif
954
+ #endif
955
+
956
+ #if !defined(psnip_builtin_subc8)
957
+ # define psnip_builtin_subc8(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT8(psnip,subc)(x, y, ci, co)
958
+ #endif
959
+
960
+ #if !defined(psnip_builtin_subc16)
961
+ # define psnip_builtin_subc16(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT16(psnip,subc)(x, y, ci, co)
962
+ #endif
963
+
964
+ #if !defined(psnip_builtin_subc32)
965
+ # define psnip_builtin_subc32(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT32(psnip,subc)(x, y, ci, co)
966
+ #endif
967
+
968
+ #if !defined(psnip_builtin_subc64)
969
+ # define psnip_builtin_subc64(x, y, ci, co) PSNIP_BUILTIN__VARIANT2_INT64(psnip,subc)(x, y, ci, co)
970
+ #endif
971
+
972
+ /*** __builtin_bswap ***/
973
+
974
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_bswap16, 4, 8)
975
+ # define psnip_builtin_bswap16(x) __builtin_bswap16(x)
976
+ #else
977
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
978
+ # define psnip_builtin_bswap16(x) _byteswap_ushort(x)
979
+ # else
980
+ PSNIP_BUILTIN__FUNCTION
981
+ psnip_uint16_t
982
+ psnip_builtin_bswap16(psnip_uint16_t v) {
983
+ return
984
+ ((v & (((psnip_uint16_t) 0xff) << 8)) >> 8) |
985
+ ((v & (((psnip_uint16_t) 0xff) )) << 8);
986
+ }
987
+ # endif
988
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
989
+ # define __builtin_bswap16(x) psnip_builtin_bswap16(x)
990
+ # endif
991
+ #endif
992
+
993
+ #if PSNIP_BUILTIN_GNU_HAS_BUILTIN(__builtin_bswap16, 4, 3)
994
+ # define psnip_builtin_bswap32(x) __builtin_bswap32(x)
995
+ # define psnip_builtin_bswap64(x) __builtin_bswap64(x)
996
+ #else
997
+ # if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
998
+ # define psnip_builtin_bswap32(x) _byteswap_ulong(x)
999
+ # define psnip_builtin_bswap64(x) _byteswap_uint64(x)
1000
+ # else
1001
+ PSNIP_BUILTIN__FUNCTION
1002
+ psnip_uint32_t
1003
+ psnip_builtin_bswap32(psnip_uint32_t v) {
1004
+ return
1005
+ ((v & (((psnip_uint32_t) 0xff) << 24)) >> 24) |
1006
+ ((v & (((psnip_uint32_t) 0xff) << 16)) >> 8) |
1007
+ ((v & (((psnip_uint32_t) 0xff) << 8)) << 8) |
1008
+ ((v & (((psnip_uint32_t) 0xff) )) << 24);
1009
+ }
1010
+
1011
+ PSNIP_BUILTIN__FUNCTION
1012
+ psnip_uint64_t
1013
+ psnip_builtin_bswap64(psnip_uint64_t v) {
1014
+ return
1015
+ ((v & (((psnip_uint64_t) 0xff) << 56)) >> 56) |
1016
+ ((v & (((psnip_uint64_t) 0xff) << 48)) >> 40) |
1017
+ ((v & (((psnip_uint64_t) 0xff) << 40)) >> 24) |
1018
+ ((v & (((psnip_uint64_t) 0xff) << 32)) >> 8) |
1019
+ ((v & (((psnip_uint64_t) 0xff) << 24)) << 8) |
1020
+ ((v & (((psnip_uint64_t) 0xff) << 16)) << 24) |
1021
+ ((v & (((psnip_uint64_t) 0xff) << 8)) << 40) |
1022
+ ((v & (((psnip_uint64_t) 0xff) )) << 56);
1023
+ }
1024
+ # endif
1025
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1026
+ # define __builtin_bswap32(x) psnip_builtin_bswap32(x)
1027
+ # define __builtin_bswap64(x) psnip_builtin_bswap64(x)
1028
+ # endif
1029
+ #endif
1030
+
1031
+ /******
1032
+ *** MSVC-style intrinsics
1033
+ ******/
1034
+
1035
+ /*** _rotl ***/
1036
+
1037
+ #define PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(f_n, T, ST) \
1038
+ PSNIP_BUILTIN__FUNCTION \
1039
+ T psnip_intrin_##f_n(T value, ST shift) { \
1040
+ return \
1041
+ (value >> ((sizeof(T) * 8) - shift)) | \
1042
+ (value << shift); \
1043
+ }
1044
+
1045
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotl8, 14, 0)
1046
+ # define psnip_intrin_rotl8(value, shift) _rotl8(value, shift)
1047
+ # define psnip_intrin_rotl16(value, shift) _rotl16(value, shift)
1048
+ #else
1049
+ PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl8, psnip_uint8_t, unsigned char)
1050
+ PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl16, psnip_uint16_t, unsigned char)
1051
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1052
+ # if !defined(_rotl8)
1053
+ # define _rotl8(value, shift) psnip_intrin_rotl8(value, shift)
1054
+ # endif
1055
+ # if !defined(_rotl16)
1056
+ # define _rotl16(value, shift) psnip_intrin_rotl16(value, shift)
1057
+ # endif
1058
+ # endif
1059
+ #endif
1060
+
1061
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotl8, 13, 10)
1062
+ # define psnip_intrin_rotl(value, shift) _rotl(value, shift)
1063
+ # define psnip_intrin_rotl64(value, shift) _rotl64(value, shift)
1064
+ #else
1065
+ PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl, psnip_uint32_t, int)
1066
+ PSNIP_BUILTIN_ROTL_DEFINE_PORTABLE(rotl64, psnip_uint64_t, int)
1067
+
1068
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1069
+ # if !defined(_rotl)
1070
+ # define _rotl(value, shift) psnip_intrin_rotl(value, shift)
1071
+ # endif
1072
+ # if !defined(_rotl64)
1073
+ # define _rotl64(value, shift) psnip_intrin_rotl64(value, shift)
1074
+ # endif
1075
+ # endif
1076
+ #endif
1077
+
1078
+ /*** _rotr ***/
1079
+
1080
+ #define PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(f_n, T, ST) \
1081
+ PSNIP_BUILTIN__FUNCTION \
1082
+ T psnip_intrin_##f_n(T value, ST shift) { \
1083
+ return \
1084
+ (value << ((sizeof(T) * 8) - shift)) | \
1085
+ (value >> shift); \
1086
+ }
1087
+
1088
+ PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr8, psnip_uint8_t, unsigned char)
1089
+ PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr16, psnip_uint16_t, unsigned char)
1090
+
1091
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotr8, 14, 0)
1092
+ # define psnip_intrin_rotr8(value, shift) _rotr8(value, shift)
1093
+ # define psnip_intrin_rotr16(value, shift) _rotr16(value, shift)
1094
+ #else
1095
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1096
+ # define _rotr8(value, shift) psnip_intrin_rotr8(value, shift)
1097
+ # define _rotr16(value, shift) psnip_intrin_rotr16(value, shift)
1098
+ # endif
1099
+ #endif
1100
+
1101
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_rotr8, 13, 10)
1102
+ # define psnip_intrin_rotr(value, shift) _rotr(value, shift)
1103
+ # define psnip_intrin_rotr64(value, shift) _rotr64(value, shift)
1104
+ #else
1105
+ PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr, psnip_uint32_t, int)
1106
+ PSNIP_BUILTIN_ROTR_DEFINE_PORTABLE(rotr64, psnip_uint64_t, int)
1107
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1108
+ # if !defined(_rotr)
1109
+ # define _rotr(value, shift) psnip_intrin_rotr(value, shift)
1110
+ # endif
1111
+ # if !defined(_rotr64)
1112
+ # define _rotr64(value, shift) psnip_intrin_rotr64(value, shift)
1113
+ # endif
1114
+ # endif
1115
+ #endif
1116
+
1117
+ /*** _BitScanForward ***/
1118
+
1119
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward, 14, 0)
1120
+ # pragma intrinsic(_BitScanForward)
1121
+ PSNIP_BUILTIN__FUNCTION
1122
+ unsigned char psnip_intrin_BitScanForward(unsigned long* Index, psnip_uint32_t Mask) {
1123
+ const unsigned long M = (unsigned long) Mask;
1124
+ return _BitScanForward(Index, M);
1125
+ }
1126
+ #else
1127
+ PSNIP_BUILTIN__FUNCTION
1128
+ unsigned char psnip_intrin_BitScanForward(unsigned long* Index, psnip_uint32_t Mask) {
1129
+ return PSNIP_BUILTIN_UNLIKELY(Mask == 0) ? 0 : ((*Index = psnip_builtin_ctz32 (Mask)), 1);
1130
+ }
1131
+
1132
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1133
+ # define _BitScanForward(Index, Mask) psnip_intrin_BitScanForward(Index, Mask)
1134
+ # endif
1135
+ #endif
1136
+
1137
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanForward64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
1138
+ # pragma intrinsic(_BitScanForward64)
1139
+ # define psnip_intrin_BitScanForward64(Index, Mask) _BitScanForward64(Index, Mask)
1140
+ #else
1141
+ PSNIP_BUILTIN__FUNCTION
1142
+ unsigned char psnip_intrin_BitScanForward64(unsigned long* Index, psnip_uint64_t Mask) {
1143
+ return PSNIP_BUILTIN_UNLIKELY(Mask == 0) ? 0 : ((*Index = psnip_builtin_ctz64 (Mask)), 1);
1144
+ }
1145
+
1146
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1147
+ # define _BitScanForward64(Index, Mask) psnip_intrin_BitScanForward64(Index, Mask)
1148
+ # endif
1149
+ #endif
1150
+
1151
+ /*** _BitScanReverse ***/
1152
+
1153
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse, 14, 0)
1154
+ # pragma intrinsic(_BitScanReverse)
1155
+ PSNIP_BUILTIN__FUNCTION
1156
+ unsigned char psnip_intrin_BitScanReverse(unsigned long* Index, psnip_uint32_t Mask) {
1157
+ const unsigned long M = (unsigned long) Mask;
1158
+ return _BitScanReverse(Index, M);
1159
+ }
1160
+ #else
1161
+ PSNIP_BUILTIN__FUNCTION
1162
+ unsigned char psnip_intrin_BitScanReverse(unsigned long* Index, psnip_uint32_t Mask) {
1163
+ return (PSNIP_BUILTIN_UNLIKELY(Mask == 0)) ? 0 : ((*Index = ((sizeof(Mask) * CHAR_BIT) - 1) - psnip_builtin_clz32 (Mask)), 1);
1164
+ }
1165
+
1166
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1167
+ # define _BitScanReverse(Index, Mask) psnip_intrin_BitScanReverse(Index, Mask)
1168
+ # endif
1169
+ #endif
1170
+
1171
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_BitScanReverse64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
1172
+ # pragma intrinsic(_BitScanReverse64)
1173
+ # define psnip_intrin_BitScanReverse64(Index, Mask) _BitScanReverse64(Index, Mask)
1174
+ #else
1175
+ PSNIP_BUILTIN__FUNCTION
1176
+ unsigned char psnip_intrin_BitScanReverse64(unsigned long* Index, psnip_uint64_t Mask) {
1177
+ return (PSNIP_BUILTIN_UNLIKELY(Mask == 0)) ? 0 : ((*Index = ((sizeof(Mask) * CHAR_BIT) - 1) - psnip_builtin_clz64 (Mask)), 1);
1178
+ }
1179
+
1180
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1181
+ # define _BitScanReverse64(Index, Mask) psnip_intrin_BitScanReverse64(Index, Mask)
1182
+ # endif
1183
+ #endif
1184
+
1185
+ /*** bittest ***/
1186
+
1187
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittest, 14, 0)
1188
+ # pragma intrinsic(_bittest)
1189
+ # define psnip_intrin_bittest(a, b) \
1190
+ __pragma(warning(push)) \
1191
+ __pragma(warning(disable:4057)) \
1192
+ _bittest(a, b) \
1193
+ __pragma(warning(pop))
1194
+ #else
1195
+ # define psnip_intrin_bittest(a, b) (((*(a)) >> (b)) & 1)
1196
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1197
+ # define _bittest(a, b) psnip_intrin_bittest(a, b)
1198
+ # endif
1199
+ #endif
1200
+
1201
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittest64, 14, 0) && (defined(_M_AMD64) || defined(_M_ARM))
1202
+ # pragma intrinsic(_bittest64)
1203
+ # define psnip_intrin_bittest64(a, b) _bittest64(a, b)
1204
+ #else
1205
+ # define psnip_intrin_bittest64(a, b) (((*(a)) >> (b)) & 1)
1206
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1207
+ # define _bittest64(a, b) psnip_intrin_bittest64(a, b)
1208
+ # endif
1209
+ #endif
1210
+
1211
+ /*** bittestandcomplement ***/
1212
+
1213
+ #define PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(f_n, T, UT) \
1214
+ PSNIP_BUILTIN__FUNCTION \
1215
+ unsigned char psnip_intrin_##f_n(T* a, T b) { \
1216
+ const char r = (*a >> b) & 1; \
1217
+ *a ^= ((UT) 1) << b; \
1218
+ return r; \
1219
+ }
1220
+
1221
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandcomplement, 14, 0)
1222
+ # pragma intrinsic(_bittestandcomplement)
1223
+ # define psnip_intrin_bittestandcomplement(a, b) \
1224
+ __pragma(warning(push)) \
1225
+ __pragma(warning(disable:4057)) \
1226
+ _bittestandcomplement(a, b) \
1227
+ __pragma(warning(pop))
1228
+ #else
1229
+ PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(bittestandcomplement, psnip_int32_t, psnip_uint32_t)
1230
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1231
+ # define _bittestandcomplement(a, b) psnip_intrin_bittestandcomplement(a, b)
1232
+ # endif
1233
+ #endif
1234
+
1235
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandcomplement64, 14, 0) && defined(_M_AMD64)
1236
+ # define psnip_intrin_bittestandcomplement64(a, b) _bittestandcomplement64(a, b)
1237
+ #else
1238
+ PSNIP_BUILTIN__BITTESTANDCOMPLEMENT_DEFINE_PORTABLE(bittestandcomplement64, psnip_int64_t, psnip_uint64_t)
1239
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1240
+ # define _bittestandcomplement64(a, b) psnip_intrin_bittestandcomplement64(a, b)
1241
+ # endif
1242
+ #endif
1243
+
1244
+ /*** bittestandreset ***/
1245
+
1246
+ #define PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(f_n, T, UT) \
1247
+ PSNIP_BUILTIN__FUNCTION \
1248
+ unsigned char psnip_intrin_##f_n(T* a, T b) { \
1249
+ const char r = (*a >> b) & 1; \
1250
+ *a &= ~(((UT) 1) << b); \
1251
+ return r; \
1252
+ }
1253
+
1254
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandreset, 14, 0)
1255
+ # pragma intrinsic(_bittestandreset)
1256
+ # define psnip_intrin_bittestandreset(a, b) \
1257
+ __pragma(warning(push)) \
1258
+ __pragma(warning(disable:4057)) \
1259
+ _bittestandreset(a, b) \
1260
+ __pragma(warning(pop))
1261
+ #else
1262
+ PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(bittestandreset, psnip_int32_t, psnip_uint32_t)
1263
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1264
+ # define _bittestandreset(a, b) psnip_intrin_bittestandreset(a, b)
1265
+ # endif
1266
+ #endif
1267
+
1268
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandreset64, 14, 0) && (defined(_M_AMD64) || defined(_M_IA64))
1269
+ # pragma intrinsic(_bittestandreset64)
1270
+ # define psnip_intrin_bittestandreset64(a, b) _bittestandreset64(a, b)
1271
+ #else
1272
+ PSNIP_BUILTIN__BITTESTANDRESET_DEFINE_PORTABLE(bittestandreset64, psnip_int64_t, psnip_uint64_t)
1273
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1274
+ # define _bittestandreset64(a, b) psnip_intrin_bittestandreset64(a, b)
1275
+ # endif
1276
+ #endif
1277
+
1278
+ /*** bittestandset ***/
1279
+
1280
+ #define PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(f_n, T, UT) \
1281
+ PSNIP_BUILTIN__FUNCTION \
1282
+ unsigned char psnip_intrin_##f_n(T* a, T b) { \
1283
+ const char r = (*a >> b) & 1; \
1284
+ *a |= ((UT) 1) << b; \
1285
+ return r; \
1286
+ }
1287
+
1288
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandset, 14, 0)
1289
+ # pragma intrinsic(_bittestandset)
1290
+ # define psnip_intrin_bittestandset(a, b) \
1291
+ __pragma(warning(push)) \
1292
+ __pragma(warning(disable:4057)) \
1293
+ _bittestandset(a, b) \
1294
+ __pragma(warning(pop))
1295
+ #else
1296
+ PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(bittestandset, psnip_int32_t, psnip_uint32_t)
1297
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1298
+ # define _bittestandset(a, b) psnip_intrin_bittestandset(a, b)
1299
+ # endif
1300
+ #endif
1301
+
1302
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_bittestandset64, 14, 0) && defined(_M_AMD64)
1303
+ # pragma intrinsic(_bittestandset64)
1304
+ # define psnip_intrin_bittestandset64(a, b) _bittestandset64(a, b)
1305
+ #else
1306
+ PSNIP_BUILTIN__BITTESTANDSET_DEFINE_PORTABLE(bittestandset64, psnip_int64_t, psnip_uint64_t)
1307
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1308
+ # define _bittestandset64(a, b) psnip_intrin_bittestandset64(a, b)
1309
+ # endif
1310
+ #endif
1311
+
1312
+ /*** shiftleft128 ***/
1313
+
1314
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(__shiftleft128, 14, 0) && defined(_M_AMD64)
1315
+ # define psnip_intrin_shiftleft128(LowPart, HighPart, Shift) __shiftleft128(LowPart, HighPart, Shift)
1316
+ #else
1317
+ # if defined(__SIZEOF_INT128__)
1318
+ PSNIP_BUILTIN__FUNCTION
1319
+ psnip_uint64_t psnip_intrin_shiftleft128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
1320
+ unsigned __int128 r = HighPart;
1321
+ r <<= 64;
1322
+ r |= LowPart;
1323
+ r <<= Shift % 64;
1324
+ return (psnip_uint64_t) (r >> 64);
1325
+ }
1326
+ # else
1327
+ PSNIP_BUILTIN__FUNCTION
1328
+ psnip_uint64_t psnip_intrin_shiftleft128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
1329
+ Shift %= 64;
1330
+ return PSNIP_BUILTIN_UNLIKELY(Shift == 0) ? HighPart : ((HighPart << Shift) | (LowPart >> (64 - Shift)));
1331
+ }
1332
+ # endif
1333
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1334
+ # define __shiftleft128(LowPart, HighPart, Shift) psnip_intrin_shiftleft128(LowPart, HighPart, Shift)
1335
+ # endif
1336
+ #endif
1337
+
1338
+ /*** shiftright128 ***/
1339
+
1340
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(__shiftright128, 14, 0) && defined(_M_AMD64)
1341
+ # define psnip_intrin_shiftright128(LowPart, HighPart, Shift) __shiftright128(LowPart, HighPart, Shift)
1342
+ #else
1343
+ # if defined(__SIZEOF_INT128__)
1344
+ PSNIP_BUILTIN__FUNCTION
1345
+ psnip_uint64_t psnip_intrin_shiftright128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
1346
+ unsigned __int128 r = HighPart;
1347
+ r <<= 64;
1348
+ r |= LowPart;
1349
+ r >>= Shift % 64;
1350
+ return (psnip_uint64_t) r;
1351
+ }
1352
+ # else
1353
+ PSNIP_BUILTIN__FUNCTION
1354
+ psnip_uint64_t psnip_intrin_shiftright128(psnip_uint64_t LowPart, psnip_uint64_t HighPart, unsigned char Shift) {
1355
+ Shift %= 64;
1356
+
1357
+ if (PSNIP_BUILTIN_UNLIKELY(Shift == 0))
1358
+ return LowPart;
1359
+
1360
+ return
1361
+ (HighPart << (64 - Shift)) |
1362
+ (LowPart >> Shift);
1363
+ }
1364
+ # endif
1365
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1366
+ # define __shiftright128(LowPart, HighPart, Shift) psnip_intrin_shiftright128(LowPart, HighPart, Shift)
1367
+ # endif
1368
+ #endif
1369
+
1370
+ /*** byteswap ***/
1371
+
1372
+ #if PSNIP_BUILTIN_MSVC_HAS_INTRIN(_byteswap_ushort,13,10)
1373
+ # pragma intrinsic(_byteswap_ushort)
1374
+ # define psnip_intrin_byteswap_ushort(v) _byteswap_ushort(v)
1375
+ # pragma intrinsic(_byteswap_ulong)
1376
+ # define psnip_intrin_byteswap_ulong(v) _byteswap_ulong(v)
1377
+ # pragma intrinsic(_byteswap_uint64)
1378
+ # define psnip_intrin_byteswap_uint64(v) _byteswap_uint64(v)
1379
+ #else
1380
+ # define psnip_intrin_byteswap_ushort(v) psnip_builtin_bswap16(v)
1381
+ # define psnip_intrin_byteswap_ulong(v) psnip_builtin_bswap32(v)
1382
+ # define psnip_intrin_byteswap_uint64(v) psnip_builtin_bswap64(v)
1383
+ # if defined(PSNIP_BUILTIN_EMULATE_NATIVE)
1384
+ # define _byteswap_ushort(v) psnip_intrin_byteswap_ushort(v)
1385
+ # define _byteswap_ulong(v) psnip_intrin_byteswap_ulong(v)
1386
+ # define _byteswap_uint64(v) psnip_intrin_byteswap_uint64(v)
1387
+ # endif
1388
+ #endif
1389
+
1390
+ #endif /* defined(PSNIP_BUILTIN_H) */