numo-random 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,750 @@
1
+ /*
2
+ * PCG Random Number Generation for C++
3
+ *
4
+ * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ *
18
+ * For additional information about the PCG random number generation scheme,
19
+ * including its license and other licensing options, visit
20
+ *
21
+ * http://www.pcg-random.org
22
+ */
23
+
24
+ /*
25
+ * This code provides a a C++ class that can provide 128-bit (or higher)
26
+ * integers. To produce 2K-bit integers, it uses two K-bit integers,
27
+ * placed in a union that allowes the code to also see them as four K/2 bit
28
+ * integers (and access them either directly name, or by index).
29
+ *
30
+ * It may seem like we're reinventing the wheel here, because several
31
+ * libraries already exist that support large integers, but most existing
32
+ * libraries provide a very generic multiprecision code, but here we're
33
+ * operating at a fixed size. Also, most other libraries are fairly
34
+ * heavyweight. So we use a direct implementation. Sadly, it's much slower
35
+ * than hand-coded assembly or direct CPU support.
36
+ */
37
+
38
+ #ifndef PCG_UINT128_HPP_INCLUDED
39
+ #define PCG_UINT128_HPP_INCLUDED 1
40
+
41
+ #include <cstdint>
42
+ #include <cstdio>
43
+ #include <cassert>
44
+ #include <climits>
45
+ #include <utility>
46
+ #include <initializer_list>
47
+ #include <type_traits>
48
+
49
+ /*
50
+ * We want to lay the type out the same way that a native type would be laid
51
+ * out, which means we must know the machine's endian, at compile time.
52
+ * This ugliness attempts to do so.
53
+ */
54
+
55
+ #ifndef PCG_LITTLE_ENDIAN
56
+ #if defined(__BYTE_ORDER__)
57
+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
58
+ #define PCG_LITTLE_ENDIAN 1
59
+ #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
60
+ #define PCG_LITTLE_ENDIAN 0
61
+ #else
62
+ #error __BYTE_ORDER__ does not match a standard endian, pick a side
63
+ #endif
64
+ #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN
65
+ #define PCG_LITTLE_ENDIAN 1
66
+ #elif __BIG_ENDIAN__ || _BIG_ENDIAN
67
+ #define PCG_LITTLE_ENDIAN 0
68
+ #elif __x86_64 || __x86_64__ || __i386 || __i386__
69
+ #define PCG_LITTLE_ENDIAN 1
70
+ #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \
71
+ || __m68k__ || __mc68000__
72
+ #define PCG_LITTLE_ENDIAN 0
73
+ #else
74
+ #error Unable to determine target endianness
75
+ #endif
76
+ #endif
77
+
78
+ namespace pcg_extras {
79
+
80
+ // Recent versions of GCC have intrinsics we can use to quickly calculate
81
+ // the number of leading and trailing zeros in a number. If possible, we
82
+ // use them, otherwise we fall back to old-fashioned bit twiddling to figure
83
+ // them out.
84
+
85
+ #ifndef PCG_BITCOUNT_T
86
+ typedef uint8_t bitcount_t;
87
+ #else
88
+ typedef PCG_BITCOUNT_T bitcount_t;
89
+ #endif
90
+
91
+ /*
92
+ * Provide some useful helper functions
93
+ * * flog2 floor(log2(x))
94
+ * * trailingzeros number of trailing zero bits
95
+ */
96
+
97
+ #ifdef __GNUC__ // Any GNU-compatible compiler supporting C++11 has
98
+ // some useful intrinsics we can use.
99
+
100
+ inline bitcount_t flog2(uint32_t v)
101
+ {
102
+ return 31 - __builtin_clz(v);
103
+ }
104
+
105
+ inline bitcount_t trailingzeros(uint32_t v)
106
+ {
107
+ return __builtin_ctz(v);
108
+ }
109
+
110
+ inline bitcount_t flog2(uint64_t v)
111
+ {
112
+ #if UINT64_MAX == ULONG_MAX
113
+ return 63 - __builtin_clzl(v);
114
+ #elif UINT64_MAX == ULLONG_MAX
115
+ return 63 - __builtin_clzll(v);
116
+ #else
117
+ #error Cannot find a function for uint64_t
118
+ #endif
119
+ }
120
+
121
+ inline bitcount_t trailingzeros(uint64_t v)
122
+ {
123
+ #if UINT64_MAX == ULONG_MAX
124
+ return __builtin_ctzl(v);
125
+ #elif UINT64_MAX == ULLONG_MAX
126
+ return __builtin_ctzll(v);
127
+ #else
128
+ #error Cannot find a function for uint64_t
129
+ #endif
130
+ }
131
+
132
+ #else // Otherwise, we fall back to bit twiddling
133
+ // implementations
134
+
135
+ inline bitcount_t flog2(uint32_t v)
136
+ {
137
+ // Based on code by Eric Cole and Mark Dickinson, which appears at
138
+ // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
139
+
140
+ static const uint8_t multiplyDeBruijnBitPos[32] = {
141
+ 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
142
+ 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
143
+ };
144
+
145
+ v |= v >> 1; // first round down to one less than a power of 2
146
+ v |= v >> 2;
147
+ v |= v >> 4;
148
+ v |= v >> 8;
149
+ v |= v >> 16;
150
+
151
+ return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27];
152
+ }
153
+
154
+ inline bitcount_t trailingzeros(uint32_t v)
155
+ {
156
+ static const uint8_t multiplyDeBruijnBitPos[32] = {
157
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
158
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
159
+ };
160
+
161
+ return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];
162
+ }
163
+
164
+ inline bitcount_t flog2(uint64_t v)
165
+ {
166
+ uint32_t high = v >> 32;
167
+ uint32_t low = uint32_t(v);
168
+
169
+ return high ? 32+flog2(high) : flog2(low);
170
+ }
171
+
172
+ inline bitcount_t trailingzeros(uint64_t v)
173
+ {
174
+ uint32_t high = v >> 32;
175
+ uint32_t low = uint32_t(v);
176
+
177
+ return low ? trailingzeros(low) : trailingzeros(high)+32;
178
+ }
179
+
180
+ #endif
181
+
182
+ template <typename UInt>
183
+ inline bitcount_t clog2(UInt v)
184
+ {
185
+ return flog2(v) + ((v & (-v)) != v);
186
+ }
187
+
188
+ template <typename UInt>
189
+ inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
190
+ {
191
+ UInt half_result = y + carryin;
192
+ UInt result = x + half_result;
193
+ *carryout = (half_result < y) || (result < x);
194
+ return result;
195
+ }
196
+
197
+ template <typename UInt>
198
+ inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
199
+ {
200
+ UInt half_result = y + carryin;
201
+ UInt result = x - half_result;
202
+ *carryout = (half_result < y) || (result > x);
203
+ return result;
204
+ }
205
+
206
+
207
+ template <typename UInt, typename UIntX2>
208
+ class uint_x4 {
209
+ // private:
210
+ public:
211
+ union {
212
+ #if PCG_LITTLE_ENDIAN
213
+ struct {
214
+ UInt v0, v1, v2, v3;
215
+ } w;
216
+ struct {
217
+ UIntX2 v01, v23;
218
+ } d;
219
+ #else
220
+ struct {
221
+ UInt v3, v2, v1, v0;
222
+ } w;
223
+ struct {
224
+ UIntX2 v23, v01;
225
+ } d;
226
+ #endif
227
+ // For the array access versions, the code that uses the array
228
+ // must handle endian itself. Yuck.
229
+ UInt wa[4];
230
+ UIntX2 da[2];
231
+ };
232
+
233
+ public:
234
+ uint_x4() = default;
235
+
236
+ constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0)
237
+ #if PCG_LITTLE_ENDIAN
238
+ : w{v0, v1, v2, v3}
239
+ #else
240
+ : w{v3, v2, v1, v0}
241
+ #endif
242
+ {
243
+ // Nothing (else) to do
244
+ }
245
+
246
+ constexpr uint_x4(UIntX2 v23, UIntX2 v01)
247
+ #if PCG_LITTLE_ENDIAN
248
+ : d{v01,v23}
249
+ #else
250
+ : d{v23,v01}
251
+ #endif
252
+ {
253
+ // Nothing (else) to do
254
+ }
255
+
256
+ template<class Integral,
257
+ typename std::enable_if<(std::is_integral<Integral>::value
258
+ && sizeof(Integral) <= sizeof(UIntX2))
259
+ >::type* = nullptr>
260
+ constexpr uint_x4(Integral v01)
261
+ #if PCG_LITTLE_ENDIAN
262
+ : d{UIntX2(v01),0UL}
263
+ #else
264
+ : d{0UL,UIntX2(v01)}
265
+ #endif
266
+ {
267
+ // Nothing (else) to do
268
+ }
269
+
270
+ explicit constexpr operator uint64_t() const
271
+ {
272
+ return d.v01;
273
+ }
274
+
275
+ explicit constexpr operator uint32_t() const
276
+ {
277
+ return w.v0;
278
+ }
279
+
280
+ explicit constexpr operator int() const
281
+ {
282
+ return w.v0;
283
+ }
284
+
285
+ explicit constexpr operator uint16_t() const
286
+ {
287
+ return w.v0;
288
+ }
289
+
290
+ explicit constexpr operator uint8_t() const
291
+ {
292
+ return w.v0;
293
+ }
294
+
295
+ typedef typename std::conditional<std::is_same<uint64_t,
296
+ unsigned long>::value,
297
+ unsigned long long,
298
+ unsigned long>::type
299
+ uint_missing_t;
300
+
301
+ explicit constexpr operator uint_missing_t() const
302
+ {
303
+ return d.v01;
304
+ }
305
+
306
+ explicit constexpr operator bool() const
307
+ {
308
+ return d.v01 || d.v23;
309
+ }
310
+
311
+ template<typename U, typename V>
312
+ friend uint_x4<U,V> operator*(const uint_x4<U,V>&, const uint_x4<U,V>&);
313
+
314
+ template<typename U, typename V>
315
+ friend std::pair< uint_x4<U,V>,uint_x4<U,V> >
316
+ divmod(const uint_x4<U,V>&, const uint_x4<U,V>&);
317
+
318
+ template<typename U, typename V>
319
+ friend uint_x4<U,V> operator+(const uint_x4<U,V>&, const uint_x4<U,V>&);
320
+
321
+ template<typename U, typename V>
322
+ friend uint_x4<U,V> operator-(const uint_x4<U,V>&, const uint_x4<U,V>&);
323
+
324
+ template<typename U, typename V>
325
+ friend uint_x4<U,V> operator<<(const uint_x4<U,V>&, const uint_x4<U,V>&);
326
+
327
+ template<typename U, typename V>
328
+ friend uint_x4<U,V> operator>>(const uint_x4<U,V>&, const uint_x4<U,V>&);
329
+
330
+ template<typename U, typename V>
331
+ friend uint_x4<U,V> operator&(const uint_x4<U,V>&, const uint_x4<U,V>&);
332
+
333
+ template<typename U, typename V>
334
+ friend uint_x4<U,V> operator|(const uint_x4<U,V>&, const uint_x4<U,V>&);
335
+
336
+ template<typename U, typename V>
337
+ friend uint_x4<U,V> operator^(const uint_x4<U,V>&, const uint_x4<U,V>&);
338
+
339
+ template<typename U, typename V>
340
+ friend bool operator==(const uint_x4<U,V>&, const uint_x4<U,V>&);
341
+
342
+ template<typename U, typename V>
343
+ friend bool operator!=(const uint_x4<U,V>&, const uint_x4<U,V>&);
344
+
345
+ template<typename U, typename V>
346
+ friend bool operator<(const uint_x4<U,V>&, const uint_x4<U,V>&);
347
+
348
+ template<typename U, typename V>
349
+ friend bool operator<=(const uint_x4<U,V>&, const uint_x4<U,V>&);
350
+
351
+ template<typename U, typename V>
352
+ friend bool operator>(const uint_x4<U,V>&, const uint_x4<U,V>&);
353
+
354
+ template<typename U, typename V>
355
+ friend bool operator>=(const uint_x4<U,V>&, const uint_x4<U,V>&);
356
+
357
+ template<typename U, typename V>
358
+ friend uint_x4<U,V> operator~(const uint_x4<U,V>&);
359
+
360
+ template<typename U, typename V>
361
+ friend uint_x4<U,V> operator-(const uint_x4<U,V>&);
362
+
363
+ template<typename U, typename V>
364
+ friend bitcount_t flog2(const uint_x4<U,V>&);
365
+
366
+ template<typename U, typename V>
367
+ friend bitcount_t trailingzeros(const uint_x4<U,V>&);
368
+
369
+ uint_x4& operator*=(const uint_x4& rhs)
370
+ {
371
+ uint_x4 result = *this * rhs;
372
+ return *this = result;
373
+ }
374
+
375
+ uint_x4& operator/=(const uint_x4& rhs)
376
+ {
377
+ uint_x4 result = *this / rhs;
378
+ return *this = result;
379
+ }
380
+
381
+ uint_x4& operator%=(const uint_x4& rhs)
382
+ {
383
+ uint_x4 result = *this % rhs;
384
+ return *this = result;
385
+ }
386
+
387
+ uint_x4& operator+=(const uint_x4& rhs)
388
+ {
389
+ uint_x4 result = *this + rhs;
390
+ return *this = result;
391
+ }
392
+
393
+ uint_x4& operator-=(const uint_x4& rhs)
394
+ {
395
+ uint_x4 result = *this - rhs;
396
+ return *this = result;
397
+ }
398
+
399
+ uint_x4& operator&=(const uint_x4& rhs)
400
+ {
401
+ uint_x4 result = *this & rhs;
402
+ return *this = result;
403
+ }
404
+
405
+ uint_x4& operator|=(const uint_x4& rhs)
406
+ {
407
+ uint_x4 result = *this | rhs;
408
+ return *this = result;
409
+ }
410
+
411
+ uint_x4& operator^=(const uint_x4& rhs)
412
+ {
413
+ uint_x4 result = *this ^ rhs;
414
+ return *this = result;
415
+ }
416
+
417
+ uint_x4& operator>>=(bitcount_t shift)
418
+ {
419
+ uint_x4 result = *this >> shift;
420
+ return *this = result;
421
+ }
422
+
423
+ uint_x4& operator<<=(bitcount_t shift)
424
+ {
425
+ uint_x4 result = *this << shift;
426
+ return *this = result;
427
+ }
428
+
429
+ };
430
+
431
+ template<typename U, typename V>
432
+ bitcount_t flog2(const uint_x4<U,V>& v)
433
+ {
434
+ #if PCG_LITTLE_ENDIAN
435
+ for (uint8_t i = 4; i !=0; /* dec in loop */) {
436
+ --i;
437
+ #else
438
+ for (uint8_t i = 0; i < 4; ++i) {
439
+ #endif
440
+ if (v.wa[i] == 0)
441
+ continue;
442
+ return flog2(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i;
443
+ }
444
+ abort();
445
+ }
446
+
447
+ template<typename U, typename V>
448
+ bitcount_t trailingzeros(const uint_x4<U,V>& v)
449
+ {
450
+ #if PCG_LITTLE_ENDIAN
451
+ for (uint8_t i = 0; i < 4; ++i) {
452
+ #else
453
+ for (uint8_t i = 4; i !=0; /* dec in loop */) {
454
+ --i;
455
+ #endif
456
+ if (v.wa[i] != 0)
457
+ return trailingzeros(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i;
458
+ }
459
+ return (sizeof(U)*CHAR_BIT)*4;
460
+ }
461
+
462
+ template <typename UInt, typename UIntX2>
463
+ std::pair< uint_x4<UInt,UIntX2>, uint_x4<UInt,UIntX2> >
464
+ divmod(const uint_x4<UInt,UIntX2>& orig_dividend,
465
+ const uint_x4<UInt,UIntX2>& divisor)
466
+ {
467
+ // If the dividend is less than the divisor, the answer is always zero.
468
+ // This takes care of boundary cases like 0/x (which would otherwise be
469
+ // problematic because we can't take the log of zero. (The boundary case
470
+ // of division by zero is undefined.)
471
+ if (orig_dividend < divisor)
472
+ return { uint_x4<UInt,UIntX2>(0UL), orig_dividend };
473
+
474
+ auto dividend = orig_dividend;
475
+
476
+ auto log2_divisor = flog2(divisor);
477
+ auto log2_dividend = flog2(dividend);
478
+ // assert(log2_dividend >= log2_divisor);
479
+ bitcount_t logdiff = log2_dividend - log2_divisor;
480
+
481
+ constexpr uint_x4<UInt,UIntX2> ONE(1UL);
482
+ if (logdiff == 0)
483
+ return { ONE, dividend - divisor };
484
+
485
+ // Now we change the log difference to
486
+ // floor(log2(divisor)) - ceil(log2(dividend))
487
+ // to ensure that we *underestimate* the result.
488
+ logdiff -= 1;
489
+
490
+ uint_x4<UInt,UIntX2> quotient(0UL);
491
+
492
+ auto qfactor = ONE << logdiff;
493
+ auto factor = divisor << logdiff;
494
+
495
+ do {
496
+ dividend -= factor;
497
+ quotient += qfactor;
498
+ while (dividend < factor) {
499
+ factor >>= 1;
500
+ qfactor >>= 1;
501
+ }
502
+ } while (dividend >= divisor);
503
+
504
+ return { quotient, dividend };
505
+ }
506
+
507
+ template <typename UInt, typename UIntX2>
508
+ uint_x4<UInt,UIntX2> operator/(const uint_x4<UInt,UIntX2>& dividend,
509
+ const uint_x4<UInt,UIntX2>& divisor)
510
+ {
511
+ return divmod(dividend, divisor).first;
512
+ }
513
+
514
+ template <typename UInt, typename UIntX2>
515
+ uint_x4<UInt,UIntX2> operator%(const uint_x4<UInt,UIntX2>& dividend,
516
+ const uint_x4<UInt,UIntX2>& divisor)
517
+ {
518
+ return divmod(dividend, divisor).second;
519
+ }
520
+
521
+
522
+ template <typename UInt, typename UIntX2>
523
+ uint_x4<UInt,UIntX2> operator*(const uint_x4<UInt,UIntX2>& a,
524
+ const uint_x4<UInt,UIntX2>& b)
525
+ {
526
+ uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
527
+ bool carryin = false;
528
+ bool carryout;
529
+ UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0);
530
+ r.w.v0 = UInt(a0b0);
531
+ r.w.v1 = UInt(a0b0 >> 32);
532
+
533
+ UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0);
534
+ r.w.v2 = UInt(a1b0 >> 32);
535
+ r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout);
536
+ carryin = carryout;
537
+ r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
538
+ carryin = carryout;
539
+ r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
540
+
541
+ UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1);
542
+ carryin = false;
543
+ r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> 32), carryin, &carryout);
544
+ carryin = carryout;
545
+ r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
546
+
547
+ carryin = false;
548
+ r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout);
549
+ carryin = carryout;
550
+ r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
551
+ carryin = carryout;
552
+ r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
553
+
554
+ UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1);
555
+ carryin = false;
556
+ r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout);
557
+ carryin = carryout;
558
+ r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> 32), carryin, &carryout);
559
+
560
+ r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01;
561
+
562
+ return r;
563
+ }
564
+
565
+
566
+ template <typename UInt, typename UIntX2>
567
+ uint_x4<UInt,UIntX2> operator+(const uint_x4<UInt,UIntX2>& a,
568
+ const uint_x4<UInt,UIntX2>& b)
569
+ {
570
+ uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
571
+
572
+ bool carryin = false;
573
+ bool carryout;
574
+ r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
575
+ carryin = carryout;
576
+ r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
577
+ carryin = carryout;
578
+ r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
579
+ carryin = carryout;
580
+ r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
581
+
582
+ return r;
583
+ }
584
+
585
+ template <typename UInt, typename UIntX2>
586
+ uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& a,
587
+ const uint_x4<UInt,UIntX2>& b)
588
+ {
589
+ uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
590
+
591
+ bool carryin = false;
592
+ bool carryout;
593
+ r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
594
+ carryin = carryout;
595
+ r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
596
+ carryin = carryout;
597
+ r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
598
+ carryin = carryout;
599
+ r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
600
+
601
+ return r;
602
+ }
603
+
604
+
605
+ template <typename UInt, typename UIntX2>
606
+ uint_x4<UInt,UIntX2> operator&(const uint_x4<UInt,UIntX2>& a,
607
+ const uint_x4<UInt,UIntX2>& b)
608
+ {
609
+ return uint_x4<UInt,UIntX2>(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01);
610
+ }
611
+
612
+ template <typename UInt, typename UIntX2>
613
+ uint_x4<UInt,UIntX2> operator|(const uint_x4<UInt,UIntX2>& a,
614
+ const uint_x4<UInt,UIntX2>& b)
615
+ {
616
+ return uint_x4<UInt,UIntX2>(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01);
617
+ }
618
+
619
+ template <typename UInt, typename UIntX2>
620
+ uint_x4<UInt,UIntX2> operator^(const uint_x4<UInt,UIntX2>& a,
621
+ const uint_x4<UInt,UIntX2>& b)
622
+ {
623
+ return uint_x4<UInt,UIntX2>(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01);
624
+ }
625
+
626
+ template <typename UInt, typename UIntX2>
627
+ uint_x4<UInt,UIntX2> operator~(const uint_x4<UInt,UIntX2>& v)
628
+ {
629
+ return uint_x4<UInt,UIntX2>(~v.d.v23, ~v.d.v01);
630
+ }
631
+
632
+ template <typename UInt, typename UIntX2>
633
+ uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& v)
634
+ {
635
+ return uint_x4<UInt,UIntX2>(0UL,0UL) - v;
636
+ }
637
+
638
+ template <typename UInt, typename UIntX2>
639
+ bool operator==(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
640
+ {
641
+ return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23);
642
+ }
643
+
644
+ template <typename UInt, typename UIntX2>
645
+ bool operator!=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
646
+ {
647
+ return !operator==(a,b);
648
+ }
649
+
650
+
651
+ template <typename UInt, typename UIntX2>
652
+ bool operator<(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
653
+ {
654
+ return (a.d.v23 < b.d.v23)
655
+ || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01));
656
+ }
657
+
658
+ template <typename UInt, typename UIntX2>
659
+ bool operator>(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
660
+ {
661
+ return operator<(b,a);
662
+ }
663
+
664
+ template <typename UInt, typename UIntX2>
665
+ bool operator<=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
666
+ {
667
+ return !(operator<(b,a));
668
+ }
669
+
670
+ template <typename UInt, typename UIntX2>
671
+ bool operator>=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
672
+ {
673
+ return !(operator<(a,b));
674
+ }
675
+
676
+
677
+
678
+ template <typename UInt, typename UIntX2>
679
+ uint_x4<UInt,UIntX2> operator<<(const uint_x4<UInt,UIntX2>& v,
680
+ const bitcount_t shift)
681
+ {
682
+ uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
683
+ const bitcount_t bits = sizeof(UInt) * CHAR_BIT;
684
+ const bitcount_t bitmask = bits - 1;
685
+ const bitcount_t shiftdiv = shift / bits;
686
+ const bitcount_t shiftmod = shift & bitmask;
687
+
688
+ if (shiftmod) {
689
+ UInt carryover = 0;
690
+ #if PCG_LITTLE_ENDIAN
691
+ for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
692
+ #else
693
+ for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
694
+ --out, --in;
695
+ #endif
696
+ r.wa[out] = (v.wa[in] << shiftmod) | carryover;
697
+ carryover = (v.wa[in] >> (bits - shiftmod));
698
+ }
699
+ } else {
700
+ #if PCG_LITTLE_ENDIAN
701
+ for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
702
+ #else
703
+ for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
704
+ --out, --in;
705
+ #endif
706
+ r.wa[out] = v.wa[in];
707
+ }
708
+ }
709
+
710
+ return r;
711
+ }
712
+
713
+ template <typename UInt, typename UIntX2>
714
+ uint_x4<UInt,UIntX2> operator>>(const uint_x4<UInt,UIntX2>& v,
715
+ const bitcount_t shift)
716
+ {
717
+ uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
718
+ const bitcount_t bits = sizeof(UInt) * CHAR_BIT;
719
+ const bitcount_t bitmask = bits - 1;
720
+ const bitcount_t shiftdiv = shift / bits;
721
+ const bitcount_t shiftmod = shift & bitmask;
722
+
723
+ if (shiftmod) {
724
+ UInt carryover = 0;
725
+ #if PCG_LITTLE_ENDIAN
726
+ for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
727
+ --out, --in;
728
+ #else
729
+ for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
730
+ #endif
731
+ r.wa[out] = (v.wa[in] >> shiftmod) | carryover;
732
+ carryover = (v.wa[in] << (bits - shiftmod));
733
+ }
734
+ } else {
735
+ #if PCG_LITTLE_ENDIAN
736
+ for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
737
+ --out, --in;
738
+ #else
739
+ for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
740
+ #endif
741
+ r.wa[out] = v.wa[in];
742
+ }
743
+ }
744
+
745
+ return r;
746
+ }
747
+
748
+ } // namespace pcg_extras
749
+
750
+ #endif // PCG_UINT128_HPP_INCLUDED