@zigc/lib 0.16.0 → 0.17.0-dev.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/c/math.zig +20 -7
- package/compiler/test_runner.zig +1 -1
- package/compiler_rt/divmodei4.zig +40 -17
- package/compiler_rt/exp.zig +1 -4
- package/compiler_rt/exp2.zig +1 -4
- package/compiler_rt/exp_f128.zig +377 -0
- package/compiler_rt/limb64.zig +875 -15
- package/compiler_rt/mulXi3.zig +1 -1
- package/compiler_rt/ssp.zig +1 -1
- package/compiler_rt/udivmodei4.zig +28 -0
- package/package.json +1 -1
- package/std/Io/Writer.zig +41 -41
- package/std/os/emscripten.zig +1 -1
- package/std/os/linux/arc.zig +144 -0
- package/std/os/linux.zig +14 -2
- package/libc/mingw/math/fdiml.c +0 -24
- package/libc/musl/src/math/fdimf.c +0 -10
- package/libc/musl/src/math/fdiml.c +0 -18
package/compiler_rt/limb64.zig
CHANGED
|
@@ -7,6 +7,7 @@ const divCeil = std.math.divCeil;
|
|
|
7
7
|
|
|
8
8
|
const builtin = @import("builtin");
|
|
9
9
|
const compiler_rt = @import("../compiler_rt.zig");
|
|
10
|
+
const symbol = @import("../compiler_rt.zig").symbol;
|
|
10
11
|
|
|
11
12
|
const endian = builtin.cpu.arch.endian();
|
|
12
13
|
|
|
@@ -24,10 +25,44 @@ inline fn limbSet(limbs: []u64, i: usize, value: u64) void {
|
|
|
24
25
|
}
|
|
25
26
|
}
|
|
26
27
|
|
|
27
|
-
fn
|
|
28
|
+
fn usedLimbCount(bits: u16) u16 {
|
|
28
29
|
return divCeil(u16, bits, 64) catch unreachable;
|
|
29
30
|
}
|
|
30
31
|
|
|
32
|
+
fn limbCount(bits: u16) u16 {
|
|
33
|
+
return @divExact(std.zig.target.intByteSize(&builtin.target, bits), 8);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
fn varLimbs(ptr: [*]u64, bits: u16) []u64 {
|
|
37
|
+
const limb_cnt = usedLimbCount(bits);
|
|
38
|
+
const true_limb_cnt = limbCount(bits);
|
|
39
|
+
return switch (endian) {
|
|
40
|
+
.little => ptr[0..limb_cnt],
|
|
41
|
+
.big => ptr[true_limb_cnt - limb_cnt .. true_limb_cnt],
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
fn constLimbs(ptr: [*]const u64, bits: u16) []const u64 {
|
|
46
|
+
const limb_cnt = usedLimbCount(bits);
|
|
47
|
+
const true_limb_cnt = limbCount(bits);
|
|
48
|
+
return switch (endian) {
|
|
49
|
+
.little => ptr[0..limb_cnt],
|
|
50
|
+
.big => ptr[true_limb_cnt - limb_cnt .. true_limb_cnt],
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
fn fixLastLimb(out_ptr: [*]u64, is_signed: bool, bits: u16) void {
|
|
55
|
+
const limb_cnt = usedLimbCount(bits);
|
|
56
|
+
const true_limb_cnt = limbCount(bits);
|
|
57
|
+
if (limb_cnt == true_limb_cnt) return;
|
|
58
|
+
const true_out = out_ptr[0..true_limb_cnt];
|
|
59
|
+
|
|
60
|
+
const sign: u64 = if (!is_signed or @as(i64, @bitCast(true_out[limb_cnt - 1])) >= 0) 0 else ~@as(u64, 0);
|
|
61
|
+
for (limb_cnt..true_limb_cnt) |i| {
|
|
62
|
+
true_out[i] = sign;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
31
66
|
fn Limbs(T: type) type {
|
|
32
67
|
const int_info = @typeInfo(T).int;
|
|
33
68
|
const limb_cnt = comptime limbCount(int_info.bits);
|
|
@@ -55,14 +90,14 @@ fn limbWrap(limb: u64, is_signed: bool, bits: u16) u64 {
|
|
|
55
90
|
}
|
|
56
91
|
|
|
57
92
|
comptime {
|
|
58
|
-
|
|
93
|
+
symbol(&__addo_limb64, "__addo_limb64");
|
|
59
94
|
}
|
|
60
95
|
|
|
61
96
|
fn __addo_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) bool {
|
|
62
|
-
const limb_cnt =
|
|
63
|
-
const out = out_ptr
|
|
64
|
-
const a = a_ptr
|
|
65
|
-
const b = b_ptr
|
|
97
|
+
const limb_cnt = usedLimbCount(bits);
|
|
98
|
+
const out = varLimbs(out_ptr, bits);
|
|
99
|
+
const a = constLimbs(a_ptr, bits);
|
|
100
|
+
const b = constLimbs(b_ptr, bits);
|
|
66
101
|
|
|
67
102
|
var carry: u1 = 0;
|
|
68
103
|
var i: usize = 0;
|
|
@@ -91,11 +126,13 @@ fn __addo_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, is_s
|
|
|
91
126
|
|
|
92
127
|
if (bits % 64 == 0) {
|
|
93
128
|
limbSet(out, i, limb);
|
|
129
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
94
130
|
return carry != 0;
|
|
95
131
|
} else {
|
|
96
132
|
assert(carry == 0);
|
|
97
133
|
const wrapped_limb = limbWrap(limb, is_signed, bits);
|
|
98
134
|
limbSet(out, i, wrapped_limb);
|
|
135
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
99
136
|
return wrapped_limb != limb;
|
|
100
137
|
}
|
|
101
138
|
}
|
|
@@ -124,17 +161,20 @@ test __addo_limb64 {
|
|
|
124
161
|
try test__addo_limb64(i64, maxInt(i64), 1, .{ minInt(i64), true });
|
|
125
162
|
try test__addo_limb64(i65, maxInt(i65), 1, .{ minInt(i65), true });
|
|
126
163
|
try test__addo_limb64(i255, -3, 2, .{ -1, false });
|
|
164
|
+
|
|
165
|
+
try test__addo_limb64(u150, maxInt(u150), 2, .{ 1, true });
|
|
166
|
+
try test__addo_limb64(i150, -3, 2, .{ -1, false });
|
|
127
167
|
}
|
|
128
168
|
|
|
129
169
|
comptime {
|
|
130
|
-
|
|
170
|
+
symbol(&__subo_limb64, "__subo_limb64");
|
|
131
171
|
}
|
|
132
172
|
|
|
133
173
|
fn __subo_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) bool {
|
|
134
|
-
const limb_cnt =
|
|
135
|
-
const out = out_ptr
|
|
136
|
-
const a = a_ptr
|
|
137
|
-
const b = b_ptr
|
|
174
|
+
const limb_cnt = usedLimbCount(bits);
|
|
175
|
+
const out = varLimbs(out_ptr, bits);
|
|
176
|
+
const a = constLimbs(a_ptr, bits);
|
|
177
|
+
const b = constLimbs(b_ptr, bits);
|
|
138
178
|
|
|
139
179
|
var borrow: u1 = 0;
|
|
140
180
|
var i: usize = 0;
|
|
@@ -163,10 +203,12 @@ fn __subo_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, is_s
|
|
|
163
203
|
|
|
164
204
|
if (bits % 64 == 0) {
|
|
165
205
|
limbSet(out, i, limb);
|
|
206
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
166
207
|
return borrow != 0;
|
|
167
208
|
} else {
|
|
168
209
|
const wrapped_limb = limbWrap(limb, is_signed, bits);
|
|
169
210
|
limbSet(out, i, wrapped_limb);
|
|
211
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
170
212
|
return borrow != 0 or wrapped_limb != limb;
|
|
171
213
|
}
|
|
172
214
|
}
|
|
@@ -195,19 +237,22 @@ test __subo_limb64 {
|
|
|
195
237
|
try test__subo_limb64(i64, minInt(i64), 1, .{ maxInt(i64), true });
|
|
196
238
|
try test__subo_limb64(i65, minInt(i65), 1, .{ maxInt(i65), true });
|
|
197
239
|
try test__subo_limb64(i255, -1, 2, .{ -3, false });
|
|
240
|
+
|
|
241
|
+
try test__subo_limb64(u150, 2, maxInt(u150), .{ 3, true });
|
|
242
|
+
try test__subo_limb64(i150, -3, 2, .{ -5, false });
|
|
198
243
|
}
|
|
199
244
|
|
|
200
245
|
comptime {
|
|
201
|
-
|
|
246
|
+
symbol(&__cmp_limb64, "__cmp_limb64");
|
|
202
247
|
}
|
|
203
248
|
|
|
204
249
|
// a < b -> -1
|
|
205
250
|
// a == b -> 0
|
|
206
251
|
// a > b -> 1
|
|
207
252
|
fn __cmp_limb64(a_ptr: [*]const u64, b_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) i8 {
|
|
208
|
-
const limb_cnt =
|
|
209
|
-
const a = a_ptr
|
|
210
|
-
const b = b_ptr
|
|
253
|
+
const limb_cnt = usedLimbCount(bits);
|
|
254
|
+
const a = constLimbs(a_ptr, bits);
|
|
255
|
+
const b = constLimbs(b_ptr, bits);
|
|
211
256
|
|
|
212
257
|
var i: usize = 0;
|
|
213
258
|
if (is_signed) {
|
|
@@ -263,4 +308,819 @@ test __cmp_limb64 {
|
|
|
263
308
|
try test__cmp_limb64(i255, -3, 2, -1);
|
|
264
309
|
try test__cmp_limb64(i255, -5, -5, 0);
|
|
265
310
|
try test__cmp_limb64(i255, 2, -3, 1);
|
|
311
|
+
|
|
312
|
+
try test__cmp_limb64(u150, maxInt(u150) - 5, maxInt(u150) - 5, 0);
|
|
313
|
+
try test__cmp_limb64(i150, minInt(i150), -5, -1);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
comptime {
|
|
317
|
+
symbol(&__and_limb64, "__and_limb64");
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
fn __and_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, bits: u16) callconv(.c) void {
|
|
321
|
+
const limb_cnt = limbCount(bits);
|
|
322
|
+
const out = out_ptr[0..limb_cnt];
|
|
323
|
+
const a = a_ptr[0..limb_cnt];
|
|
324
|
+
const b = b_ptr[0..limb_cnt];
|
|
325
|
+
|
|
326
|
+
var i: usize = 0;
|
|
327
|
+
while (i < limb_cnt) : (i += 1) {
|
|
328
|
+
limbSet(out, i, limbGet(a, i) & limbGet(b, i));
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
fn test__and_limb64(comptime T: type, a: T, b: T, expected: T) !void {
|
|
333
|
+
const int_info = @typeInfo(T).int;
|
|
334
|
+
|
|
335
|
+
var a_limbs = asLimbs(a);
|
|
336
|
+
var b_limbs = asLimbs(b);
|
|
337
|
+
var out: Limbs(T) = undefined;
|
|
338
|
+
__and_limb64(&out, &a_limbs, &b_limbs, int_info.bits);
|
|
339
|
+
|
|
340
|
+
const expected_limbs = asLimbs(expected);
|
|
341
|
+
try testing.expectEqual(expected_limbs, out);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
test __and_limb64 {
|
|
345
|
+
try test__and_limb64(u64, 1, 2, 0);
|
|
346
|
+
try test__and_limb64(u64, maxInt(u64), 2, 2);
|
|
347
|
+
try test__and_limb64(u65, maxInt(u65), 2, 2);
|
|
348
|
+
try test__and_limb64(u255, maxInt(u255), 7, 7);
|
|
349
|
+
|
|
350
|
+
try test__and_limb64(i64, 1, 2, 0);
|
|
351
|
+
try test__and_limb64(i64, -1, 2, 2);
|
|
352
|
+
try test__and_limb64(i65, minInt(i65), -1, minInt(i65));
|
|
353
|
+
try test__and_limb64(i255, -1, 2, 2);
|
|
354
|
+
|
|
355
|
+
try test__and_limb64(u150, maxInt(u150), 7, 7);
|
|
356
|
+
try test__and_limb64(i150, -2, 3, 2);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
comptime {
|
|
360
|
+
symbol(&__or_limb64, "__or_limb64");
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
fn __or_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, bits: u16) callconv(.c) void {
|
|
364
|
+
const limb_cnt = limbCount(bits);
|
|
365
|
+
const out = out_ptr[0..limb_cnt];
|
|
366
|
+
const a = a_ptr[0..limb_cnt];
|
|
367
|
+
const b = b_ptr[0..limb_cnt];
|
|
368
|
+
|
|
369
|
+
var i: usize = 0;
|
|
370
|
+
while (i < limb_cnt) : (i += 1) {
|
|
371
|
+
limbSet(out, i, limbGet(a, i) | limbGet(b, i));
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
fn test__or_limb64(comptime T: type, a: T, b: T, expected: T) !void {
|
|
376
|
+
const int_info = @typeInfo(T).int;
|
|
377
|
+
|
|
378
|
+
var a_limbs = asLimbs(a);
|
|
379
|
+
var b_limbs = asLimbs(b);
|
|
380
|
+
var out: Limbs(T) = undefined;
|
|
381
|
+
__or_limb64(&out, &a_limbs, &b_limbs, int_info.bits);
|
|
382
|
+
|
|
383
|
+
const expected_limbs = asLimbs(expected);
|
|
384
|
+
try testing.expectEqual(expected_limbs, out);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
test __or_limb64 {
|
|
388
|
+
try test__or_limb64(u64, 1, 2, 3);
|
|
389
|
+
try test__or_limb64(u64, maxInt(u64), 2, maxInt(u64));
|
|
390
|
+
try test__or_limb64(u65, maxInt(u65), 2, maxInt(u65));
|
|
391
|
+
try test__or_limb64(u255, 1, 2, 3);
|
|
392
|
+
|
|
393
|
+
try test__or_limb64(i64, 1, 2, 3);
|
|
394
|
+
try test__or_limb64(i64, -1, 2, -1);
|
|
395
|
+
try test__or_limb64(i65, minInt(i65), 1, minInt(i65) + 1);
|
|
396
|
+
try test__or_limb64(i255, -3, 2, -1);
|
|
397
|
+
|
|
398
|
+
try test__or_limb64(u150, maxInt(u150) - 1, 3, maxInt(u150));
|
|
399
|
+
try test__or_limb64(i150, -2, 3, -1);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
comptime {
|
|
403
|
+
symbol(&__xor_limb64, "__xor_limb64");
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
fn __xor_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, bits: u16) callconv(.c) void {
|
|
407
|
+
const limb_cnt = limbCount(bits);
|
|
408
|
+
const out = out_ptr[0..limb_cnt];
|
|
409
|
+
const a = a_ptr[0..limb_cnt];
|
|
410
|
+
const b = b_ptr[0..limb_cnt];
|
|
411
|
+
|
|
412
|
+
var i: usize = 0;
|
|
413
|
+
while (i < limb_cnt) : (i += 1) {
|
|
414
|
+
limbSet(out, i, limbGet(a, i) ^ limbGet(b, i));
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
fn test__xor_limb64(comptime T: type, a: T, b: T, expected: T) !void {
|
|
419
|
+
const int_info = @typeInfo(T).int;
|
|
420
|
+
|
|
421
|
+
var a_limbs = asLimbs(a);
|
|
422
|
+
var b_limbs = asLimbs(b);
|
|
423
|
+
var out: Limbs(T) = undefined;
|
|
424
|
+
__xor_limb64(&out, &a_limbs, &b_limbs, int_info.bits);
|
|
425
|
+
|
|
426
|
+
const expected_limbs = asLimbs(expected);
|
|
427
|
+
try testing.expectEqual(expected_limbs, out);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
test __xor_limb64 {
|
|
431
|
+
try test__xor_limb64(u64, 1, 2, 3);
|
|
432
|
+
try test__xor_limb64(u64, 3, 2, 1);
|
|
433
|
+
try test__xor_limb64(u65, maxInt(u65), 2, maxInt(u65) - 2);
|
|
434
|
+
try test__xor_limb64(u255, 7, 3, 4);
|
|
435
|
+
|
|
436
|
+
try test__xor_limb64(i64, 3, 2, 1);
|
|
437
|
+
try test__xor_limb64(i64, -1, 2, -3);
|
|
438
|
+
try test__xor_limb64(i65, minInt(i65), -1, maxInt(i65));
|
|
439
|
+
try test__xor_limb64(i255, -3, 2, -1);
|
|
440
|
+
|
|
441
|
+
try test__xor_limb64(u150, maxInt(u150) - 1, 3, maxInt(u150) - 2);
|
|
442
|
+
try test__xor_limb64(i150, -2, 3, -3);
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
comptime {
|
|
446
|
+
symbol(&__not_limb64, "__not_limb64");
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
fn __not_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) void {
|
|
450
|
+
const limb_cnt = usedLimbCount(bits);
|
|
451
|
+
const out = varLimbs(out_ptr, bits);
|
|
452
|
+
const a = constLimbs(a_ptr, bits);
|
|
453
|
+
|
|
454
|
+
var i: usize = 0;
|
|
455
|
+
while (i < limb_cnt - 1) : (i += 1) {
|
|
456
|
+
limbSet(out, i, ~limbGet(a, i));
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
var limb: u64 = ~limbGet(a, i);
|
|
460
|
+
if (!is_signed and bits % 64 != 0) {
|
|
461
|
+
limb = limbWrap(limb, is_signed, bits);
|
|
462
|
+
}
|
|
463
|
+
limbSet(out, i, limb);
|
|
464
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
fn test__not_limb64(comptime T: type, a: T, expected: T) !void {
|
|
468
|
+
const int_info = @typeInfo(T).int;
|
|
469
|
+
const is_signed = int_info.signedness == .signed;
|
|
470
|
+
|
|
471
|
+
var a_limbs = asLimbs(a);
|
|
472
|
+
var out: Limbs(T) = undefined;
|
|
473
|
+
__not_limb64(&out, &a_limbs, is_signed, int_info.bits);
|
|
474
|
+
|
|
475
|
+
const expected_limbs = asLimbs(expected);
|
|
476
|
+
try testing.expectEqual(expected_limbs, out);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
test __not_limb64 {
|
|
480
|
+
try test__not_limb64(u64, 1, maxInt(u64) - 1);
|
|
481
|
+
try test__not_limb64(u64, 3, maxInt(u64) - 3);
|
|
482
|
+
try test__not_limb64(u65, maxInt(u65), 0);
|
|
483
|
+
try test__not_limb64(u255, 7, maxInt(u255) - 7);
|
|
484
|
+
|
|
485
|
+
try test__not_limb64(i64, 3, -4);
|
|
486
|
+
try test__not_limb64(i64, -1, 0);
|
|
487
|
+
try test__not_limb64(i65, minInt(i65), maxInt(i65));
|
|
488
|
+
try test__not_limb64(i255, -3, 2);
|
|
489
|
+
|
|
490
|
+
try test__not_limb64(u150, maxInt(u150), 0);
|
|
491
|
+
try test__not_limb64(i150, maxInt(i150), minInt(i150));
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
comptime {
|
|
495
|
+
symbol(&__shlo_limb64, "__shlo_limb64");
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
fn __shlo_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, shift: u16, is_signed: bool, bits: u16) callconv(.c) bool {
|
|
499
|
+
const limb_cnt = usedLimbCount(bits);
|
|
500
|
+
const out = varLimbs(out_ptr, bits);
|
|
501
|
+
const a = constLimbs(a_ptr, bits);
|
|
502
|
+
|
|
503
|
+
assert(shift < bits);
|
|
504
|
+
|
|
505
|
+
const limb_shift = shift / 64;
|
|
506
|
+
const bit_shift = shift % 64;
|
|
507
|
+
|
|
508
|
+
var carry: u64 = 0;
|
|
509
|
+
var i: usize = 0;
|
|
510
|
+
while (i < limb_cnt - 1) : (i += 1) {
|
|
511
|
+
if (i < limb_shift) {
|
|
512
|
+
limbSet(out, i, 0);
|
|
513
|
+
} else {
|
|
514
|
+
const limb = limbGet(a, i - limb_shift);
|
|
515
|
+
limbSet(out, i, (limb << @intCast(bit_shift)) | carry);
|
|
516
|
+
carry = if (bit_shift != 0) (limb >> @intCast(64 - bit_shift)) else 0;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const limb = limbGet(a, i - limb_shift);
|
|
521
|
+
const raw_last = (limb << @intCast(bit_shift)) | carry;
|
|
522
|
+
carry = if (bit_shift != 0) (limb >> @intCast(64 - bit_shift)) else 0;
|
|
523
|
+
|
|
524
|
+
const last = if (bits % 64 == 0) raw_last else limbWrap(raw_last, is_signed, bits);
|
|
525
|
+
limbSet(out, i, last);
|
|
526
|
+
|
|
527
|
+
const sign_extend: u64 = if (is_signed and (last >> 63) == 1) ~@as(u64, 0) else 0;
|
|
528
|
+
const expected_carry: u64 = if (bit_shift == 0) 0 else sign_extend >> @intCast(64 - bit_shift);
|
|
529
|
+
|
|
530
|
+
var overflow = carry != expected_carry;
|
|
531
|
+
if (bits % 64 != 0) {
|
|
532
|
+
overflow = overflow or raw_last != last;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
var j = limb_cnt - limb_shift;
|
|
536
|
+
while (j < limb_cnt) : (j += 1) {
|
|
537
|
+
overflow = overflow or limbGet(a, j) != sign_extend;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
541
|
+
return overflow;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
fn test__shlo_limb64(comptime T: type, a: T, shift: u16, expected: struct { T, bool }) !void {
|
|
545
|
+
const int_info = @typeInfo(T).int;
|
|
546
|
+
const is_signed = int_info.signedness == .signed;
|
|
547
|
+
|
|
548
|
+
var a_limbs = asLimbs(a);
|
|
549
|
+
var out: Limbs(T) = undefined;
|
|
550
|
+
const overflow = __shlo_limb64(&out, &a_limbs, shift, is_signed, int_info.bits);
|
|
551
|
+
|
|
552
|
+
const expected_limbs = asLimbs(expected[0]);
|
|
553
|
+
try testing.expectEqual(expected_limbs, out);
|
|
554
|
+
try testing.expectEqual(expected[1], overflow);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
test __shlo_limb64 {
|
|
558
|
+
try test__shlo_limb64(u64, 0x1234_5678_9ABC_DEF0, 4, .{ 0x2345_6789_ABCD_EF00, true });
|
|
559
|
+
try test__shlo_limb64(u64, 0x8000_0000_0000_0001, 63, .{ 0x8000_0000_0000_0000, true });
|
|
560
|
+
try test__shlo_limb64(u65, 1, 64, .{ 0x1_0000_0000_0000_0000, false });
|
|
561
|
+
try test__shlo_limb64(u65, 0x1_0000_0000_0000_0000, 1, .{ 0, true });
|
|
562
|
+
try test__shlo_limb64(u128, 0x1234_5678_9ABC_DEF0_1234_5678_9ABC_DEF0, 4, .{ 0x2345_6789_ABCD_EF01_2345_6789_ABCD_EF00, true });
|
|
563
|
+
try test__shlo_limb64(u255, maxInt(u255), 1, .{ maxInt(u255) - 1, true });
|
|
564
|
+
try test__shlo_limb64(u633, 1 << 299, 333, .{ 1 << 632, false });
|
|
565
|
+
try test__shlo_limb64(u633, 1 << 300, 333, .{ 0, true });
|
|
566
|
+
try test__shlo_limb64(u633, 1 << 298, 333, .{ 1 << 631, false });
|
|
567
|
+
|
|
568
|
+
try test__shlo_limb64(i64, -2, 1, .{ -4, false });
|
|
569
|
+
try test__shlo_limb64(i64, minInt(i64), 1, .{ 0, true });
|
|
570
|
+
try test__shlo_limb64(i64, minInt(i64), 63, .{ 0, true });
|
|
571
|
+
try test__shlo_limb64(i65, minInt(i63), 1, .{ minInt(i64), false });
|
|
572
|
+
try test__shlo_limb64(i65, -1, 17, .{ -1 << 17, false });
|
|
573
|
+
try test__shlo_limb64(i65, -3, 64, .{ -1 << 64, true });
|
|
574
|
+
try test__shlo_limb64(i128, -0x1234_5678_9ABC_DEF0_1234_5678_9ABC_DEF0, 4, .{ -0x2345_6789_ABCD_EF01_2345_6789_ABCD_EF00, true });
|
|
575
|
+
try test__shlo_limb64(i255, -3, 1, .{ -6, false });
|
|
576
|
+
try test__shlo_limb64(i633, 1 << 298, 333, .{ 1 << 631, false });
|
|
577
|
+
try test__shlo_limb64(i633, 1 << 299, 333, .{ minInt(i633), true });
|
|
578
|
+
try test__shlo_limb64(i633, 1 << 300, 333, .{ 0, true });
|
|
579
|
+
try test__shlo_limb64(i633, 1 << 297, 333, .{ 1 << 630, false });
|
|
580
|
+
try test__shlo_limb64(i633, -1 << 299, 333, .{ -1 << 632, false });
|
|
581
|
+
try test__shlo_limb64(i633, -1 << 300, 333, .{ 0, true });
|
|
582
|
+
try test__shlo_limb64(i633, -1 << 298, 333, .{ -1 << 631, false });
|
|
583
|
+
|
|
584
|
+
try test__shlo_limb64(u150, maxInt(u150), 1, .{ maxInt(u150) - 1, true });
|
|
585
|
+
try test__shlo_limb64(i150, -3, 1, .{ -6, false });
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
comptime {
|
|
589
|
+
symbol(&__shr_limb64, "__shr_limb64");
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
fn __shr_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, shift: u16, is_signed: bool, bits: u16) callconv(.c) void {
|
|
593
|
+
const limb_cnt = usedLimbCount(bits);
|
|
594
|
+
const out = varLimbs(out_ptr, bits);
|
|
595
|
+
const a = constLimbs(a_ptr, bits);
|
|
596
|
+
|
|
597
|
+
assert(shift < bits);
|
|
598
|
+
|
|
599
|
+
const limb_shift = shift / 64;
|
|
600
|
+
const bit_shift = shift % 64;
|
|
601
|
+
|
|
602
|
+
const ms = limbGet(a, limb_cnt - 1);
|
|
603
|
+
const sign_extend: u64 = if (is_signed and (ms >> 63) == 1) ~@as(u64, 0) else 0;
|
|
604
|
+
|
|
605
|
+
var carry: u64 = if (bit_shift != 0) (sign_extend << @intCast(64 - bit_shift)) else 0;
|
|
606
|
+
var i: usize = 0;
|
|
607
|
+
while (i < limb_cnt) : (i += 1) {
|
|
608
|
+
const j = limb_cnt - 1 - i;
|
|
609
|
+
if (i < limb_shift) {
|
|
610
|
+
limbSet(out, j, sign_extend);
|
|
611
|
+
} else {
|
|
612
|
+
const limb = limbGet(a, j + limb_shift);
|
|
613
|
+
limbSet(out, j, (limb >> @intCast(bit_shift)) | carry);
|
|
614
|
+
carry = if (bit_shift != 0) (limb << @intCast(64 - bit_shift)) else 0;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
fn test__shr_limb64(comptime T: type, a: T, shift: u16, expected: T) !void {
|
|
622
|
+
const int_info = @typeInfo(T).int;
|
|
623
|
+
const is_signed = int_info.signedness == .signed;
|
|
624
|
+
|
|
625
|
+
var a_limbs = asLimbs(a);
|
|
626
|
+
var out: Limbs(T) = undefined;
|
|
627
|
+
__shr_limb64(&out, &a_limbs, shift, is_signed, int_info.bits);
|
|
628
|
+
|
|
629
|
+
const expected_limbs = asLimbs(expected);
|
|
630
|
+
try testing.expectEqual(expected_limbs, out);
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
test __shr_limb64 {
|
|
634
|
+
try test__shr_limb64(u64, 0x1234_5678_9ABC_DEF0, 4, 0x0123_4567_89AB_CDEF);
|
|
635
|
+
try test__shr_limb64(u64, 0x8000_0000_0000_0001, 63, 1);
|
|
636
|
+
try test__shr_limb64(u65, 0x1_0000_0000_0000_0000, 64, 1);
|
|
637
|
+
try test__shr_limb64(u65, 0x1_0000_0000_0000_0001, 1, 0x0_8000_0000_0000_0000);
|
|
638
|
+
try test__shr_limb64(u128, 0x1234_5678_9ABC_DEF0_1234_5678_9ABC_DEF0, 4, 0x0123_4567_89AB_CDEF_0123_4567_89AB_CDEF);
|
|
639
|
+
try test__shr_limb64(u255, maxInt(u255), 1, maxInt(u254));
|
|
640
|
+
try test__shr_limb64(u633, 1 << 333, 333, 1);
|
|
641
|
+
try test__shr_limb64(u633, 1 << 334, 333, 2);
|
|
642
|
+
try test__shr_limb64(u633, 1 << 332, 333, 0);
|
|
643
|
+
|
|
644
|
+
try test__shr_limb64(i64, -2, 1, -1);
|
|
645
|
+
try test__shr_limb64(i64, minInt(i64), 63, -1);
|
|
646
|
+
try test__shr_limb64(i65, minInt(i65), 1, minInt(i65) | (1 << 63));
|
|
647
|
+
try test__shr_limb64(i65, -1, 17, -1);
|
|
648
|
+
try test__shr_limb64(i128, -0x1234_5678_9ABC_DEF0_1234_5678_9ABC_DEF0, 4, -0x0123_4567_89AB_CDEF_0123_4567_89AB_CDEF);
|
|
649
|
+
try test__shr_limb64(i255, -3, 1, -2);
|
|
650
|
+
try test__shr_limb64(i633, 1 << 333, 333, 1);
|
|
651
|
+
try test__shr_limb64(i633, 1 << 334, 333, 2);
|
|
652
|
+
try test__shr_limb64(i633, 1 << 332, 333, 0);
|
|
653
|
+
try test__shr_limb64(i633, -1 << 333, 333, -1);
|
|
654
|
+
try test__shr_limb64(i633, -1 << 334, 333, -2);
|
|
655
|
+
try test__shr_limb64(i633, -1 << 332, 333, -1);
|
|
656
|
+
|
|
657
|
+
try test__shr_limb64(u150, maxInt(u150), 1, maxInt(u149));
|
|
658
|
+
try test__shr_limb64(i150, -3, 1, -2);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
comptime {
|
|
662
|
+
symbol(&__clz_limb64, "__clz_limb64");
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
fn __clz_limb64(a_ptr: [*]const u64, bits: u16) callconv(.c) u16 {
|
|
666
|
+
const limb_cnt = usedLimbCount(bits);
|
|
667
|
+
const a = constLimbs(a_ptr, bits);
|
|
668
|
+
|
|
669
|
+
var res: u16 = 0;
|
|
670
|
+
var i: usize = 0;
|
|
671
|
+
|
|
672
|
+
if (bits % 64 != 0) {
|
|
673
|
+
const limb = limbGet(a, limb_cnt - 1);
|
|
674
|
+
if (limb == 0) {
|
|
675
|
+
res += bits % 64;
|
|
676
|
+
} else {
|
|
677
|
+
return @clz(limb << @intCast(64 - bits % 64));
|
|
678
|
+
}
|
|
679
|
+
i += 1;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
while (i < limb_cnt) : (i += 1) {
|
|
683
|
+
const j = limb_cnt - 1 - i;
|
|
684
|
+
const limb = limbGet(a, j);
|
|
685
|
+
if (limb == 0) {
|
|
686
|
+
res += 64;
|
|
687
|
+
} else {
|
|
688
|
+
res += @clz(limb);
|
|
689
|
+
break;
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
return res;
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
fn test__clz_limb64(comptime T: type, a: T, expected: u16) !void {
|
|
697
|
+
const int_info = @typeInfo(T).int;
|
|
698
|
+
|
|
699
|
+
var a_limbs = asLimbs(a);
|
|
700
|
+
const out = __clz_limb64(&a_limbs, int_info.bits);
|
|
701
|
+
|
|
702
|
+
try testing.expectEqual(expected, out);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
test __clz_limb64 {
|
|
706
|
+
try test__clz_limb64(u64, 0, 64);
|
|
707
|
+
try test__clz_limb64(u65, 1 << 64, 0);
|
|
708
|
+
try test__clz_limb64(u65, 1 << 9, 55);
|
|
709
|
+
try test__clz_limb64(u128, 1 << 31, 96);
|
|
710
|
+
try test__clz_limb64(u255, 1 << 62, 192);
|
|
711
|
+
|
|
712
|
+
try test__clz_limb64(i64, -1, 0);
|
|
713
|
+
try test__clz_limb64(i65, minInt(i65), 0);
|
|
714
|
+
try test__clz_limb64(i65, 1 << 32, 32);
|
|
715
|
+
try test__clz_limb64(i128, 0, 128);
|
|
716
|
+
try test__clz_limb64(i255, 1 << 130, 124);
|
|
717
|
+
|
|
718
|
+
try test__clz_limb64(u150, 1 << 31, 118);
|
|
719
|
+
try test__clz_limb64(i150, maxInt(u65) - 1, 85);
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
comptime {
|
|
723
|
+
symbol(&__ctz_limb64, "__ctz_limb64");
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
fn __ctz_limb64(a_ptr: [*]const u64, bits: u16) callconv(.c) u16 {
|
|
727
|
+
const limb_cnt = usedLimbCount(bits);
|
|
728
|
+
const a = constLimbs(a_ptr, bits);
|
|
729
|
+
|
|
730
|
+
var res: u16 = 0;
|
|
731
|
+
var i: usize = 0;
|
|
732
|
+
while (i < limb_cnt - 1) : (i += 1) {
|
|
733
|
+
const limb = limbGet(a, i);
|
|
734
|
+
if (limb == 0) {
|
|
735
|
+
res += 64;
|
|
736
|
+
} else {
|
|
737
|
+
res += @ctz(limb);
|
|
738
|
+
return res;
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
const limb = limbGet(a, i);
|
|
743
|
+
if (bits % 64 != 0 and limb == 0) {
|
|
744
|
+
res += bits % 64;
|
|
745
|
+
} else {
|
|
746
|
+
res += @ctz(limb);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
return res;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
fn test__ctz_limb64(comptime T: type, a: T, expected: u16) !void {
|
|
753
|
+
const int_info = @typeInfo(T).int;
|
|
754
|
+
|
|
755
|
+
var a_limbs = asLimbs(a);
|
|
756
|
+
const out = __ctz_limb64(&a_limbs, int_info.bits);
|
|
757
|
+
|
|
758
|
+
try testing.expectEqual(expected, out);
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
test __ctz_limb64 {
|
|
762
|
+
try test__ctz_limb64(u64, 1 << 17, 17);
|
|
763
|
+
try test__ctz_limb64(u65, 1 << 64, 64);
|
|
764
|
+
try test__ctz_limb64(u65, 0, 65);
|
|
765
|
+
try test__ctz_limb64(u128, 1 << 100, 100);
|
|
766
|
+
try test__ctz_limb64(u255, 1 << 200, 200);
|
|
767
|
+
|
|
768
|
+
try test__ctz_limb64(i64, -1 << 9, 9);
|
|
769
|
+
try test__ctz_limb64(i65, minInt(i65), 64);
|
|
770
|
+
try test__ctz_limb64(i65, 0, 65);
|
|
771
|
+
try test__ctz_limb64(i128, -1 << 73, 73);
|
|
772
|
+
try test__ctz_limb64(i255, 1 << 130, 130);
|
|
773
|
+
|
|
774
|
+
try test__ctz_limb64(u150, 1 << 101, 101);
|
|
775
|
+
try test__ctz_limb64(i150, -1 << 74, 74);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
comptime {
|
|
779
|
+
symbol(&__popcount_limb64, "__popcount_limb64");
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
fn __popcount_limb64(a_ptr: [*]const u64, bits: u16) callconv(.c) u16 {
|
|
783
|
+
const limb_cnt = usedLimbCount(bits);
|
|
784
|
+
const a = constLimbs(a_ptr, bits);
|
|
785
|
+
|
|
786
|
+
var res: u16 = 0;
|
|
787
|
+
var i: usize = 0;
|
|
788
|
+
while (i < limb_cnt - 1) : (i += 1) {
|
|
789
|
+
res += @popCount(limbGet(a, i));
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
var limb = limbGet(a, i);
|
|
793
|
+
if (bits % 64 != 0) {
|
|
794
|
+
limb <<= @intCast(64 - bits % 64);
|
|
795
|
+
}
|
|
796
|
+
res += @popCount(limb);
|
|
797
|
+
|
|
798
|
+
return res;
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
fn test__popcount_limb64(comptime T: type, a: T, expected: u16) !void {
|
|
802
|
+
const int_info = @typeInfo(T).int;
|
|
803
|
+
|
|
804
|
+
var a_limbs = asLimbs(a);
|
|
805
|
+
const out = __popcount_limb64(&a_limbs, int_info.bits);
|
|
806
|
+
|
|
807
|
+
try testing.expectEqual(expected, out);
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
test __popcount_limb64 {
|
|
811
|
+
try test__popcount_limb64(u64, 0xF0F0_0000_0000_0001, 9);
|
|
812
|
+
try test__popcount_limb64(u65, 1 << 64, 1);
|
|
813
|
+
try test__popcount_limb64(u65, maxInt(u65), 65);
|
|
814
|
+
try test__popcount_limb64(u128, (1 << 100) | (1 << 5) | 1, 3);
|
|
815
|
+
try test__popcount_limb64(u255, maxInt(u255), 255);
|
|
816
|
+
|
|
817
|
+
try test__popcount_limb64(i64, -1, 64);
|
|
818
|
+
try test__popcount_limb64(i65, minInt(i65), 1);
|
|
819
|
+
try test__popcount_limb64(i65, -1, 65);
|
|
820
|
+
try test__popcount_limb64(i128, -1 << 7, 121);
|
|
821
|
+
try test__popcount_limb64(i255, -1 << 200, 55);
|
|
822
|
+
|
|
823
|
+
try test__popcount_limb64(u150, (1 << 149) | (1 << 65) | 1, 3);
|
|
824
|
+
try test__popcount_limb64(i150, -1 << 7, 143);
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
comptime {
|
|
828
|
+
symbol(&__bitreverse_limb64, "__bitreverse_limb64");
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
fn __bitreverse_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) void {
|
|
832
|
+
const limb_cnt = usedLimbCount(bits);
|
|
833
|
+
const out = varLimbs(out_ptr, bits);
|
|
834
|
+
const a = constLimbs(a_ptr, bits);
|
|
835
|
+
|
|
836
|
+
var i: usize = 0;
|
|
837
|
+
while (i < limb_cnt) : (i += 1) {
|
|
838
|
+
const j = limb_cnt - 1 - i;
|
|
839
|
+
limbSet(out, j, @bitReverse(limbGet(a, i)));
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (bits % 64 != 0) {
|
|
843
|
+
__shr_limb64(out_ptr, out_ptr, 64 - bits % 64, is_signed, bits);
|
|
844
|
+
}
|
|
845
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
fn test__bitreverse_limb64(comptime T: type, a: T, expected: T) !void {
|
|
849
|
+
const int_info = @typeInfo(T).int;
|
|
850
|
+
const is_signed = int_info.signedness == .signed;
|
|
851
|
+
|
|
852
|
+
var a_limbs = asLimbs(a);
|
|
853
|
+
var out: Limbs(T) = undefined;
|
|
854
|
+
__bitreverse_limb64(&out, &a_limbs, is_signed, int_info.bits);
|
|
855
|
+
|
|
856
|
+
const expected_limbs = asLimbs(expected);
|
|
857
|
+
try testing.expectEqual(expected_limbs, out);
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
test __bitreverse_limb64 {
|
|
861
|
+
try test__bitreverse_limb64(u64, 1 << 7, 1 << 56);
|
|
862
|
+
try test__bitreverse_limb64(u65, 1 << 64, 1);
|
|
863
|
+
try test__bitreverse_limb64(u65, 1 << 9, 1 << 55);
|
|
864
|
+
try test__bitreverse_limb64(u128, 1 << 100, 1 << 27);
|
|
865
|
+
try test__bitreverse_limb64(u255, 1 << 200, 1 << 54);
|
|
866
|
+
|
|
867
|
+
try test__bitreverse_limb64(i64, -1, -1);
|
|
868
|
+
try test__bitreverse_limb64(i65, 1 << 32, 1 << 32);
|
|
869
|
+
try test__bitreverse_limb64(i65, minInt(i65), 1);
|
|
870
|
+
try test__bitreverse_limb64(i128, 1 << 63, 1 << 64);
|
|
871
|
+
try test__bitreverse_limb64(i255, 1 << 130, 1 << 124);
|
|
872
|
+
|
|
873
|
+
try test__bitreverse_limb64(u150, 1 << 9, 1 << 140);
|
|
874
|
+
try test__bitreverse_limb64(i150, minInt(i150), 1);
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
comptime {
|
|
878
|
+
symbol(&__byteswap_limb64, "__byteswap_limb64");
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
fn __byteswap_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) void {
|
|
882
|
+
const limb_cnt = usedLimbCount(bits);
|
|
883
|
+
const out = varLimbs(out_ptr, bits);
|
|
884
|
+
const a = constLimbs(a_ptr, bits);
|
|
885
|
+
|
|
886
|
+
assert(bits % 8 == 0);
|
|
887
|
+
|
|
888
|
+
var i: usize = 0;
|
|
889
|
+
while (i < limb_cnt) : (i += 1) {
|
|
890
|
+
const j = limb_cnt - 1 - i;
|
|
891
|
+
limbSet(out, j, @byteSwap(limbGet(a, i)));
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
if (bits % 64 != 0) {
|
|
895
|
+
__shr_limb64(out_ptr, out_ptr, 64 - bits % 64, is_signed, bits);
|
|
896
|
+
}
|
|
897
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
fn test__byteswap_limb64(comptime T: type, a: T, expected: T) !void {
|
|
901
|
+
const int_info = @typeInfo(T).int;
|
|
902
|
+
const is_signed = int_info.signedness == .signed;
|
|
903
|
+
|
|
904
|
+
var a_limbs = asLimbs(a);
|
|
905
|
+
var out: Limbs(T) = undefined;
|
|
906
|
+
__byteswap_limb64(&out, &a_limbs, is_signed, int_info.bits);
|
|
907
|
+
|
|
908
|
+
const expected_limbs = asLimbs(expected);
|
|
909
|
+
try testing.expectEqual(expected_limbs, out);
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
test __byteswap_limb64 {
|
|
913
|
+
try test__byteswap_limb64(u64, 0x0123_4567_89AB_CDEF, 0xEFCD_AB89_6745_2301);
|
|
914
|
+
try test__byteswap_limb64(u72, 0x01_23_45_67_89_AB_CD_EF_11, 0x11_EF_CD_AB_89_67_45_23_01);
|
|
915
|
+
try test__byteswap_limb64(u128, 1 << 72, 1 << 48);
|
|
916
|
+
try test__byteswap_limb64(u248, 1, 1 << 240);
|
|
917
|
+
try test__byteswap_limb64(u256, 1 << 120, 1 << 128);
|
|
918
|
+
|
|
919
|
+
try test__byteswap_limb64(i64, minInt(i64), 128);
|
|
920
|
+
try test__byteswap_limb64(i72, 1, 1 << 64);
|
|
921
|
+
try test__byteswap_limb64(i72, -1, -1);
|
|
922
|
+
try test__byteswap_limb64(i128, 1 << 56, 1 << 64);
|
|
923
|
+
try test__byteswap_limb64(i248, minInt(i248), 128);
|
|
924
|
+
|
|
925
|
+
try test__byteswap_limb64(u152, 1, 1 << 144);
|
|
926
|
+
try test__byteswap_limb64(i152, 1 << 56, 1 << 88);
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
comptime {
|
|
930
|
+
symbol(&__mulo_limb64, "__mulo_limb64");
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
inline fn add3(x: *[3]u64, start: usize, v0: u64) void {
|
|
934
|
+
var i = start;
|
|
935
|
+
var v = v0;
|
|
936
|
+
while (i < 3) : (i += 1) {
|
|
937
|
+
const s = @addWithOverflow(x[i], v);
|
|
938
|
+
x[i] = s[0];
|
|
939
|
+
if (s[1] == 0) break;
|
|
940
|
+
v = 1;
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
fn mulwide(a: u64, b: u64) [2]u64 {
|
|
945
|
+
const muldXi = @import("mulXi3.zig").muldXi;
|
|
946
|
+
const limbs: [2]u64 = @bitCast(muldXi(u64, a, b));
|
|
947
|
+
return switch (endian) {
|
|
948
|
+
.little => limbs,
|
|
949
|
+
.big => .{ limbs[1], limbs[0] },
|
|
950
|
+
};
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
fn __mulo_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) bool {
|
|
954
|
+
const limb_cnt = usedLimbCount(bits);
|
|
955
|
+
|
|
956
|
+
const out = varLimbs(out_ptr, bits);
|
|
957
|
+
const a = constLimbs(a_ptr, bits);
|
|
958
|
+
const b = constLimbs(b_ptr, bits);
|
|
959
|
+
|
|
960
|
+
@memset(out, 0);
|
|
961
|
+
|
|
962
|
+
const all_ones = ~@as(u64, 0);
|
|
963
|
+
const a_neg = is_signed and ((limbGet(a, limb_cnt - 1) >> 63) != 0);
|
|
964
|
+
const b_neg = is_signed and ((limbGet(b, limb_cnt - 1) >> 63) != 0);
|
|
965
|
+
|
|
966
|
+
var carry: [3]u64 = @splat(0);
|
|
967
|
+
var hi_zero = true;
|
|
968
|
+
var hi_ones = true;
|
|
969
|
+
var hi_borrow: u1 = 0;
|
|
970
|
+
var raw_last: u64 = 0;
|
|
971
|
+
|
|
972
|
+
var k: usize = 0;
|
|
973
|
+
while (k < 2 * limb_cnt) : (k += 1) {
|
|
974
|
+
var acc = carry;
|
|
975
|
+
|
|
976
|
+
var i: usize = if (k < limb_cnt) 0 else k - (limb_cnt - 1);
|
|
977
|
+
while (i < limb_cnt and i <= k) : (i += 1) {
|
|
978
|
+
const j = k - i;
|
|
979
|
+
if (j >= limb_cnt) continue;
|
|
980
|
+
|
|
981
|
+
const p = mulwide(limbGet(a, i), limbGet(b, j));
|
|
982
|
+
add3(&acc, 0, p[0]);
|
|
983
|
+
add3(&acc, 1, p[1]);
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
var limb = acc[0];
|
|
987
|
+
if (k < limb_cnt) {
|
|
988
|
+
limbSet(out, k, limb);
|
|
989
|
+
if (k == limb_cnt - 1) raw_last = limb;
|
|
990
|
+
} else {
|
|
991
|
+
if (is_signed) {
|
|
992
|
+
const h = k - limb_cnt;
|
|
993
|
+
|
|
994
|
+
const s0 = @subWithOverflow(limb, if (a_neg) limbGet(b, h) else 0);
|
|
995
|
+
const s1 = @subWithOverflow(s0[0], if (b_neg) limbGet(a, h) else 0);
|
|
996
|
+
const s2 = @subWithOverflow(s1[0], hi_borrow);
|
|
997
|
+
|
|
998
|
+
limb = s2[0];
|
|
999
|
+
hi_borrow = @intFromBool(s0[1] != 0 or s1[1] != 0 or s2[1] != 0);
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
hi_zero = hi_zero and limb == 0;
|
|
1003
|
+
hi_ones = hi_ones and limb == all_ones;
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
carry = .{ acc[1], acc[2], 0 };
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
const last = if (bits % 64 == 0) raw_last else limbWrap(raw_last, is_signed, bits);
|
|
1010
|
+
if (bits % 64 != 0) {
|
|
1011
|
+
limbSet(out, limb_cnt - 1, last);
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
fixLastLimb(out_ptr, is_signed, bits);
|
|
1015
|
+
|
|
1016
|
+
if (!is_signed) {
|
|
1017
|
+
return !hi_zero or raw_last != last;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
const sign_extend: u64 = if ((last >> 63) == 1) all_ones else 0;
|
|
1021
|
+
return (raw_last != last) or if (sign_extend == 0) !hi_zero else !hi_ones;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
fn test__mulo_limb64(comptime T: type, a: T, b: T, expected: struct { T, bool }) !void {
|
|
1025
|
+
const int_info = @typeInfo(T).int;
|
|
1026
|
+
const is_signed = int_info.signedness == .signed;
|
|
1027
|
+
|
|
1028
|
+
var a_limbs = asLimbs(a);
|
|
1029
|
+
var b_limbs = asLimbs(b);
|
|
1030
|
+
var out: Limbs(T) = undefined;
|
|
1031
|
+
const overflow = __mulo_limb64(&out, &a_limbs, &b_limbs, is_signed, int_info.bits);
|
|
1032
|
+
|
|
1033
|
+
const expected_limbs = asLimbs(expected[0]);
|
|
1034
|
+
try testing.expectEqual(expected_limbs, out);
|
|
1035
|
+
try testing.expectEqual(expected[1], overflow);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
test __mulo_limb64 {
|
|
1039
|
+
try test__mulo_limb64(u64, 3, 5, .{ 15, false });
|
|
1040
|
+
try test__mulo_limb64(u64, maxInt(u64), 2, .{ maxInt(u64) - 1, true });
|
|
1041
|
+
try test__mulo_limb64(u65, 1 << 32, 1 << 32, .{ 1 << 64, false });
|
|
1042
|
+
try test__mulo_limb64(u65, 1 << 64, 2, .{ 0, true });
|
|
1043
|
+
try test__mulo_limb64(u128, 1 << 80, 1 << 40, .{ 1 << 120, false });
|
|
1044
|
+
try test__mulo_limb64(u128, 1 << 100, 1 << 40, .{ 0, true });
|
|
1045
|
+
try test__mulo_limb64(u255, 7, 9, .{ 63, false });
|
|
1046
|
+
try test__mulo_limb64(u255, maxInt(u255), 2, .{ maxInt(u255) - 1, true });
|
|
1047
|
+
|
|
1048
|
+
try test__mulo_limb64(i64, -3, 2, .{ -6, false });
|
|
1049
|
+
try test__mulo_limb64(i64, maxInt(i64), 2, .{ -2, true });
|
|
1050
|
+
try test__mulo_limb64(i65, 1 << 63, 2, .{ minInt(i65), true });
|
|
1051
|
+
try test__mulo_limb64(i65, -1 << 32, 1 << 16, .{ -1 << 48, false });
|
|
1052
|
+
try test__mulo_limb64(i128, 1 << 100, 1 << 27, .{ minInt(i128), true });
|
|
1053
|
+
try test__mulo_limb64(i128, -1 << 80, 1 << 40, .{ -1 << 120, false });
|
|
1054
|
+
try test__mulo_limb64(i255, -3, 2, .{ -6, false });
|
|
1055
|
+
try test__mulo_limb64(i255, maxInt(i255), 2, .{ -2, true });
|
|
1056
|
+
|
|
1057
|
+
try test__mulo_limb64(u200, 0, maxInt(u200), .{ 0, false });
|
|
1058
|
+
try test__mulo_limb64(u200, 1, maxInt(u200), .{ maxInt(u200), false });
|
|
1059
|
+
try test__mulo_limb64(u200, 1 << 100, 1 << 99, .{ 1 << 199, false });
|
|
1060
|
+
try test__mulo_limb64(u200, 1 << 100, 1 << 100, .{ 0, true });
|
|
1061
|
+
try test__mulo_limb64(u200, maxInt(u200), maxInt(u200), .{ 1, true });
|
|
1062
|
+
|
|
1063
|
+
try test__mulo_limb64(i200, 0, -1, .{ 0, false });
|
|
1064
|
+
try test__mulo_limb64(i200, -1, -1, .{ 1, false });
|
|
1065
|
+
try test__mulo_limb64(i200, -1, minInt(i200), .{ minInt(i200), true });
|
|
1066
|
+
try test__mulo_limb64(i200, maxInt(i200), 2, .{ -2, true });
|
|
1067
|
+
try test__mulo_limb64(i200, 1 << 100, 1 << 98, .{ 1 << 198, false });
|
|
1068
|
+
try test__mulo_limb64(i200, 1 << 100, 1 << 99, .{ minInt(i200), true });
|
|
1069
|
+
try test__mulo_limb64(i200, maxInt(i200), maxInt(i200), .{ 1, true });
|
|
1070
|
+
try test__mulo_limb64(i200, minInt(i200), minInt(i200), .{ 0, true });
|
|
1071
|
+
|
|
1072
|
+
try test__mulo_limb64(u150, maxInt(u150), 2, .{ maxInt(u150) - 1, true });
|
|
1073
|
+
try test__mulo_limb64(i150, maxInt(i150), 2, .{ -2, true });
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
comptime {
|
|
1077
|
+
symbol(&__abs_limb64, "__abs_limb64");
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
fn __abs_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, bits: u16) callconv(.c) void {
|
|
1081
|
+
const limb_cnt = limbCount(bits);
|
|
1082
|
+
const out = out_ptr[0..limb_cnt];
|
|
1083
|
+
const a = a_ptr[0..limb_cnt];
|
|
1084
|
+
|
|
1085
|
+
const ms = limbGet(a, limb_cnt - 1);
|
|
1086
|
+
if ((ms >> 63) == 0) {
|
|
1087
|
+
@memcpy(out, a);
|
|
1088
|
+
return;
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
var carry: u1 = 1;
|
|
1092
|
+
var i: usize = 0;
|
|
1093
|
+
while (i < limb_cnt) : (i += 1) {
|
|
1094
|
+
const s = @addWithOverflow(~limbGet(a, i), carry);
|
|
1095
|
+
limbSet(out, i, s[0]);
|
|
1096
|
+
carry = s[1];
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
fn test__abs_limb64(comptime T: type, a: T, expected: @Int(.unsigned, @typeInfo(T).int.bits)) !void {
|
|
1101
|
+
const int_info = @typeInfo(T).int;
|
|
1102
|
+
comptime assert(int_info.signedness == .signed);
|
|
1103
|
+
|
|
1104
|
+
var a_limbs = asLimbs(a);
|
|
1105
|
+
var out: Limbs(@TypeOf(expected)) = undefined;
|
|
1106
|
+
__abs_limb64(&out, &a_limbs, int_info.bits);
|
|
1107
|
+
|
|
1108
|
+
const expected_limbs = asLimbs(expected);
|
|
1109
|
+
try testing.expectEqual(expected_limbs, out);
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
test __abs_limb64 {
|
|
1113
|
+
try test__abs_limb64(i64, 0, 0);
|
|
1114
|
+
try test__abs_limb64(i64, -1, 1);
|
|
1115
|
+
try test__abs_limb64(i64, minInt(i64), 1 << 63);
|
|
1116
|
+
try test__abs_limb64(i65, -1, 1);
|
|
1117
|
+
try test__abs_limb64(i65, minInt(i65), 1 << 64);
|
|
1118
|
+
try test__abs_limb64(i65, maxInt(i65), maxInt(i65));
|
|
1119
|
+
try test__abs_limb64(i128, -1 << 80, 1 << 80);
|
|
1120
|
+
try test__abs_limb64(i128, 1 << 64, 1 << 64);
|
|
1121
|
+
try test__abs_limb64(i200, -1 << 198, 1 << 198);
|
|
1122
|
+
try test__abs_limb64(i255, -5, 5);
|
|
1123
|
+
try test__abs_limb64(i255, minInt(i255), 1 << 254);
|
|
1124
|
+
|
|
1125
|
+
try test__abs_limb64(i150, -40, 40);
|
|
266
1126
|
}
|