toilscript 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/NOTICE +94 -0
- package/README.md +114 -0
- package/bin/asc.js +35 -0
- package/bin/asinit.js +468 -0
- package/dist/asc.d.ts +4 -0
- package/dist/toilscript.d.ts +4 -0
- package/dist/transform.cjs +1 -0
- package/dist/transform.d.ts +1 -0
- package/dist/transform.js +1 -0
- package/lib/binaryen.d.ts +2 -0
- package/lib/binaryen.js +2 -0
- package/package.json +114 -0
- package/std/README.md +6 -0
- package/std/assembly/array.ts +550 -0
- package/std/assembly/arraybuffer.ts +77 -0
- package/std/assembly/atomics.ts +127 -0
- package/std/assembly/bindings/asyncify.ts +16 -0
- package/std/assembly/bindings/dom.ts +291 -0
- package/std/assembly/bindings/node.ts +6 -0
- package/std/assembly/bitflags.ts +53 -0
- package/std/assembly/builtins.ts +2650 -0
- package/std/assembly/byteslice.ts +177 -0
- package/std/assembly/compat.ts +2 -0
- package/std/assembly/console.ts +42 -0
- package/std/assembly/crypto.ts +9 -0
- package/std/assembly/dataview.ts +181 -0
- package/std/assembly/date.ts +375 -0
- package/std/assembly/diagnostics.ts +11 -0
- package/std/assembly/encoding.ts +151 -0
- package/std/assembly/endian.ts +45 -0
- package/std/assembly/error.ts +44 -0
- package/std/assembly/fixedarray.ts +173 -0
- package/std/assembly/fixedmap.ts +326 -0
- package/std/assembly/fixedset.ts +275 -0
- package/std/assembly/function.ts +42 -0
- package/std/assembly/index.d.ts +2891 -0
- package/std/assembly/iterator.ts +35 -0
- package/std/assembly/map.ts +269 -0
- package/std/assembly/math.ts +3289 -0
- package/std/assembly/memory.ts +123 -0
- package/std/assembly/number.ts +388 -0
- package/std/assembly/object.ts +36 -0
- package/std/assembly/performance.ts +9 -0
- package/std/assembly/pointer.ts +80 -0
- package/std/assembly/polyfills.ts +27 -0
- package/std/assembly/process.ts +50 -0
- package/std/assembly/reference.ts +48 -0
- package/std/assembly/regexp.ts +12 -0
- package/std/assembly/rt/README.md +83 -0
- package/std/assembly/rt/common.ts +81 -0
- package/std/assembly/rt/index-incremental.ts +2 -0
- package/std/assembly/rt/index-memory.ts +1 -0
- package/std/assembly/rt/index-minimal.ts +2 -0
- package/std/assembly/rt/index-stub.ts +1 -0
- package/std/assembly/rt/index.d.ts +37 -0
- package/std/assembly/rt/itcms.ts +419 -0
- package/std/assembly/rt/memory-runtime.ts +94 -0
- package/std/assembly/rt/rtrace.ts +15 -0
- package/std/assembly/rt/stub.ts +133 -0
- package/std/assembly/rt/tcms.ts +254 -0
- package/std/assembly/rt/tlsf.ts +592 -0
- package/std/assembly/rt.ts +90 -0
- package/std/assembly/set.ts +225 -0
- package/std/assembly/shared/feature.ts +68 -0
- package/std/assembly/shared/runtime.ts +13 -0
- package/std/assembly/shared/target.ts +11 -0
- package/std/assembly/shared/tsconfig.json +11 -0
- package/std/assembly/shared/typeinfo.ts +72 -0
- package/std/assembly/staticarray.ts +423 -0
- package/std/assembly/string.ts +850 -0
- package/std/assembly/symbol.ts +114 -0
- package/std/assembly/table.ts +16 -0
- package/std/assembly/tsconfig.json +6 -0
- package/std/assembly/typedarray.ts +1954 -0
- package/std/assembly/uri.ts +17 -0
- package/std/assembly/util/bytes.ts +107 -0
- package/std/assembly/util/casemap.ts +497 -0
- package/std/assembly/util/error.ts +58 -0
- package/std/assembly/util/hash.ts +117 -0
- package/std/assembly/util/math.ts +1922 -0
- package/std/assembly/util/memory.ts +290 -0
- package/std/assembly/util/number.ts +873 -0
- package/std/assembly/util/sort.ts +313 -0
- package/std/assembly/util/string.ts +1202 -0
- package/std/assembly/util/uri.ts +275 -0
- package/std/assembly/vector.ts +4 -0
- package/std/assembly.json +16 -0
- package/std/portable/index.d.ts +461 -0
- package/std/portable/index.js +416 -0
- package/std/portable.json +11 -0
- package/std/types/assembly/index.d.ts +1 -0
- package/std/types/assembly/package.json +3 -0
- package/std/types/portable/index.d.ts +1 -0
- package/std/types/portable/package.json +3 -0
- package/tsconfig-base.json +13 -0
- package/util/README.md +23 -0
- package/util/browser/fs.js +1 -0
- package/util/browser/module.js +5 -0
- package/util/browser/path.js +520 -0
- package/util/browser/process.js +59 -0
- package/util/browser/url.js +23 -0
- package/util/cpu.d.ts +9 -0
- package/util/cpu.js +42 -0
- package/util/find.d.ts +6 -0
- package/util/find.js +20 -0
- package/util/node.d.ts +21 -0
- package/util/node.js +34 -0
- package/util/options.d.ts +70 -0
- package/util/options.js +262 -0
- package/util/terminal.d.ts +52 -0
- package/util/terminal.js +35 -0
- package/util/text.d.ts +26 -0
- package/util/text.js +114 -0
- package/util/tsconfig.json +9 -0
- package/util/web.d.ts +11 -0
- package/util/web.js +33 -0
|
@@ -0,0 +1,1922 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Lookup data for exp2f
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
// @ts-ignore: decorator
|
|
6
|
+
@inline const EXP2F_TABLE_BITS = 5;
|
|
7
|
+
|
|
8
|
+
// @ts-ignore: decorator
|
|
9
|
+
@lazy @inline const EXP2F_DATA_TAB = memory.data<u64>([
|
|
10
|
+
// exp2f_data_tab[i] = uint(2^(i/N)) - (i << 52-BITS)
|
|
11
|
+
// used for computing 2^(k/N) for an int |k| < 150 N as
|
|
12
|
+
// double(tab[k%N] + (k << 52-BITS))
|
|
13
|
+
0x3FF0000000000000, 0x3FEFD9B0D3158574, 0x3FEFB5586CF9890F, 0x3FEF9301D0125B51,
|
|
14
|
+
0x3FEF72B83C7D517B, 0x3FEF54873168B9AA, 0x3FEF387A6E756238, 0x3FEF1E9DF51FDEE1,
|
|
15
|
+
0x3FEF06FE0A31B715, 0x3FEEF1A7373AA9CB, 0x3FEEDEA64C123422, 0x3FEECE086061892D,
|
|
16
|
+
0x3FEEBFDAD5362A27, 0x3FEEB42B569D4F82, 0x3FEEAB07DD485429, 0x3FEEA47EB03A5585,
|
|
17
|
+
0x3FEEA09E667F3BCD, 0x3FEE9F75E8EC5F74, 0x3FEEA11473EB0187, 0x3FEEA589994CCE13,
|
|
18
|
+
0x3FEEACE5422AA0DB, 0x3FEEB737B0CDC5E5, 0x3FEEC49182A3F090, 0x3FEED503B23E255D,
|
|
19
|
+
0x3FEEE89F995AD3AD, 0x3FEEFF76F2FB5E47, 0x3FEF199BDD85529C, 0x3FEF3720DCEF9069,
|
|
20
|
+
0x3FEF5818DCFBA487, 0x3FEF7C97337B9B5F, 0x3FEFA4AFA2A490DA, 0x3FEFD0765B6E4540
|
|
21
|
+
]);
|
|
22
|
+
|
|
23
|
+
// ULP error: 0.502 (nearest rounding.)
|
|
24
|
+
// Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
|
|
25
|
+
// Wrong count: 168353 (all nearest rounding wrong results with fma.)
|
|
26
|
+
// @ts-ignore: decorator
|
|
27
|
+
@inline
|
|
28
|
+
export function exp2f_lut(x: f32): f32 {
|
|
29
|
+
const
|
|
30
|
+
N = 1 << EXP2F_TABLE_BITS,
|
|
31
|
+
N_MASK = N - 1,
|
|
32
|
+
shift = reinterpret<f64>(0x4338000000000000) / N, // 0x1.8p+52
|
|
33
|
+
Ox127f = reinterpret<f32>(0x7F000000);
|
|
34
|
+
|
|
35
|
+
const
|
|
36
|
+
C0 = reinterpret<f64>(0x3FAC6AF84B912394), // 0x1.c6af84b912394p-5
|
|
37
|
+
C1 = reinterpret<f64>(0x3FCEBFCE50FAC4F3), // 0x1.ebfce50fac4f3p-3
|
|
38
|
+
C2 = reinterpret<f64>(0x3FE62E42FF0C52D6); // 0x1.62e42ff0c52d6p-1
|
|
39
|
+
|
|
40
|
+
let xd = <f64>x;
|
|
41
|
+
let ix = reinterpret<u32>(x);
|
|
42
|
+
let ux = ix >> 20 & 0x7FF;
|
|
43
|
+
if (ux >= 0x430) {
|
|
44
|
+
// |x| >= 128 or x is nan.
|
|
45
|
+
if (ix == 0xFF800000) return 0; // x == -Inf -> 0
|
|
46
|
+
if (ux >= 0x7F8) return x + x; // x == Inf/NaN -> Inf/NaN
|
|
47
|
+
if (x > 0) return x * Ox127f; // x > 0 -> HugeVal (Owerflow)
|
|
48
|
+
if (x <= -150) return 0; // x <= -150 -> 0 (Underflow)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.
|
|
52
|
+
let kd = xd + shift;
|
|
53
|
+
let ki = reinterpret<u64>(kd);
|
|
54
|
+
let r = xd - (kd - shift);
|
|
55
|
+
let t: u64, y: f64, s: f64;
|
|
56
|
+
|
|
57
|
+
// exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)
|
|
58
|
+
t = load<u64>(EXP2F_DATA_TAB + ((<usize>ki & N_MASK) << alignof<u64>()));
|
|
59
|
+
t += ki << (52 - EXP2F_TABLE_BITS);
|
|
60
|
+
s = reinterpret<f64>(t);
|
|
61
|
+
y = C2 * r + 1;
|
|
62
|
+
y += (C0 * r + C1) * (r * r);
|
|
63
|
+
y *= s;
|
|
64
|
+
|
|
65
|
+
return <f32>y;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ULP error: 0.502 (nearest rounding.)
|
|
69
|
+
// Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
|
|
70
|
+
// Wrong count: 170635 (all nearest rounding wrong results with fma.)
|
|
71
|
+
// @ts-ignore: decorator
|
|
72
|
+
@inline
|
|
73
|
+
export function expf_lut(x: f32): f32 {
|
|
74
|
+
const
|
|
75
|
+
N = 1 << EXP2F_TABLE_BITS,
|
|
76
|
+
N_MASK = N - 1,
|
|
77
|
+
shift = reinterpret<f64>(0x4338000000000000), // 0x1.8p+52
|
|
78
|
+
InvLn2N = reinterpret<f64>(0x3FF71547652B82FE) * N, // 0x1.71547652b82fep+0
|
|
79
|
+
Ox1p127f = reinterpret<f32>(0x7F000000);
|
|
80
|
+
|
|
81
|
+
const
|
|
82
|
+
C0 = reinterpret<f64>(0x3FAC6AF84B912394) / N / N / N, // 0x1.c6af84b912394p-5
|
|
83
|
+
C1 = reinterpret<f64>(0x3FCEBFCE50FAC4F3) / N / N, // 0x1.ebfce50fac4f3p-3
|
|
84
|
+
C2 = reinterpret<f64>(0x3FE62E42FF0C52D6) / N; // 0x1.62e42ff0c52d6p-1
|
|
85
|
+
|
|
86
|
+
let xd = <f64>x;
|
|
87
|
+
let ix = reinterpret<u32>(x);
|
|
88
|
+
let ux = ix >> 20 & 0x7FF;
|
|
89
|
+
if (ux >= 0x42B) {
|
|
90
|
+
// |x| >= 88 or x is nan.
|
|
91
|
+
if (ix == 0xFF800000) return 0; // x == -Inf -> 0
|
|
92
|
+
if (ux >= 0x7F8) return x + x; // x == Inf/NaN -> Inf/NaN
|
|
93
|
+
if (x > reinterpret<f32>(0x42B17217)) return x * Ox1p127f; // x > log(0x1p128) ~= 88.72 -> HugeVal (Owerflow)
|
|
94
|
+
if (x < reinterpret<f32>(0xC2CFF1B4)) return 0; // x < log(0x1p-150) ~= -103.97 -> 0 (Underflow)
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.
|
|
98
|
+
let z = InvLn2N * xd;
|
|
99
|
+
|
|
100
|
+
// Round and convert z to int, the result is in [-150*N, 128*N] and
|
|
101
|
+
// ideally ties-to-even rule is used, otherwise the magnitude of r
|
|
102
|
+
// can be bigger which gives larger approximation error.
|
|
103
|
+
let kd = <f64>(z + shift);
|
|
104
|
+
let ki = reinterpret<u64>(kd);
|
|
105
|
+
let r = z - (kd - shift);
|
|
106
|
+
let s: f64, y: f64, t: u64;
|
|
107
|
+
|
|
108
|
+
// exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)
|
|
109
|
+
t = load<u64>(EXP2F_DATA_TAB + ((<usize>ki & N_MASK) << alignof<u64>()));
|
|
110
|
+
t += ki << (52 - EXP2F_TABLE_BITS);
|
|
111
|
+
s = reinterpret<f64>(t);
|
|
112
|
+
z = C0 * r + C1;
|
|
113
|
+
y = C2 * r + 1;
|
|
114
|
+
y += z * (r * r);
|
|
115
|
+
y *= s;
|
|
116
|
+
|
|
117
|
+
return <f32>y;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
//
|
|
121
|
+
// Lookup data for log2f
|
|
122
|
+
//
|
|
123
|
+
|
|
124
|
+
// @ts-ignore: decorator
|
|
125
|
+
@inline const LOG2F_TABLE_BITS = 4;
|
|
126
|
+
|
|
127
|
+
// @ts-ignore: decorator
|
|
128
|
+
@lazy @inline const LOG2F_DATA_TAB = memory.data<u64>([
|
|
129
|
+
0x3FF661EC79F8F3BE, 0xBFDEFEC65B963019, // 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2,
|
|
130
|
+
0x3FF571ED4AAF883D, 0xBFDB0B6832D4FCA4, // 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2,
|
|
131
|
+
0x3FF49539F0F010B0, 0xBFD7418B0A1FB77B, // 0x1.49539f0f010bp+0 , -0x1.7418b0a1fb77bp-2,
|
|
132
|
+
0x3FF3C995B0B80385, 0xBFD39DE91A6DCF7B, // 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2,
|
|
133
|
+
0x3FF30D190C8864A5, 0xBFD01D9BF3F2B631, // 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2,
|
|
134
|
+
0x3FF25E227B0B8EA0, 0xBFC97C1D1B3B7AF0, // 0x1.25e227b0b8eap+0 , -0x1.97c1d1b3b7afp-3 ,
|
|
135
|
+
0x3FF1BB4A4A1A343F, 0xBFC2F9E393AF3C9F, // 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3,
|
|
136
|
+
0x3FF12358F08AE5BA, 0xBFB960CBBF788D5C, // 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4,
|
|
137
|
+
0x3FF0953F419900A7, 0xBFAA6F9DB6475FCE, // 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5,
|
|
138
|
+
0x3FF0000000000000, 0, // 0x1p+0, 0x0,
|
|
139
|
+
0x3FEE608CFD9A47AC, 0x3FB338CA9F24F53D, // 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4,
|
|
140
|
+
0x3FECA4B31F026AA0, 0x3FC476A9543891BA, // 0x1.ca4b31f026aap-1 , 0x1.476a9543891bap-3,
|
|
141
|
+
0x3FEB2036576AFCE6, 0x3FCE840B4AC4E4D2, // 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3,
|
|
142
|
+
0x3FE9C2D163A1AA2D, 0x3FD40645F0C6651C, // 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2,
|
|
143
|
+
0x3FE886E6037841ED, 0x3FD88E9C2C1B9FF8, // 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2,
|
|
144
|
+
0x3FE767DCF5534862, 0x3FDCE0A44EB17BCC // 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2
|
|
145
|
+
]);
|
|
146
|
+
|
|
147
|
+
// ULP error: 0.752 (nearest rounding.)
|
|
148
|
+
// Relative error: 1.9 * 2^-26 (before rounding.)
|
|
149
|
+
// @ts-ignore: decorator
|
|
150
|
+
@inline
|
|
151
|
+
export function log2f_lut(x: f32): f32 {
|
|
152
|
+
const
|
|
153
|
+
N_MASK = (1 << LOG2F_TABLE_BITS) - 1,
|
|
154
|
+
Ox1p23f = reinterpret<f32>(0x4B000000); // 0x1p23f
|
|
155
|
+
|
|
156
|
+
const
|
|
157
|
+
A0 = reinterpret<f64>(0xBFD712B6F70A7E4D), // -0x1.712b6f70a7e4dp-2
|
|
158
|
+
A1 = reinterpret<f64>(0x3FDECABF496832E0), // 0x1.ecabf496832ep-2
|
|
159
|
+
A2 = reinterpret<f64>(0xBFE715479FFAE3DE), // -0x1.715479ffae3dep-1
|
|
160
|
+
A3 = reinterpret<f64>(0x3FF715475F35C8B8); // 0x1.715475f35c8b8p0
|
|
161
|
+
|
|
162
|
+
let ux = reinterpret<u32>(x);
|
|
163
|
+
// Fix sign of zero with downward rounding when x==1.
|
|
164
|
+
// if (WANT_ROUNDING && predict_false(ix == 0x3f800000)) return 0;
|
|
165
|
+
if (ux - 0x00800000 >= 0x7F800000 - 0x00800000) {
|
|
166
|
+
// x < 0x1p-126 or inf or nan.
|
|
167
|
+
if (ux * 2 == 0) return -Infinity;
|
|
168
|
+
if (ux == 0x7F800000) return x; // log2(inf) == inf.
|
|
169
|
+
if ((ux >> 31) || ux * 2 >= 0xFF000000) return (x - x) / (x - x);
|
|
170
|
+
// x is subnormal, normalize it.
|
|
171
|
+
ux = reinterpret<u32>(x * Ox1p23f);
|
|
172
|
+
ux -= 23 << 23;
|
|
173
|
+
}
|
|
174
|
+
// x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
|
175
|
+
// The range is split into N subintervals.
|
|
176
|
+
// The ith subinterval contains z and c is near its center.
|
|
177
|
+
let tmp = ux - 0x3F330000;
|
|
178
|
+
let i = (tmp >> (23 - LOG2F_TABLE_BITS)) & N_MASK;
|
|
179
|
+
let top = tmp & 0xFF800000;
|
|
180
|
+
let iz = ux - top;
|
|
181
|
+
let k = <i32>tmp >> 23;
|
|
182
|
+
|
|
183
|
+
let invc = load<f64>(LOG2F_DATA_TAB + (i << (1 + alignof<f64>())), 0 << alignof<f64>());
|
|
184
|
+
let logc = load<f64>(LOG2F_DATA_TAB + (i << (1 + alignof<f64>())), 1 << alignof<f64>());
|
|
185
|
+
let z = <f64>reinterpret<f32>(iz);
|
|
186
|
+
|
|
187
|
+
// log2(x) = log1p(z/c-1)/ln2 + log2(c) + k
|
|
188
|
+
let r = z * invc - 1;
|
|
189
|
+
let y0 = logc + <f64>k;
|
|
190
|
+
|
|
191
|
+
// Pipelined polynomial evaluation to approximate log1p(r)/ln2.
|
|
192
|
+
let y = A1 * r + A2;
|
|
193
|
+
let p = A3 * r + y0;
|
|
194
|
+
let r2 = r * r;
|
|
195
|
+
y += A0 * r2;
|
|
196
|
+
y = y * r2 + p;
|
|
197
|
+
|
|
198
|
+
return <f32>y;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
//
|
|
202
|
+
// Lookup data for logf. See: https://git.musl-libc.org/cgit/musl/tree/src/math/logf.c
|
|
203
|
+
//
|
|
204
|
+
|
|
205
|
+
// @ts-ignore: decorator
|
|
206
|
+
@inline const LOGF_TABLE_BITS = 4;
|
|
207
|
+
|
|
208
|
+
// @ts-ignore: decorator
|
|
209
|
+
@lazy @inline const LOGF_DATA_TAB = memory.data<u64>([
|
|
210
|
+
0x3FF661EC79F8F3BE, 0xBFD57BF7808CAADE, // 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2,
|
|
211
|
+
0x3FF571ED4AAF883D, 0xBFD2BEF0A7C06DDB, // 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2,
|
|
212
|
+
0x3FF49539F0F010B0, 0xBFD01EAE7F513A67, // 0x1.49539f0f010bp+0 , -0x1.01eae7f513a67p-2,
|
|
213
|
+
0x3FF3C995B0B80385, 0xBFCB31D8A68224E9, // 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3,
|
|
214
|
+
0x3FF30D190C8864A5, 0xBFC6574F0AC07758, // 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3,
|
|
215
|
+
0x3FF25E227B0B8EA0, 0xBFC1AA2BC79C8100, // 0x1.25e227b0b8eap+0 , -0x1.1aa2bc79c81p-3 ,
|
|
216
|
+
0x3FF1BB4A4A1A343F, 0xBFBA4E76CE8C0E5E, // 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4,
|
|
217
|
+
0x3FF12358F08AE5BA, 0xBFB1973C5A611CCC, // 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4,
|
|
218
|
+
0x3FF0953F419900A7, 0xBFA252F438E10C1E, // 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5,
|
|
219
|
+
0x3FF0000000000000, 0, // 0x1p+0, 0,
|
|
220
|
+
0x3FEE608CFD9A47AC, 0x3FAAA5AA5DF25984, // 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5,
|
|
221
|
+
0x3FECA4B31F026AA0, 0x3FBC5E53AA362EB4, // 0x1.ca4b31f026aap-1 , 0x1.c5e53aa362eb4p-4,
|
|
222
|
+
0x3FEB2036576AFCE6, 0x3FC526E57720DB08, // 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3,
|
|
223
|
+
0x3FE9C2D163A1AA2D, 0x3FCBC2860D224770, // 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 ,
|
|
224
|
+
0x3FE886E6037841ED, 0x3FD1058BC8A07EE1, // 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2,
|
|
225
|
+
0x3FE767DCF5534862, 0x3FD4043057B6EE09 // 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2
|
|
226
|
+
]);
|
|
227
|
+
|
|
228
|
+
// ULP error: 0.818 (nearest rounding.)
|
|
229
|
+
// Relative error: 1.957 * 2^-26 (before rounding.)
|
|
230
|
+
// @ts-ignore: decorator
|
|
231
|
+
@inline
|
|
232
|
+
export function logf_lut(x: f32): f32 {
|
|
233
|
+
const
|
|
234
|
+
N_MASK = (1 << LOGF_TABLE_BITS) - 1,
|
|
235
|
+
Ox1p23f = reinterpret<f32>(0x4B000000); // 0x1p23f
|
|
236
|
+
|
|
237
|
+
const
|
|
238
|
+
Ln2 = reinterpret<f64>(0x3FE62E42FEFA39EF), // 0x1.62e42fefa39efp-1;
|
|
239
|
+
A0 = reinterpret<f64>(0xBFD00EA348B88334), // -0x1.00ea348b88334p-2
|
|
240
|
+
A1 = reinterpret<f64>(0x3FD5575B0BE00B6A), // 0x1.5575b0be00b6ap-2
|
|
241
|
+
A2 = reinterpret<f64>(0xBFDFFFFEF20A4123); // -0x1.ffffef20a4123p-2
|
|
242
|
+
|
|
243
|
+
let ux = reinterpret<u32>(x);
|
|
244
|
+
// Fix sign of zero with downward rounding when x==1.
|
|
245
|
+
// if (WANT_ROUNDING && ux == 0x3f800000) return 0;
|
|
246
|
+
if (ux - 0x00800000 >= 0x7F800000 - 0x00800000) {
|
|
247
|
+
// x < 0x1p-126 or inf or nan.
|
|
248
|
+
if ((ux << 1) == 0) return -Infinity;
|
|
249
|
+
if (ux == 0x7F800000) return x; // log(inf) == inf.
|
|
250
|
+
if ((ux >> 31) || (ux << 1) >= 0xFF000000) return (x - x) / (x - x);
|
|
251
|
+
// x is subnormal, normalize it.
|
|
252
|
+
ux = reinterpret<u32>(x * Ox1p23f);
|
|
253
|
+
ux -= 23 << 23;
|
|
254
|
+
}
|
|
255
|
+
// x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
|
256
|
+
// The range is split into N subintervals.
|
|
257
|
+
// The ith subinterval contains z and c is near its center.
|
|
258
|
+
let tmp = ux - 0x3F330000;
|
|
259
|
+
let i = (tmp >> (23 - LOGF_TABLE_BITS)) & N_MASK;
|
|
260
|
+
let k = <i32>tmp >> 23;
|
|
261
|
+
let iz = ux - (tmp & 0x1FF << 23);
|
|
262
|
+
|
|
263
|
+
let invc = load<f64>(LOGF_DATA_TAB + (i << (1 + alignof<f64>())), 0 << alignof<f64>());
|
|
264
|
+
let logc = load<f64>(LOGF_DATA_TAB + (i << (1 + alignof<f64>())), 1 << alignof<f64>());
|
|
265
|
+
|
|
266
|
+
let z = <f64>reinterpret<f32>(iz);
|
|
267
|
+
|
|
268
|
+
// log(x) = log1p(z/c-1) + log(c) + k*Ln2
|
|
269
|
+
let r = z * invc - 1;
|
|
270
|
+
let y0 = logc + <f64>k * Ln2;
|
|
271
|
+
|
|
272
|
+
// Pipelined polynomial evaluation to approximate log1p(r).
|
|
273
|
+
let r2 = r * r;
|
|
274
|
+
let y = A1 * r + A2;
|
|
275
|
+
y += A0 * r2;
|
|
276
|
+
y = y * r2 + (y0 + r);
|
|
277
|
+
|
|
278
|
+
return <f32>y;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
//
|
|
282
|
+
// Lookup data for powf. See: https://git.musl-libc.org/cgit/musl/tree/src/math/powf.c
|
|
283
|
+
//
|
|
284
|
+
|
|
285
|
+
// @ts-ignore: decorator
|
|
286
|
+
@inline
|
|
287
|
+
function zeroinfnanf(ux: u32): bool {
|
|
288
|
+
return (ux << 1) - 1 >= (<u32>0x7f800000 << 1) - 1;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
|
292
|
+
// the bit representation of a non-zero finite floating-point value.
|
|
293
|
+
// @ts-ignore: decorator
|
|
294
|
+
@inline
|
|
295
|
+
function checkintf(iy: u32): i32 {
|
|
296
|
+
let e = iy >> 23 & 0xFF;
|
|
297
|
+
if (e < 0x7F ) return 0;
|
|
298
|
+
if (e > 0x7F + 23) return 2;
|
|
299
|
+
e = 1 << (0x7F + 23 - e);
|
|
300
|
+
if (iy & (e - 1)) return 0;
|
|
301
|
+
if (iy & e ) return 1;
|
|
302
|
+
return 2;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Subnormal input is normalized so ix has negative biased exponent.
|
|
306
|
+
// Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set.
|
|
307
|
+
// @ts-ignore: decorator
|
|
308
|
+
@inline
|
|
309
|
+
function log2f_inline(ux: u32): f64 {
|
|
310
|
+
const N_MASK = (1 << LOG2F_TABLE_BITS) - 1;
|
|
311
|
+
|
|
312
|
+
const
|
|
313
|
+
A0 = reinterpret<f64>(0x3FD27616C9496E0B), // 0x1.27616c9496e0bp-2
|
|
314
|
+
A1 = reinterpret<f64>(0xBFD71969A075C67A), // -0x1.71969a075c67ap-2
|
|
315
|
+
A2 = reinterpret<f64>(0x3FDEC70A6CA7BADD), // 0x1.ec70a6ca7baddp-2
|
|
316
|
+
A3 = reinterpret<f64>(0xBFE7154748BEF6C8), // -0x1.7154748bef6c8p-1
|
|
317
|
+
A4 = reinterpret<f64>(0x3FF71547652AB82B); // 0x1.71547652ab82bp+0
|
|
318
|
+
|
|
319
|
+
// x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
|
320
|
+
// The range is split into N subintervals.
|
|
321
|
+
// The ith subinterval contains z and c is near its center.
|
|
322
|
+
let tmp = ux - 0x3F330000;
|
|
323
|
+
let i = usize((tmp >> (23 - LOG2F_TABLE_BITS)) & N_MASK);
|
|
324
|
+
let top = tmp & 0xFF800000;
|
|
325
|
+
let uz = ux - top;
|
|
326
|
+
let k = <i32>top >> 23;
|
|
327
|
+
|
|
328
|
+
let invc = load<f64>(LOG2F_DATA_TAB + (i << (1 + alignof<f64>())), 0 << alignof<f64>());
|
|
329
|
+
let logc = load<f64>(LOG2F_DATA_TAB + (i << (1 + alignof<f64>())), 1 << alignof<f64>());
|
|
330
|
+
let z = <f64>reinterpret<f32>(uz);
|
|
331
|
+
|
|
332
|
+
// log2(x) = log1p(z/c-1)/ln2 + log2(c) + k
|
|
333
|
+
let r = z * invc - 1;
|
|
334
|
+
let y0 = logc + <f64>k;
|
|
335
|
+
|
|
336
|
+
// Pipelined polynomial evaluation to approximate log1p(r)/ln2.
|
|
337
|
+
let y = A0 * r + A1;
|
|
338
|
+
let p = A2 * r + A3;
|
|
339
|
+
let q = A4 * r + y0;
|
|
340
|
+
|
|
341
|
+
r *= r;
|
|
342
|
+
q += p * r;
|
|
343
|
+
y = y * (r * r) + q;
|
|
344
|
+
|
|
345
|
+
return y;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// The output of log2 and thus the input of exp2 is either scaled by N
|
|
349
|
+
// (in case of fast toint intrinsics) or not. The unscaled xd must be
|
|
350
|
+
// in [-1021,1023], sign_bias sets the sign of the result.
|
|
351
|
+
// @ts-ignore: decorator
|
|
352
|
+
@inline
|
|
353
|
+
function exp2f_inline(xd: f64, signBias: u32): f32 {
|
|
354
|
+
const
|
|
355
|
+
N = 1 << EXP2F_TABLE_BITS,
|
|
356
|
+
N_MASK = N - 1,
|
|
357
|
+
shift = reinterpret<f64>(0x4338000000000000) / N; // 0x1.8p+52
|
|
358
|
+
|
|
359
|
+
const
|
|
360
|
+
C0 = reinterpret<f64>(0x3FAC6AF84B912394), // 0x1.c6af84b912394p-5
|
|
361
|
+
C1 = reinterpret<f64>(0x3FCEBFCE50FAC4F3), // 0x1.ebfce50fac4f3p-3
|
|
362
|
+
C2 = reinterpret<f64>(0x3FE62E42FF0C52D6); // 0x1.62e42ff0c52d6p-1
|
|
363
|
+
|
|
364
|
+
// x = k/N + r with r in [-1/(2N), 1/(2N)]
|
|
365
|
+
let kd = <f64>(xd + shift);
|
|
366
|
+
let ki = reinterpret<u64>(kd);
|
|
367
|
+
let r = xd - (kd - shift);
|
|
368
|
+
let t: u64, z: f64, y: f64, s: f64;
|
|
369
|
+
|
|
370
|
+
// exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)
|
|
371
|
+
t = load<u64>(EXP2F_DATA_TAB + ((<usize>ki & N_MASK) << alignof<u64>()));
|
|
372
|
+
t += (ki + signBias) << (52 - EXP2F_TABLE_BITS);
|
|
373
|
+
s = reinterpret<f64>(t);
|
|
374
|
+
z = C0 * r + C1;
|
|
375
|
+
y = C2 * r + 1;
|
|
376
|
+
y += z * (r * r);
|
|
377
|
+
y *= s;
|
|
378
|
+
return <f32>y;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// @ts-ignore: decorator
|
|
382
|
+
@inline
|
|
383
|
+
function xflowf(sign: u32, y: f32): f32 {
|
|
384
|
+
return select<f32>(-y, y, sign) * y;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// @ts-ignore: decorator
|
|
388
|
+
@inline
|
|
389
|
+
function oflowf(sign: u32): f32 {
|
|
390
|
+
return xflowf(sign, reinterpret<f32>(0x70000000)); // 0x1p97f
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// @ts-ignore: decorator
|
|
394
|
+
@inline
|
|
395
|
+
function uflowf(sign: u32): f32 {
|
|
396
|
+
return xflowf(sign, reinterpret<f32>(0x10000000)); // 0x1p-95f
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// @ts-ignore: decorator
|
|
400
|
+
@inline
|
|
401
|
+
export function powf_lut(x: f32, y: f32): f32 {
|
|
402
|
+
const
|
|
403
|
+
Ox1p23f = reinterpret<f32>(0x4B000000), // 0x1p23f
|
|
404
|
+
UPPER_LIMIT = reinterpret<f64>(0x405FFFFFFFD1D571), // 0x1.fffffffd1d571p+6
|
|
405
|
+
LOWER_LIMIT = -150.0,
|
|
406
|
+
SIGN_BIAS = 1 << (EXP2F_TABLE_BITS + 11);
|
|
407
|
+
|
|
408
|
+
let signBias: u32 = 0;
|
|
409
|
+
let ix = reinterpret<u32>(x);
|
|
410
|
+
let iy = reinterpret<u32>(y);
|
|
411
|
+
let ny = 0;
|
|
412
|
+
|
|
413
|
+
if (i32(ix - 0x00800000 >= 0x7f800000 - 0x00800000) | (ny = i32(zeroinfnanf(iy)))) {
|
|
414
|
+
// Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).
|
|
415
|
+
if (ny) {
|
|
416
|
+
if ((iy << 1) == 0) return 1.0;
|
|
417
|
+
if (ix == 0x3F800000) return NaN; // original: 1.0
|
|
418
|
+
if ((ix << 1) > (<u32>0x7F800000 << 1) || (iy << 1) > (<u32>0x7F800000 << 1)) return x + y;
|
|
419
|
+
if ((ix << 1) == (0x3F800000 << 1)) return NaN; // original: 1.0
|
|
420
|
+
if (((ix << 1) < (0x3F800000 << 1)) == !(iy >> 31)) return 0; // |x| < 1 && y==inf or |x| > 1 && y==-inf.
|
|
421
|
+
return y * y;
|
|
422
|
+
}
|
|
423
|
+
if (zeroinfnanf(ix)) {
|
|
424
|
+
let x2 = x * x;
|
|
425
|
+
if ((ix >> 31) && checkintf(iy) == 1) x2 = -x2;
|
|
426
|
+
return <i32>iy < 0 ? 1 / x2 : x2;
|
|
427
|
+
}
|
|
428
|
+
// x and y are non-zero finite.
|
|
429
|
+
if (<i32>ix < 0) {
|
|
430
|
+
// Finite x < 0.
|
|
431
|
+
let yint = checkintf(iy);
|
|
432
|
+
if (yint == 0) return (x - x) / (x - x);
|
|
433
|
+
if (yint == 1) signBias = SIGN_BIAS;
|
|
434
|
+
ix &= 0x7FFFFFFF;
|
|
435
|
+
}
|
|
436
|
+
if (ix < 0x00800000) {
|
|
437
|
+
// Normalize subnormal x so exponent becomes negative.
|
|
438
|
+
ix = reinterpret<u32>(x * Ox1p23f);
|
|
439
|
+
ix &= 0x7FFFFFFF;
|
|
440
|
+
ix -= 23 << 23;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
let logx = log2f_inline(ix);
|
|
444
|
+
let ylogx = y * logx; // cannot overflow, y is single prec.
|
|
445
|
+
if ((reinterpret<u64>(ylogx) >> 47 & 0xFFFF) >= 0x80BF) { // reinterpret<u64>(126.0) >> 47
|
|
446
|
+
// |y * log(x)| >= 126
|
|
447
|
+
if (ylogx > UPPER_LIMIT) return oflowf(signBias); // overflow
|
|
448
|
+
if (ylogx <= LOWER_LIMIT) return uflowf(signBias); // underflow
|
|
449
|
+
}
|
|
450
|
+
return exp2f_inline(ylogx, signBias);
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
//
|
|
454
|
+
// Lookup data for exp. See: https://git.musl-libc.org/cgit/musl/tree/src/math/exp.c
|
|
455
|
+
//
|
|
456
|
+
|
|
457
|
+
// @ts-ignore: decorator
|
|
458
|
+
@inline const EXP_TABLE_BITS = 7;
|
|
459
|
+
|
|
460
|
+
// @ts-ignore: decorator
|
|
461
|
+
@lazy @inline const EXP_DATA_TAB = memory.data<u64>([
|
|
462
|
+
0x0000000000000000, 0x3FF0000000000000,
|
|
463
|
+
0x3C9B3B4F1A88BF6E, 0x3FEFF63DA9FB3335,
|
|
464
|
+
0xBC7160139CD8DC5D, 0x3FEFEC9A3E778061,
|
|
465
|
+
0xBC905E7A108766D1, 0x3FEFE315E86E7F85,
|
|
466
|
+
0x3C8CD2523567F613, 0x3FEFD9B0D3158574,
|
|
467
|
+
0xBC8BCE8023F98EFA, 0x3FEFD06B29DDF6DE,
|
|
468
|
+
0x3C60F74E61E6C861, 0x3FEFC74518759BC8,
|
|
469
|
+
0x3C90A3E45B33D399, 0x3FEFBE3ECAC6F383,
|
|
470
|
+
0x3C979AA65D837B6D, 0x3FEFB5586CF9890F,
|
|
471
|
+
0x3C8EB51A92FDEFFC, 0x3FEFAC922B7247F7,
|
|
472
|
+
0x3C3EBE3D702F9CD1, 0x3FEFA3EC32D3D1A2,
|
|
473
|
+
0xBC6A033489906E0B, 0x3FEF9B66AFFED31B,
|
|
474
|
+
0xBC9556522A2FBD0E, 0x3FEF9301D0125B51,
|
|
475
|
+
0xBC5080EF8C4EEA55, 0x3FEF8ABDC06C31CC,
|
|
476
|
+
0xBC91C923B9D5F416, 0x3FEF829AAEA92DE0,
|
|
477
|
+
0x3C80D3E3E95C55AF, 0x3FEF7A98C8A58E51,
|
|
478
|
+
0xBC801B15EAA59348, 0x3FEF72B83C7D517B,
|
|
479
|
+
0xBC8F1FF055DE323D, 0x3FEF6AF9388C8DEA,
|
|
480
|
+
0x3C8B898C3F1353BF, 0x3FEF635BEB6FCB75,
|
|
481
|
+
0xBC96D99C7611EB26, 0x3FEF5BE084045CD4,
|
|
482
|
+
0x3C9AECF73E3A2F60, 0x3FEF54873168B9AA,
|
|
483
|
+
0xBC8FE782CB86389D, 0x3FEF4D5022FCD91D,
|
|
484
|
+
0x3C8A6F4144A6C38D, 0x3FEF463B88628CD6,
|
|
485
|
+
0x3C807A05B0E4047D, 0x3FEF3F49917DDC96,
|
|
486
|
+
0x3C968EFDE3A8A894, 0x3FEF387A6E756238,
|
|
487
|
+
0x3C875E18F274487D, 0x3FEF31CE4FB2A63F,
|
|
488
|
+
0x3C80472B981FE7F2, 0x3FEF2B4565E27CDD,
|
|
489
|
+
0xBC96B87B3F71085E, 0x3FEF24DFE1F56381,
|
|
490
|
+
0x3C82F7E16D09AB31, 0x3FEF1E9DF51FDEE1,
|
|
491
|
+
0xBC3D219B1A6FBFFA, 0x3FEF187FD0DAD990,
|
|
492
|
+
0x3C8B3782720C0AB4, 0x3FEF1285A6E4030B,
|
|
493
|
+
0x3C6E149289CECB8F, 0x3FEF0CAFA93E2F56,
|
|
494
|
+
0x3C834D754DB0ABB6, 0x3FEF06FE0A31B715,
|
|
495
|
+
0x3C864201E2AC744C, 0x3FEF0170FC4CD831,
|
|
496
|
+
0x3C8FDD395DD3F84A, 0x3FEEFC08B26416FF,
|
|
497
|
+
0xBC86A3803B8E5B04, 0x3FEEF6C55F929FF1,
|
|
498
|
+
0xBC924AEDCC4B5068, 0x3FEEF1A7373AA9CB,
|
|
499
|
+
0xBC9907F81B512D8E, 0x3FEEECAE6D05D866,
|
|
500
|
+
0xBC71D1E83E9436D2, 0x3FEEE7DB34E59FF7,
|
|
501
|
+
0xBC991919B3CE1B15, 0x3FEEE32DC313A8E5,
|
|
502
|
+
0x3C859F48A72A4C6D, 0x3FEEDEA64C123422,
|
|
503
|
+
0xBC9312607A28698A, 0x3FEEDA4504AC801C,
|
|
504
|
+
0xBC58A78F4817895B, 0x3FEED60A21F72E2A,
|
|
505
|
+
0xBC7C2C9B67499A1B, 0x3FEED1F5D950A897,
|
|
506
|
+
0x3C4363ED60C2AC11, 0x3FEECE086061892D,
|
|
507
|
+
0x3C9666093B0664EF, 0x3FEECA41ED1D0057,
|
|
508
|
+
0x3C6ECCE1DAA10379, 0x3FEEC6A2B5C13CD0,
|
|
509
|
+
0x3C93FF8E3F0F1230, 0x3FEEC32AF0D7D3DE,
|
|
510
|
+
0x3C7690CEBB7AAFB0, 0x3FEEBFDAD5362A27,
|
|
511
|
+
0x3C931DBDEB54E077, 0x3FEEBCB299FDDD0D,
|
|
512
|
+
0xBC8F94340071A38E, 0x3FEEB9B2769D2CA7,
|
|
513
|
+
0xBC87DECCDC93A349, 0x3FEEB6DAA2CF6642,
|
|
514
|
+
0xBC78DEC6BD0F385F, 0x3FEEB42B569D4F82,
|
|
515
|
+
0xBC861246EC7B5CF6, 0x3FEEB1A4CA5D920F,
|
|
516
|
+
0x3C93350518FDD78E, 0x3FEEAF4736B527DA,
|
|
517
|
+
0x3C7B98B72F8A9B05, 0x3FEEAD12D497C7FD,
|
|
518
|
+
0x3C9063E1E21C5409, 0x3FEEAB07DD485429,
|
|
519
|
+
0x3C34C7855019C6EA, 0x3FEEA9268A5946B7,
|
|
520
|
+
0x3C9432E62B64C035, 0x3FEEA76F15AD2148,
|
|
521
|
+
0xBC8CE44A6199769F, 0x3FEEA5E1B976DC09,
|
|
522
|
+
0xBC8C33C53BEF4DA8, 0x3FEEA47EB03A5585,
|
|
523
|
+
0xBC845378892BE9AE, 0x3FEEA34634CCC320,
|
|
524
|
+
0xBC93CEDD78565858, 0x3FEEA23882552225,
|
|
525
|
+
0x3C5710AA807E1964, 0x3FEEA155D44CA973,
|
|
526
|
+
0xBC93B3EFBF5E2228, 0x3FEEA09E667F3BCD,
|
|
527
|
+
0xBC6A12AD8734B982, 0x3FEEA012750BDABF,
|
|
528
|
+
0xBC6367EFB86DA9EE, 0x3FEE9FB23C651A2F,
|
|
529
|
+
0xBC80DC3D54E08851, 0x3FEE9F7DF9519484,
|
|
530
|
+
0xBC781F647E5A3ECF, 0x3FEE9F75E8EC5F74,
|
|
531
|
+
0xBC86EE4AC08B7DB0, 0x3FEE9F9A48A58174,
|
|
532
|
+
0xBC8619321E55E68A, 0x3FEE9FEB564267C9,
|
|
533
|
+
0x3C909CCB5E09D4D3, 0x3FEEA0694FDE5D3F,
|
|
534
|
+
0xBC7B32DCB94DA51D, 0x3FEEA11473EB0187,
|
|
535
|
+
0x3C94ECFD5467C06B, 0x3FEEA1ED0130C132,
|
|
536
|
+
0x3C65EBE1ABD66C55, 0x3FEEA2F336CF4E62,
|
|
537
|
+
0xBC88A1C52FB3CF42, 0x3FEEA427543E1A12,
|
|
538
|
+
0xBC9369B6F13B3734, 0x3FEEA589994CCE13,
|
|
539
|
+
0xBC805E843A19FF1E, 0x3FEEA71A4623C7AD,
|
|
540
|
+
0xBC94D450D872576E, 0x3FEEA8D99B4492ED,
|
|
541
|
+
0x3C90AD675B0E8A00, 0x3FEEAAC7D98A6699,
|
|
542
|
+
0x3C8DB72FC1F0EAB4, 0x3FEEACE5422AA0DB,
|
|
543
|
+
0xBC65B6609CC5E7FF, 0x3FEEAF3216B5448C,
|
|
544
|
+
0x3C7BF68359F35F44, 0x3FEEB1AE99157736,
|
|
545
|
+
0xBC93091FA71E3D83, 0x3FEEB45B0B91FFC6,
|
|
546
|
+
0xBC5DA9B88B6C1E29, 0x3FEEB737B0CDC5E5,
|
|
547
|
+
0xBC6C23F97C90B959, 0x3FEEBA44CBC8520F,
|
|
548
|
+
0xBC92434322F4F9AA, 0x3FEEBD829FDE4E50,
|
|
549
|
+
0xBC85CA6CD7668E4B, 0x3FEEC0F170CA07BA,
|
|
550
|
+
0x3C71AFFC2B91CE27, 0x3FEEC49182A3F090,
|
|
551
|
+
0x3C6DD235E10A73BB, 0x3FEEC86319E32323,
|
|
552
|
+
0xBC87C50422622263, 0x3FEECC667B5DE565,
|
|
553
|
+
0x3C8B1C86E3E231D5, 0x3FEED09BEC4A2D33,
|
|
554
|
+
0xBC91BBD1D3BCBB15, 0x3FEED503B23E255D,
|
|
555
|
+
0x3C90CC319CEE31D2, 0x3FEED99E1330B358,
|
|
556
|
+
0x3C8469846E735AB3, 0x3FEEDE6B5579FDBF,
|
|
557
|
+
0xBC82DFCD978E9DB4, 0x3FEEE36BBFD3F37A,
|
|
558
|
+
0x3C8C1A7792CB3387, 0x3FEEE89F995AD3AD,
|
|
559
|
+
0xBC907B8F4AD1D9FA, 0x3FEEEE07298DB666,
|
|
560
|
+
0xBC55C3D956DCAEBA, 0x3FEEF3A2B84F15FB,
|
|
561
|
+
0xBC90A40E3DA6F640, 0x3FEEF9728DE5593A,
|
|
562
|
+
0xBC68D6F438AD9334, 0x3FEEFF76F2FB5E47,
|
|
563
|
+
0xBC91EEE26B588A35, 0x3FEF05B030A1064A,
|
|
564
|
+
0x3C74FFD70A5FDDCD, 0x3FEF0C1E904BC1D2,
|
|
565
|
+
0xBC91BDFBFA9298AC, 0x3FEF12C25BD71E09,
|
|
566
|
+
0x3C736EAE30AF0CB3, 0x3FEF199BDD85529C,
|
|
567
|
+
0x3C8EE3325C9FFD94, 0x3FEF20AB5FFFD07A,
|
|
568
|
+
0x3C84E08FD10959AC, 0x3FEF27F12E57D14B,
|
|
569
|
+
0x3C63CDAF384E1A67, 0x3FEF2F6D9406E7B5,
|
|
570
|
+
0x3C676B2C6C921968, 0x3FEF3720DCEF9069,
|
|
571
|
+
0xBC808A1883CCB5D2, 0x3FEF3F0B555DC3FA,
|
|
572
|
+
0xBC8FAD5D3FFFFA6F, 0x3FEF472D4A07897C,
|
|
573
|
+
0xBC900DAE3875A949, 0x3FEF4F87080D89F2,
|
|
574
|
+
0x3C74A385A63D07A7, 0x3FEF5818DCFBA487,
|
|
575
|
+
0xBC82919E2040220F, 0x3FEF60E316C98398,
|
|
576
|
+
0x3C8E5A50D5C192AC, 0x3FEF69E603DB3285,
|
|
577
|
+
0x3C843A59AC016B4B, 0x3FEF7321F301B460,
|
|
578
|
+
0xBC82D52107B43E1F, 0x3FEF7C97337B9B5F,
|
|
579
|
+
0xBC892AB93B470DC9, 0x3FEF864614F5A129,
|
|
580
|
+
0x3C74B604603A88D3, 0x3FEF902EE78B3FF6,
|
|
581
|
+
0x3C83C5EC519D7271, 0x3FEF9A51FBC74C83,
|
|
582
|
+
0xBC8FF7128FD391F0, 0x3FEFA4AFA2A490DA,
|
|
583
|
+
0xBC8DAE98E223747D, 0x3FEFAF482D8E67F1,
|
|
584
|
+
0x3C8EC3BC41AA2008, 0x3FEFBA1BEE615A27,
|
|
585
|
+
0x3C842B94C3A9EB32, 0x3FEFC52B376BBA97,
|
|
586
|
+
0x3C8A64A931D185EE, 0x3FEFD0765B6E4540,
|
|
587
|
+
0xBC8E37BAE43BE3ED, 0x3FEFDBFDAD9CBE14,
|
|
588
|
+
0x3C77893B4D91CD9D, 0x3FEFE7C1819E90D8,
|
|
589
|
+
0x3C5305C14160CC89, 0x3FEFF3C22B8F71F1
|
|
590
|
+
]);
|
|
591
|
+
|
|
592
|
+
// Handle cases that may overflow or underflow when computing the result that
|
|
593
|
+
// is scale*(1+TMP) without intermediate rounding. The bit representation of
|
|
594
|
+
// scale is in SBITS, however it has a computed exponent that may have
|
|
595
|
+
// overflown into the sign bit so that needs to be adjusted before using it as
|
|
596
|
+
// a double. (int32_t)KI is the k used in the argument reduction and exponent
|
|
597
|
+
// adjustment of scale, positive k here means the result may overflow and
|
|
598
|
+
// negative k means the result may underflow.
|
|
599
|
+
// @ts-ignore: decorator
|
|
600
|
+
@inline
|
|
601
|
+
function specialcase(tmp: f64, sbits: u64, ki: u64): f64 {
|
|
602
|
+
const
|
|
603
|
+
Ox1p_1022 = reinterpret<f64>(0x0010000000000000), // 0x1p-1022
|
|
604
|
+
Ox1p1009 = reinterpret<f64>(0x7F00000000000000); // 0x1p1009
|
|
605
|
+
|
|
606
|
+
let scale: f64;
|
|
607
|
+
if (!(ki & 0x80000000)) {
|
|
608
|
+
// k > 0, the exponent of scale might have overflowed by <= 460.
|
|
609
|
+
sbits -= u64(1009) << 52;
|
|
610
|
+
scale = reinterpret<f64>(sbits);
|
|
611
|
+
return Ox1p1009 * (scale + scale * tmp); // 0x1p1009
|
|
612
|
+
}
|
|
613
|
+
// k < 0, need special care in the subnormal range.
|
|
614
|
+
sbits += u64(1022) << 52;
|
|
615
|
+
// Note: sbits is signed scale.
|
|
616
|
+
scale = reinterpret<f64>(sbits);
|
|
617
|
+
let y = scale + scale * tmp;
|
|
618
|
+
if (abs(y) < 1.0) {
|
|
619
|
+
// Round y to the right precision before scaling it into the subnormal
|
|
620
|
+
// range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
|
621
|
+
// E is the worst-case ulp error outside the subnormal range. So this
|
|
622
|
+
// is only useful if the goal is better than 1 ulp worst-case error.
|
|
623
|
+
let one = copysign(1.0, y);
|
|
624
|
+
let lo = scale - y + scale * tmp;
|
|
625
|
+
let hi = one + y;
|
|
626
|
+
lo = one - hi + y + lo;
|
|
627
|
+
y = (hi + lo) - one;
|
|
628
|
+
// Fix the sign of 0.
|
|
629
|
+
if (y == 0.0) y = reinterpret<f64>(sbits & 0x8000000000000000);
|
|
630
|
+
}
|
|
631
|
+
return y * Ox1p_1022;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// @ts-ignore: decorator
|
|
635
|
+
@inline
|
|
636
|
+
export function exp_lut(x: f64): f64 {
|
|
637
|
+
const
|
|
638
|
+
N = 1 << EXP_TABLE_BITS,
|
|
639
|
+
N_MASK = N - 1;
|
|
640
|
+
|
|
641
|
+
const
|
|
642
|
+
InvLn2N = reinterpret<f64>(0x3FF71547652B82FE) * N, // 0x1.71547652b82fep0
|
|
643
|
+
NegLn2hiN = reinterpret<f64>(0xBF762E42FEFA0000), // -0x1.62e42fefa0000p-8
|
|
644
|
+
NegLn2loN = reinterpret<f64>(0xBD0CF79ABC9E3B3A), // -0x1.cf79abc9e3b3ap-47
|
|
645
|
+
shift = reinterpret<f64>(0x4338000000000000); // 0x1.8p52;
|
|
646
|
+
|
|
647
|
+
const
|
|
648
|
+
C2 = reinterpret<f64>(0x3FDFFFFFFFFFFDBD), // __exp_data.poly[0] (0x1.ffffffffffdbdp-2)
|
|
649
|
+
C3 = reinterpret<f64>(0x3FC555555555543C), // __exp_data.poly[1] (0x1.555555555543cp-3)
|
|
650
|
+
C4 = reinterpret<f64>(0x3FA55555CF172B91), // __exp_data.poly[2] (0x1.55555cf172b91p-5)
|
|
651
|
+
C5 = reinterpret<f64>(0x3F81111167A4D017); // __exp_data.poly[3] (0x1.1111167a4d017p-7)
|
|
652
|
+
|
|
653
|
+
let ux = reinterpret<u64>(x);
|
|
654
|
+
let abstop = u32(ux >> 52) & 0x7FF;
|
|
655
|
+
if (abstop - 0x3C9 >= 0x03F) {
|
|
656
|
+
if (abstop - 0x3C9 >= 0x80000000) return 1;
|
|
657
|
+
if (abstop >= 0x409) {
|
|
658
|
+
if (ux == 0xFFF0000000000000) return 0;
|
|
659
|
+
if (abstop >= 0x7FF) {
|
|
660
|
+
return 1.0 + x;
|
|
661
|
+
} else {
|
|
662
|
+
return select<f64>(0, Infinity, <i64>ux < 0);
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
// Large x is special cased below.
|
|
666
|
+
abstop = 0;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]
|
|
670
|
+
// x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]
|
|
671
|
+
let z = InvLn2N * x;
|
|
672
|
+
// #if TOINT_INTRINSICS
|
|
673
|
+
// kd = roundtoint(z);
|
|
674
|
+
// ki = converttoint(z);
|
|
675
|
+
// #elif EXP_USE_TOINT_NARROW
|
|
676
|
+
// // z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.
|
|
677
|
+
// let kd = z + shift;
|
|
678
|
+
// let ki = reinterpret<u64>(kd) >> 16;
|
|
679
|
+
// let kd = <f64><i32>ki;
|
|
680
|
+
// #else
|
|
681
|
+
// z - kd is in [-1, 1] in non-nearest rounding modes.
|
|
682
|
+
let kd = z + shift;
|
|
683
|
+
let ki = reinterpret<u64>(kd);
|
|
684
|
+
kd -= shift;
|
|
685
|
+
// #endif
|
|
686
|
+
let r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
|
687
|
+
// 2^(k/N) ~= scale * (1 + tail).
|
|
688
|
+
let idx = usize((ki & N_MASK) << 1);
|
|
689
|
+
let top = ki << (52 - EXP_TABLE_BITS);
|
|
690
|
+
|
|
691
|
+
let tail = reinterpret<f64>(load<u64>(EXP_DATA_TAB + (idx << alignof<u64>()))); // T[idx]
|
|
692
|
+
// This is only a valid scale when -1023*N < k < 1024*N
|
|
693
|
+
let sbits = load<u64>(EXP_DATA_TAB + (idx << alignof<u64>()), 1 << alignof<u64>()) + top; // T[idx + 1]
|
|
694
|
+
// exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).
|
|
695
|
+
// Evaluation is optimized assuming superscalar pipelined execution.
|
|
696
|
+
let r2 = r * r;
|
|
697
|
+
// Without fma the worst case error is 0.25/N ulp larger.
|
|
698
|
+
// Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.
|
|
699
|
+
let tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
|
700
|
+
if (abstop == 0) return specialcase(tmp, sbits, ki);
|
|
701
|
+
let scale = reinterpret<f64>(sbits);
|
|
702
|
+
// Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
|
703
|
+
// is no spurious underflow here even without fma.
|
|
704
|
+
return scale + scale * tmp;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
//
|
|
708
|
+
// Lookup data for exp2. See: https://git.musl-libc.org/cgit/musl/tree/src/math/exp2.c
|
|
709
|
+
//
|
|
710
|
+
|
|
711
|
+
// Handle cases that may overflow or underflow when computing the result that
|
|
712
|
+
// is scale*(1+TMP) without intermediate rounding. The bit representation of
|
|
713
|
+
// scale is in SBITS, however it has a computed exponent that may have
|
|
714
|
+
// overflown into the sign bit so that needs to be adjusted before using it as
|
|
715
|
+
// a double. (int32_t)KI is the k used in the argument reduction and exponent
|
|
716
|
+
// adjustment of scale, positive k here means the result may overflow and
|
|
717
|
+
// negative k means the result may underflow.
|
|
718
|
+
// @ts-ignore: decorator
|
|
719
|
+
@inline
|
|
720
|
+
function specialcase2(tmp: f64, sbits: u64, ki: u64): f64 {
|
|
721
|
+
const Ox1p_1022 = reinterpret<f64>(0x10000000000000); // 0x1p-1022
|
|
722
|
+
let scale: f64;
|
|
723
|
+
if ((ki & 0x80000000) == 0) {
|
|
724
|
+
// k > 0, the exponent of scale might have overflowed by 1
|
|
725
|
+
sbits -= u64(1) << 52;
|
|
726
|
+
scale = reinterpret<f64>(sbits);
|
|
727
|
+
return 2 * (scale * tmp + scale);
|
|
728
|
+
}
|
|
729
|
+
// k < 0, need special care in the subnormal range
|
|
730
|
+
sbits += u64(1022) << 52;
|
|
731
|
+
scale = reinterpret<f64>(sbits);
|
|
732
|
+
let y = scale * tmp + scale;
|
|
733
|
+
if (y < 1.0) {
|
|
734
|
+
// Round y to the right precision before scaling it into the subnormal
|
|
735
|
+
// range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
|
736
|
+
// E is the worst-case ulp error outside the subnormal range. So this
|
|
737
|
+
// is only useful if the goal is better than 1 ulp worst-case error.
|
|
738
|
+
let hi: f64, lo: f64;
|
|
739
|
+
lo = scale - y + scale * tmp;
|
|
740
|
+
hi = 1.0 + y;
|
|
741
|
+
lo = 1.0 - hi + y + lo;
|
|
742
|
+
y = (hi + lo) - 1.0;
|
|
743
|
+
}
|
|
744
|
+
return y * Ox1p_1022;
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
// @ts-ignore: decorator
|
|
748
|
+
@inline
|
|
749
|
+
export function exp2_lut(x: f64): f64 {
|
|
750
|
+
const
|
|
751
|
+
N = 1 << EXP_TABLE_BITS,
|
|
752
|
+
N_MASK = N - 1,
|
|
753
|
+
shift = reinterpret<f64>(0x4338000000000000) / N; // 0x1.8p52
|
|
754
|
+
|
|
755
|
+
const
|
|
756
|
+
C1 = reinterpret<f64>(0x3FE62E42FEFA39EF), // 0x1.62e42fefa39efp-1
|
|
757
|
+
C2 = reinterpret<f64>(0x3FCEBFBDFF82C424), // 0x1.ebfbdff82c424p-3
|
|
758
|
+
C3 = reinterpret<f64>(0x3FAC6B08D70CF4B5), // 0x1.c6b08d70cf4b5p-5
|
|
759
|
+
C4 = reinterpret<f64>(0x3F83B2ABD24650CC), // 0x1.3b2abd24650ccp-7
|
|
760
|
+
C5 = reinterpret<f64>(0x3F55D7E09B4E3A84); // 0x1.5d7e09b4e3a84p-10
|
|
761
|
+
|
|
762
|
+
let ux = reinterpret<u64>(x);
|
|
763
|
+
let abstop = u32(ux >> 52) & 0x7ff;
|
|
764
|
+
if (abstop - 0x3C9 >= 0x03F) {
|
|
765
|
+
if (abstop - 0x3C9 >= 0x80000000) return 1.0;
|
|
766
|
+
if (abstop >= 0x409) {
|
|
767
|
+
if (ux == 0xFFF0000000000000) return 0;
|
|
768
|
+
if (abstop >= 0x7FF) return 1.0 + x;
|
|
769
|
+
if (<i64>ux >= 0) return Infinity;
|
|
770
|
+
else if (ux >= 0xC090CC0000000000) return 0;
|
|
771
|
+
}
|
|
772
|
+
if ((ux << 1) > 0x811A000000000000) abstop = 0; // Large x is special cased below.
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
// exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].
|
|
776
|
+
// x = k/N + r, with int k and r in [-1/2N, 1/2N]
|
|
777
|
+
let kd = x + shift;
|
|
778
|
+
let ki = reinterpret<u64>(kd);
|
|
779
|
+
kd -= shift; // k/N for int k
|
|
780
|
+
let r = x - kd;
|
|
781
|
+
// 2^(k/N) ~= scale * (1 + tail)
|
|
782
|
+
let idx = usize((ki & N_MASK) << 1);
|
|
783
|
+
let top = ki << (52 - EXP_TABLE_BITS);
|
|
784
|
+
|
|
785
|
+
let tail = reinterpret<f64>(load<u64>(EXP_DATA_TAB + (idx << alignof<u64>()), 0 << alignof<u64>())); // T[idx])
|
|
786
|
+
// This is only a valid scale when -1023*N < k < 1024*N
|
|
787
|
+
let sbits = load<u64>(EXP_DATA_TAB + (idx << alignof<u64>()), 1 << alignof<u64>()) + top; // T[idx + 1]
|
|
788
|
+
// exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).
|
|
789
|
+
// Evaluation is optimized assuming superscalar pipelined execution
|
|
790
|
+
let r2 = r * r;
|
|
791
|
+
// Without fma the worst case error is 0.5/N ulp larger.
|
|
792
|
+
// Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.
|
|
793
|
+
let tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
|
794
|
+
if (abstop == 0) return specialcase2(tmp, sbits, ki);
|
|
795
|
+
let scale = reinterpret<f64>(sbits);
|
|
796
|
+
// Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
|
|
797
|
+
// is no spurious underflow here even without fma.
|
|
798
|
+
return scale * tmp + scale;
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
//
|
|
802
|
+
// Lookup data for log2. See: https://git.musl-libc.org/cgit/musl/tree/src/math/log2.c
|
|
803
|
+
//
|
|
804
|
+
|
|
805
|
+
// @ts-ignore: decorator
|
|
806
|
+
@inline const LOG2_TABLE_BITS = 6;
|
|
807
|
+
|
|
808
|
+
/* Algorithm:
|
|
809
|
+
|
|
810
|
+
x = 2^k z
|
|
811
|
+
log2(x) = k + log2(c) + log2(z/c)
|
|
812
|
+
log2(z/c) = poly(z/c - 1)
|
|
813
|
+
|
|
814
|
+
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
|
815
|
+
into the ith one, then table entries are computed as
|
|
816
|
+
|
|
817
|
+
tab[i].invc = 1/c
|
|
818
|
+
tab[i].logc = (double)log2(c)
|
|
819
|
+
tab2[i].chi = (double)c
|
|
820
|
+
tab2[i].clo = (double)(c - (double)c)
|
|
821
|
+
|
|
822
|
+
where c is near the center of the subinterval and is chosen by trying +-2^29
|
|
823
|
+
floating point invc candidates around 1/center and selecting one for which
|
|
824
|
+
|
|
825
|
+
1) the rounding error in 0x1.8p10 + logc is 0,
|
|
826
|
+
2) the rounding error in z - chi - clo is < 0x1p-64 and
|
|
827
|
+
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
|
|
828
|
+
|
|
829
|
+
Note: 1) ensures that k + logc can be computed without rounding error, 2)
|
|
830
|
+
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
|
|
831
|
+
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
|
832
|
+
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
|
833
|
+
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
|
|
834
|
+
|
|
835
|
+
// @ts-ignore: decorator
|
|
836
|
+
@lazy @inline const LOG2_DATA_TAB1 = memory.data<u64>([
|
|
837
|
+
// invc , logc
|
|
838
|
+
0x3FF724286BB1ACF8, 0xBFE1095FEECDB000,
|
|
839
|
+
0x3FF6E1F766D2CCA1, 0xBFE08494BD76D000,
|
|
840
|
+
0x3FF6A13D0E30D48A, 0xBFE00143AEE8F800,
|
|
841
|
+
0x3FF661EC32D06C85, 0xBFDEFEC5360B4000,
|
|
842
|
+
0x3FF623FA951198F8, 0xBFDDFDD91AB7E000,
|
|
843
|
+
0x3FF5E75BA4CF026C, 0xBFDCFFAE0CC79000,
|
|
844
|
+
0x3FF5AC055A214FB8, 0xBFDC043811FDA000,
|
|
845
|
+
0x3FF571ED0F166E1E, 0xBFDB0B67323AE000,
|
|
846
|
+
0x3FF53909590BF835, 0xBFDA152F5A2DB000,
|
|
847
|
+
0x3FF5014FED61ADDD, 0xBFD9217F5AF86000,
|
|
848
|
+
0x3FF4CAB88E487BD0, 0xBFD8304DB0719000,
|
|
849
|
+
0x3FF49539B4334FEE, 0xBFD74189F9A9E000,
|
|
850
|
+
0x3FF460CBDFAFD569, 0xBFD6552BB5199000,
|
|
851
|
+
0x3FF42D664EE4B953, 0xBFD56B23A29B1000,
|
|
852
|
+
0x3FF3FB01111DD8A6, 0xBFD483650F5FA000,
|
|
853
|
+
0x3FF3C995B70C5836, 0xBFD39DE937F6A000,
|
|
854
|
+
0x3FF3991C4AB6FD4A, 0xBFD2BAA1538D6000,
|
|
855
|
+
0x3FF3698E0CE099B5, 0xBFD1D98340CA4000,
|
|
856
|
+
0x3FF33AE48213E7B2, 0xBFD0FA853A40E000,
|
|
857
|
+
0x3FF30D191985BDB1, 0xBFD01D9C32E73000,
|
|
858
|
+
0x3FF2E025CAB271D7, 0xBFCE857DA2FA6000,
|
|
859
|
+
0x3FF2B404CF13CD82, 0xBFCCD3C8633D8000,
|
|
860
|
+
0x3FF288B02C7CCB50, 0xBFCB26034C14A000,
|
|
861
|
+
0x3FF25E2263944DE5, 0xBFC97C1C2F4FE000,
|
|
862
|
+
0x3FF234563D8615B1, 0xBFC7D6023F800000,
|
|
863
|
+
0x3FF20B46E33EAF38, 0xBFC633A71A05E000,
|
|
864
|
+
0x3FF1E2EEFDCDA3DD, 0xBFC494F5E9570000,
|
|
865
|
+
0x3FF1BB4A580B3930, 0xBFC2F9E424E0A000,
|
|
866
|
+
0x3FF19453847F2200, 0xBFC162595AFDC000,
|
|
867
|
+
0x3FF16E06C0D5D73C, 0xBFBF9C9A75BD8000,
|
|
868
|
+
0x3FF1485F47B7E4C2, 0xBFBC7B575BF9C000,
|
|
869
|
+
0x3FF12358AD0085D1, 0xBFB960C60FF48000,
|
|
870
|
+
0x3FF0FEF00F532227, 0xBFB64CE247B60000,
|
|
871
|
+
0x3FF0DB2077D03A8F, 0xBFB33F78B2014000,
|
|
872
|
+
0x3FF0B7E6D65980D9, 0xBFB0387D1A42C000,
|
|
873
|
+
0x3FF0953EFE7B408D, 0xBFAA6F9208B50000,
|
|
874
|
+
0x3FF07325CAC53B83, 0xBFA47A954F770000,
|
|
875
|
+
0x3FF05197E40D1B5C, 0xBF9D23A8C50C0000,
|
|
876
|
+
0x3FF03091C1208EA2, 0xBF916A2629780000,
|
|
877
|
+
0x3FF0101025B37E21, 0xBF7720F8D8E80000,
|
|
878
|
+
0x3FEFC07EF9CAA76B, 0x3F86FE53B1500000,
|
|
879
|
+
0x3FEF4465D3F6F184, 0x3FA11CCCE10F8000,
|
|
880
|
+
0x3FEECC079F84107F, 0x3FAC4DFC8C8B8000,
|
|
881
|
+
0x3FEE573A99975AE8, 0x3FB3AA321E574000,
|
|
882
|
+
0x3FEDE5D6F0BD3DE6, 0x3FB918A0D08B8000,
|
|
883
|
+
0x3FED77B681FF38B3, 0x3FBE72E9DA044000,
|
|
884
|
+
0x3FED0CB5724DE943, 0x3FC1DCD2507F6000,
|
|
885
|
+
0x3FECA4B2DC0E7563, 0x3FC476AB03DEA000,
|
|
886
|
+
0x3FEC3F8EE8D6CB51, 0x3FC7074377E22000,
|
|
887
|
+
0x3FEBDD2B4F020C4C, 0x3FC98EDE8BA94000,
|
|
888
|
+
0x3FEB7D6C006015CA, 0x3FCC0DB86AD2E000,
|
|
889
|
+
0x3FEB20366E2E338F, 0x3FCE840AAFCEE000,
|
|
890
|
+
0x3FEAC57026295039, 0x3FD0790AB4678000,
|
|
891
|
+
0x3FEA6D01BC2731DD, 0x3FD1AC056801C000,
|
|
892
|
+
0x3FEA16D3BC3FF18B, 0x3FD2DB11D4FEE000,
|
|
893
|
+
0x3FE9C2D14967FEAD, 0x3FD406464EC58000,
|
|
894
|
+
0x3FE970E4F47C9902, 0x3FD52DBE093AF000,
|
|
895
|
+
0x3FE920FB3982BCF2, 0x3FD651902050D000,
|
|
896
|
+
0x3FE8D30187F759F1, 0x3FD771D2CDEAF000,
|
|
897
|
+
0x3FE886E5EBB9F66D, 0x3FD88E9C857D9000,
|
|
898
|
+
0x3FE83C97B658B994, 0x3FD9A80155E16000,
|
|
899
|
+
0x3FE7F405FFC61022, 0x3FDABE186ED3D000,
|
|
900
|
+
0x3FE7AD22181415CA, 0x3FDBD0F2AEA0E000,
|
|
901
|
+
0x3FE767DCF99EFF8C, 0x3FDCE0A43DBF4000
|
|
902
|
+
]);
|
|
903
|
+
|
|
904
|
+
// @ts-ignore: decorator
|
|
905
|
+
@lazy @inline const LOG2_DATA_TAB2 = memory.data<u64>([
|
|
906
|
+
// chi , clo
|
|
907
|
+
0x3FE6200012B90A8E, 0x3C8904AB0644B605,
|
|
908
|
+
0x3FE66000045734A6, 0x3C61FF9BEA62F7A9,
|
|
909
|
+
0x3FE69FFFC325F2C5, 0x3C827ECFCB3C90BA,
|
|
910
|
+
0x3FE6E00038B95A04, 0x3C88FF8856739326,
|
|
911
|
+
0x3FE71FFFE09994E3, 0x3C8AFD40275F82B1,
|
|
912
|
+
0x3FE7600015590E10, 0xBC72FD75B4238341,
|
|
913
|
+
0x3FE7A00012655BD5, 0x3C7808E67C242B76,
|
|
914
|
+
0x3FE7E0003259E9A6, 0xBC6208E426F622B7,
|
|
915
|
+
0x3FE81FFFEDB4B2D2, 0xBC8402461EA5C92F,
|
|
916
|
+
0x3FE860002DFAFCC3, 0x3C6DF7F4A2F29A1F,
|
|
917
|
+
0x3FE89FFFF78C6B50, 0xBC8E0453094995FD,
|
|
918
|
+
0x3FE8E00039671566, 0xBC8A04F3BEC77B45,
|
|
919
|
+
0x3FE91FFFE2BF1745, 0xBC77FA34400E203C,
|
|
920
|
+
0x3FE95FFFCC5C9FD1, 0xBC76FF8005A0695D,
|
|
921
|
+
0x3FE9A0003BBA4767, 0x3C70F8C4C4EC7E03,
|
|
922
|
+
0x3FE9DFFFE7B92DA5, 0x3C8E7FD9478C4602,
|
|
923
|
+
0x3FEA1FFFD72EFDAF, 0xBC6A0C554DCDAE7E,
|
|
924
|
+
0x3FEA5FFFDE04FF95, 0x3C867DA98CE9B26B,
|
|
925
|
+
0x3FEA9FFFCA5E8D2B, 0xBC8284C9B54C13DE,
|
|
926
|
+
0x3FEADFFFDDAD03EA, 0x3C5812C8EA602E3C,
|
|
927
|
+
0x3FEB1FFFF10D3D4D, 0xBC8EFADDAD27789C,
|
|
928
|
+
0x3FEB5FFFCE21165A, 0x3C53CB1719C61237,
|
|
929
|
+
0x3FEB9FFFD950E674, 0x3C73F7D94194CE00,
|
|
930
|
+
0x3FEBE000139CA8AF, 0x3C750AC4215D9BC0,
|
|
931
|
+
0x3FEC20005B46DF99, 0x3C6BEEA653E9C1C9,
|
|
932
|
+
0x3FEC600040B9F7AE, 0xBC7C079F274A70D6,
|
|
933
|
+
0x3FECA0006255FD8A, 0xBC7A0B4076E84C1F,
|
|
934
|
+
0x3FECDFFFD94C095D, 0x3C88F933F99AB5D7,
|
|
935
|
+
0x3FED1FFFF975D6CF, 0xBC582C08665FE1BE,
|
|
936
|
+
0x3FED5FFFA2561C93, 0xBC7B04289BD295F3,
|
|
937
|
+
0x3FED9FFF9D228B0C, 0x3C870251340FA236,
|
|
938
|
+
0x3FEDE00065BC7E16, 0xBC75011E16A4D80C,
|
|
939
|
+
0x3FEE200002F64791, 0x3C89802F09EF62E0,
|
|
940
|
+
0x3FEE600057D7A6D8, 0xBC7E0B75580CF7FA,
|
|
941
|
+
0x3FEEA00027EDC00C, 0xBC8C848309459811,
|
|
942
|
+
0x3FEEE0006CF5CB7C, 0xBC8F8027951576F4,
|
|
943
|
+
0x3FEF2000782B7DCC, 0xBC8F81D97274538F,
|
|
944
|
+
0x3FEF6000260C450A, 0xBC4071002727FFDC,
|
|
945
|
+
0x3FEF9FFFE88CD533, 0xBC581BDCE1FDA8B0,
|
|
946
|
+
0x3FEFDFFFD50F8689, 0x3C87F91ACB918E6E,
|
|
947
|
+
0x3FF0200004292367, 0x3C9B7FF365324681,
|
|
948
|
+
0x3FF05FFFE3E3D668, 0x3C86FA08DDAE957B,
|
|
949
|
+
0x3FF0A0000A85A757, 0xBC57E2DE80D3FB91,
|
|
950
|
+
0x3FF0E0001A5F3FCC, 0xBC91823305C5F014,
|
|
951
|
+
0x3FF11FFFF8AFBAF5, 0xBC8BFABB6680BAC2,
|
|
952
|
+
0x3FF15FFFE54D91AD, 0xBC9D7F121737E7EF,
|
|
953
|
+
0x3FF1A00011AC36E1, 0x3C9C000A0516F5FF,
|
|
954
|
+
0x3FF1E00019C84248, 0xBC9082FBE4DA5DA0,
|
|
955
|
+
0x3FF220000FFE5E6E, 0xBC88FDD04C9CFB43,
|
|
956
|
+
0x3FF26000269FD891, 0x3C8CFE2A7994D182,
|
|
957
|
+
0x3FF2A00029A6E6DA, 0xBC700273715E8BC5,
|
|
958
|
+
0x3FF2DFFFE0293E39, 0x3C9B7C39DAB2A6F9,
|
|
959
|
+
0x3FF31FFFF7DCF082, 0x3C7DF1336EDC5254,
|
|
960
|
+
0x3FF35FFFF05A8B60, 0xBC9E03564CCD31EB,
|
|
961
|
+
0x3FF3A0002E0EAECC, 0x3C75F0E74BD3A477,
|
|
962
|
+
0x3FF3E000043BB236, 0x3C9C7DCB149D8833,
|
|
963
|
+
0x3FF4200002D187FF, 0x3C7E08AFCF2D3D28,
|
|
964
|
+
0x3FF460000D387CB1, 0x3C820837856599A6,
|
|
965
|
+
0x3FF4A00004569F89, 0xBC89FA5C904FBCD2,
|
|
966
|
+
0x3FF4E000043543F3, 0xBC781125ED175329,
|
|
967
|
+
0x3FF51FFFCC027F0F, 0x3C9883D8847754DC,
|
|
968
|
+
0x3FF55FFFFD87B36F, 0xBC8709E731D02807,
|
|
969
|
+
0x3FF59FFFF21DF7BA, 0x3C87F79F68727B02,
|
|
970
|
+
0x3FF5DFFFEBFC3481, 0xBC9180902E30E93E
|
|
971
|
+
]);
|
|
972
|
+
|
|
973
|
+
// @ts-ignore: decorator
|
|
974
|
+
@inline
|
|
975
|
+
export function log2_lut(x: f64): f64 {
|
|
976
|
+
const N_MASK = (1 << LOG2_TABLE_BITS) - 1;
|
|
977
|
+
|
|
978
|
+
const
|
|
979
|
+
LO: u64 = 0x3FEEA4AF00000000, // reinterpret<u64>(1.0 - 0x1.5b51p-5)
|
|
980
|
+
HI: u64 = 0x3FF0B55900000000; // reinterpret<u64>(1.0 + 0x1.6ab2p-5)
|
|
981
|
+
|
|
982
|
+
const
|
|
983
|
+
InvLn2hi = reinterpret<f64>(0x3FF7154765200000), // 0x1.7154765200000p+0
|
|
984
|
+
InvLn2lo = reinterpret<f64>(0x3DE705FC2EEFA200), // 0x1.705fc2eefa200p-33
|
|
985
|
+
Ox1p52 = reinterpret<f64>(0x4330000000000000); // 0x1p52
|
|
986
|
+
|
|
987
|
+
const
|
|
988
|
+
B0 = reinterpret<f64>(0xBFE71547652B82FE), // -0x1.71547652b82fep-1
|
|
989
|
+
B1 = reinterpret<f64>(0x3FDEC709DC3A03F7), // 0x1.ec709dc3a03f7p-2
|
|
990
|
+
B2 = reinterpret<f64>(0xBFD71547652B7C3F), // -0x1.71547652b7c3fp-2
|
|
991
|
+
B3 = reinterpret<f64>(0x3FD2776C50F05BE4), // 0x1.2776c50f05be4p-2
|
|
992
|
+
B4 = reinterpret<f64>(0xBFCEC709DD768FE5), // -0x1.ec709dd768fe5p-3
|
|
993
|
+
B5 = reinterpret<f64>(0x3FCA61761EC4E736), // 0x1.a61761ec4e736p-3
|
|
994
|
+
B6 = reinterpret<f64>(0xBFC7153FBC64A79B), // -0x1.7153fbc64a79bp-3
|
|
995
|
+
B7 = reinterpret<f64>(0x3FC484D154F01B4A), // 0x1.484d154f01b4ap-3
|
|
996
|
+
B8 = reinterpret<f64>(0xBFC289E4A72C383C), // -0x1.289e4a72c383cp-3
|
|
997
|
+
B9 = reinterpret<f64>(0x3FC0B32F285AEE66); // 0x1.0b32f285aee66p-3
|
|
998
|
+
|
|
999
|
+
const
|
|
1000
|
+
A0 = reinterpret<f64>(0xBFE71547652B8339), // -0x1.71547652b8339p-1
|
|
1001
|
+
A1 = reinterpret<f64>(0x3FDEC709DC3A04BE), // 0x1.ec709dc3a04bep-2
|
|
1002
|
+
A2 = reinterpret<f64>(0xBFD7154764702FFB), // -0x1.7154764702ffbp-2
|
|
1003
|
+
A3 = reinterpret<f64>(0x3FD2776C50034C48), // 0x1.2776c50034c48p-2
|
|
1004
|
+
A4 = reinterpret<f64>(0xBFCEC7B328EA92BC), // -0x1.ec7b328ea92bcp-3
|
|
1005
|
+
A5 = reinterpret<f64>(0x3FCA6225E117F92E); // 0x1.a6225e117f92ep-3
|
|
1006
|
+
|
|
1007
|
+
let ix = reinterpret<u64>(x);
|
|
1008
|
+
if (ix - LO < HI - LO) {
|
|
1009
|
+
let r = x - 1.0;
|
|
1010
|
+
// #if __FP_FAST_FMA
|
|
1011
|
+
// hi = r * InvLn2hi;
|
|
1012
|
+
// lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
|
|
1013
|
+
// #else
|
|
1014
|
+
let rhi = reinterpret<f64>(reinterpret<u64>(r) & 0xFFFFFFFF00000000);
|
|
1015
|
+
let rlo = r - rhi;
|
|
1016
|
+
let hi = rhi * InvLn2hi;
|
|
1017
|
+
let lo = rlo * InvLn2hi + r * InvLn2lo;
|
|
1018
|
+
// #endif
|
|
1019
|
+
let r2 = r * r; // rounding error: 0x1p-62
|
|
1020
|
+
let r4 = r2 * r2;
|
|
1021
|
+
// Worst-case error is less than 0.54 ULP (0.55 ULP without fma)
|
|
1022
|
+
let p = r2 * (B0 + r * B1);
|
|
1023
|
+
let y = hi + p;
|
|
1024
|
+
lo += hi - y + p;
|
|
1025
|
+
lo += r4 * (B2 + r * B3 + r2 * (B4 + r * B5) +
|
|
1026
|
+
r4 * (B6 + r * B7 + r2 * (B8 + r * B9)));
|
|
1027
|
+
return y + lo;
|
|
1028
|
+
}
|
|
1029
|
+
let top = u32(ix >> 48);
|
|
1030
|
+
if (top - 0x0010 >= 0x7ff0 - 0x0010) {
|
|
1031
|
+
// x < 0x1p-1022 or inf or nan.
|
|
1032
|
+
if ((ix << 1) == 0) return -1.0 / (x * x);
|
|
1033
|
+
if (ix == 0x7FF0000000000000) return x; // log(inf) == inf
|
|
1034
|
+
if ((top & 0x8000) || (top & 0x7FF0) == 0x7FF0) return (x - x) / (x - x);
|
|
1035
|
+
// x is subnormal, normalize it.
|
|
1036
|
+
ix = reinterpret<u64>(x * Ox1p52);
|
|
1037
|
+
ix -= u64(52) << 52;
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
// x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
|
1041
|
+
// The range is split into N subintervals.
|
|
1042
|
+
// The ith subinterval contains z and c is near its center.
|
|
1043
|
+
let tmp = ix - 0x3FE6000000000000;
|
|
1044
|
+
let i = <usize>((tmp >> (52 - LOG2_TABLE_BITS)) & N_MASK);
|
|
1045
|
+
let k = <i64>tmp >> 52;
|
|
1046
|
+
let iz = ix - (tmp & 0xFFF0000000000000);
|
|
1047
|
+
|
|
1048
|
+
let invc = load<f64>(LOG2_DATA_TAB1 + (i << (1 + alignof<f64>())), 0 << alignof<f64>()); // T[i].invc;
|
|
1049
|
+
let logc = load<f64>(LOG2_DATA_TAB1 + (i << (1 + alignof<f64>())), 1 << alignof<f64>()); // T[i].logc;
|
|
1050
|
+
let z = reinterpret<f64>(iz);
|
|
1051
|
+
let kd = <f64>k;
|
|
1052
|
+
|
|
1053
|
+
// log2(x) = log2(z/c) + log2(c) + k.
|
|
1054
|
+
// r ~= z/c - 1, |r| < 1/(2*N).
|
|
1055
|
+
// #if __FP_FAST_FMA
|
|
1056
|
+
// // rounding error: 0x1p-55/N.
|
|
1057
|
+
// r = __builtin_fma(z, invc, -1.0);
|
|
1058
|
+
// t1 = r * InvLn2hi;
|
|
1059
|
+
// t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
|
|
1060
|
+
// #else
|
|
1061
|
+
// rounding error: 0x1p-55/N + 0x1p-65.
|
|
1062
|
+
let chi = load<f64>(LOG2_DATA_TAB2 + (i << (1 + alignof<f64>())), 0 << alignof<f64>()); // T[i].chi;
|
|
1063
|
+
let clo = load<f64>(LOG2_DATA_TAB2 + (i << (1 + alignof<f64>())), 1 << alignof<f64>()); // T[i].clo;
|
|
1064
|
+
|
|
1065
|
+
let r = (z - chi - clo) * invc;
|
|
1066
|
+
let rhi = reinterpret<f64>(reinterpret<u64>(r) & 0xFFFFFFFF00000000);
|
|
1067
|
+
let rlo = r - rhi;
|
|
1068
|
+
let t1 = rhi * InvLn2hi;
|
|
1069
|
+
let t2 = rlo * InvLn2hi + r * InvLn2lo;
|
|
1070
|
+
// #endif
|
|
1071
|
+
|
|
1072
|
+
// hi + lo = r/ln2 + log2(c) + k
|
|
1073
|
+
let t3 = kd + logc;
|
|
1074
|
+
let hi = t3 + t1;
|
|
1075
|
+
let lo = t3 - hi + t1 + t2;
|
|
1076
|
+
|
|
1077
|
+
// log2(r+1) = r/ln2 + r^2*poly(r)
|
|
1078
|
+
// Evaluation is optimized assuming superscalar pipelined execution
|
|
1079
|
+
let r2 = r * r; // rounding error: 0x1p-54/N^2
|
|
1080
|
+
// Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
|
|
1081
|
+
// ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).
|
|
1082
|
+
let p = A0 + r * A1 + r2 * (A2 + r * A3) + (r2 * r2) * (A4 + r * A5);
|
|
1083
|
+
return lo + r2 * p + hi;
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
//
|
|
1087
|
+
// Lookup data for log. See: https://git.musl-libc.org/cgit/musl/tree/src/math/log.c
|
|
1088
|
+
//
|
|
1089
|
+
|
|
1090
|
+
// @ts-ignore: decorator
|
|
1091
|
+
@inline const LOG_TABLE_BITS = 7;
|
|
1092
|
+
|
|
1093
|
+
/* Algorithm:
|
|
1094
|
+
|
|
1095
|
+
x = 2^k z
|
|
1096
|
+
log(x) = k ln2 + log(c) + log(z/c)
|
|
1097
|
+
log(z/c) = poly(z/c - 1)
|
|
1098
|
+
|
|
1099
|
+
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
|
1100
|
+
into the ith one, then table entries are computed as
|
|
1101
|
+
|
|
1102
|
+
tab[i].invc = 1/c
|
|
1103
|
+
tab[i].logc = (double)log(c)
|
|
1104
|
+
tab2[i].chi = (double)c
|
|
1105
|
+
tab2[i].clo = (double)(c - (double)c)
|
|
1106
|
+
|
|
1107
|
+
where c is near the center of the subinterval and is chosen by trying +-2^29
|
|
1108
|
+
floating point invc candidates around 1/center and selecting one for which
|
|
1109
|
+
|
|
1110
|
+
1) the rounding error in 0x1.8p9 + logc is 0,
|
|
1111
|
+
2) the rounding error in z - chi - clo is < 0x1p-66 and
|
|
1112
|
+
3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
|
|
1113
|
+
|
|
1114
|
+
Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
|
|
1115
|
+
2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
|
|
1116
|
+
a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
|
1117
|
+
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
|
1118
|
+
|log(x)| < 0x1p-4, this is not enough so that is special cased.*/
|
|
1119
|
+
|
|
1120
|
+
// @ts-ignore: decorator
|
|
1121
|
+
@lazy @inline const LOG_DATA_TAB1 = memory.data<u64>([
|
|
1122
|
+
// invc , logc
|
|
1123
|
+
0x3FF734F0C3E0DE9F, 0xBFD7CC7F79E69000,
|
|
1124
|
+
0x3FF713786A2CE91F, 0xBFD76FEEC20D0000,
|
|
1125
|
+
0x3FF6F26008FAB5A0, 0xBFD713E31351E000,
|
|
1126
|
+
0x3FF6D1A61F138C7D, 0xBFD6B85B38287800,
|
|
1127
|
+
0x3FF6B1490BC5B4D1, 0xBFD65D5590807800,
|
|
1128
|
+
0x3FF69147332F0CBA, 0xBFD602D076180000,
|
|
1129
|
+
0x3FF6719F18224223, 0xBFD5A8CA86909000,
|
|
1130
|
+
0x3FF6524F99A51ED9, 0xBFD54F4356035000,
|
|
1131
|
+
0x3FF63356AA8F24C4, 0xBFD4F637C36B4000,
|
|
1132
|
+
0x3FF614B36B9DDC14, 0xBFD49DA7FDA85000,
|
|
1133
|
+
0x3FF5F66452C65C4C, 0xBFD445923989A800,
|
|
1134
|
+
0x3FF5D867B5912C4F, 0xBFD3EDF439B0B800,
|
|
1135
|
+
0x3FF5BABCCB5B90DE, 0xBFD396CE448F7000,
|
|
1136
|
+
0x3FF59D61F2D91A78, 0xBFD3401E17BDA000,
|
|
1137
|
+
0x3FF5805612465687, 0xBFD2E9E2EF468000,
|
|
1138
|
+
0x3FF56397CEE76BD3, 0xBFD2941B3830E000,
|
|
1139
|
+
0x3FF54725E2A77F93, 0xBFD23EC58CDA8800,
|
|
1140
|
+
0x3FF52AFF42064583, 0xBFD1E9E129279000,
|
|
1141
|
+
0x3FF50F22DBB2BDDF, 0xBFD1956D2B48F800,
|
|
1142
|
+
0x3FF4F38F4734DED7, 0xBFD141679AB9F800,
|
|
1143
|
+
0x3FF4D843CFDE2840, 0xBFD0EDD094EF9800,
|
|
1144
|
+
0x3FF4BD3EC078A3C8, 0xBFD09AA518DB1000,
|
|
1145
|
+
0x3FF4A27FC3E0258A, 0xBFD047E65263B800,
|
|
1146
|
+
0x3FF4880524D48434, 0xBFCFEB224586F000,
|
|
1147
|
+
0x3FF46DCE1B192D0B, 0xBFCF474A7517B000,
|
|
1148
|
+
0x3FF453D9D3391854, 0xBFCEA4443D103000,
|
|
1149
|
+
0x3FF43A2744B4845A, 0xBFCE020D44E9B000,
|
|
1150
|
+
0x3FF420B54115F8FB, 0xBFCD60A22977F000,
|
|
1151
|
+
0x3FF40782DA3EF4B1, 0xBFCCC00104959000,
|
|
1152
|
+
0x3FF3EE8F5D57FE8F, 0xBFCC202956891000,
|
|
1153
|
+
0x3FF3D5D9A00B4CE9, 0xBFCB81178D811000,
|
|
1154
|
+
0x3FF3BD60C010C12B, 0xBFCAE2C9CCD3D000,
|
|
1155
|
+
0x3FF3A5242B75DAB8, 0xBFCA45402E129000,
|
|
1156
|
+
0x3FF38D22CD9FD002, 0xBFC9A877681DF000,
|
|
1157
|
+
0x3FF3755BC5847A1C, 0xBFC90C6D69483000,
|
|
1158
|
+
0x3FF35DCE49AD36E2, 0xBFC87120A645C000,
|
|
1159
|
+
0x3FF34679984DD440, 0xBFC7D68FB4143000,
|
|
1160
|
+
0x3FF32F5CCEFFCB24, 0xBFC73CB83C627000,
|
|
1161
|
+
0x3FF3187775A10D49, 0xBFC6A39A9B376000,
|
|
1162
|
+
0x3FF301C8373E3990, 0xBFC60B3154B7A000,
|
|
1163
|
+
0x3FF2EB4EBB95F841, 0xBFC5737D76243000,
|
|
1164
|
+
0x3FF2D50A0219A9D1, 0xBFC4DC7B8FC23000,
|
|
1165
|
+
0x3FF2BEF9A8B7FD2A, 0xBFC4462C51D20000,
|
|
1166
|
+
0x3FF2A91C7A0C1BAB, 0xBFC3B08ABC830000,
|
|
1167
|
+
0x3FF293726014B530, 0xBFC31B996B490000,
|
|
1168
|
+
0x3FF27DFA5757A1F5, 0xBFC2875490A44000,
|
|
1169
|
+
0x3FF268B39B1D3BBF, 0xBFC1F3B9F879A000,
|
|
1170
|
+
0x3FF2539D838FF5BD, 0xBFC160C8252CA000,
|
|
1171
|
+
0x3FF23EB7AAC9083B, 0xBFC0CE7F57F72000,
|
|
1172
|
+
0x3FF22A012BA940B6, 0xBFC03CDC49FEA000,
|
|
1173
|
+
0x3FF2157996CC4132, 0xBFBF57BDBC4B8000,
|
|
1174
|
+
0x3FF201201DD2FC9B, 0xBFBE370896404000,
|
|
1175
|
+
0x3FF1ECF4494D480B, 0xBFBD17983EF94000,
|
|
1176
|
+
0x3FF1D8F5528F6569, 0xBFBBF9674ED8A000,
|
|
1177
|
+
0x3FF1C52311577E7C, 0xBFBADC79202F6000,
|
|
1178
|
+
0x3FF1B17C74CB26E9, 0xBFB9C0C3E7288000,
|
|
1179
|
+
0x3FF19E010C2C1AB6, 0xBFB8A646B372C000,
|
|
1180
|
+
0x3FF18AB07BB670BD, 0xBFB78D01B3AC0000,
|
|
1181
|
+
0x3FF1778A25EFBCB6, 0xBFB674F145380000,
|
|
1182
|
+
0x3FF1648D354C31DA, 0xBFB55E0E6D878000,
|
|
1183
|
+
0x3FF151B990275FDD, 0xBFB4485CDEA1E000,
|
|
1184
|
+
0x3FF13F0EA432D24C, 0xBFB333D94D6AA000,
|
|
1185
|
+
0x3FF12C8B7210F9DA, 0xBFB22079F8C56000,
|
|
1186
|
+
0x3FF11A3028ECB531, 0xBFB10E4698622000,
|
|
1187
|
+
0x3FF107FBDA8434AF, 0xBFAFFA6C6AD20000,
|
|
1188
|
+
0x3FF0F5EE0F4E6BB3, 0xBFADDA8D4A774000,
|
|
1189
|
+
0x3FF0E4065D2A9FCE, 0xBFABBCECE4850000,
|
|
1190
|
+
0x3FF0D244632CA521, 0xBFA9A1894012C000,
|
|
1191
|
+
0x3FF0C0A77CE2981A, 0xBFA788583302C000,
|
|
1192
|
+
0x3FF0AF2F83C636D1, 0xBFA5715E67D68000,
|
|
1193
|
+
0x3FF09DDB98A01339, 0xBFA35C8A49658000,
|
|
1194
|
+
0x3FF08CABAF52E7DF, 0xBFA149E364154000,
|
|
1195
|
+
0x3FF07B9F2F4E28FB, 0xBF9E72C082EB8000,
|
|
1196
|
+
0x3FF06AB58C358F19, 0xBF9A55F152528000,
|
|
1197
|
+
0x3FF059EEA5ECF92C, 0xBF963D62CF818000,
|
|
1198
|
+
0x3FF04949CDD12C90, 0xBF9228FB8CAA0000,
|
|
1199
|
+
0x3FF038C6C6F0ADA9, 0xBF8C317B20F90000,
|
|
1200
|
+
0x3FF02865137932A9, 0xBF8419355DAA0000,
|
|
1201
|
+
0x3FF0182427EA7348, 0xBF781203C2EC0000,
|
|
1202
|
+
0x3FF008040614B195, 0xBF60040979240000,
|
|
1203
|
+
0x3FEFE01FF726FA1A, 0x3F6FEFF384900000,
|
|
1204
|
+
0x3FEFA11CC261EA74, 0x3F87DC41353D0000,
|
|
1205
|
+
0x3FEF6310B081992E, 0x3F93CEA3C4C28000,
|
|
1206
|
+
0x3FEF25F63CEEADCD, 0x3F9B9FC114890000,
|
|
1207
|
+
0x3FEEE9C8039113E7, 0x3FA1B0D8CE110000,
|
|
1208
|
+
0x3FEEAE8078CBB1AB, 0x3FA58A5BD001C000,
|
|
1209
|
+
0x3FEE741AA29D0C9B, 0x3FA95C8340D88000,
|
|
1210
|
+
0x3FEE3A91830A99B5, 0x3FAD276AEF578000,
|
|
1211
|
+
0x3FEE01E009609A56, 0x3FB07598E598C000,
|
|
1212
|
+
0x3FEDCA01E577BB98, 0x3FB253F5E30D2000,
|
|
1213
|
+
0x3FED92F20B7C9103, 0x3FB42EDD8B380000,
|
|
1214
|
+
0x3FED5CAC66FB5CCE, 0x3FB606598757C000,
|
|
1215
|
+
0x3FED272CAA5EDE9D, 0x3FB7DA76356A0000,
|
|
1216
|
+
0x3FECF26E3E6B2CCD, 0x3FB9AB434E1C6000,
|
|
1217
|
+
0x3FECBE6DA2A77902, 0x3FBB78C7BB0D6000,
|
|
1218
|
+
0x3FEC8B266D37086D, 0x3FBD431332E72000,
|
|
1219
|
+
0x3FEC5894BD5D5804, 0x3FBF0A3171DE6000,
|
|
1220
|
+
0x3FEC26B533BB9F8C, 0x3FC067152B914000,
|
|
1221
|
+
0x3FEBF583EEECE73F, 0x3FC147858292B000,
|
|
1222
|
+
0x3FEBC4FD75DB96C1, 0x3FC2266ECDCA3000,
|
|
1223
|
+
0x3FEB951E0C864A28, 0x3FC303D7A6C55000,
|
|
1224
|
+
0x3FEB65E2C5EF3E2C, 0x3FC3DFC33C331000,
|
|
1225
|
+
0x3FEB374867C9888B, 0x3FC4BA366B7A8000,
|
|
1226
|
+
0x3FEB094B211D304A, 0x3FC5933928D1F000,
|
|
1227
|
+
0x3FEADBE885F2EF7E, 0x3FC66ACD2418F000,
|
|
1228
|
+
0x3FEAAF1D31603DA2, 0x3FC740F8EC669000,
|
|
1229
|
+
0x3FEA82E63FD358A7, 0x3FC815C0F51AF000,
|
|
1230
|
+
0x3FEA5740EF09738B, 0x3FC8E92954F68000,
|
|
1231
|
+
0x3FEA2C2A90AB4B27, 0x3FC9BB3602F84000,
|
|
1232
|
+
0x3FEA01A01393F2D1, 0x3FCA8BED1C2C0000,
|
|
1233
|
+
0x3FE9D79F24DB3C1B, 0x3FCB5B515C01D000,
|
|
1234
|
+
0x3FE9AE2505C7B190, 0x3FCC2967CCBCC000,
|
|
1235
|
+
0x3FE9852EF297CE2F, 0x3FCCF635D5486000,
|
|
1236
|
+
0x3FE95CBAEEA44B75, 0x3FCDC1BD3446C000,
|
|
1237
|
+
0x3FE934C69DE74838, 0x3FCE8C01B8CFE000,
|
|
1238
|
+
0x3FE90D4F2F6752E6, 0x3FCF5509C0179000,
|
|
1239
|
+
0x3FE8E6528EFFD79D, 0x3FD00E6C121FB800,
|
|
1240
|
+
0x3FE8BFCE9FCC007C, 0x3FD071B80E93D000,
|
|
1241
|
+
0x3FE899C0DABEC30E, 0x3FD0D46B9E867000,
|
|
1242
|
+
0x3FE87427AA2317FB, 0x3FD13687334BD000,
|
|
1243
|
+
0x3FE84F00ACB39A08, 0x3FD1980D67234800,
|
|
1244
|
+
0x3FE82A49E8653E55, 0x3FD1F8FFE0CC8000,
|
|
1245
|
+
0x3FE8060195F40260, 0x3FD2595FD7636800,
|
|
1246
|
+
0x3FE7E22563E0A329, 0x3FD2B9300914A800,
|
|
1247
|
+
0x3FE7BEB377DCB5AD, 0x3FD3187210436000,
|
|
1248
|
+
0x3FE79BAA679725C2, 0x3FD377266DEC1800,
|
|
1249
|
+
0x3FE77907F2170657, 0x3FD3D54FFBAF3000,
|
|
1250
|
+
0x3FE756CADBD6130C, 0x3FD432EEE32FE000
|
|
1251
|
+
]);
|
|
1252
|
+
|
|
1253
|
+
// @ts-ignore: decorator
|
|
1254
|
+
@lazy @inline const LOG_DATA_TAB2 = memory.data<u64>([
|
|
1255
|
+
// chi , clo
|
|
1256
|
+
0x3FE61000014FB66B, 0x3C7E026C91425B3C,
|
|
1257
|
+
0x3FE63000034DB495, 0x3C8DBFEA48005D41,
|
|
1258
|
+
0x3FE650000D94D478, 0x3C8E7FA786D6A5B7,
|
|
1259
|
+
0x3FE67000074E6FAD, 0x3C61FCEA6B54254C,
|
|
1260
|
+
0x3FE68FFFFEDF0FAE, 0xBC7C7E274C590EFD,
|
|
1261
|
+
0x3FE6B0000763C5BC, 0xBC8AC16848DCDA01,
|
|
1262
|
+
0x3FE6D0001E5CC1F6, 0x3C833F1C9D499311,
|
|
1263
|
+
0x3FE6EFFFEB05F63E, 0xBC7E80041AE22D53,
|
|
1264
|
+
0x3FE710000E869780, 0x3C7BFF6671097952,
|
|
1265
|
+
0x3FE72FFFFC67E912, 0x3C8C00E226BD8724,
|
|
1266
|
+
0x3FE74FFFDF81116A, 0xBC6E02916EF101D2,
|
|
1267
|
+
0x3FE770000F679C90, 0xBC67FC71CD549C74,
|
|
1268
|
+
0x3FE78FFFFA7EC835, 0x3C81BEC19EF50483,
|
|
1269
|
+
0x3FE7AFFFFE20C2E6, 0xBC707E1729CC6465,
|
|
1270
|
+
0x3FE7CFFFED3FC900, 0xBC808072087B8B1C,
|
|
1271
|
+
0x3FE7EFFFE9261A76, 0x3C8DC0286D9DF9AE,
|
|
1272
|
+
0x3FE81000049CA3E8, 0x3C897FD251E54C33,
|
|
1273
|
+
0x3FE8300017932C8F, 0xBC8AFEE9B630F381,
|
|
1274
|
+
0x3FE850000633739C, 0x3C89BFBF6B6535BC,
|
|
1275
|
+
0x3FE87000204289C6, 0xBC8BBF65F3117B75,
|
|
1276
|
+
0x3FE88FFFEBF57904, 0xBC89006EA23DCB57,
|
|
1277
|
+
0x3FE8B00022BC04DF, 0xBC7D00DF38E04B0A,
|
|
1278
|
+
0x3FE8CFFFE50C1B8A, 0xBC88007146FF9F05,
|
|
1279
|
+
0x3FE8EFFFFC918E43, 0x3C83817BD07A7038,
|
|
1280
|
+
0x3FE910001EFA5FC7, 0x3C893E9176DFB403,
|
|
1281
|
+
0x3FE9300013467BB9, 0x3C7F804E4B980276,
|
|
1282
|
+
0x3FE94FFFE6EE076F, 0xBC8F7EF0D9FF622E,
|
|
1283
|
+
0x3FE96FFFDE3C12D1, 0xBC7082AA962638BA,
|
|
1284
|
+
0x3FE98FFFF4458A0D, 0xBC87801B9164A8EF,
|
|
1285
|
+
0x3FE9AFFFDD982E3E, 0xBC8740E08A5A9337,
|
|
1286
|
+
0x3FE9CFFFED49FB66, 0x3C3FCE08C19BE000,
|
|
1287
|
+
0x3FE9F00020F19C51, 0xBC8A3FAA27885B0A,
|
|
1288
|
+
0x3FEA10001145B006, 0x3C74FF489958DA56,
|
|
1289
|
+
0x3FEA300007BBF6FA, 0x3C8CBEAB8A2B6D18,
|
|
1290
|
+
0x3FEA500010971D79, 0x3C88FECADD787930,
|
|
1291
|
+
0x3FEA70001DF52E48, 0xBC8F41763DD8ABDB,
|
|
1292
|
+
0x3FEA90001C593352, 0xBC8EBF0284C27612,
|
|
1293
|
+
0x3FEAB0002A4F3E4B, 0xBC69FD043CFF3F5F,
|
|
1294
|
+
0x3FEACFFFD7AE1ED1, 0xBC823EE7129070B4,
|
|
1295
|
+
0x3FEAEFFFEE510478, 0x3C6A063EE00EDEA3,
|
|
1296
|
+
0x3FEB0FFFDB650D5B, 0x3C5A06C8381F0AB9,
|
|
1297
|
+
0x3FEB2FFFFEAACA57, 0xBC79011E74233C1D,
|
|
1298
|
+
0x3FEB4FFFD995BADC, 0xBC79FF1068862A9F,
|
|
1299
|
+
0x3FEB7000249E659C, 0x3C8AFF45D0864F3E,
|
|
1300
|
+
0x3FEB8FFFF9871640, 0x3C7CFE7796C2C3F9,
|
|
1301
|
+
0x3FEBAFFFD204CB4F, 0xBC63FF27EEF22BC4,
|
|
1302
|
+
0x3FEBCFFFD2415C45, 0xBC6CFFB7EE3BEA21,
|
|
1303
|
+
0x3FEBEFFFF86309DF, 0xBC814103972E0B5C,
|
|
1304
|
+
0x3FEC0FFFE1B57653, 0x3C8BC16494B76A19,
|
|
1305
|
+
0x3FEC2FFFF1FA57E3, 0xBC64FEEF8D30C6ED,
|
|
1306
|
+
0x3FEC4FFFDCBFE424, 0xBC843F68BCEC4775,
|
|
1307
|
+
0x3FEC6FFFED54B9F7, 0x3C847EA3F053E0EC,
|
|
1308
|
+
0x3FEC8FFFEB998FD5, 0x3C7383068DF992F1,
|
|
1309
|
+
0x3FECB0002125219A, 0xBC68FD8E64180E04,
|
|
1310
|
+
0x3FECCFFFDD94469C, 0x3C8E7EBE1CC7EA72,
|
|
1311
|
+
0x3FECEFFFEAFDC476, 0x3C8EBE39AD9F88FE,
|
|
1312
|
+
0x3FED1000169AF82B, 0x3C757D91A8B95A71,
|
|
1313
|
+
0x3FED30000D0FF71D, 0x3C89C1906970C7DA,
|
|
1314
|
+
0x3FED4FFFEA790FC4, 0xBC580E37C558FE0C,
|
|
1315
|
+
0x3FED70002EDC87E5, 0xBC7F80D64DC10F44,
|
|
1316
|
+
0x3FED900021DC82AA, 0xBC747C8F94FD5C5C,
|
|
1317
|
+
0x3FEDAFFFD86B0283, 0x3C8C7F1DC521617E,
|
|
1318
|
+
0x3FEDD000296C4739, 0x3C88019EB2FFB153,
|
|
1319
|
+
0x3FEDEFFFE54490F5, 0x3C6E00D2C652CC89,
|
|
1320
|
+
0x3FEE0FFFCDABF694, 0xBC7F8340202D69D2,
|
|
1321
|
+
0x3FEE2FFFDB52C8DD, 0x3C7B00C1CA1B0864,
|
|
1322
|
+
0x3FEE4FFFF24216EF, 0x3C72FFA8B094AB51,
|
|
1323
|
+
0x3FEE6FFFE88A5E11, 0xBC57F673B1EFBE59,
|
|
1324
|
+
0x3FEE9000119EFF0D, 0xBC84808D5E0BC801,
|
|
1325
|
+
0x3FEEAFFFDFA51744, 0x3C780006D54320B5,
|
|
1326
|
+
0x3FEED0001A127FA1, 0xBC5002F860565C92,
|
|
1327
|
+
0x3FEEF00007BABCC4, 0xBC8540445D35E611,
|
|
1328
|
+
0x3FEF0FFFF57A8D02, 0xBC4FFB3139EF9105,
|
|
1329
|
+
0x3FEF30001EE58AC7, 0x3C8A81ACF2731155,
|
|
1330
|
+
0x3FEF4FFFF5823494, 0x3C8A3F41D4D7C743,
|
|
1331
|
+
0x3FEF6FFFFCA94C6B, 0xBC6202F41C987875,
|
|
1332
|
+
0x3FEF8FFFE1F9C441, 0x3C777DD1F477E74B,
|
|
1333
|
+
0x3FEFAFFFD2E0E37E, 0xBC6F01199A7CA331,
|
|
1334
|
+
0x3FEFD0001C77E49E, 0x3C7181EE4BCEACB1,
|
|
1335
|
+
0x3FEFEFFFF7E0C331, 0xBC6E05370170875A,
|
|
1336
|
+
0x3FF00FFFF465606E, 0xBC8A7EAD491C0ADA,
|
|
1337
|
+
0x3FF02FFFF3867A58, 0xBC977F69C3FCB2E0,
|
|
1338
|
+
0x3FF04FFFFDFC0D17, 0x3C97BFFE34CB945B,
|
|
1339
|
+
0x3FF0700003CD4D82, 0x3C820083C0E456CB,
|
|
1340
|
+
0x3FF08FFFF9F2CBE8, 0xBC6DFFDFBE37751A,
|
|
1341
|
+
0x3FF0B000010CDA65, 0xBC913F7FAEE626EB,
|
|
1342
|
+
0x3FF0D00001A4D338, 0x3C807DFA79489FF7,
|
|
1343
|
+
0x3FF0EFFFFADAFDFD, 0xBC77040570D66BC0,
|
|
1344
|
+
0x3FF110000BBAFD96, 0x3C8E80D4846D0B62,
|
|
1345
|
+
0x3FF12FFFFAE5F45D, 0x3C9DBFFA64FD36EF,
|
|
1346
|
+
0x3FF150000DD59AD9, 0x3C9A0077701250AE,
|
|
1347
|
+
0x3FF170000F21559A, 0x3C8DFDF9E2E3DEEE,
|
|
1348
|
+
0x3FF18FFFFC275426, 0x3C910030DC3B7273,
|
|
1349
|
+
0x3FF1B000123D3C59, 0x3C997F7980030188,
|
|
1350
|
+
0x3FF1CFFFF8299EB7, 0xBC65F932AB9F8C67,
|
|
1351
|
+
0x3FF1EFFFF48AD400, 0x3C937FBF9DA75BEB,
|
|
1352
|
+
0x3FF210000C8B86A4, 0x3C9F806B91FD5B22,
|
|
1353
|
+
0x3FF2300003854303, 0x3C93FFC2EB9FBF33,
|
|
1354
|
+
0x3FF24FFFFFBCF684, 0x3C7601E77E2E2E72,
|
|
1355
|
+
0x3FF26FFFF52921D9, 0x3C7FFCBB767F0C61,
|
|
1356
|
+
0x3FF2900014933A3C, 0xBC7202CA3C02412B,
|
|
1357
|
+
0x3FF2B00014556313, 0xBC92808233F21F02,
|
|
1358
|
+
0x3FF2CFFFEBFE523B, 0xBC88FF7E384FDCF2,
|
|
1359
|
+
0x3FF2F0000BB8AD96, 0xBC85FF51503041C5,
|
|
1360
|
+
0x3FF30FFFFB7AE2AF, 0xBC810071885E289D,
|
|
1361
|
+
0x3FF32FFFFEAC5F7F, 0xBC91FF5D3FB7B715,
|
|
1362
|
+
0x3FF350000CA66756, 0x3C957F82228B82BD,
|
|
1363
|
+
0x3FF3700011FBF721, 0x3C8000BAC40DD5CC,
|
|
1364
|
+
0x3FF38FFFF9592FB9, 0xBC943F9D2DB2A751,
|
|
1365
|
+
0x3FF3B00004DDD242, 0x3C857F6B707638E1,
|
|
1366
|
+
0x3FF3CFFFF5B2C957, 0x3C7A023A10BF1231,
|
|
1367
|
+
0x3FF3EFFFEAB0B418, 0x3C987F6D66B152B0,
|
|
1368
|
+
0x3FF410001532AFF4, 0x3C67F8375F198524,
|
|
1369
|
+
0x3FF4300017478B29, 0x3C8301E672DC5143,
|
|
1370
|
+
0x3FF44FFFE795B463, 0x3C89FF69B8B2895A,
|
|
1371
|
+
0x3FF46FFFE80475E0, 0xBC95C0B19BC2F254,
|
|
1372
|
+
0x3FF48FFFEF6FC1E7, 0x3C9B4009F23A2A72,
|
|
1373
|
+
0x3FF4AFFFE5BEA704, 0xBC94FFB7BF0D7D45,
|
|
1374
|
+
0x3FF4D000171027DE, 0xBC99C06471DC6A3D,
|
|
1375
|
+
0x3FF4F0000FF03EE2, 0x3C977F890B85531C,
|
|
1376
|
+
0x3FF5100012DC4BD1, 0x3C6004657166A436,
|
|
1377
|
+
0x3FF530001605277A, 0xBC96BFCECE233209,
|
|
1378
|
+
0x3FF54FFFECDB704C, 0xBC8902720505A1D7,
|
|
1379
|
+
0x3FF56FFFEF5F54A9, 0x3C9BBFE60EC96412,
|
|
1380
|
+
0x3FF5900017E61012, 0x3C887EC581AFEF90,
|
|
1381
|
+
0x3FF5B00003C93E92, 0xBC9F41080ABF0CC0,
|
|
1382
|
+
0x3FF5D0001D4919BC, 0xBC98812AFB254729,
|
|
1383
|
+
0x3FF5EFFFE7B87A89, 0xBC947EB780ED6904
|
|
1384
|
+
]);
|
|
1385
|
+
|
|
1386
|
+
// @ts-ignore: decorator
|
|
1387
|
+
@inline
|
|
1388
|
+
export function log_lut(x: f64): f64 {
|
|
1389
|
+
const N_MASK = (1 << LOG_TABLE_BITS) - 1;
|
|
1390
|
+
|
|
1391
|
+
const
|
|
1392
|
+
B0 = reinterpret<f64>(0xBFE0000000000000), // -0x1p-1
|
|
1393
|
+
B1 = reinterpret<f64>(0x3FD5555555555577), // 0x1.5555555555577p-2
|
|
1394
|
+
B2 = reinterpret<f64>(0xBFCFFFFFFFFFFDCB), // -0x1.ffffffffffdcbp-3
|
|
1395
|
+
B3 = reinterpret<f64>(0x3FC999999995DD0C), // 0x1.999999995dd0cp-3
|
|
1396
|
+
B4 = reinterpret<f64>(0xBFC55555556745A7), // -0x1.55555556745a7p-3
|
|
1397
|
+
B5 = reinterpret<f64>(0x3FC24924A344DE30), // 0x1.24924a344de3p-3
|
|
1398
|
+
B6 = reinterpret<f64>(0xBFBFFFFFA4423D65), // -0x1.fffffa4423d65p-4
|
|
1399
|
+
B7 = reinterpret<f64>(0x3FBC7184282AD6CA), // 0x1.c7184282ad6cap-4
|
|
1400
|
+
B8 = reinterpret<f64>(0xBFB999EB43B068FF), // -0x1.999eb43b068ffp-4
|
|
1401
|
+
B9 = reinterpret<f64>(0x3FB78182F7AFD085), // 0x1.78182f7afd085p-4
|
|
1402
|
+
B10 = reinterpret<f64>(0xBFB5521375D145CD); // -0x1.5521375d145cdp-4
|
|
1403
|
+
|
|
1404
|
+
const
|
|
1405
|
+
A0 = reinterpret<f64>(0xBFE0000000000001), // -0x1.0000000000001p-1
|
|
1406
|
+
A1 = reinterpret<f64>(0x3FD555555551305B), // 0x1.555555551305bp-2
|
|
1407
|
+
A2 = reinterpret<f64>(0xBFCFFFFFFFEB4590), // -0x1.fffffffeb459p-3
|
|
1408
|
+
A3 = reinterpret<f64>(0x3FC999B324F10111), // 0x1.999b324f10111p-3
|
|
1409
|
+
A4 = reinterpret<f64>(0xBFC55575E506C89F); // -0x1.55575e506c89fp-3
|
|
1410
|
+
|
|
1411
|
+
const
|
|
1412
|
+
LO: u64 = 0x3FEE000000000000,
|
|
1413
|
+
HI: u64 = 0x3FF1090000000000;
|
|
1414
|
+
|
|
1415
|
+
const
|
|
1416
|
+
Ln2hi = reinterpret<f64>(0x3FE62E42FEFA3800), // 0x1.62e42fefa3800p-1
|
|
1417
|
+
Ln2lo = reinterpret<f64>(0x3D2EF35793C76730), // 0x1.ef35793c76730p-45
|
|
1418
|
+
Ox1p27 = reinterpret<f64>(0x41A0000000000000), // 0x1p27
|
|
1419
|
+
Ox1p52 = reinterpret<f64>(0x4330000000000000); // 0x1p52
|
|
1420
|
+
|
|
1421
|
+
let ix = reinterpret<u64>(x);
|
|
1422
|
+
if (ix - LO < HI - LO) {
|
|
1423
|
+
let r = x - 1.0;
|
|
1424
|
+
let r2 = r * r;
|
|
1425
|
+
let r3 = r2 * r;
|
|
1426
|
+
let y =
|
|
1427
|
+
r3 * (B1 + r * B2 + r2 * B3 +
|
|
1428
|
+
r3 * (B4 + r * B5 + r2 * B6 +
|
|
1429
|
+
r3 * (B7 + r * B8 + r2 * B9 + r3 * B10)));
|
|
1430
|
+
// Worst-case error is around 0.507 ULP
|
|
1431
|
+
let w = r * Ox1p27;
|
|
1432
|
+
let rhi = r + w - w;
|
|
1433
|
+
let rlo = r - rhi;
|
|
1434
|
+
w = rhi * rhi * B0; // B[0] == -0.5
|
|
1435
|
+
let hi = r + w;
|
|
1436
|
+
let lo = r - hi + w;
|
|
1437
|
+
lo += B0 * rlo * (rhi + r);
|
|
1438
|
+
return y + lo + hi;
|
|
1439
|
+
}
|
|
1440
|
+
let top = u32(ix >> 48);
|
|
1441
|
+
if (top - 0x0010 >= 0x7FF0 - 0x0010) {
|
|
1442
|
+
// x < 0x1p-1022 or inf or nan
|
|
1443
|
+
if ((ix << 1) == 0) return -1.0 / (x * x);
|
|
1444
|
+
if (ix == reinterpret<u64>(Infinity)) return x; // log(inf) == inf
|
|
1445
|
+
if ((top & 0x8000) || (top & 0x7FF0) == 0x7FF0) return (x - x) / (x - x);
|
|
1446
|
+
// x is subnormal, normalize it
|
|
1447
|
+
ix = reinterpret<u64>(x * Ox1p52);
|
|
1448
|
+
ix -= u64(52) << 52;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
// x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
|
1452
|
+
// The range is split into N subintervals.
|
|
1453
|
+
// The ith subinterval contains z and c is near its center.
|
|
1454
|
+
let tmp = ix - 0x3FE6000000000000;
|
|
1455
|
+
let i = <usize>((tmp >> (52 - LOG_TABLE_BITS)) & N_MASK);
|
|
1456
|
+
let k = <i64>tmp >> 52;
|
|
1457
|
+
let iz = ix - (tmp & (u64(0xFFF) << 52));
|
|
1458
|
+
|
|
1459
|
+
let invc = load<f64>(LOG_DATA_TAB1 + (i << (1 + alignof<f64>())), 0 << alignof<f64>()); // T[i].invc;
|
|
1460
|
+
let logc = load<f64>(LOG_DATA_TAB1 + (i << (1 + alignof<f64>())), 1 << alignof<f64>()); // T[i].logc;
|
|
1461
|
+
let z = reinterpret<f64>(iz);
|
|
1462
|
+
|
|
1463
|
+
// log(x) = log1p(z/c-1) + log(c) + k*Ln2.
|
|
1464
|
+
// r ~= z/c - 1, |r| < 1/(2*N)
|
|
1465
|
+
// #if __FP_FAST_FMA
|
|
1466
|
+
// // rounding error: 0x1p-55/N
|
|
1467
|
+
// r = __builtin_fma(z, invc, -1.0);
|
|
1468
|
+
// #else
|
|
1469
|
+
// rounding error: 0x1p-55/N + 0x1p-66
|
|
1470
|
+
const chi = load<f64>(LOG_DATA_TAB2 + (i << (1 + alignof<f64>())), 0 << alignof<f64>()); // T2[i].chi
|
|
1471
|
+
const clo = load<f64>(LOG_DATA_TAB2 + (i << (1 + alignof<f64>())), 1 << alignof<f64>()); // T2[i].clo
|
|
1472
|
+
let r = (z - chi - clo) * invc;
|
|
1473
|
+
// #endif
|
|
1474
|
+
let kd = <f64>k;
|
|
1475
|
+
|
|
1476
|
+
// hi + lo = r + log(c) + k*Ln2
|
|
1477
|
+
let w = kd * Ln2hi + logc;
|
|
1478
|
+
let hi = w + r;
|
|
1479
|
+
let lo = w - hi + r + kd * Ln2lo;
|
|
1480
|
+
|
|
1481
|
+
// log(x) = lo + (log1p(r) - r) + hi
|
|
1482
|
+
let r2 = r * r; // rounding error: 0x1p-54/N^2
|
|
1483
|
+
// Worst case error if |y| > 0x1p-5:
|
|
1484
|
+
// 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
|
|
1485
|
+
// Worst case error if |y| > 0x1p-4:
|
|
1486
|
+
// 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).
|
|
1487
|
+
return lo + r2 * A0 + r * r2 * (A1 + r * A2 + r2 * (A3 + r * A4)) + hi;
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
//
|
|
1491
|
+
// Lookup data for pow. See: https://git.musl-libc.org/cgit/musl/tree/src/math/pow.c
|
|
1492
|
+
//
|
|
1493
|
+
|
|
1494
|
+
// @ts-ignore: decorator
|
|
1495
|
+
@inline const POW_LOG_TABLE_BITS = 7;
|
|
1496
|
+
|
|
1497
|
+
/* Algorithm:
|
|
1498
|
+
|
|
1499
|
+
x = 2^k z
|
|
1500
|
+
log(x) = k ln2 + log(c) + log(z/c)
|
|
1501
|
+
log(z/c) = poly(z/c - 1)
|
|
1502
|
+
|
|
1503
|
+
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
|
|
1504
|
+
and z falls into the ith one, then table entries are computed as
|
|
1505
|
+
|
|
1506
|
+
tab[i].invc = 1/c
|
|
1507
|
+
tab[i].logc = round(0x1p43*log(c))/0x1p43
|
|
1508
|
+
tab[i].logctail = (double)(log(c) - logc)
|
|
1509
|
+
|
|
1510
|
+
where c is chosen near the center of the subinterval such that 1/c has only a
|
|
1511
|
+
few precision bits so z/c - 1 is exactly representible as double:
|
|
1512
|
+
|
|
1513
|
+
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
|
|
1514
|
+
|
|
1515
|
+
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
|
|
1516
|
+
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
|
|
1517
|
+
error and the interval for z is selected such that near x == 1, where log(x)
|
|
1518
|
+
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
|
|
1519
|
+
|
|
1520
|
+
// @ts-ignore: decorator
|
|
1521
|
+
@lazy @inline const POW_LOG_DATA_TAB = memory.data<u64>([
|
|
1522
|
+
// invc ,pad, logc , logctail
|
|
1523
|
+
0x3FF6A00000000000, 0, 0xBFD62C82F2B9C800, 0x3CFAB42428375680,
|
|
1524
|
+
0x3FF6800000000000, 0, 0xBFD5D1BDBF580800, 0xBD1CA508D8E0F720,
|
|
1525
|
+
0x3FF6600000000000, 0, 0xBFD5767717455800, 0xBD2362A4D5B6506D,
|
|
1526
|
+
0x3FF6400000000000, 0, 0xBFD51AAD872DF800, 0xBCE684E49EB067D5,
|
|
1527
|
+
0x3FF6200000000000, 0, 0xBFD4BE5F95777800, 0xBD041B6993293EE0,
|
|
1528
|
+
0x3FF6000000000000, 0, 0xBFD4618BC21C6000, 0x3D13D82F484C84CC,
|
|
1529
|
+
0x3FF5E00000000000, 0, 0xBFD404308686A800, 0x3CDC42F3ED820B3A,
|
|
1530
|
+
0x3FF5C00000000000, 0, 0xBFD3A64C55694800, 0x3D20B1C686519460,
|
|
1531
|
+
0x3FF5A00000000000, 0, 0xBFD347DD9A988000, 0x3D25594DD4C58092,
|
|
1532
|
+
0x3FF5800000000000, 0, 0xBFD2E8E2BAE12000, 0x3D267B1E99B72BD8,
|
|
1533
|
+
0x3FF5600000000000, 0, 0xBFD2895A13DE8800, 0x3D15CA14B6CFB03F,
|
|
1534
|
+
0x3FF5600000000000, 0, 0xBFD2895A13DE8800, 0x3D15CA14B6CFB03F,
|
|
1535
|
+
0x3FF5400000000000, 0, 0xBFD22941FBCF7800, 0xBD165A242853DA76,
|
|
1536
|
+
0x3FF5200000000000, 0, 0xBFD1C898C1699800, 0xBD1FAFBC68E75404,
|
|
1537
|
+
0x3FF5000000000000, 0, 0xBFD1675CABABA800, 0x3D1F1FC63382A8F0,
|
|
1538
|
+
0x3FF4E00000000000, 0, 0xBFD1058BF9AE4800, 0xBD26A8C4FD055A66,
|
|
1539
|
+
0x3FF4C00000000000, 0, 0xBFD0A324E2739000, 0xBD0C6BEE7EF4030E,
|
|
1540
|
+
0x3FF4A00000000000, 0, 0xBFD0402594B4D000, 0xBCF036B89EF42D7F,
|
|
1541
|
+
0x3FF4A00000000000, 0, 0xBFD0402594B4D000, 0xBCF036B89EF42D7F,
|
|
1542
|
+
0x3FF4800000000000, 0, 0xBFCFB9186D5E4000, 0x3D0D572AAB993C87,
|
|
1543
|
+
0x3FF4600000000000, 0, 0xBFCEF0ADCBDC6000, 0x3D2B26B79C86AF24,
|
|
1544
|
+
0x3FF4400000000000, 0, 0xBFCE27076E2AF000, 0xBD172F4F543FFF10,
|
|
1545
|
+
0x3FF4200000000000, 0, 0xBFCD5C216B4FC000, 0x3D21BA91BBCA681B,
|
|
1546
|
+
0x3FF4000000000000, 0, 0xBFCC8FF7C79AA000, 0x3D27794F689F8434,
|
|
1547
|
+
0x3FF4000000000000, 0, 0xBFCC8FF7C79AA000, 0x3D27794F689F8434,
|
|
1548
|
+
0x3FF3E00000000000, 0, 0xBFCBC286742D9000, 0x3D194EB0318BB78F,
|
|
1549
|
+
0x3FF3C00000000000, 0, 0xBFCAF3C94E80C000, 0x3CBA4E633FCD9066,
|
|
1550
|
+
0x3FF3A00000000000, 0, 0xBFCA23BC1FE2B000, 0xBD258C64DC46C1EA,
|
|
1551
|
+
0x3FF3A00000000000, 0, 0xBFCA23BC1FE2B000, 0xBD258C64DC46C1EA,
|
|
1552
|
+
0x3FF3800000000000, 0, 0xBFC9525A9CF45000, 0xBD2AD1D904C1D4E3,
|
|
1553
|
+
0x3FF3600000000000, 0, 0xBFC87FA06520D000, 0x3D2BBDBF7FDBFA09,
|
|
1554
|
+
0x3FF3400000000000, 0, 0xBFC7AB890210E000, 0x3D2BDB9072534A58,
|
|
1555
|
+
0x3FF3400000000000, 0, 0xBFC7AB890210E000, 0x3D2BDB9072534A58,
|
|
1556
|
+
0x3FF3200000000000, 0, 0xBFC6D60FE719D000, 0xBD10E46AA3B2E266,
|
|
1557
|
+
0x3FF3000000000000, 0, 0xBFC5FF3070A79000, 0xBD1E9E439F105039,
|
|
1558
|
+
0x3FF3000000000000, 0, 0xBFC5FF3070A79000, 0xBD1E9E439F105039,
|
|
1559
|
+
0x3FF2E00000000000, 0, 0xBFC526E5E3A1B000, 0xBD20DE8B90075B8F,
|
|
1560
|
+
0x3FF2C00000000000, 0, 0xBFC44D2B6CCB8000, 0x3D170CC16135783C,
|
|
1561
|
+
0x3FF2C00000000000, 0, 0xBFC44D2B6CCB8000, 0x3D170CC16135783C,
|
|
1562
|
+
0x3FF2A00000000000, 0, 0xBFC371FC201E9000, 0x3CF178864D27543A,
|
|
1563
|
+
0x3FF2800000000000, 0, 0xBFC29552F81FF000, 0xBD248D301771C408,
|
|
1564
|
+
0x3FF2600000000000, 0, 0xBFC1B72AD52F6000, 0xBD2E80A41811A396,
|
|
1565
|
+
0x3FF2600000000000, 0, 0xBFC1B72AD52F6000, 0xBD2E80A41811A396,
|
|
1566
|
+
0x3FF2400000000000, 0, 0xBFC0D77E7CD09000, 0x3D0A699688E85BF4,
|
|
1567
|
+
0x3FF2400000000000, 0, 0xBFC0D77E7CD09000, 0x3D0A699688E85BF4,
|
|
1568
|
+
0x3FF2200000000000, 0, 0xBFBFEC9131DBE000, 0xBD2575545CA333F2,
|
|
1569
|
+
0x3FF2000000000000, 0, 0xBFBE27076E2B0000, 0x3D2A342C2AF0003C,
|
|
1570
|
+
0x3FF2000000000000, 0, 0xBFBE27076E2B0000, 0x3D2A342C2AF0003C,
|
|
1571
|
+
0x3FF1E00000000000, 0, 0xBFBC5E548F5BC000, 0xBD1D0C57585FBE06,
|
|
1572
|
+
0x3FF1C00000000000, 0, 0xBFBA926D3A4AE000, 0x3D253935E85BAAC8,
|
|
1573
|
+
0x3FF1C00000000000, 0, 0xBFBA926D3A4AE000, 0x3D253935E85BAAC8,
|
|
1574
|
+
0x3FF1A00000000000, 0, 0xBFB8C345D631A000, 0x3D137C294D2F5668,
|
|
1575
|
+
0x3FF1A00000000000, 0, 0xBFB8C345D631A000, 0x3D137C294D2F5668,
|
|
1576
|
+
0x3FF1800000000000, 0, 0xBFB6F0D28AE56000, 0xBD269737C93373DA,
|
|
1577
|
+
0x3FF1600000000000, 0, 0xBFB51B073F062000, 0x3D1F025B61C65E57,
|
|
1578
|
+
0x3FF1600000000000, 0, 0xBFB51B073F062000, 0x3D1F025B61C65E57,
|
|
1579
|
+
0x3FF1400000000000, 0, 0xBFB341D7961BE000, 0x3D2C5EDACCF913DF,
|
|
1580
|
+
0x3FF1400000000000, 0, 0xBFB341D7961BE000, 0x3D2C5EDACCF913DF,
|
|
1581
|
+
0x3FF1200000000000, 0, 0xBFB16536EEA38000, 0x3D147C5E768FA309,
|
|
1582
|
+
0x3FF1000000000000, 0, 0xBFAF0A30C0118000, 0x3D2D599E83368E91,
|
|
1583
|
+
0x3FF1000000000000, 0, 0xBFAF0A30C0118000, 0x3D2D599E83368E91,
|
|
1584
|
+
0x3FF0E00000000000, 0, 0xBFAB42DD71198000, 0x3D1C827AE5D6704C,
|
|
1585
|
+
0x3FF0E00000000000, 0, 0xBFAB42DD71198000, 0x3D1C827AE5D6704C,
|
|
1586
|
+
0x3FF0C00000000000, 0, 0xBFA77458F632C000, 0xBD2CFC4634F2A1EE,
|
|
1587
|
+
0x3FF0C00000000000, 0, 0xBFA77458F632C000, 0xBD2CFC4634F2A1EE,
|
|
1588
|
+
0x3FF0A00000000000, 0, 0xBFA39E87B9FEC000, 0x3CF502B7F526FEAA,
|
|
1589
|
+
0x3FF0A00000000000, 0, 0xBFA39E87B9FEC000, 0x3CF502B7F526FEAA,
|
|
1590
|
+
0x3FF0800000000000, 0, 0xBF9F829B0E780000, 0xBD2980267C7E09E4,
|
|
1591
|
+
0x3FF0800000000000, 0, 0xBF9F829B0E780000, 0xBD2980267C7E09E4,
|
|
1592
|
+
0x3FF0600000000000, 0, 0xBF97B91B07D58000, 0xBD288D5493FAA639,
|
|
1593
|
+
0x3FF0400000000000, 0, 0xBF8FC0A8B0FC0000, 0xBCDF1E7CF6D3A69C,
|
|
1594
|
+
0x3FF0400000000000, 0, 0xBF8FC0A8B0FC0000, 0xBCDF1E7CF6D3A69C,
|
|
1595
|
+
0x3FF0200000000000, 0, 0xBF7FE02A6B100000, 0xBD19E23F0DDA40E4,
|
|
1596
|
+
0x3FF0200000000000, 0, 0xBF7FE02A6B100000, 0xBD19E23F0DDA40E4,
|
|
1597
|
+
0x3FF0000000000000, 0, 0, 0,
|
|
1598
|
+
0x3FF0000000000000, 0, 0, 0,
|
|
1599
|
+
0x3FEFC00000000000, 0, 0x3F80101575890000, 0xBD10C76B999D2BE8,
|
|
1600
|
+
0x3FEF800000000000, 0, 0x3F90205658938000, 0xBD23DC5B06E2F7D2,
|
|
1601
|
+
0x3FEF400000000000, 0, 0x3F98492528C90000, 0xBD2AA0BA325A0C34,
|
|
1602
|
+
0x3FEF000000000000, 0, 0x3FA0415D89E74000, 0x3D0111C05CF1D753,
|
|
1603
|
+
0x3FEEC00000000000, 0, 0x3FA466AED42E0000, 0xBD2C167375BDFD28,
|
|
1604
|
+
0x3FEE800000000000, 0, 0x3FA894AA149FC000, 0xBD197995D05A267D,
|
|
1605
|
+
0x3FEE400000000000, 0, 0x3FACCB73CDDDC000, 0xBD1A68F247D82807,
|
|
1606
|
+
0x3FEE200000000000, 0, 0x3FAEEA31C006C000, 0xBD0E113E4FC93B7B,
|
|
1607
|
+
0x3FEDE00000000000, 0, 0x3FB1973BD1466000, 0xBD25325D560D9E9B,
|
|
1608
|
+
0x3FEDA00000000000, 0, 0x3FB3BDF5A7D1E000, 0x3D2CC85EA5DB4ED7,
|
|
1609
|
+
0x3FED600000000000, 0, 0x3FB5E95A4D97A000, 0xBD2C69063C5D1D1E,
|
|
1610
|
+
0x3FED400000000000, 0, 0x3FB700D30AEAC000, 0x3CEC1E8DA99DED32,
|
|
1611
|
+
0x3FED000000000000, 0, 0x3FB9335E5D594000, 0x3D23115C3ABD47DA,
|
|
1612
|
+
0x3FECC00000000000, 0, 0x3FBB6AC88DAD6000, 0xBD1390802BF768E5,
|
|
1613
|
+
0x3FECA00000000000, 0, 0x3FBC885801BC4000, 0x3D2646D1C65AACD3,
|
|
1614
|
+
0x3FEC600000000000, 0, 0x3FBEC739830A2000, 0xBD2DC068AFE645E0,
|
|
1615
|
+
0x3FEC400000000000, 0, 0x3FBFE89139DBE000, 0xBD2534D64FA10AFD,
|
|
1616
|
+
0x3FEC000000000000, 0, 0x3FC1178E8227E000, 0x3D21EF78CE2D07F2,
|
|
1617
|
+
0x3FEBE00000000000, 0, 0x3FC1AA2B7E23F000, 0x3D2CA78E44389934,
|
|
1618
|
+
0x3FEBA00000000000, 0, 0x3FC2D1610C868000, 0x3D039D6CCB81B4A1,
|
|
1619
|
+
0x3FEB800000000000, 0, 0x3FC365FCB0159000, 0x3CC62FA8234B7289,
|
|
1620
|
+
0x3FEB400000000000, 0, 0x3FC4913D8333B000, 0x3D25837954FDB678,
|
|
1621
|
+
0x3FEB200000000000, 0, 0x3FC527E5E4A1B000, 0x3D2633E8E5697DC7,
|
|
1622
|
+
0x3FEAE00000000000, 0, 0x3FC6574EBE8C1000, 0x3D19CF8B2C3C2E78,
|
|
1623
|
+
0x3FEAC00000000000, 0, 0x3FC6F0128B757000, 0xBD25118DE59C21E1,
|
|
1624
|
+
0x3FEAA00000000000, 0, 0x3FC7898D85445000, 0xBD1C661070914305,
|
|
1625
|
+
0x3FEA600000000000, 0, 0x3FC8BEAFEB390000, 0xBD073D54AAE92CD1,
|
|
1626
|
+
0x3FEA400000000000, 0, 0x3FC95A5ADCF70000, 0x3D07F22858A0FF6F,
|
|
1627
|
+
0x3FEA000000000000, 0, 0x3FCA93ED3C8AE000, 0xBD28724350562169,
|
|
1628
|
+
0x3FE9E00000000000, 0, 0x3FCB31D8575BD000, 0xBD0C358D4EACE1AA,
|
|
1629
|
+
0x3FE9C00000000000, 0, 0x3FCBD087383BE000, 0xBD2D4BC4595412B6,
|
|
1630
|
+
0x3FE9A00000000000, 0, 0x3FCC6FFBC6F01000, 0xBCF1EC72C5962BD2,
|
|
1631
|
+
0x3FE9600000000000, 0, 0x3FCDB13DB0D49000, 0xBD2AFF2AF715B035,
|
|
1632
|
+
0x3FE9400000000000, 0, 0x3FCE530EFFE71000, 0x3CC212276041F430,
|
|
1633
|
+
0x3FE9200000000000, 0, 0x3FCEF5ADE4DD0000, 0xBCCA211565BB8E11,
|
|
1634
|
+
0x3FE9000000000000, 0, 0x3FCF991C6CB3B000, 0x3D1BCBECCA0CDF30,
|
|
1635
|
+
0x3FE8C00000000000, 0, 0x3FD07138604D5800, 0x3CF89CDB16ED4E91,
|
|
1636
|
+
0x3FE8A00000000000, 0, 0x3FD0C42D67616000, 0x3D27188B163CEAE9,
|
|
1637
|
+
0x3FE8800000000000, 0, 0x3FD1178E8227E800, 0xBD2C210E63A5F01C,
|
|
1638
|
+
0x3FE8600000000000, 0, 0x3FD16B5CCBACF800, 0x3D2B9ACDF7A51681,
|
|
1639
|
+
0x3FE8400000000000, 0, 0x3FD1BF99635A6800, 0x3D2CA6ED5147BDB7,
|
|
1640
|
+
0x3FE8200000000000, 0, 0x3FD214456D0EB800, 0x3D0A87DEBA46BAEA,
|
|
1641
|
+
0x3FE7E00000000000, 0, 0x3FD2BEF07CDC9000, 0x3D2A9CFA4A5004F4,
|
|
1642
|
+
0x3FE7C00000000000, 0, 0x3FD314F1E1D36000, 0xBD28E27AD3213CB8,
|
|
1643
|
+
0x3FE7A00000000000, 0, 0x3FD36B6776BE1000, 0x3D116ECDB0F177C8,
|
|
1644
|
+
0x3FE7800000000000, 0, 0x3FD3C25277333000, 0x3D183B54B606BD5C,
|
|
1645
|
+
0x3FE7600000000000, 0, 0x3FD419B423D5E800, 0x3D08E436EC90E09D,
|
|
1646
|
+
0x3FE7400000000000, 0, 0x3FD4718DC271C800, 0xBD2F27CE0967D675,
|
|
1647
|
+
0x3FE7200000000000, 0, 0x3FD4C9E09E173000, 0xBD2E20891B0AD8A4,
|
|
1648
|
+
0x3FE7000000000000, 0, 0x3FD522AE0738A000, 0x3D2EBE708164C759,
|
|
1649
|
+
0x3FE6E00000000000, 0, 0x3FD57BF753C8D000, 0x3D1FADEDEE5D40EF,
|
|
1650
|
+
0x3FE6C00000000000, 0, 0x3FD5D5BDDF596000, 0xBD0A0B2A08A465DC
|
|
1651
|
+
]);
|
|
1652
|
+
|
|
1653
|
+
// Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
|
1654
|
+
// the bit representation of a non-zero finite floating-point value.
|
|
1655
|
+
// @ts-ignore: decorator
|
|
1656
|
+
@inline
|
|
1657
|
+
function checkint(iy: u64): i32 {
|
|
1658
|
+
let e = iy >> 52 & 0x7FF;
|
|
1659
|
+
if (e < 0x3FF ) return 0;
|
|
1660
|
+
if (e > 0x3FF + 52) return 2;
|
|
1661
|
+
e = u64(1) << (0x3FF + 52 - e);
|
|
1662
|
+
if (iy & (e - 1)) return 0;
|
|
1663
|
+
if (iy & e ) return 1;
|
|
1664
|
+
return 2;
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
// @ts-ignore: decorator
|
|
1668
|
+
@inline
|
|
1669
|
+
function xflow(sign: u32, y: f64): f64 {
|
|
1670
|
+
return select(-y, y, sign) * y;
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
// @ts-ignore: decorator
|
|
1674
|
+
@inline
|
|
1675
|
+
function uflow(sign: u32): f64 {
|
|
1676
|
+
return xflow(sign, reinterpret<f64>(0x1000000000000000)); // 0x1p-767
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
// @ts-ignore: decorator
|
|
1680
|
+
@inline
|
|
1681
|
+
function oflow(sign: u32): f64 {
|
|
1682
|
+
return xflow(sign, reinterpret<f64>(0x7000000000000000)); // 0x1p769
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
// Returns 1 if input is the bit representation of 0, infinity or nan.
|
|
1686
|
+
// @ts-ignore: decorator
|
|
1687
|
+
@inline
|
|
1688
|
+
function zeroinfnan(u: u64): bool {
|
|
1689
|
+
return (u << 1) - 1 >= 0xFFE0000000000000 - 1;
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
// @ts-ignore: decorator
|
|
1693
|
+
@lazy let log_tail: f64 = 0;
|
|
1694
|
+
|
|
1695
|
+
// Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
|
|
1696
|
+
// additional 15 bits precision. IX is the bit representation of x, but
|
|
1697
|
+
// normalized in the subnormal range using the sign bit for the exponent.
|
|
1698
|
+
// @ts-ignore: decorator
|
|
1699
|
+
@inline
|
|
1700
|
+
function log_inline(ix: u64): f64 {
|
|
1701
|
+
const N = 1 << POW_LOG_TABLE_BITS;
|
|
1702
|
+
const N_MASK = N - 1;
|
|
1703
|
+
|
|
1704
|
+
const
|
|
1705
|
+
Ln2hi = reinterpret<f64>(0x3FE62E42FEFA3800),
|
|
1706
|
+
Ln2lo = reinterpret<f64>(0x3D2EF35793C76730);
|
|
1707
|
+
|
|
1708
|
+
const
|
|
1709
|
+
A0 = reinterpret<f64>(0xBFE0000000000000),
|
|
1710
|
+
A1 = reinterpret<f64>(0xBFE5555555555560),
|
|
1711
|
+
A2 = reinterpret<f64>(0x3FE0000000000006),
|
|
1712
|
+
A3 = reinterpret<f64>(0x3FE999999959554E),
|
|
1713
|
+
A4 = reinterpret<f64>(0xBFE555555529A47A),
|
|
1714
|
+
A5 = reinterpret<f64>(0xBFF2495B9B4845E9),
|
|
1715
|
+
A6 = reinterpret<f64>(0x3FF0002B8B263FC3);
|
|
1716
|
+
|
|
1717
|
+
// x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
|
1718
|
+
// The range is split into N subintervals.
|
|
1719
|
+
// The ith subinterval contains z and c is near its center.
|
|
1720
|
+
let tmp = ix - 0x3fE6955500000000;
|
|
1721
|
+
let i = usize((tmp >> (52 - POW_LOG_TABLE_BITS)) & N_MASK);
|
|
1722
|
+
let k = <i64>tmp >> 52;
|
|
1723
|
+
let iz = ix - (tmp & u64(0xFFF) << 52);
|
|
1724
|
+
let z = reinterpret<f64>(iz);
|
|
1725
|
+
let kd = <f64>k;
|
|
1726
|
+
|
|
1727
|
+
// log(x) = k*Ln2 + log(c) + log1p(z/c-1).
|
|
1728
|
+
let invc = load<f64>(POW_LOG_DATA_TAB + (i << (2 + alignof<f64>())), 0 << alignof<f64>()); // tab[i].invc
|
|
1729
|
+
let logc = load<f64>(POW_LOG_DATA_TAB + (i << (2 + alignof<f64>())), 2 << alignof<f64>()); // tab[i].logc
|
|
1730
|
+
let logctail = load<f64>(POW_LOG_DATA_TAB + (i << (2 + alignof<f64>())), 3 << alignof<f64>()); // tab[i].logctail
|
|
1731
|
+
|
|
1732
|
+
// Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|
|
1733
|
+
// |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.
|
|
1734
|
+
// Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.
|
|
1735
|
+
let zhi = reinterpret<f64>((iz + u64(0x80000000)) & 0xFFFFFFFF00000000);
|
|
1736
|
+
let zlo = z - zhi;
|
|
1737
|
+
let rhi = zhi * invc - 1.0;
|
|
1738
|
+
let rlo = zlo * invc;
|
|
1739
|
+
let r = rhi + rlo;
|
|
1740
|
+
|
|
1741
|
+
// k * Ln2 + log(c) + r.
|
|
1742
|
+
let t1 = kd * Ln2hi + logc;
|
|
1743
|
+
let t2 = t1 + r;
|
|
1744
|
+
let lo1 = kd * Ln2lo + logctail;
|
|
1745
|
+
let lo2 = t1 - t2 + r;
|
|
1746
|
+
|
|
1747
|
+
// Evaluation is optimized assuming superscalar pipelined execution.
|
|
1748
|
+
let ar = A0 * r; // A[0] = -0.5
|
|
1749
|
+
let ar2 = r * ar;
|
|
1750
|
+
let ar3 = r * ar2;
|
|
1751
|
+
// k * Ln2 + log(c) + r + A[0] * r * r.
|
|
1752
|
+
let arhi = A0 * rhi;
|
|
1753
|
+
let arhi2 = rhi * arhi;
|
|
1754
|
+
let hi = t2 + arhi2;
|
|
1755
|
+
let lo3 = rlo * (ar + arhi);
|
|
1756
|
+
let lo4 = t2 - hi + arhi2;
|
|
1757
|
+
|
|
1758
|
+
// p = log1p(r) - r - A[0] * r * r.
|
|
1759
|
+
let p = ar3 * (A1 + r * A2 + ar2 * (A3 + r * A4 + ar2 * (A5 + r * A6)));
|
|
1760
|
+
let lo = lo1 + lo2 + lo3 + lo4 + p;
|
|
1761
|
+
let y = hi + lo;
|
|
1762
|
+
log_tail = hi - y + lo;
|
|
1763
|
+
|
|
1764
|
+
return y;
|
|
1765
|
+
}
|
|
1766
|
+
|
|
1767
|
+
// @ts-ignore: decorator
|
|
1768
|
+
@inline const SIGN_BIAS = 0x800 << EXP_TABLE_BITS;
|
|
1769
|
+
|
|
1770
|
+
// Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
|
|
1771
|
+
// The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1.
|
|
1772
|
+
// @ts-ignore: decorator
|
|
1773
|
+
@inline
|
|
1774
|
+
function exp_inline(x: f64, xtail: f64, sign_bias: u32): f64 {
|
|
1775
|
+
const N = 1 << EXP_TABLE_BITS;
|
|
1776
|
+
const N_MASK = N - 1;
|
|
1777
|
+
|
|
1778
|
+
const
|
|
1779
|
+
InvLn2N = reinterpret<f64>(0x3FF71547652B82FE) * N, // 0x1.71547652b82fep0
|
|
1780
|
+
NegLn2hiN = reinterpret<f64>(0xBF762E42FEFA0000), // -0x1.62e42fefa0000p-8
|
|
1781
|
+
NegLn2loN = reinterpret<f64>(0xBD0CF79ABC9E3B3A), // -0x1.cf79abc9e3b3ap-47
|
|
1782
|
+
shift = reinterpret<f64>(0x4338000000000000); // 0x1.8p52
|
|
1783
|
+
|
|
1784
|
+
const
|
|
1785
|
+
C2 = reinterpret<f64>(0x3FDFFFFFFFFFFDBD), // __exp_data.poly[0] (0x1.ffffffffffdbdp-2)
|
|
1786
|
+
C3 = reinterpret<f64>(0x3FC555555555543C), // __exp_data.poly[1] (0x1.555555555543cp-3)
|
|
1787
|
+
C4 = reinterpret<f64>(0x3FA55555CF172B91), // __exp_data.poly[2] (0x1.55555cf172b91p-5)
|
|
1788
|
+
C5 = reinterpret<f64>(0x3F81111167A4D017); // __exp_data.poly[3] (0x1.1111167a4d017p-7)
|
|
1789
|
+
|
|
1790
|
+
let abstop: u32;
|
|
1791
|
+
let ki: u64, top: u64, sbits: u64;
|
|
1792
|
+
let idx: usize;
|
|
1793
|
+
// double_t for better performance on targets with FLT_EVAL_METHOD==2.
|
|
1794
|
+
let kd: f64, z: f64, r: f64, r2: f64, scale: f64, tail: f64, tmp: f64;
|
|
1795
|
+
|
|
1796
|
+
let ux = reinterpret<u64>(x);
|
|
1797
|
+
abstop = u32(ux >> 52) & 0x7FF;
|
|
1798
|
+
if (abstop - 0x3C9 >= 0x03F) {
|
|
1799
|
+
if (abstop - 0x3C9 >= 0x80000000) {
|
|
1800
|
+
// Avoid spurious underflow for tiny x.
|
|
1801
|
+
// Note: 0 is common input.
|
|
1802
|
+
return select(-1.0, 1.0, sign_bias);
|
|
1803
|
+
}
|
|
1804
|
+
if (abstop >= 0x409) { // top12(1024.0)
|
|
1805
|
+
// Note: inf and nan are already handled.
|
|
1806
|
+
return <i64>ux < 0
|
|
1807
|
+
? uflow(sign_bias)
|
|
1808
|
+
: oflow(sign_bias);
|
|
1809
|
+
}
|
|
1810
|
+
// Large x is special cased below.
|
|
1811
|
+
abstop = 0;
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1814
|
+
// exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].
|
|
1815
|
+
// x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].
|
|
1816
|
+
z = InvLn2N * x;
|
|
1817
|
+
|
|
1818
|
+
// #if TOINT_INTRINSICS
|
|
1819
|
+
// kd = roundtoint(z);
|
|
1820
|
+
// ki = converttoint(z);
|
|
1821
|
+
// #elif EXP_USE_TOINT_NARROW
|
|
1822
|
+
// // z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.
|
|
1823
|
+
// kd = eval_as_double(z + shift);
|
|
1824
|
+
// ki = asuint64(kd) >> 16;
|
|
1825
|
+
// kd = (double_t)(int32_t)ki;
|
|
1826
|
+
// #else
|
|
1827
|
+
// z - kd is in [-1, 1] in non-nearest rounding modes
|
|
1828
|
+
kd = z + shift;
|
|
1829
|
+
ki = reinterpret<u64>(kd);
|
|
1830
|
+
kd -= shift;
|
|
1831
|
+
// #endif
|
|
1832
|
+
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
|
1833
|
+
// The code assumes 2^-200 < |xtail| < 2^-8/N
|
|
1834
|
+
r += xtail;
|
|
1835
|
+
// 2^(k/N) ~= scale * (1 + tail)
|
|
1836
|
+
idx = usize((ki & N_MASK) << 1);
|
|
1837
|
+
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
|
|
1838
|
+
|
|
1839
|
+
tail = reinterpret<f64>(load<u64>(EXP_DATA_TAB + (idx << alignof<u64>())));
|
|
1840
|
+
// This is only a valid scale when -1023*N < k < 1024*N
|
|
1841
|
+
sbits = load<u64>(EXP_DATA_TAB + (idx << alignof<u64>()), 1 << alignof<u64>()) + top;
|
|
1842
|
+
// exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).
|
|
1843
|
+
// Evaluation is optimized assuming superscalar pipelined execution.
|
|
1844
|
+
r2 = r * r;
|
|
1845
|
+
// Without fma the worst case error is 0.25/N ulp larger.
|
|
1846
|
+
// Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp
|
|
1847
|
+
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
|
1848
|
+
if (abstop == 0) return specialcase(tmp, sbits, ki);
|
|
1849
|
+
scale = reinterpret<f64>(sbits);
|
|
1850
|
+
// Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
|
1851
|
+
// is no spurious underflow here even without fma.
|
|
1852
|
+
return scale + scale * tmp;
|
|
1853
|
+
}
|
|
1854
|
+
|
|
1855
|
+
// @ts-ignore: decorator
|
|
1856
|
+
@inline
|
|
1857
|
+
export function pow_lut(x: f64, y: f64): f64 {
|
|
1858
|
+
const Ox1p52 = reinterpret<f64>(0x4330000000000000); // 0x1p52
|
|
1859
|
+
|
|
1860
|
+
let sign_bias: u32 = 0;
|
|
1861
|
+
let ix = reinterpret<u64>(x);
|
|
1862
|
+
let iy = reinterpret<u64>(y);
|
|
1863
|
+
let topx = ix >> 52;
|
|
1864
|
+
let topy = iy >> 52;
|
|
1865
|
+
|
|
1866
|
+
if (topx - 0x001 >= 0x7FF - 0x001 || (topy & 0x7FF) - 0x3BE >= 0x43e - 0x3BE) {
|
|
1867
|
+
// Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
|
|
1868
|
+
// and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1.
|
|
1869
|
+
// Special cases: (x < 0x1p-126 or inf or nan) or
|
|
1870
|
+
// (|y| < 0x1p-65 or |y| >= 0x1p63 or nan).
|
|
1871
|
+
if (zeroinfnan(iy)) {
|
|
1872
|
+
if ((iy << 1) == 0) return 1.0;
|
|
1873
|
+
if (ix == 0x3FF0000000000000) return NaN; // original: 1.0
|
|
1874
|
+
if ((ix << 1) > 0xFFE0000000000000 || (iy << 1) > 0xFFE0000000000000) return x + y;
|
|
1875
|
+
if ((ix << 1) == 0x7FE0000000000000) return NaN; // original: 1.0
|
|
1876
|
+
if (((ix << 1) < 0x7FE0000000000000) == !(iy >> 63)) return 0; // |x|<1 && y==inf or |x|>1 && y==-inf.
|
|
1877
|
+
return y * y;
|
|
1878
|
+
}
|
|
1879
|
+
if (zeroinfnan(ix)) {
|
|
1880
|
+
let x2 = x * x;
|
|
1881
|
+
if (i32(ix >> 63) && checkint(iy) == 1) x2 = -x2;
|
|
1882
|
+
return <i64>iy < 0 ? 1 / x2 : x2;
|
|
1883
|
+
}
|
|
1884
|
+
// Here x and y are non-zero finite
|
|
1885
|
+
if (<i64>ix < 0) {
|
|
1886
|
+
// Finite x < 0
|
|
1887
|
+
let yint = checkint(iy);
|
|
1888
|
+
if (yint == 0) return (x - x) / (x - x);
|
|
1889
|
+
if (yint == 1) sign_bias = SIGN_BIAS;
|
|
1890
|
+
ix &= 0x7FFFFFFFFFFFFFFF;
|
|
1891
|
+
topx &= 0x7FF;
|
|
1892
|
+
}
|
|
1893
|
+
if ((topy & 0x7FF) - 0x3BE >= 0x43E - 0x3BE) {
|
|
1894
|
+
// Note: sign_bias == 0 here because y is not odd.
|
|
1895
|
+
if (ix == 0x3FF0000000000000) return 1;
|
|
1896
|
+
if ((topy & 0x7FF) < 0x3BE) return 1; // |y| < 2^-65, x^y ~= 1 + y*log(x).
|
|
1897
|
+
return (ix > 0x3FF0000000000000) == (topy < 0x800) ? Infinity : 0;
|
|
1898
|
+
}
|
|
1899
|
+
if (topx == 0) {
|
|
1900
|
+
// Normalize subnormal x so exponent becomes negative.
|
|
1901
|
+
ix = reinterpret<u64>(x * Ox1p52);
|
|
1902
|
+
ix &= 0x7FFFFFFFFFFFFFFF;
|
|
1903
|
+
ix -= u64(52) << 52;
|
|
1904
|
+
}
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
let hi = log_inline(ix);
|
|
1908
|
+
let lo = log_tail;
|
|
1909
|
+
let ehi: f64, elo: f64;
|
|
1910
|
+
// #if __FP_FAST_FMA
|
|
1911
|
+
// ehi = y * hi;
|
|
1912
|
+
// elo = y * lo + __builtin_fma(y, hi, -ehi);
|
|
1913
|
+
// #else
|
|
1914
|
+
let yhi = reinterpret<f64>(iy & 0xFFFFFFFFF8000000);
|
|
1915
|
+
let ylo = y - yhi;
|
|
1916
|
+
let lhi = reinterpret<f64>(reinterpret<u64>(hi) & 0xFFFFFFFFF8000000);
|
|
1917
|
+
let llo = hi - lhi + lo;
|
|
1918
|
+
ehi = yhi * lhi;
|
|
1919
|
+
elo = ylo * lhi + y * llo; // |elo| < |ehi| * 2^-25.
|
|
1920
|
+
// #endif
|
|
1921
|
+
return exp_inline(ehi, elo, sign_bias);
|
|
1922
|
+
}
|