intervals 0.3.56
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.txt +1 -0
- data/ext/crlibm/AUTHORS +2 -0
- data/ext/crlibm/COPYING +504 -0
- data/ext/crlibm/ChangeLog +80 -0
- data/ext/crlibm/INSTALL +182 -0
- data/ext/crlibm/Makefile.am +84 -0
- data/ext/crlibm/Makefile.in +530 -0
- data/ext/crlibm/NEWS +0 -0
- data/ext/crlibm/README +31 -0
- data/ext/crlibm/TODO +47 -0
- data/ext/crlibm/VERSION +1 -0
- data/ext/crlibm/aclocal.m4 +989 -0
- data/ext/crlibm/atan-itanium.c +846 -0
- data/ext/crlibm/atan-pentium.c +261 -0
- data/ext/crlibm/atan_accurate.c +244 -0
- data/ext/crlibm/atan_accurate.h +191 -0
- data/ext/crlibm/atan_fast.c +324 -0
- data/ext/crlibm/atan_fast.h +678 -0
- data/ext/crlibm/config.guess +1461 -0
- data/ext/crlibm/config.sub +1566 -0
- data/ext/crlibm/configure +7517 -0
- data/ext/crlibm/configure.ac +364 -0
- data/ext/crlibm/crlibm.h +125 -0
- data/ext/crlibm/crlibm_config.h +149 -0
- data/ext/crlibm/crlibm_config.h.in +148 -0
- data/ext/crlibm/crlibm_private.c +293 -0
- data/ext/crlibm/crlibm_private.h +658 -0
- data/ext/crlibm/csh_fast.c +631 -0
- data/ext/crlibm/csh_fast.h +771 -0
- data/ext/crlibm/double-extended.h +496 -0
- data/ext/crlibm/exp-td.c +962 -0
- data/ext/crlibm/exp-td.h +685 -0
- data/ext/crlibm/exp_accurate.c +197 -0
- data/ext/crlibm/exp_accurate.h +85 -0
- data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
- data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
- data/ext/crlibm/gappa/log-de.gappa +81 -0
- data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
- data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
- data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
- data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
- data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
- data/ext/crlibm/gappa/log-td.gappa +156 -0
- data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
- data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
- data/ext/crlibm/install-sh +269 -0
- data/ext/crlibm/log-de.c +431 -0
- data/ext/crlibm/log-de.h +732 -0
- data/ext/crlibm/log-td.c +852 -0
- data/ext/crlibm/log-td.h +819 -0
- data/ext/crlibm/log10-td.c +906 -0
- data/ext/crlibm/log10-td.h +823 -0
- data/ext/crlibm/log2-td.c +935 -0
- data/ext/crlibm/log2-td.h +821 -0
- data/ext/crlibm/maple/atan.mpl +359 -0
- data/ext/crlibm/maple/common-procedures.mpl +997 -0
- data/ext/crlibm/maple/csh.mpl +446 -0
- data/ext/crlibm/maple/double-extended.mpl +151 -0
- data/ext/crlibm/maple/exp-td.mpl +195 -0
- data/ext/crlibm/maple/log-de.mpl +243 -0
- data/ext/crlibm/maple/log-td.mpl +316 -0
- data/ext/crlibm/maple/log10-td.mpl +345 -0
- data/ext/crlibm/maple/log2-td.mpl +334 -0
- data/ext/crlibm/maple/trigo.mpl +728 -0
- data/ext/crlibm/maple/triple-double.mpl +58 -0
- data/ext/crlibm/missing +198 -0
- data/ext/crlibm/mkinstalldirs +40 -0
- data/ext/crlibm/rem_pio2_accurate.c +219 -0
- data/ext/crlibm/rem_pio2_accurate.h +53 -0
- data/ext/crlibm/scs_lib/AUTHORS +3 -0
- data/ext/crlibm/scs_lib/COPYING +504 -0
- data/ext/crlibm/scs_lib/ChangeLog +16 -0
- data/ext/crlibm/scs_lib/INSTALL +215 -0
- data/ext/crlibm/scs_lib/Makefile.am +18 -0
- data/ext/crlibm/scs_lib/Makefile.in +328 -0
- data/ext/crlibm/scs_lib/NEWS +0 -0
- data/ext/crlibm/scs_lib/README +9 -0
- data/ext/crlibm/scs_lib/TODO +4 -0
- data/ext/crlibm/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm/scs_lib/config.guess +1461 -0
- data/ext/crlibm/scs_lib/config.sub +1566 -0
- data/ext/crlibm/scs_lib/configure +6226 -0
- data/ext/crlibm/scs_lib/division_scs.c +110 -0
- data/ext/crlibm/scs_lib/double2scs.c +174 -0
- data/ext/crlibm/scs_lib/install-sh +269 -0
- data/ext/crlibm/scs_lib/missing +198 -0
- data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
- data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
- data/ext/crlibm/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm/scs_lib/print_scs.c +73 -0
- data/ext/crlibm/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm/scs_lib/scs.h +353 -0
- data/ext/crlibm/scs_lib/scs2double.c +391 -0
- data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm/scs_lib/scs_private.c +23 -0
- data/ext/crlibm/scs_lib/scs_private.h +133 -0
- data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
- data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm/stamp-h.in +1 -0
- data/ext/crlibm/tests/Makefile.am +43 -0
- data/ext/crlibm/tests/Makefile.in +396 -0
- data/ext/crlibm/tests/blind_test.c +148 -0
- data/ext/crlibm/tests/generate_test_vectors.c +258 -0
- data/ext/crlibm/tests/soak_test.c +334 -0
- data/ext/crlibm/tests/test_common.c +627 -0
- data/ext/crlibm/tests/test_common.h +28 -0
- data/ext/crlibm/tests/test_perf.c +570 -0
- data/ext/crlibm/tests/test_val.c +249 -0
- data/ext/crlibm/trigo_accurate.c +500 -0
- data/ext/crlibm/trigo_accurate.h +331 -0
- data/ext/crlibm/trigo_fast.c +1219 -0
- data/ext/crlibm/trigo_fast.h +639 -0
- data/ext/crlibm/triple-double.h +878 -0
- data/ext/extconf.rb +31 -0
- data/ext/fpu.c +107 -0
- data/ext/jamis-mod.rb +591 -0
- data/lib/fpu.rb +287 -0
- data/lib/interval.rb +1170 -0
- data/lib/intervals.rb +212 -0
- data/lib/struct_float.rb +133 -0
- data/test/data_atan.txt +360 -0
- data/test/data_cos.txt +346 -0
- data/test/data_cosh.txt +3322 -0
- data/test/data_exp.txt +3322 -0
- data/test/data_log.txt +141 -0
- data/test/data_sin.txt +140 -0
- data/test/data_sinh.txt +3322 -0
- data/test/data_tan.txt +342 -0
- metadata +186 -0
@@ -0,0 +1,935 @@
|
|
1
|
+
/*
|
2
|
+
* This function computes log2, correctly rounded,
|
3
|
+
* using experimental techniques based on triple double arithmetics
|
4
|
+
|
5
|
+
THIS IS EXPERIMENTAL SOFTWARE
|
6
|
+
|
7
|
+
*
|
8
|
+
* Author : Christoph Lauter
|
9
|
+
* christoph.lauter at ens-lyon.fr
|
10
|
+
*
|
11
|
+
|
12
|
+
To have it replace the crlibm log2, do:
|
13
|
+
|
14
|
+
gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c log2-td.c; mv log2-td.o log2_accurate.o; make
|
15
|
+
|
16
|
+
|
17
|
+
*/
|
18
|
+
|
19
|
+
|
20
|
+
#include <stdio.h>
|
21
|
+
#include <stdlib.h>
|
22
|
+
#include "crlibm.h"
|
23
|
+
#include "crlibm_private.h"
|
24
|
+
#include "triple-double.h"
|
25
|
+
#include "log2-td.h"
|
26
|
+
|
27
|
+
#define AVOID_FMA 0
|
28
|
+
|
29
|
+
|
30
|
+
void log2_td_accurate(double *logb2h, double *logb2m, double *logb2l, int E, double ed, int index, double zh, double zl, double logih, double logim) {
|
31
|
+
double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
|
32
|
+
double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
|
33
|
+
double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
|
34
|
+
double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
|
35
|
+
double log2edh, log2edm, log2edl, logb2hover, logb2mover, logb2lover;
|
36
|
+
|
37
|
+
|
38
|
+
#if EVAL_PERF
|
39
|
+
crlibm_second_step_taken++;
|
40
|
+
#endif
|
41
|
+
|
42
|
+
|
43
|
+
/* Accurate phase:
|
44
|
+
|
45
|
+
Argument reduction is already done.
|
46
|
+
We must return logh, logm and logl representing the intermediate result in 118 bits precision.
|
47
|
+
|
48
|
+
We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
|
49
|
+
calculating the next 7 coefficients in double double arithmetics and the last in double.
|
50
|
+
|
51
|
+
We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
|
52
|
+
double double calculations won't account for it.
|
53
|
+
|
54
|
+
*/
|
55
|
+
|
56
|
+
/* Start of the horner scheme */
|
57
|
+
|
58
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
59
|
+
highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
|
60
|
+
#else
|
61
|
+
highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
|
62
|
+
#endif
|
63
|
+
|
64
|
+
/* We want to write
|
65
|
+
|
66
|
+
accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
|
67
|
+
( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
|
68
|
+
|
69
|
+
with all additions and multiplications in double double arithmetics
|
70
|
+
but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
|
71
|
+
*/
|
72
|
+
|
73
|
+
Mul12(&t1h, &t1l, zh, highPoly);
|
74
|
+
Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
|
75
|
+
Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
|
76
|
+
Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
|
77
|
+
Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
|
78
|
+
Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
|
79
|
+
Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
|
80
|
+
Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
|
81
|
+
Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
|
82
|
+
Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
|
83
|
+
Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
|
84
|
+
Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
|
85
|
+
Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
|
86
|
+
Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
|
87
|
+
|
88
|
+
/* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
|
89
|
+
|
90
|
+
Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
|
91
|
+
Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
|
92
|
+
|
93
|
+
/* We can now multiplicate the middle and higher polynomial by z^3 */
|
94
|
+
|
95
|
+
Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
|
96
|
+
|
97
|
+
/* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
|
98
|
+
|
99
|
+
zSquareHalfh = zSquareh * -0.5;
|
100
|
+
zSquareHalfm = zSquarem * -0.5;
|
101
|
+
zSquareHalfl = zSquarel * -0.5;
|
102
|
+
|
103
|
+
Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
|
104
|
+
zSquareHalfh, zSquareHalfm, zSquareHalfl,
|
105
|
+
higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
|
106
|
+
|
107
|
+
/* Add now zh and zl to obtain the polynomial evaluation result */
|
108
|
+
|
109
|
+
Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
|
110
|
+
|
111
|
+
/* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
|
112
|
+
logil has not been read to the time, do this first
|
113
|
+
*/
|
114
|
+
|
115
|
+
logil = argredtable[index].logil;
|
116
|
+
|
117
|
+
Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
|
118
|
+
|
119
|
+
/* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
|
120
|
+
ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
|
121
|
+
So multiplying naively is correct (up to 134 bits at least)
|
122
|
+
|
123
|
+
The final result is thus obtained by adding log2 * E to log(y)
|
124
|
+
*/
|
125
|
+
|
126
|
+
log2edhover = log2h * ed;
|
127
|
+
log2edmover = log2m * ed;
|
128
|
+
log2edlover = log2l * ed;
|
129
|
+
|
130
|
+
/* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
|
131
|
+
the to the log(y)-result
|
132
|
+
|
133
|
+
If needed, uncomment the following Renormalize3-Statement and comment out the copies
|
134
|
+
following it.
|
135
|
+
*/
|
136
|
+
|
137
|
+
/* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
|
138
|
+
|
139
|
+
log2edh = log2edhover;
|
140
|
+
log2edm = log2edmover;
|
141
|
+
log2edl = log2edlover;
|
142
|
+
|
143
|
+
Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
|
144
|
+
|
145
|
+
|
146
|
+
/* Change logarithm base from natural base to base 2 by multiplying */
|
147
|
+
|
148
|
+
Mul233(&logb2hover, &logb2mover, &logb2lover, log2invh, log2invl, loghover, logmover, loglover);
|
149
|
+
|
150
|
+
|
151
|
+
/* Since we can not guarantee in each addition and multiplication procedure that
|
152
|
+
the results are not overlapping, we must renormalize the result before handing
|
153
|
+
it over to the final rounding
|
154
|
+
*/
|
155
|
+
|
156
|
+
Renormalize3(logb2h,logb2m,logb2l,logb2hover,logb2mover,logb2lover);
|
157
|
+
|
158
|
+
}
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
/*************************************************************
|
163
|
+
*************************************************************
|
164
|
+
* ROUNDED TO NEAREST *
|
165
|
+
*************************************************************
|
166
|
+
*************************************************************/
|
167
|
+
double log2_rn(double x){
|
168
|
+
db_number xdb;
|
169
|
+
double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
|
170
|
+
double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
|
171
|
+
double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
|
172
|
+
double logb2h, logb2m, logb2l;
|
173
|
+
int E, index;
|
174
|
+
|
175
|
+
E=0;
|
176
|
+
xdb.d=x;
|
177
|
+
|
178
|
+
/* Filter cases */
|
179
|
+
if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
|
180
|
+
if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
|
181
|
+
return -1.0/0.0;
|
182
|
+
} /* log(+/-0) = -Inf */
|
183
|
+
if (xdb.i[HI] < 0){
|
184
|
+
return (x-x)/0; /* log(-x) = Nan */
|
185
|
+
}
|
186
|
+
/* Subnormal number */
|
187
|
+
E = -52;
|
188
|
+
xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
|
189
|
+
}
|
190
|
+
|
191
|
+
if (xdb.i[HI] >= 0x7ff00000){
|
192
|
+
return x+x; /* Inf or Nan */
|
193
|
+
}
|
194
|
+
|
195
|
+
|
196
|
+
/* Extract exponent and mantissa
|
197
|
+
Do range reduction,
|
198
|
+
yielding to E holding the exponent and
|
199
|
+
y the mantissa between sqrt(2)/2 and sqrt(2)
|
200
|
+
*/
|
201
|
+
E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
|
202
|
+
index = (xdb.i[HI] & 0x000fffff);
|
203
|
+
xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
204
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
205
|
+
|
206
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
207
|
+
if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
|
208
|
+
xdb.i[HI] -= 0x00100000;
|
209
|
+
E++;
|
210
|
+
}
|
211
|
+
y = xdb.d;
|
212
|
+
index = index & INDEXMASK;
|
213
|
+
/* Cast integer E into double ed for multiplication later */
|
214
|
+
ed = (double) E;
|
215
|
+
|
216
|
+
/*
|
217
|
+
Read tables:
|
218
|
+
Read one float for ri
|
219
|
+
Read the first two doubles for -log(r_i) (out of three)
|
220
|
+
|
221
|
+
Organization of the table:
|
222
|
+
|
223
|
+
one struct entry per index, the struct entry containing
|
224
|
+
r, logih, logim and logil in this order
|
225
|
+
*/
|
226
|
+
|
227
|
+
|
228
|
+
ri = argredtable[index].ri;
|
229
|
+
/*
|
230
|
+
Actually we don't need the logarithm entries now
|
231
|
+
Move the following two lines to the eventual reconstruction
|
232
|
+
As long as we don't have any if in the following code, we can overlap
|
233
|
+
memory access with calculations
|
234
|
+
*/
|
235
|
+
logih = argredtable[index].logih;
|
236
|
+
logim = argredtable[index].logim;
|
237
|
+
|
238
|
+
/* Do range reduction:
|
239
|
+
|
240
|
+
zh + zl = y * ri - 1.0 correctly
|
241
|
+
|
242
|
+
Correctness is assured by use of Mul12 and Add12
|
243
|
+
even if we don't force ri to have its' LSBs set to zero
|
244
|
+
|
245
|
+
Discard zl for higher monome degrees
|
246
|
+
*/
|
247
|
+
|
248
|
+
Mul12(&yrih, &yril, y, ri);
|
249
|
+
th = yrih - 1.0;
|
250
|
+
Add12Cond(zh, zl, th, yril);
|
251
|
+
|
252
|
+
/*
|
253
|
+
Polynomial evaluation
|
254
|
+
|
255
|
+
Use a 7 degree polynomial
|
256
|
+
Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
|
257
|
+
Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
|
258
|
+
using an ad hoc method
|
259
|
+
|
260
|
+
*/
|
261
|
+
|
262
|
+
|
263
|
+
|
264
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
265
|
+
polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
|
266
|
+
#else
|
267
|
+
polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
|
268
|
+
#endif
|
269
|
+
|
270
|
+
Mul12(&zhSquareh, &zhSquarel, zh, zh);
|
271
|
+
polyUpper = polyHorner * (zh * zhSquareh);
|
272
|
+
zhSquareHalfh = zhSquareh * -0.5;
|
273
|
+
zhSquareHalfl = zhSquarel * -0.5;
|
274
|
+
Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
|
275
|
+
Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
|
276
|
+
Add22(&ph, &pl, t2h, t2l, t1h, t1l);
|
277
|
+
|
278
|
+
/* Reconstruction
|
279
|
+
|
280
|
+
Read logih and logim in the tables (already done)
|
281
|
+
|
282
|
+
Compute log(x) = E * log(2) + log(1+z) - log(ri)
|
283
|
+
i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
|
284
|
+
|
285
|
+
Carry out everything in double double precision
|
286
|
+
|
287
|
+
*/
|
288
|
+
|
289
|
+
/*
|
290
|
+
We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
|
291
|
+
Multiplication of ed (double E) and log2h is thus correct
|
292
|
+
The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
|
293
|
+
is enough for the accurate phase
|
294
|
+
The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
|
295
|
+
Nevertheless the storage with trailing zeros implies an overlap of the tabulated
|
296
|
+
triple double values. We have to take it into account for the accurate phase
|
297
|
+
basic procedures for addition and multiplication
|
298
|
+
The condition on the next Add12 is verified as log2m is smaller than log2h
|
299
|
+
and both are scaled by ed
|
300
|
+
*/
|
301
|
+
|
302
|
+
Add12(log2edh, log2edl, log2h * ed, log2m * ed);
|
303
|
+
|
304
|
+
/* Add logih and logim to ph and pl
|
305
|
+
|
306
|
+
We must use conditioned Add22 as logih can move over ph
|
307
|
+
*/
|
308
|
+
|
309
|
+
Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
|
310
|
+
|
311
|
+
/* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
|
312
|
+
|
313
|
+
Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
|
314
|
+
|
315
|
+
|
316
|
+
|
317
|
+
/* Change logarithm base from natural base to base 2 by multiplying */
|
318
|
+
|
319
|
+
Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
|
320
|
+
|
321
|
+
|
322
|
+
/* Rounding test and eventual return or call to the accurate function */
|
323
|
+
|
324
|
+
if(E==0)
|
325
|
+
roundcst = ROUNDCST1;
|
326
|
+
else
|
327
|
+
roundcst = ROUNDCST2;
|
328
|
+
|
329
|
+
|
330
|
+
if(logb2h == (logb2h + (logb2m * roundcst)))
|
331
|
+
return logb2h;
|
332
|
+
else
|
333
|
+
{
|
334
|
+
|
335
|
+
#if DEBUG
|
336
|
+
printf("Going for Accurate Phase for x=%1.50e\n",x);
|
337
|
+
#endif
|
338
|
+
|
339
|
+
log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
|
340
|
+
|
341
|
+
ReturnRoundToNearest3(logb2h, logb2m, logb2l);
|
342
|
+
|
343
|
+
} /* Accurate phase launched */
|
344
|
+
}
|
345
|
+
|
346
|
+
|
347
|
+
/*************************************************************
|
348
|
+
*************************************************************
|
349
|
+
* ROUNDED UPWARDS *
|
350
|
+
*************************************************************
|
351
|
+
*************************************************************/
|
352
|
+
double log2_ru(double x) {
|
353
|
+
db_number xdb;
|
354
|
+
double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
|
355
|
+
double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
|
356
|
+
double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
|
357
|
+
double logb2h, logb2m, logb2l;
|
358
|
+
int E, index;
|
359
|
+
|
360
|
+
|
361
|
+
E=0;
|
362
|
+
xdb.d=x;
|
363
|
+
|
364
|
+
/* Filter cases */
|
365
|
+
if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
|
366
|
+
if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
|
367
|
+
return -1.0/0.0;
|
368
|
+
} /* log(+/-0) = -Inf */
|
369
|
+
if (xdb.i[HI] < 0){
|
370
|
+
return (x-x)/0; /* log(-x) = Nan */
|
371
|
+
}
|
372
|
+
/* Subnormal number */
|
373
|
+
E = -52;
|
374
|
+
xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
|
375
|
+
}
|
376
|
+
|
377
|
+
if (xdb.i[HI] >= 0x7ff00000){
|
378
|
+
return x+x; /* Inf or Nan */
|
379
|
+
}
|
380
|
+
|
381
|
+
|
382
|
+
/* Extract exponent and mantissa
|
383
|
+
Do range reduction,
|
384
|
+
yielding to E holding the exponent and
|
385
|
+
y the mantissa between sqrt(2)/2 and sqrt(2)
|
386
|
+
*/
|
387
|
+
E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
|
388
|
+
index = (xdb.i[HI] & 0x000fffff);
|
389
|
+
|
390
|
+
|
391
|
+
/* Test now if the argument is an exact power of 2
|
392
|
+
i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
|
393
|
+
This test is necessary for filtering out the cases where the final
|
394
|
+
rounding test cannot distinguish between an exact algebraic
|
395
|
+
number and a hard case to round
|
396
|
+
*/
|
397
|
+
|
398
|
+
if ((index | xdb.i[LO]) == 0) {
|
399
|
+
/* Handle the "trivial" case for log2:
|
400
|
+
The argument is an exact power of 2, return thus
|
401
|
+
just the exponant of the number
|
402
|
+
*/
|
403
|
+
|
404
|
+
return (double) E;
|
405
|
+
|
406
|
+
}
|
407
|
+
|
408
|
+
xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
409
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
410
|
+
|
411
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
412
|
+
if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
|
413
|
+
xdb.i[HI] -= 0x00100000;
|
414
|
+
E++;
|
415
|
+
}
|
416
|
+
y = xdb.d;
|
417
|
+
index = index & INDEXMASK;
|
418
|
+
/* Cast integer E into double ed for multiplication later */
|
419
|
+
ed = (double) E;
|
420
|
+
|
421
|
+
/*
|
422
|
+
Read tables:
|
423
|
+
Read one float for ri
|
424
|
+
Read the first two doubles for -log(r_i) (out of three)
|
425
|
+
|
426
|
+
Organization of the table:
|
427
|
+
|
428
|
+
one struct entry per index, the struct entry containing
|
429
|
+
r, logih, logim and logil in this order
|
430
|
+
*/
|
431
|
+
|
432
|
+
|
433
|
+
ri = argredtable[index].ri;
|
434
|
+
/*
|
435
|
+
Actually we don't need the logarithm entries now
|
436
|
+
Move the following two lines to the eventual reconstruction
|
437
|
+
As long as we don't have any if in the following code, we can overlap
|
438
|
+
memory access with calculations
|
439
|
+
*/
|
440
|
+
logih = argredtable[index].logih;
|
441
|
+
logim = argredtable[index].logim;
|
442
|
+
|
443
|
+
/* Do range reduction:
|
444
|
+
|
445
|
+
zh + zl = y * ri - 1.0 correctly
|
446
|
+
|
447
|
+
Correctness is assured by use of Mul12 and Add12
|
448
|
+
even if we don't force ri to have its' LSBs set to zero
|
449
|
+
|
450
|
+
Discard zl for higher monome degrees
|
451
|
+
*/
|
452
|
+
|
453
|
+
Mul12(&yrih, &yril, y, ri);
|
454
|
+
th = yrih - 1.0;
|
455
|
+
Add12Cond(zh, zl, th, yril);
|
456
|
+
|
457
|
+
/*
|
458
|
+
Polynomial evaluation
|
459
|
+
|
460
|
+
Use a 7 degree polynomial
|
461
|
+
Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
|
462
|
+
Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
|
463
|
+
using an ad hoc method
|
464
|
+
|
465
|
+
*/
|
466
|
+
|
467
|
+
|
468
|
+
|
469
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
470
|
+
polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
|
471
|
+
#else
|
472
|
+
polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
|
473
|
+
#endif
|
474
|
+
|
475
|
+
Mul12(&zhSquareh, &zhSquarel, zh, zh);
|
476
|
+
polyUpper = polyHorner * (zh * zhSquareh);
|
477
|
+
zhSquareHalfh = zhSquareh * -0.5;
|
478
|
+
zhSquareHalfl = zhSquarel * -0.5;
|
479
|
+
Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
|
480
|
+
Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
|
481
|
+
Add22(&ph, &pl, t2h, t2l, t1h, t1l);
|
482
|
+
|
483
|
+
/* Reconstruction
|
484
|
+
|
485
|
+
Read logih and logim in the tables (already done)
|
486
|
+
|
487
|
+
Compute log(x) = E * log(2) + log(1+z) - log(ri)
|
488
|
+
i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
|
489
|
+
|
490
|
+
Carry out everything in double double precision
|
491
|
+
|
492
|
+
*/
|
493
|
+
|
494
|
+
/*
|
495
|
+
We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
|
496
|
+
Multiplication of ed (double E) and log2h is thus correct
|
497
|
+
The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
|
498
|
+
is enough for the accurate phase
|
499
|
+
The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
|
500
|
+
Nevertheless the storage with trailing zeros implies an overlap of the tabulated
|
501
|
+
triple double values. We have to take it into account for the accurate phase
|
502
|
+
basic procedures for addition and multiplication
|
503
|
+
The condition on the next Add12 is verified as log2m is smaller than log2h
|
504
|
+
and both are scaled by ed
|
505
|
+
*/
|
506
|
+
|
507
|
+
Add12(log2edh, log2edl, log2h * ed, log2m * ed);
|
508
|
+
|
509
|
+
/* Add logih and logim to ph and pl
|
510
|
+
|
511
|
+
We must use conditioned Add22 as logih can move over ph
|
512
|
+
*/
|
513
|
+
|
514
|
+
Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
|
515
|
+
|
516
|
+
/* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
|
517
|
+
|
518
|
+
Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
|
519
|
+
|
520
|
+
/* Change logarithm base from natural base to base 2 by multiplying */
|
521
|
+
|
522
|
+
Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
|
523
|
+
|
524
|
+
/* Rounding test and eventual return or call to the accurate function */
|
525
|
+
|
526
|
+
if(E==0)
|
527
|
+
roundcst = RDROUNDCST1;
|
528
|
+
else
|
529
|
+
roundcst = RDROUNDCST2;
|
530
|
+
|
531
|
+
TEST_AND_RETURN_RU(logb2h, logb2m, roundcst);
|
532
|
+
|
533
|
+
#if DEBUG
|
534
|
+
printf("Going for Accurate Phase for x=%1.50e\n",x);
|
535
|
+
#endif
|
536
|
+
|
537
|
+
log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
|
538
|
+
|
539
|
+
ReturnRoundUpwards3(logb2h, logb2m, logb2l);
|
540
|
+
|
541
|
+
}
|
542
|
+
|
543
|
+
|
544
|
+
/*************************************************************
|
545
|
+
*************************************************************
|
546
|
+
* ROUNDED DOWNWARDS *
|
547
|
+
*************************************************************
|
548
|
+
*************************************************************/
|
549
|
+
double log2_rd(double x) {
|
550
|
+
db_number xdb;
|
551
|
+
double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
|
552
|
+
double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
|
553
|
+
double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
|
554
|
+
double logb2h, logb2m, logb2l;
|
555
|
+
int E, index;
|
556
|
+
|
557
|
+
|
558
|
+
E=0;
|
559
|
+
xdb.d=x;
|
560
|
+
|
561
|
+
/* Filter cases */
|
562
|
+
if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
|
563
|
+
if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
|
564
|
+
return -1.0/0.0;
|
565
|
+
} /* log(+/-0) = -Inf */
|
566
|
+
if (xdb.i[HI] < 0){
|
567
|
+
return (x-x)/0; /* log(-x) = Nan */
|
568
|
+
}
|
569
|
+
/* Subnormal number */
|
570
|
+
E = -52;
|
571
|
+
xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
|
572
|
+
}
|
573
|
+
|
574
|
+
if (xdb.i[HI] >= 0x7ff00000){
|
575
|
+
return x+x; /* Inf or Nan */
|
576
|
+
}
|
577
|
+
|
578
|
+
|
579
|
+
/* Extract exponent and mantissa
|
580
|
+
Do range reduction,
|
581
|
+
yielding to E holding the exponent and
|
582
|
+
y the mantissa between sqrt(2)/2 and sqrt(2)
|
583
|
+
*/
|
584
|
+
E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
|
585
|
+
index = (xdb.i[HI] & 0x000fffff);
|
586
|
+
|
587
|
+
|
588
|
+
/* Test now if the argument is an exact power of 2
|
589
|
+
i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
|
590
|
+
This test is necessary for filtering out the cases where the final
|
591
|
+
rounding test cannot distinguish between an exact algebraic
|
592
|
+
number and a hard case to round
|
593
|
+
*/
|
594
|
+
|
595
|
+
if ((index | xdb.i[LO]) == 0) {
|
596
|
+
/* Handle the "trivial" case for log2:
|
597
|
+
The argument is an exact power of 2, return thus
|
598
|
+
just the exponant of the number
|
599
|
+
*/
|
600
|
+
|
601
|
+
return (double) E;
|
602
|
+
|
603
|
+
}
|
604
|
+
|
605
|
+
xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
606
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
607
|
+
|
608
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
609
|
+
if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
|
610
|
+
xdb.i[HI] -= 0x00100000;
|
611
|
+
E++;
|
612
|
+
}
|
613
|
+
y = xdb.d;
|
614
|
+
index = index & INDEXMASK;
|
615
|
+
/* Cast integer E into double ed for multiplication later */
|
616
|
+
ed = (double) E;
|
617
|
+
|
618
|
+
/*
|
619
|
+
Read tables:
|
620
|
+
Read one float for ri
|
621
|
+
Read the first two doubles for -log(r_i) (out of three)
|
622
|
+
|
623
|
+
Organization of the table:
|
624
|
+
|
625
|
+
one struct entry per index, the struct entry containing
|
626
|
+
r, logih, logim and logil in this order
|
627
|
+
*/
|
628
|
+
|
629
|
+
|
630
|
+
ri = argredtable[index].ri;
|
631
|
+
/*
|
632
|
+
Actually we don't need the logarithm entries now
|
633
|
+
Move the following two lines to the eventual reconstruction
|
634
|
+
As long as we don't have any if in the following code, we can overlap
|
635
|
+
memory access with calculations
|
636
|
+
*/
|
637
|
+
logih = argredtable[index].logih;
|
638
|
+
logim = argredtable[index].logim;
|
639
|
+
|
640
|
+
/* Do range reduction:
|
641
|
+
|
642
|
+
zh + zl = y * ri - 1.0 correctly
|
643
|
+
|
644
|
+
Correctness is assured by use of Mul12 and Add12
|
645
|
+
even if we don't force ri to have its' LSBs set to zero
|
646
|
+
|
647
|
+
Discard zl for higher monome degrees
|
648
|
+
*/
|
649
|
+
|
650
|
+
Mul12(&yrih, &yril, y, ri);
|
651
|
+
th = yrih - 1.0;
|
652
|
+
Add12Cond(zh, zl, th, yril);
|
653
|
+
|
654
|
+
/*
|
655
|
+
Polynomial evaluation
|
656
|
+
|
657
|
+
Use a 7 degree polynomial
|
658
|
+
Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
|
659
|
+
Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
|
660
|
+
using an ad hoc method
|
661
|
+
|
662
|
+
*/
|
663
|
+
|
664
|
+
|
665
|
+
|
666
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
667
|
+
polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
|
668
|
+
#else
|
669
|
+
polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
|
670
|
+
#endif
|
671
|
+
|
672
|
+
Mul12(&zhSquareh, &zhSquarel, zh, zh);
|
673
|
+
polyUpper = polyHorner * (zh * zhSquareh);
|
674
|
+
zhSquareHalfh = zhSquareh * -0.5;
|
675
|
+
zhSquareHalfl = zhSquarel * -0.5;
|
676
|
+
Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
|
677
|
+
Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
|
678
|
+
Add22(&ph, &pl, t2h, t2l, t1h, t1l);
|
679
|
+
|
680
|
+
/* Reconstruction
|
681
|
+
|
682
|
+
Read logih and logim in the tables (already done)
|
683
|
+
|
684
|
+
Compute log(x) = E * log(2) + log(1+z) - log(ri)
|
685
|
+
i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
|
686
|
+
|
687
|
+
Carry out everything in double double precision
|
688
|
+
|
689
|
+
*/
|
690
|
+
|
691
|
+
/*
|
692
|
+
We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
|
693
|
+
Multiplication of ed (double E) and log2h is thus correct
|
694
|
+
The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
|
695
|
+
is enough for the accurate phase
|
696
|
+
The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
|
697
|
+
Nevertheless the storage with trailing zeros implies an overlap of the tabulated
|
698
|
+
triple double values. We have to take it into account for the accurate phase
|
699
|
+
basic procedures for addition and multiplication
|
700
|
+
The condition on the next Add12 is verified as log2m is smaller than log2h
|
701
|
+
and both are scaled by ed
|
702
|
+
*/
|
703
|
+
|
704
|
+
Add12(log2edh, log2edl, log2h * ed, log2m * ed);
|
705
|
+
|
706
|
+
/* Add logih and logim to ph and pl
|
707
|
+
|
708
|
+
We must use conditioned Add22 as logih can move over ph
|
709
|
+
*/
|
710
|
+
|
711
|
+
Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
|
712
|
+
|
713
|
+
/* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
|
714
|
+
|
715
|
+
Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
|
716
|
+
|
717
|
+
/* Change logarithm base from natural base to base 2 by multiplying */
|
718
|
+
|
719
|
+
Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
|
720
|
+
|
721
|
+
/* Rounding test and eventual return or call to the accurate function */
|
722
|
+
|
723
|
+
if(E==0)
|
724
|
+
roundcst = RDROUNDCST1;
|
725
|
+
else
|
726
|
+
roundcst = RDROUNDCST2;
|
727
|
+
|
728
|
+
TEST_AND_RETURN_RD(logb2h, logb2m, roundcst);
|
729
|
+
|
730
|
+
#if DEBUG
|
731
|
+
printf("Going for Accurate Phase for x=%1.50e\n",x);
|
732
|
+
#endif
|
733
|
+
|
734
|
+
log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
|
735
|
+
|
736
|
+
ReturnRoundDownwards3(logb2h, logb2m, logb2l);
|
737
|
+
}
|
738
|
+
|
739
|
+
/*************************************************************
|
740
|
+
*************************************************************
|
741
|
+
* ROUNDED TOWARDS ZERO *
|
742
|
+
*************************************************************
|
743
|
+
*************************************************************/
|
744
|
+
double log2_rz(double x) {
|
745
|
+
db_number xdb;
|
746
|
+
double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
|
747
|
+
double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
|
748
|
+
double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
|
749
|
+
double logb2h, logb2m, logb2l;
|
750
|
+
int E, index;
|
751
|
+
|
752
|
+
|
753
|
+
E=0;
|
754
|
+
xdb.d=x;
|
755
|
+
|
756
|
+
/* Filter cases */
|
757
|
+
if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
|
758
|
+
if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
|
759
|
+
return -1.0/0.0;
|
760
|
+
} /* log(+/-0) = -Inf */
|
761
|
+
if (xdb.i[HI] < 0){
|
762
|
+
return (x-x)/0; /* log(-x) = Nan */
|
763
|
+
}
|
764
|
+
/* Subnormal number */
|
765
|
+
E = -52;
|
766
|
+
xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
|
767
|
+
}
|
768
|
+
|
769
|
+
if (xdb.i[HI] >= 0x7ff00000){
|
770
|
+
return x+x; /* Inf or Nan */
|
771
|
+
}
|
772
|
+
|
773
|
+
|
774
|
+
/* Extract exponent and mantissa
|
775
|
+
Do range reduction,
|
776
|
+
yielding to E holding the exponent and
|
777
|
+
y the mantissa between sqrt(2)/2 and sqrt(2)
|
778
|
+
*/
|
779
|
+
E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
|
780
|
+
index = (xdb.i[HI] & 0x000fffff);
|
781
|
+
|
782
|
+
|
783
|
+
/* Test now if the argument is an exact power of 2
|
784
|
+
i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
|
785
|
+
This test is necessary for filtering out the cases where the final
|
786
|
+
rounding test cannot distinguish between an exact algebraic
|
787
|
+
number and a hard case to round
|
788
|
+
*/
|
789
|
+
|
790
|
+
if ((index | xdb.i[LO]) == 0) {
|
791
|
+
/* Handle the "trivial" case for log2:
|
792
|
+
The argument is an exact power of 2, return thus
|
793
|
+
just the exponant of the number
|
794
|
+
*/
|
795
|
+
|
796
|
+
return (double) E;
|
797
|
+
|
798
|
+
}
|
799
|
+
|
800
|
+
xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
801
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
802
|
+
|
803
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
804
|
+
if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
|
805
|
+
xdb.i[HI] -= 0x00100000;
|
806
|
+
E++;
|
807
|
+
}
|
808
|
+
y = xdb.d;
|
809
|
+
index = index & INDEXMASK;
|
810
|
+
/* Cast integer E into double ed for multiplication later */
|
811
|
+
ed = (double) E;
|
812
|
+
|
813
|
+
/*
|
814
|
+
Read tables:
|
815
|
+
Read one float for ri
|
816
|
+
Read the first two doubles for -log(r_i) (out of three)
|
817
|
+
|
818
|
+
Organization of the table:
|
819
|
+
|
820
|
+
one struct entry per index, the struct entry containing
|
821
|
+
r, logih, logim and logil in this order
|
822
|
+
*/
|
823
|
+
|
824
|
+
|
825
|
+
ri = argredtable[index].ri;
|
826
|
+
/*
|
827
|
+
Actually we don't need the logarithm entries now
|
828
|
+
Move the following two lines to the eventual reconstruction
|
829
|
+
As long as we don't have any if in the following code, we can overlap
|
830
|
+
memory access with calculations
|
831
|
+
*/
|
832
|
+
logih = argredtable[index].logih;
|
833
|
+
logim = argredtable[index].logim;
|
834
|
+
|
835
|
+
/* Do range reduction:
|
836
|
+
|
837
|
+
zh + zl = y * ri - 1.0 correctly
|
838
|
+
|
839
|
+
Correctness is assured by use of Mul12 and Add12
|
840
|
+
even if we don't force ri to have its' LSBs set to zero
|
841
|
+
|
842
|
+
Discard zl for higher monome degrees
|
843
|
+
*/
|
844
|
+
|
845
|
+
Mul12(&yrih, &yril, y, ri);
|
846
|
+
th = yrih - 1.0;
|
847
|
+
Add12Cond(zh, zl, th, yril);
|
848
|
+
|
849
|
+
/*
|
850
|
+
Polynomial evaluation
|
851
|
+
|
852
|
+
Use a 7 degree polynomial
|
853
|
+
Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
|
854
|
+
Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
|
855
|
+
using an ad hoc method
|
856
|
+
|
857
|
+
*/
|
858
|
+
|
859
|
+
|
860
|
+
|
861
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
862
|
+
polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
|
863
|
+
#else
|
864
|
+
polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
|
865
|
+
#endif
|
866
|
+
|
867
|
+
Mul12(&zhSquareh, &zhSquarel, zh, zh);
|
868
|
+
polyUpper = polyHorner * (zh * zhSquareh);
|
869
|
+
zhSquareHalfh = zhSquareh * -0.5;
|
870
|
+
zhSquareHalfl = zhSquarel * -0.5;
|
871
|
+
Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
|
872
|
+
Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
|
873
|
+
Add22(&ph, &pl, t2h, t2l, t1h, t1l);
|
874
|
+
|
875
|
+
/* Reconstruction
|
876
|
+
|
877
|
+
Read logih and logim in the tables (already done)
|
878
|
+
|
879
|
+
Compute log(x) = E * log(2) + log(1+z) - log(ri)
|
880
|
+
i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
|
881
|
+
|
882
|
+
Carry out everything in double double precision
|
883
|
+
|
884
|
+
*/
|
885
|
+
|
886
|
+
/*
|
887
|
+
We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
|
888
|
+
Multiplication of ed (double E) and log2h is thus correct
|
889
|
+
The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
|
890
|
+
is enough for the accurate phase
|
891
|
+
The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
|
892
|
+
Nevertheless the storage with trailing zeros implies an overlap of the tabulated
|
893
|
+
triple double values. We have to take it into account for the accurate phase
|
894
|
+
basic procedures for addition and multiplication
|
895
|
+
The condition on the next Add12 is verified as log2m is smaller than log2h
|
896
|
+
and both are scaled by ed
|
897
|
+
*/
|
898
|
+
|
899
|
+
Add12(log2edh, log2edl, log2h * ed, log2m * ed);
|
900
|
+
|
901
|
+
/* Add logih and logim to ph and pl
|
902
|
+
|
903
|
+
We must use conditioned Add22 as logih can move over ph
|
904
|
+
*/
|
905
|
+
|
906
|
+
Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
|
907
|
+
|
908
|
+
/* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
|
909
|
+
|
910
|
+
Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
|
911
|
+
|
912
|
+
/* Change logarithm base from natural base to base 2 by multiplying */
|
913
|
+
|
914
|
+
Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
|
915
|
+
|
916
|
+
/* Rounding test and eventual return or call to the accurate function */
|
917
|
+
|
918
|
+
if(E==0)
|
919
|
+
roundcst = RDROUNDCST1;
|
920
|
+
else
|
921
|
+
roundcst = RDROUNDCST2;
|
922
|
+
|
923
|
+
TEST_AND_RETURN_RZ(logb2h, logb2m, roundcst);
|
924
|
+
|
925
|
+
#if DEBUG
|
926
|
+
printf("Going for Accurate Phase for x=%1.50e\n",x);
|
927
|
+
#endif
|
928
|
+
|
929
|
+
log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
|
930
|
+
|
931
|
+
ReturnRoundTowardsZero3(logb2h, logb2m, logb2l);
|
932
|
+
}
|
933
|
+
|
934
|
+
|
935
|
+
|