crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1295 @@
|
|
1
|
+
/*
|
2
|
+
* Correctly rounded log1p(x) = log(1 + x)
|
3
|
+
*
|
4
|
+
* Author : Christoph Lauter (ENS Lyon)
|
5
|
+
*
|
6
|
+
* This file is part of the crlibm library developed by the Arenaire
|
7
|
+
* project at Ecole Normale Superieure de Lyon
|
8
|
+
*
|
9
|
+
* This program is free software; you can redistribute it and/or modify
|
10
|
+
* it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
* the Free Software Foundation; either version 2 of the License, or
|
12
|
+
* (at your option) any later version.
|
13
|
+
*
|
14
|
+
* This program is distributed in the hope that it will be useful,
|
15
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
* GNU General Public License for more details.
|
18
|
+
*
|
19
|
+
* You should have received a copy of the GNU Lesser General Public License
|
20
|
+
* along with this program; if not, write to the Free Software
|
21
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
22
|
+
*/
|
23
|
+
|
24
|
+
|
25
|
+
#include <stdio.h>
|
26
|
+
#include <stdlib.h>
|
27
|
+
#include "crlibm.h"
|
28
|
+
#include "crlibm_private.h"
|
29
|
+
#include "triple-double.h"
|
30
|
+
#include "log-td.h"
|
31
|
+
|
32
|
+
#define AVOID_FMA 0
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
void log1p_td_accurate(double *logh, double *logm, double *logl, double ed, int index,
|
37
|
+
double zh, double zm, double zl, double logih, double logim) {
|
38
|
+
double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
|
39
|
+
double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
|
40
|
+
double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
|
41
|
+
double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
|
42
|
+
double log2edh, log2edm, log2edl;
|
43
|
+
|
44
|
+
|
45
|
+
#if EVAL_PERF
|
46
|
+
crlibm_second_step_taken++;
|
47
|
+
#endif
|
48
|
+
|
49
|
+
|
50
|
+
/* Accurate phase:
|
51
|
+
|
52
|
+
Argument reduction is already done.
|
53
|
+
We must return logh, logm and logl representing the intermediate result in 118 bits precision.
|
54
|
+
|
55
|
+
We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
|
56
|
+
calculating the next 7 coefficients in double double arithmetics and the last in double.
|
57
|
+
|
58
|
+
*/
|
59
|
+
|
60
|
+
/* Start of the horner scheme */
|
61
|
+
|
62
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
63
|
+
highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
|
64
|
+
#else
|
65
|
+
highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
|
66
|
+
#endif
|
67
|
+
|
68
|
+
/* We want to write
|
69
|
+
|
70
|
+
accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
|
71
|
+
( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
|
72
|
+
|
73
|
+
with all additions and multiplications in double double arithmetics
|
74
|
+
but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
|
75
|
+
*/
|
76
|
+
|
77
|
+
Mul12(&t1h, &t1l, zh, highPoly);
|
78
|
+
Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
|
79
|
+
Mul22(&t3h, &t3l, zh, zm, t2h, t2l);
|
80
|
+
Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
|
81
|
+
Mul22(&t5h, &t5l, zh, zm, t4h, t4l);
|
82
|
+
Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
|
83
|
+
Mul22(&t7h, &t7l, zh, zm, t6h, t6l);
|
84
|
+
Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
|
85
|
+
Mul22(&t9h, &t9l, zh, zm, t8h, t8l);
|
86
|
+
Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
|
87
|
+
Mul22(&t11h, &t11l, zh, zm, t10h, t10l);
|
88
|
+
Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
|
89
|
+
Mul22(&t13h, &t13l, zh, zm, t12h, t12l);
|
90
|
+
Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
|
91
|
+
|
92
|
+
/* We must now prepare (zh + zm)^2 and (zh + zm)^3 as triple doubles */
|
93
|
+
|
94
|
+
Mul33(&zSquareh, &zSquarem, &zSquarel, zh, zm, zl, zh, zm, zl);
|
95
|
+
Mul33(&zCubeh, &zCubem, &zCubel, zh, zm, zl, zSquareh, zSquarem, zSquarel);
|
96
|
+
|
97
|
+
/* We can now multiplicate the middle and higher polynomial by z^3 */
|
98
|
+
|
99
|
+
Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
|
100
|
+
|
101
|
+
/* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
|
102
|
+
|
103
|
+
zSquareHalfh = zSquareh * -0.5;
|
104
|
+
zSquareHalfm = zSquarem * -0.5;
|
105
|
+
zSquareHalfl = zSquarel * -0.5;
|
106
|
+
|
107
|
+
Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
|
108
|
+
zSquareHalfh, zSquareHalfm, zSquareHalfl,
|
109
|
+
higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
|
110
|
+
|
111
|
+
/* Add now zh and zm to obtain the polynomial evaluation result */
|
112
|
+
|
113
|
+
Add33(&polyh, &polym, &polyl, zh, zm, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
|
114
|
+
|
115
|
+
/* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
|
116
|
+
logil has not been read to the time, do this first
|
117
|
+
*/
|
118
|
+
|
119
|
+
logil = argredtable[index].logil;
|
120
|
+
|
121
|
+
Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
|
122
|
+
|
123
|
+
/* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
|
124
|
+
ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
|
125
|
+
So multiplying naively is correct (up to 134 bits at least)
|
126
|
+
|
127
|
+
The final result is thus obtained by adding log2 * E to log(y)
|
128
|
+
*/
|
129
|
+
|
130
|
+
log2edhover = log2h * ed;
|
131
|
+
log2edmover = log2m * ed;
|
132
|
+
log2edlover = log2l * ed;
|
133
|
+
|
134
|
+
/* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
|
135
|
+
the to the log(y)-result
|
136
|
+
|
137
|
+
If needed, uncomment the following Renormalize3-Statement and comment out the copies
|
138
|
+
following it.
|
139
|
+
*/
|
140
|
+
|
141
|
+
/* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
|
142
|
+
|
143
|
+
log2edh = log2edhover;
|
144
|
+
log2edm = log2edmover;
|
145
|
+
log2edl = log2edlover;
|
146
|
+
|
147
|
+
Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
|
148
|
+
|
149
|
+
/* Since we can not guarantee in each addition and multiplication procedure that
|
150
|
+
the results are not overlapping, we must renormalize the result before handing
|
151
|
+
it over to the final rounding
|
152
|
+
*/
|
153
|
+
|
154
|
+
Renormalize3(logh,logm,logl,loghover,logmover,loglover);
|
155
|
+
|
156
|
+
}
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
/*************************************************************
|
161
|
+
*************************************************************
|
162
|
+
* ROUNDED TO NEAREST *
|
163
|
+
*************************************************************
|
164
|
+
*************************************************************/
|
165
|
+
double log1p_rn(double x){
|
166
|
+
db_number xdb, shdb, scaledb;
|
167
|
+
double yh, yl, ed, ri, logih, logim, yhrih, yhril, ylri, t1, t2, t3, t4, t5, t6, zh, zm, zl;
|
168
|
+
double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
|
169
|
+
double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
|
170
|
+
double sh, sl;
|
171
|
+
int E, index;
|
172
|
+
|
173
|
+
|
174
|
+
xdb.d=x;
|
175
|
+
|
176
|
+
/* Filter cases */
|
177
|
+
if ((xdb.i[HI] & 0x7fffffff) < 0x3c900000) {
|
178
|
+
/* We are less than 2^(-54) and return simply an adjusted x
|
179
|
+
This captures also the algebraic case x = 0
|
180
|
+
*/
|
181
|
+
return x;
|
182
|
+
}
|
183
|
+
|
184
|
+
if (((xdb.i[HI] & 0x80000000) != 0) && ((xdb.i[HI] & 0x7fffffff) >= 0x3ff00000)) {
|
185
|
+
/* We are less or equal than -1 (-inf and NaN, too),
|
186
|
+
we return -inf for -1 and NaN otherwise
|
187
|
+
*/
|
188
|
+
if (x == -1.0) return x/0.0;
|
189
|
+
|
190
|
+
|
191
|
+
return (x-x)/0.0;
|
192
|
+
}
|
193
|
+
|
194
|
+
if ((xdb.i[HI] & 0x7ff00000) == 0x7ff00000) {
|
195
|
+
/* We are +inf or NaN
|
196
|
+
If +inf, we return +inf (x+x)
|
197
|
+
If NaN, we return NaN (x+x)
|
198
|
+
*/
|
199
|
+
return x+x;
|
200
|
+
}
|
201
|
+
|
202
|
+
/* Test if |x| < 2^(-8)
|
203
|
+
|
204
|
+
If yes, short-circuit the range reduction
|
205
|
+
|
206
|
+
*/
|
207
|
+
|
208
|
+
if ((xdb.i[HI] & 0x7fffffff) < 0x3f700000) {
|
209
|
+
/* Use the polynomial p(zh + zl) approximating log(1+zh+zl) directly
|
210
|
+
Set E and index to values that read 0.0 in the accurate phase.
|
211
|
+
*/
|
212
|
+
logih = 0.0;
|
213
|
+
logim = 0.0;
|
214
|
+
index = 0;
|
215
|
+
ed = 0.0;
|
216
|
+
index = 0;
|
217
|
+
zh = x;
|
218
|
+
zm = 0.0;
|
219
|
+
zl = 0.0;
|
220
|
+
} else {
|
221
|
+
/* If we are here, |x| >= 2^(-8) and we must perform range reduction */
|
222
|
+
|
223
|
+
/* Compute first exactly
|
224
|
+
|
225
|
+
sh + sl = 1 + x
|
226
|
+
|
227
|
+
x can move over 1, so use a conditional Add12
|
228
|
+
*/
|
229
|
+
|
230
|
+
Add12Cond(sh,sl,1.0,x);
|
231
|
+
|
232
|
+
/* Transform higher order double to integer */
|
233
|
+
|
234
|
+
shdb.d = sh;
|
235
|
+
|
236
|
+
/* Extract exponent and mantissa
|
237
|
+
Do range reduction,
|
238
|
+
yielding to E holding the exponent and
|
239
|
+
y the mantissa between sqrt(2)/2 and sqrt(2)
|
240
|
+
*/
|
241
|
+
E = 0;
|
242
|
+
E += (shdb.i[HI]>>20)-1023; /* extract the exponent */
|
243
|
+
index = (shdb.i[HI] & 0x000fffff);
|
244
|
+
shdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
245
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
246
|
+
|
247
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
248
|
+
if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
|
249
|
+
shdb.i[HI] -= 0x00100000;
|
250
|
+
E++;
|
251
|
+
}
|
252
|
+
|
253
|
+
|
254
|
+
/* Transform shdb to yh */
|
255
|
+
yh = shdb.d;
|
256
|
+
|
257
|
+
|
258
|
+
/* Compute the index to the table */
|
259
|
+
index = index & INDEXMASK;
|
260
|
+
|
261
|
+
/* Cast integer E into double ed for multiplication later */
|
262
|
+
ed = (double) E;
|
263
|
+
|
264
|
+
/*
|
265
|
+
Read tables:
|
266
|
+
Read one float for ri
|
267
|
+
Read the first two doubles for -log(r_i) (out of three)
|
268
|
+
|
269
|
+
Organization of the table:
|
270
|
+
|
271
|
+
one struct entry per index, the struct entry containing
|
272
|
+
r, logih, logim and logil in this order
|
273
|
+
*/
|
274
|
+
|
275
|
+
|
276
|
+
ri = argredtable[index].ri;
|
277
|
+
/*
|
278
|
+
Actually we don't need the logarithm entries now
|
279
|
+
Move the following two lines to the eventual reconstruction
|
280
|
+
As long as we don't have any if in the following code, we can overlap
|
281
|
+
memory access with calculations
|
282
|
+
*/
|
283
|
+
logih = argredtable[index].logih;
|
284
|
+
logim = argredtable[index].logim;
|
285
|
+
|
286
|
+
/* Test if we have a simple range reduction or a complicated one
|
287
|
+
|
288
|
+
Simple range reduction for x < 0: x + 1 is exact, sl = 0 exactly
|
289
|
+
Simple range reduction for x > 2^(125) (sh > 2^(125)): x + 1 is not exact but its error less than 2^(-125)
|
290
|
+
|
291
|
+
Complicated range reduction: other cases
|
292
|
+
|
293
|
+
*/
|
294
|
+
|
295
|
+
|
296
|
+
if ((sl == 0.0) || (E > 125)) {
|
297
|
+
/* Simple range reduction */
|
298
|
+
|
299
|
+
Mul12(&yhrih, &yhril, yh, ri);
|
300
|
+
t1 = yhrih - 1.0;
|
301
|
+
Add12Cond(zh, zm, t1, yhril);
|
302
|
+
zl = 0.0;
|
303
|
+
|
304
|
+
} else {
|
305
|
+
/* Complicated range reduction; E <= 125 */
|
306
|
+
|
307
|
+
|
308
|
+
/* Scale sl accordingly to sh, from which the exponent was extracted
|
309
|
+
|
310
|
+
We form first 2^(-E) and multiply sl with this value; this gives yl.
|
311
|
+
*/
|
312
|
+
|
313
|
+
scaledb.i[HI] = (-E + 1023) << 20;
|
314
|
+
scaledb.i[LO] = 0;
|
315
|
+
|
316
|
+
yl = sl * scaledb.d;
|
317
|
+
|
318
|
+
|
319
|
+
/* Do complicated range reduction:
|
320
|
+
|
321
|
+
zh + zm + zl = (yh + yl) * ri - 1.0
|
322
|
+
|
323
|
+
|
324
|
+
We use zh + zm in the quick phase and zh + zm + zl in the accurate phase
|
325
|
+
|
326
|
+
The multiplication yl * ri is exact because yl contains at most 9 bits and
|
327
|
+
ri contains at most 24 bits.
|
328
|
+
|
329
|
+
The substraction yhrih - 1.0 is exact as per Sterbenz' lemma.
|
330
|
+
|
331
|
+
*/
|
332
|
+
|
333
|
+
Mul12(&yhrih,&yhril,yh,ri);
|
334
|
+
ylri = yl * ri;
|
335
|
+
|
336
|
+
t1 = yhrih - 1.0;
|
337
|
+
|
338
|
+
/* The unnormalized triple-double t1 + yhril + ylri is equal to (yh + yl) * ri - 1.0
|
339
|
+
As t1 can move over yhril and yhri can move over ylri, we normalize first these
|
340
|
+
values pairwise with Add12Conds. Then we renormalize the pairs by a
|
341
|
+
"inverted" (A.E.) Renormalize3.
|
342
|
+
*/
|
343
|
+
|
344
|
+
Add12Cond(t2,t3,yhril,ylri);
|
345
|
+
Add12Cond(t4,t5,t1,t2);
|
346
|
+
|
347
|
+
Add12Cond(t6,zl,t3,t5);
|
348
|
+
Add12Cond(zh,zm,t4,t6);
|
349
|
+
|
350
|
+
}
|
351
|
+
}
|
352
|
+
|
353
|
+
|
354
|
+
/*
|
355
|
+
Polynomial evaluation
|
356
|
+
|
357
|
+
Use a 7 degree polynomial
|
358
|
+
Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
|
359
|
+
Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zm
|
360
|
+
using an ad hoc method
|
361
|
+
|
362
|
+
*/
|
363
|
+
|
364
|
+
|
365
|
+
|
366
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
367
|
+
polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
|
368
|
+
#else
|
369
|
+
polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
|
370
|
+
#endif
|
371
|
+
|
372
|
+
Mul12(&zhSquareh, &zhSquarel, zh, zh);
|
373
|
+
polyUpper = polyHorner * (zh * zhSquareh);
|
374
|
+
zhSquareHalfh = zhSquareh * -0.5;
|
375
|
+
zhSquareHalfl = zhSquarel * -0.5;
|
376
|
+
Add12(t1h, t1l, polyUpper, -1 * (zh * zm));
|
377
|
+
Add22(&t2h, &t2l, zh, zm, zhSquareHalfh, zhSquareHalfl);
|
378
|
+
Add22(&ph, &pl, t2h, t2l, t1h, t1l);
|
379
|
+
|
380
|
+
/* Reconstruction
|
381
|
+
|
382
|
+
Read logih and logim in the tables (already done)
|
383
|
+
|
384
|
+
Compute log(x) = E * log(2) + log(1+z) - log(ri)
|
385
|
+
i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
|
386
|
+
|
387
|
+
Carry out everything in double double precision
|
388
|
+
|
389
|
+
*/
|
390
|
+
|
391
|
+
/*
|
392
|
+
We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
|
393
|
+
Multiplication of ed (double E) and log2h is thus correct
|
394
|
+
The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
|
395
|
+
is enough for the accurate phase
|
396
|
+
The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
|
397
|
+
Nevertheless the storage with trailing zeros implies an overlap of the tabulated
|
398
|
+
triple double values. We have to take it into account for the accurate phase
|
399
|
+
basic procedures for addition and multiplication
|
400
|
+
The condition on the next Add12 is verified as log2m is smaller than log2h
|
401
|
+
and both are scaled by ed
|
402
|
+
*/
|
403
|
+
|
404
|
+
Add12(log2edh, log2edl, log2h * ed, log2m * ed);
|
405
|
+
|
406
|
+
/* Add logih and logim to ph and pl
|
407
|
+
|
408
|
+
We must use conditioned Add22 as logih can move over ph
|
409
|
+
*/
|
410
|
+
|
411
|
+
Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
|
412
|
+
|
413
|
+
/* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
|
414
|
+
|
415
|
+
Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
|
416
|
+
|
417
|
+
/* Rounding test and eventual return or call to the accurate function */
|
418
|
+
|
419
|
+
if(E==0)
|
420
|
+
roundcst = ROUNDCST1;
|
421
|
+
else
|
422
|
+
roundcst = ROUNDCST2;
|
423
|
+
|
424
|
+
|
425
|
+
if(logh == (logh + (logm * roundcst)))
|
426
|
+
return logh;
|
427
|
+
else
|
428
|
+
{
|
429
|
+
|
430
|
+
#if DEBUG
|
431
|
+
printf("Going for Accurate Phase for x=%1.50e\n",x);
|
432
|
+
#endif
|
433
|
+
|
434
|
+
log1p_td_accurate(&logh, &logm, &logl, ed, index, zh, zm, zl, logih, logim);
|
435
|
+
|
436
|
+
ReturnRoundToNearest3(logh, logm, logl);
|
437
|
+
|
438
|
+
} /* Accurate phase launched */
|
439
|
+
}
|
440
|
+
|
441
|
+
|
442
|
+
|
443
|
+
|
444
|
+
double log1p_ru(double x) {
|
445
|
+
db_number xdb, shdb, scaledb;
|
446
|
+
double yh, yl, ed, ri, logih, logim, yhrih, yhril, ylri, t1, t2, t3, t4, t5, t6, zh, zm, zl;
|
447
|
+
double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
|
448
|
+
double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
|
449
|
+
double sh, sl;
|
450
|
+
int E, index;
|
451
|
+
|
452
|
+
|
453
|
+
xdb.d=x;
|
454
|
+
|
455
|
+
/* Filter cases */
|
456
|
+
if ((xdb.i[HI] & 0x7fffffff) < 0x3c900000) {
|
457
|
+
/* We are less than 2^(-54) and return simply an adjusted x
|
458
|
+
|
459
|
+
If x = 0, the result is algebraic and equal to 0.
|
460
|
+
|
461
|
+
The series for log(1 + x) = x - 1/2 * x^2 + ... is alternated
|
462
|
+
and converges in this interval.
|
463
|
+
The truncation rest -1/2 * x^2 + 1/3 * x^3 - ... is
|
464
|
+
always negative, so log(1 + x) is always less than x but less than
|
465
|
+
1 ulp of x away.
|
466
|
+
We round up, so we return x.
|
467
|
+
|
468
|
+
*/
|
469
|
+
return x;
|
470
|
+
}
|
471
|
+
|
472
|
+
if (((xdb.i[HI] & 0x80000000) != 0) && ((xdb.i[HI] & 0x7fffffff) >= 0x3ff00000)) {
|
473
|
+
/* We are less or equal than -1 (-inf and NaN, too),
|
474
|
+
we return -inf for -1 and NaN otherwise
|
475
|
+
*/
|
476
|
+
if (x == -1.0) return x/0.0;
|
477
|
+
|
478
|
+
|
479
|
+
return (x-x)/0.0;
|
480
|
+
}
|
481
|
+
|
482
|
+
if ((xdb.i[HI] & 0x7ff00000) == 0x7ff00000) {
|
483
|
+
/* We are +inf or NaN
|
484
|
+
If +inf, we return +inf (x+x)
|
485
|
+
If NaN, we return NaN (x+x)
|
486
|
+
*/
|
487
|
+
return x+x;
|
488
|
+
}
|
489
|
+
|
490
|
+
/* Test if |x| < 2^(-8)
|
491
|
+
|
492
|
+
If yes, short-circuit the range reduction
|
493
|
+
|
494
|
+
*/
|
495
|
+
|
496
|
+
if ((xdb.i[HI] & 0x7fffffff) < 0x3f700000) {
|
497
|
+
/* Use the polynomial p(zh + zl) approximating log(1+zh+zl) directly
|
498
|
+
Set E and index to values that read 0.0 in the accurate phase.
|
499
|
+
*/
|
500
|
+
logih = 0.0;
|
501
|
+
logim = 0.0;
|
502
|
+
index = 0;
|
503
|
+
ed = 0.0;
|
504
|
+
index = 0;
|
505
|
+
zh = x;
|
506
|
+
zm = 0.0;
|
507
|
+
zl = 0.0;
|
508
|
+
} else {
|
509
|
+
/* If we are here, |x| >= 2^(-8) and we must perform range reduction */
|
510
|
+
|
511
|
+
/* Compute first exactly
|
512
|
+
|
513
|
+
sh + sl = 1 + x
|
514
|
+
|
515
|
+
x can move over 1, so use a conditional Add12
|
516
|
+
*/
|
517
|
+
|
518
|
+
Add12Cond(sh,sl,1.0,x);
|
519
|
+
|
520
|
+
/* Transform higher order double to integer */
|
521
|
+
|
522
|
+
shdb.d = sh;
|
523
|
+
|
524
|
+
/* Extract exponent and mantissa
|
525
|
+
Do range reduction,
|
526
|
+
yielding to E holding the exponent and
|
527
|
+
y the mantissa between sqrt(2)/2 and sqrt(2)
|
528
|
+
*/
|
529
|
+
E = 0;
|
530
|
+
E += (shdb.i[HI]>>20)-1023; /* extract the exponent */
|
531
|
+
index = (shdb.i[HI] & 0x000fffff);
|
532
|
+
shdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
533
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
534
|
+
|
535
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
536
|
+
if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
|
537
|
+
shdb.i[HI] -= 0x00100000;
|
538
|
+
E++;
|
539
|
+
}
|
540
|
+
|
541
|
+
|
542
|
+
/* Transform shdb to yh */
|
543
|
+
yh = shdb.d;
|
544
|
+
|
545
|
+
|
546
|
+
/* Compute the index to the table */
|
547
|
+
index = index & INDEXMASK;
|
548
|
+
|
549
|
+
/* Cast integer E into double ed for multiplication later */
|
550
|
+
ed = (double) E;
|
551
|
+
|
552
|
+
/*
|
553
|
+
Read tables:
|
554
|
+
Read one float for ri
|
555
|
+
Read the first two doubles for -log(r_i) (out of three)
|
556
|
+
|
557
|
+
Organization of the table:
|
558
|
+
|
559
|
+
one struct entry per index, the struct entry containing
|
560
|
+
r, logih, logim and logil in this order
|
561
|
+
*/
|
562
|
+
|
563
|
+
|
564
|
+
ri = argredtable[index].ri;
|
565
|
+
/*
|
566
|
+
Actually we don't need the logarithm entries now
|
567
|
+
Move the following two lines to the eventual reconstruction
|
568
|
+
As long as we don't have any if in the following code, we can overlap
|
569
|
+
memory access with calculations
|
570
|
+
*/
|
571
|
+
logih = argredtable[index].logih;
|
572
|
+
logim = argredtable[index].logim;
|
573
|
+
|
574
|
+
/* Test if we have a simple range reduction or a complicated one
|
575
|
+
|
576
|
+
Simple range reduction for x < 0: x + 1 is exact, sl = 0 exactly
|
577
|
+
Simple range reduction for x > 2^(125) (sh > 2^(125)): x + 1 is not exact but its error less than 2^(-125)
|
578
|
+
|
579
|
+
Complicated range reduction: other cases
|
580
|
+
|
581
|
+
*/
|
582
|
+
|
583
|
+
|
584
|
+
if ((sl == 0.0) || (E > 125)) {
|
585
|
+
/* Simple range reduction */
|
586
|
+
|
587
|
+
Mul12(&yhrih, &yhril, yh, ri);
|
588
|
+
t1 = yhrih - 1.0;
|
589
|
+
Add12Cond(zh, zm, t1, yhril);
|
590
|
+
zl = 0.0;
|
591
|
+
|
592
|
+
} else {
|
593
|
+
/* Complicated range reduction; E <= 125 */
|
594
|
+
|
595
|
+
|
596
|
+
/* Scale sl accordingly to sh, from which the exponent was extracted
|
597
|
+
|
598
|
+
We form first 2^(-E) and multiply sl with this value; this gives yl.
|
599
|
+
*/
|
600
|
+
|
601
|
+
scaledb.i[HI] = (-E + 1023) << 20;
|
602
|
+
scaledb.i[LO] = 0;
|
603
|
+
|
604
|
+
yl = sl * scaledb.d;
|
605
|
+
|
606
|
+
|
607
|
+
/* Do complicated range reduction:
|
608
|
+
|
609
|
+
zh + zm + zl = (yh + yl) * ri - 1.0
|
610
|
+
|
611
|
+
|
612
|
+
We use zh + zm in the quick phase and zh + zm + zl in the accurate phase
|
613
|
+
|
614
|
+
The multiplication yl * ri is exact because yl contains at most 9 bits and
|
615
|
+
ri contains at most 24 bits.
|
616
|
+
|
617
|
+
The substraction yhrih - 1.0 is exact as per Sterbenz' lemma.
|
618
|
+
|
619
|
+
*/
|
620
|
+
|
621
|
+
Mul12(&yhrih,&yhril,yh,ri);
|
622
|
+
ylri = yl * ri;
|
623
|
+
|
624
|
+
t1 = yhrih - 1.0;
|
625
|
+
|
626
|
+
/* The unnormalized triple-double t1 + yhril + ylri is equal to (yh + yl) * ri - 1.0
|
627
|
+
As t1 can move over yhril and yhri can move over ylri, we normalize first these
|
628
|
+
values pairwise with Add12Conds. Then we renormalize the pairs by a
|
629
|
+
"inverted" (A.E.) Renormalize3.
|
630
|
+
*/
|
631
|
+
|
632
|
+
Add12Cond(t2,t3,yhril,ylri);
|
633
|
+
Add12Cond(t4,t5,t1,t2);
|
634
|
+
|
635
|
+
Add12Cond(t6,zl,t3,t5);
|
636
|
+
Add12Cond(zh,zm,t4,t6);
|
637
|
+
|
638
|
+
}
|
639
|
+
}
|
640
|
+
|
641
|
+
|
642
|
+
/*
|
643
|
+
Polynomial evaluation
|
644
|
+
|
645
|
+
Use a 7 degree polynomial
|
646
|
+
Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
|
647
|
+
Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zm
|
648
|
+
using an ad hoc method
|
649
|
+
|
650
|
+
*/
|
651
|
+
|
652
|
+
|
653
|
+
|
654
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
655
|
+
polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
|
656
|
+
#else
|
657
|
+
polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
|
658
|
+
#endif
|
659
|
+
|
660
|
+
Mul12(&zhSquareh, &zhSquarel, zh, zh);
|
661
|
+
polyUpper = polyHorner * (zh * zhSquareh);
|
662
|
+
zhSquareHalfh = zhSquareh * -0.5;
|
663
|
+
zhSquareHalfl = zhSquarel * -0.5;
|
664
|
+
Add12(t1h, t1l, polyUpper, -1 * (zh * zm));
|
665
|
+
Add22(&t2h, &t2l, zh, zm, zhSquareHalfh, zhSquareHalfl);
|
666
|
+
Add22(&ph, &pl, t2h, t2l, t1h, t1l);
|
667
|
+
|
668
|
+
/* Reconstruction
|
669
|
+
|
670
|
+
Read logih and logim in the tables (already done)
|
671
|
+
|
672
|
+
Compute log(x) = E * log(2) + log(1+z) - log(ri)
|
673
|
+
i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
|
674
|
+
|
675
|
+
Carry out everything in double double precision
|
676
|
+
|
677
|
+
*/
|
678
|
+
|
679
|
+
/*
|
680
|
+
We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
|
681
|
+
Multiplication of ed (double E) and log2h is thus correct
|
682
|
+
The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
|
683
|
+
is enough for the accurate phase
|
684
|
+
The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
|
685
|
+
Nevertheless the storage with trailing zeros implies an overlap of the tabulated
|
686
|
+
triple double values. We have to take it into account for the accurate phase
|
687
|
+
basic procedures for addition and multiplication
|
688
|
+
The condition on the next Add12 is verified as log2m is smaller than log2h
|
689
|
+
and both are scaled by ed
|
690
|
+
*/
|
691
|
+
|
692
|
+
Add12(log2edh, log2edl, log2h * ed, log2m * ed);
|
693
|
+
|
694
|
+
/* Add logih and logim to ph and pl
|
695
|
+
|
696
|
+
We must use conditioned Add22 as logih can move over ph
|
697
|
+
*/
|
698
|
+
|
699
|
+
Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
|
700
|
+
|
701
|
+
/* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
|
702
|
+
|
703
|
+
Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
|
704
|
+
|
705
|
+
/* Rounding test and eventual return or call to the accurate function */
|
706
|
+
|
707
|
+
if(E==0)
|
708
|
+
roundcst = RDROUNDCST1;
|
709
|
+
else
|
710
|
+
roundcst = RDROUNDCST2;
|
711
|
+
|
712
|
+
TEST_AND_RETURN_RU(logh, logm, roundcst);
|
713
|
+
|
714
|
+
#if DEBUG
|
715
|
+
printf("Going for Accurate Phase for x=%1.50e\n",x);
|
716
|
+
#endif
|
717
|
+
|
718
|
+
log1p_td_accurate(&logh, &logm, &logl, ed, index, zh, zm, zl, logih, logim);
|
719
|
+
|
720
|
+
ReturnRoundUpwards3(logh, logm, logl);
|
721
|
+
}
|
722
|
+
|
723
|
+
double log1p_rd(double x) {
|
724
|
+
db_number xdb, shdb, scaledb;
|
725
|
+
double yh, yl, ed, ri, logih, logim, yhrih, yhril, ylri, t1, t2, t3, t4, t5, t6, zh, zm, zl;
|
726
|
+
double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
|
727
|
+
double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
|
728
|
+
double sh, sl;
|
729
|
+
int E, index;
|
730
|
+
|
731
|
+
|
732
|
+
xdb.d=x;
|
733
|
+
|
734
|
+
/* Filter cases */
|
735
|
+
if ((xdb.i[HI] & 0x7fffffff) < 0x3c900000) {
|
736
|
+
/* We are less than 2^(-54) and return simply an adjusted x
|
737
|
+
|
738
|
+
If x = 0, the result is algebraic and equal to 0.
|
739
|
+
|
740
|
+
The series for log(1 + x) = x - 1/2 * x^2 + ... is alternated
|
741
|
+
and converges in this interval.
|
742
|
+
The truncation rest -1/2 * x^2 + 1/3 * x^3 - ... is
|
743
|
+
always negative, so log(1 + x) is always less than x but less than
|
744
|
+
1 ulp of x away.
|
745
|
+
We round down, so we return x - 1ulp;
|
746
|
+
|
747
|
+
*/
|
748
|
+
|
749
|
+
if (x == 0.0) return x;
|
750
|
+
|
751
|
+
if (x > 0) {
|
752
|
+
xdb.l--;
|
753
|
+
} else {
|
754
|
+
xdb.l++;
|
755
|
+
}
|
756
|
+
return xdb.d;
|
757
|
+
}
|
758
|
+
|
759
|
+
if (((xdb.i[HI] & 0x80000000) != 0) && ((xdb.i[HI] & 0x7fffffff) >= 0x3ff00000)) {
|
760
|
+
/* We are less or equal than -1 (-inf and NaN, too),
|
761
|
+
we return -inf for -1 and NaN otherwise
|
762
|
+
*/
|
763
|
+
if (x == -1.0) return x/0.0;
|
764
|
+
|
765
|
+
|
766
|
+
return (x-x)/0.0;
|
767
|
+
}
|
768
|
+
|
769
|
+
if ((xdb.i[HI] & 0x7ff00000) == 0x7ff00000) {
|
770
|
+
/* We are +inf or NaN
|
771
|
+
If +inf, we return +inf (x+x)
|
772
|
+
If NaN, we return NaN (x+x)
|
773
|
+
*/
|
774
|
+
return x+x;
|
775
|
+
}
|
776
|
+
|
777
|
+
/* Test if |x| < 2^(-8)
|
778
|
+
|
779
|
+
If yes, short-circuit the range reduction
|
780
|
+
|
781
|
+
*/
|
782
|
+
|
783
|
+
if ((xdb.i[HI] & 0x7fffffff) < 0x3f700000) {
|
784
|
+
/* Use the polynomial p(zh + zl) approximating log(1+zh+zl) directly
|
785
|
+
Set E and index to values that read 0.0 in the accurate phase.
|
786
|
+
*/
|
787
|
+
logih = 0.0;
|
788
|
+
logim = 0.0;
|
789
|
+
index = 0;
|
790
|
+
ed = 0.0;
|
791
|
+
index = 0;
|
792
|
+
zh = x;
|
793
|
+
zm = 0.0;
|
794
|
+
zl = 0.0;
|
795
|
+
} else {
|
796
|
+
/* If we are here, |x| >= 2^(-8) and we must perform range reduction */
|
797
|
+
|
798
|
+
/* Compute first exactly
|
799
|
+
|
800
|
+
sh + sl = 1 + x
|
801
|
+
|
802
|
+
x can move over 1, so use a conditional Add12
|
803
|
+
*/
|
804
|
+
|
805
|
+
Add12Cond(sh,sl,1.0,x);
|
806
|
+
|
807
|
+
/* Transform higher order double to integer */
|
808
|
+
|
809
|
+
shdb.d = sh;
|
810
|
+
|
811
|
+
/* Extract exponent and mantissa
|
812
|
+
Do range reduction,
|
813
|
+
yielding to E holding the exponent and
|
814
|
+
y the mantissa between sqrt(2)/2 and sqrt(2)
|
815
|
+
*/
|
816
|
+
E = 0;
|
817
|
+
E += (shdb.i[HI]>>20)-1023; /* extract the exponent */
|
818
|
+
index = (shdb.i[HI] & 0x000fffff);
|
819
|
+
shdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
820
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
821
|
+
|
822
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
823
|
+
if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
|
824
|
+
shdb.i[HI] -= 0x00100000;
|
825
|
+
E++;
|
826
|
+
}
|
827
|
+
|
828
|
+
|
829
|
+
/* Transform shdb to yh */
|
830
|
+
yh = shdb.d;
|
831
|
+
|
832
|
+
|
833
|
+
/* Compute the index to the table */
|
834
|
+
index = index & INDEXMASK;
|
835
|
+
|
836
|
+
/* Cast integer E into double ed for multiplication later */
|
837
|
+
ed = (double) E;
|
838
|
+
|
839
|
+
/*
|
840
|
+
Read tables:
|
841
|
+
Read one float for ri
|
842
|
+
Read the first two doubles for -log(r_i) (out of three)
|
843
|
+
|
844
|
+
Organization of the table:
|
845
|
+
|
846
|
+
one struct entry per index, the struct entry containing
|
847
|
+
r, logih, logim and logil in this order
|
848
|
+
*/
|
849
|
+
|
850
|
+
|
851
|
+
ri = argredtable[index].ri;
|
852
|
+
/*
|
853
|
+
Actually we don't need the logarithm entries now
|
854
|
+
Move the following two lines to the eventual reconstruction
|
855
|
+
As long as we don't have any if in the following code, we can overlap
|
856
|
+
memory access with calculations
|
857
|
+
*/
|
858
|
+
logih = argredtable[index].logih;
|
859
|
+
logim = argredtable[index].logim;
|
860
|
+
|
861
|
+
/* Test if we have a simple range reduction or a complicated one
|
862
|
+
|
863
|
+
Simple range reduction for x < 0: x + 1 is exact, sl = 0 exactly
|
864
|
+
Simple range reduction for x > 2^(125) (sh > 2^(125)): x + 1 is not exact but its error less than 2^(-125)
|
865
|
+
|
866
|
+
Complicated range reduction: other cases
|
867
|
+
|
868
|
+
*/
|
869
|
+
|
870
|
+
|
871
|
+
if ((sl == 0.0) || (E > 125)) {
|
872
|
+
/* Simple range reduction */
|
873
|
+
|
874
|
+
Mul12(&yhrih, &yhril, yh, ri);
|
875
|
+
t1 = yhrih - 1.0;
|
876
|
+
Add12Cond(zh, zm, t1, yhril);
|
877
|
+
zl = 0.0;
|
878
|
+
|
879
|
+
} else {
|
880
|
+
/* Complicated range reduction; E <= 125 */
|
881
|
+
|
882
|
+
|
883
|
+
/* Scale sl accordingly to sh, from which the exponent was extracted
|
884
|
+
|
885
|
+
We form first 2^(-E) and multiply sl with this value; this gives yl.
|
886
|
+
*/
|
887
|
+
|
888
|
+
scaledb.i[HI] = (-E + 1023) << 20;
|
889
|
+
scaledb.i[LO] = 0;
|
890
|
+
|
891
|
+
yl = sl * scaledb.d;
|
892
|
+
|
893
|
+
|
894
|
+
/* Do complicated range reduction:
|
895
|
+
|
896
|
+
zh + zm + zl = (yh + yl) * ri - 1.0
|
897
|
+
|
898
|
+
|
899
|
+
We use zh + zm in the quick phase and zh + zm + zl in the accurate phase
|
900
|
+
|
901
|
+
The multiplication yl * ri is exact because yl contains at most 9 bits and
|
902
|
+
ri contains at most 24 bits.
|
903
|
+
|
904
|
+
The substraction yhrih - 1.0 is exact as per Sterbenz' lemma.
|
905
|
+
|
906
|
+
*/
|
907
|
+
|
908
|
+
Mul12(&yhrih,&yhril,yh,ri);
|
909
|
+
ylri = yl * ri;
|
910
|
+
|
911
|
+
t1 = yhrih - 1.0;
|
912
|
+
|
913
|
+
/* The unnormalized triple-double t1 + yhril + ylri is equal to (yh + yl) * ri - 1.0
|
914
|
+
As t1 can move over yhril and yhri can move over ylri, we normalize first these
|
915
|
+
values pairwise with Add12Conds. Then we renormalize the pairs by a
|
916
|
+
"inverted" (A.E.) Renormalize3.
|
917
|
+
*/
|
918
|
+
|
919
|
+
Add12Cond(t2,t3,yhril,ylri);
|
920
|
+
Add12Cond(t4,t5,t1,t2);
|
921
|
+
|
922
|
+
Add12Cond(t6,zl,t3,t5);
|
923
|
+
Add12Cond(zh,zm,t4,t6);
|
924
|
+
|
925
|
+
}
|
926
|
+
}
|
927
|
+
|
928
|
+
|
929
|
+
/*
|
930
|
+
Polynomial evaluation
|
931
|
+
|
932
|
+
Use a 7 degree polynomial
|
933
|
+
Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
|
934
|
+
Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zm
|
935
|
+
using an ad hoc method
|
936
|
+
|
937
|
+
*/
|
938
|
+
|
939
|
+
|
940
|
+
|
941
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
942
|
+
polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
|
943
|
+
#else
|
944
|
+
polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
|
945
|
+
#endif
|
946
|
+
|
947
|
+
Mul12(&zhSquareh, &zhSquarel, zh, zh);
|
948
|
+
polyUpper = polyHorner * (zh * zhSquareh);
|
949
|
+
zhSquareHalfh = zhSquareh * -0.5;
|
950
|
+
zhSquareHalfl = zhSquarel * -0.5;
|
951
|
+
Add12(t1h, t1l, polyUpper, -1 * (zh * zm));
|
952
|
+
Add22(&t2h, &t2l, zh, zm, zhSquareHalfh, zhSquareHalfl);
|
953
|
+
Add22(&ph, &pl, t2h, t2l, t1h, t1l);
|
954
|
+
|
955
|
+
/* Reconstruction
|
956
|
+
|
957
|
+
Read logih and logim in the tables (already done)
|
958
|
+
|
959
|
+
Compute log(x) = E * log(2) + log(1+z) - log(ri)
|
960
|
+
i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
|
961
|
+
|
962
|
+
Carry out everything in double double precision
|
963
|
+
|
964
|
+
*/
|
965
|
+
|
966
|
+
/*
|
967
|
+
We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
|
968
|
+
Multiplication of ed (double E) and log2h is thus correct
|
969
|
+
The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
|
970
|
+
is enough for the accurate phase
|
971
|
+
The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
|
972
|
+
Nevertheless the storage with trailing zeros implies an overlap of the tabulated
|
973
|
+
triple double values. We have to take it into account for the accurate phase
|
974
|
+
basic procedures for addition and multiplication
|
975
|
+
The condition on the next Add12 is verified as log2m is smaller than log2h
|
976
|
+
and both are scaled by ed
|
977
|
+
*/
|
978
|
+
|
979
|
+
Add12(log2edh, log2edl, log2h * ed, log2m * ed);
|
980
|
+
|
981
|
+
/* Add logih and logim to ph and pl
|
982
|
+
|
983
|
+
We must use conditioned Add22 as logih can move over ph
|
984
|
+
*/
|
985
|
+
|
986
|
+
Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
|
987
|
+
|
988
|
+
/* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
|
989
|
+
|
990
|
+
Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
|
991
|
+
|
992
|
+
/* Rounding test and eventual return or call to the accurate function */
|
993
|
+
|
994
|
+
if(E==0)
|
995
|
+
roundcst = RDROUNDCST1;
|
996
|
+
else
|
997
|
+
roundcst = RDROUNDCST2;
|
998
|
+
|
999
|
+
TEST_AND_RETURN_RD(logh, logm, roundcst);
|
1000
|
+
|
1001
|
+
#if DEBUG
|
1002
|
+
printf("Going for Accurate Phase for x=%1.50e\n",x);
|
1003
|
+
#endif
|
1004
|
+
|
1005
|
+
log1p_td_accurate(&logh, &logm, &logl, ed, index, zh, zm, zl, logih, logim);
|
1006
|
+
|
1007
|
+
ReturnRoundDownwards3(logh, logm, logl);
|
1008
|
+
}
|
1009
|
+
|
1010
|
+
double log1p_rz(double x) {
|
1011
|
+
db_number xdb, shdb, scaledb;
|
1012
|
+
double yh, yl, ed, ri, logih, logim, yhrih, yhril, ylri, t1, t2, t3, t4, t5, t6, zh, zm, zl;
|
1013
|
+
double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
|
1014
|
+
double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
|
1015
|
+
double sh, sl;
|
1016
|
+
int E, index;
|
1017
|
+
|
1018
|
+
|
1019
|
+
xdb.d=x;
|
1020
|
+
|
1021
|
+
/* Filter cases */
|
1022
|
+
if ((xdb.i[HI] & 0x7fffffff) < 0x3c900000) {
|
1023
|
+
/* We are less than 2^(-54) and return simply an adjusted x
|
1024
|
+
|
1025
|
+
If x = 0, the result is algebraic and equal to 0.
|
1026
|
+
|
1027
|
+
The series for log(1 + x) = x - 1/2 * x^2 + ... is alternated
|
1028
|
+
and converges in this interval.
|
1029
|
+
The truncation rest -1/2 * x^2 + 1/3 * x^3 - ... is
|
1030
|
+
always negative, so log(1 + x) is always less than x but less than
|
1031
|
+
1 ulp of x away.
|
1032
|
+
For x < 0, we have log(1 + x) < 0, so we round up and return x;
|
1033
|
+
For x > 0, we round down and return x - 1ulp
|
1034
|
+
|
1035
|
+
*/
|
1036
|
+
if (x > 0) {
|
1037
|
+
xdb.l--;
|
1038
|
+
return xdb.d;
|
1039
|
+
}
|
1040
|
+
|
1041
|
+
/* Algebraic case x == 0.0 and round up */
|
1042
|
+
|
1043
|
+
return x;
|
1044
|
+
}
|
1045
|
+
|
1046
|
+
if (((xdb.i[HI] & 0x80000000) != 0) && ((xdb.i[HI] & 0x7fffffff) >= 0x3ff00000)) {
|
1047
|
+
/* We are less or equal than -1 (-inf and NaN, too),
|
1048
|
+
we return -inf for -1 and NaN otherwise
|
1049
|
+
*/
|
1050
|
+
if (x == -1.0) return x/0.0;
|
1051
|
+
|
1052
|
+
|
1053
|
+
return (x-x)/0.0;
|
1054
|
+
}
|
1055
|
+
|
1056
|
+
if ((xdb.i[HI] & 0x7ff00000) == 0x7ff00000) {
|
1057
|
+
/* We are +inf or NaN
|
1058
|
+
If +inf, we return +inf (x+x)
|
1059
|
+
If NaN, we return NaN (x+x)
|
1060
|
+
*/
|
1061
|
+
return x+x;
|
1062
|
+
}
|
1063
|
+
|
1064
|
+
/* Test if |x| < 2^(-8)
|
1065
|
+
|
1066
|
+
If yes, short-circuit the range reduction
|
1067
|
+
|
1068
|
+
*/
|
1069
|
+
|
1070
|
+
if ((xdb.i[HI] & 0x7fffffff) < 0x3f700000) {
|
1071
|
+
/* Use the polynomial p(zh + zl) approximating log(1+zh+zl) directly
|
1072
|
+
Set E and index to values that read 0.0 in the accurate phase.
|
1073
|
+
*/
|
1074
|
+
logih = 0.0;
|
1075
|
+
logim = 0.0;
|
1076
|
+
index = 0;
|
1077
|
+
ed = 0.0;
|
1078
|
+
index = 0;
|
1079
|
+
zh = x;
|
1080
|
+
zm = 0.0;
|
1081
|
+
zl = 0.0;
|
1082
|
+
} else {
|
1083
|
+
/* If we are here, |x| >= 2^(-8) and we must perform range reduction */
|
1084
|
+
|
1085
|
+
/* Compute first exactly
|
1086
|
+
|
1087
|
+
sh + sl = 1 + x
|
1088
|
+
|
1089
|
+
x can move over 1, so use a conditional Add12
|
1090
|
+
*/
|
1091
|
+
|
1092
|
+
Add12Cond(sh,sl,1.0,x);
|
1093
|
+
|
1094
|
+
/* Transform higher order double to integer */
|
1095
|
+
|
1096
|
+
shdb.d = sh;
|
1097
|
+
|
1098
|
+
/* Extract exponent and mantissa
|
1099
|
+
Do range reduction,
|
1100
|
+
yielding to E holding the exponent and
|
1101
|
+
y the mantissa between sqrt(2)/2 and sqrt(2)
|
1102
|
+
*/
|
1103
|
+
E = 0;
|
1104
|
+
E += (shdb.i[HI]>>20)-1023; /* extract the exponent */
|
1105
|
+
index = (shdb.i[HI] & 0x000fffff);
|
1106
|
+
shdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
1107
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
1108
|
+
|
1109
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
1110
|
+
if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
|
1111
|
+
shdb.i[HI] -= 0x00100000;
|
1112
|
+
E++;
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
|
1116
|
+
/* Transform shdb to yh */
|
1117
|
+
yh = shdb.d;
|
1118
|
+
|
1119
|
+
|
1120
|
+
/* Compute the index to the table */
|
1121
|
+
index = index & INDEXMASK;
|
1122
|
+
|
1123
|
+
/* Cast integer E into double ed for multiplication later */
|
1124
|
+
ed = (double) E;
|
1125
|
+
|
1126
|
+
/*
|
1127
|
+
Read tables:
|
1128
|
+
Read one float for ri
|
1129
|
+
Read the first two doubles for -log(r_i) (out of three)
|
1130
|
+
|
1131
|
+
Organization of the table:
|
1132
|
+
|
1133
|
+
one struct entry per index, the struct entry containing
|
1134
|
+
r, logih, logim and logil in this order
|
1135
|
+
*/
|
1136
|
+
|
1137
|
+
|
1138
|
+
ri = argredtable[index].ri;
|
1139
|
+
/*
|
1140
|
+
Actually we don't need the logarithm entries now
|
1141
|
+
Move the following two lines to the eventual reconstruction
|
1142
|
+
As long as we don't have any if in the following code, we can overlap
|
1143
|
+
memory access with calculations
|
1144
|
+
*/
|
1145
|
+
logih = argredtable[index].logih;
|
1146
|
+
logim = argredtable[index].logim;
|
1147
|
+
|
1148
|
+
/* Test if we have a simple range reduction or a complicated one
|
1149
|
+
|
1150
|
+
Simple range reduction for x < 0: x + 1 is exact, sl = 0 exactly
|
1151
|
+
Simple range reduction for x > 2^(125) (sh > 2^(125)): x + 1 is not exact but its error less than 2^(-125)
|
1152
|
+
|
1153
|
+
Complicated range reduction: other cases
|
1154
|
+
|
1155
|
+
*/
|
1156
|
+
|
1157
|
+
|
1158
|
+
if ((sl == 0.0) || (E > 125)) {
|
1159
|
+
/* Simple range reduction */
|
1160
|
+
|
1161
|
+
Mul12(&yhrih, &yhril, yh, ri);
|
1162
|
+
t1 = yhrih - 1.0;
|
1163
|
+
Add12Cond(zh, zm, t1, yhril);
|
1164
|
+
zl = 0.0;
|
1165
|
+
|
1166
|
+
} else {
|
1167
|
+
/* Complicated range reduction; E <= 125 */
|
1168
|
+
|
1169
|
+
|
1170
|
+
/* Scale sl accordingly to sh, from which the exponent was extracted
|
1171
|
+
|
1172
|
+
We form first 2^(-E) and multiply sl with this value; this gives yl.
|
1173
|
+
*/
|
1174
|
+
|
1175
|
+
scaledb.i[HI] = (-E + 1023) << 20;
|
1176
|
+
scaledb.i[LO] = 0;
|
1177
|
+
|
1178
|
+
yl = sl * scaledb.d;
|
1179
|
+
|
1180
|
+
|
1181
|
+
/* Do complicated range reduction:
|
1182
|
+
|
1183
|
+
zh + zm + zl = (yh + yl) * ri - 1.0
|
1184
|
+
|
1185
|
+
|
1186
|
+
We use zh + zm in the quick phase and zh + zm + zl in the accurate phase
|
1187
|
+
|
1188
|
+
The multiplication yl * ri is exact because yl contains at most 9 bits and
|
1189
|
+
ri contains at most 24 bits.
|
1190
|
+
|
1191
|
+
The substraction yhrih - 1.0 is exact as per Sterbenz' lemma.
|
1192
|
+
|
1193
|
+
*/
|
1194
|
+
|
1195
|
+
Mul12(&yhrih,&yhril,yh,ri);
|
1196
|
+
ylri = yl * ri;
|
1197
|
+
|
1198
|
+
t1 = yhrih - 1.0;
|
1199
|
+
|
1200
|
+
/* The unnormalized triple-double t1 + yhril + ylri is equal to (yh + yl) * ri - 1.0
|
1201
|
+
As t1 can move over yhril and yhri can move over ylri, we normalize first these
|
1202
|
+
values pairwise with Add12Conds. Then we renormalize the pairs by a
|
1203
|
+
"inverted" (A.E.) Renormalize3.
|
1204
|
+
*/
|
1205
|
+
|
1206
|
+
Add12Cond(t2,t3,yhril,ylri);
|
1207
|
+
Add12Cond(t4,t5,t1,t2);
|
1208
|
+
|
1209
|
+
Add12Cond(t6,zl,t3,t5);
|
1210
|
+
Add12Cond(zh,zm,t4,t6);
|
1211
|
+
|
1212
|
+
}
|
1213
|
+
}
|
1214
|
+
|
1215
|
+
|
1216
|
+
/*
|
1217
|
+
Polynomial evaluation
|
1218
|
+
|
1219
|
+
Use a 7 degree polynomial
|
1220
|
+
Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
|
1221
|
+
Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zm
|
1222
|
+
using an ad hoc method
|
1223
|
+
|
1224
|
+
*/
|
1225
|
+
|
1226
|
+
|
1227
|
+
|
1228
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
1229
|
+
polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
|
1230
|
+
#else
|
1231
|
+
polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
|
1232
|
+
#endif
|
1233
|
+
|
1234
|
+
Mul12(&zhSquareh, &zhSquarel, zh, zh);
|
1235
|
+
polyUpper = polyHorner * (zh * zhSquareh);
|
1236
|
+
zhSquareHalfh = zhSquareh * -0.5;
|
1237
|
+
zhSquareHalfl = zhSquarel * -0.5;
|
1238
|
+
Add12(t1h, t1l, polyUpper, -1 * (zh * zm));
|
1239
|
+
Add22(&t2h, &t2l, zh, zm, zhSquareHalfh, zhSquareHalfl);
|
1240
|
+
Add22(&ph, &pl, t2h, t2l, t1h, t1l);
|
1241
|
+
|
1242
|
+
/* Reconstruction
|
1243
|
+
|
1244
|
+
Read logih and logim in the tables (already done)
|
1245
|
+
|
1246
|
+
Compute log(x) = E * log(2) + log(1+z) - log(ri)
|
1247
|
+
i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
|
1248
|
+
|
1249
|
+
Carry out everything in double double precision
|
1250
|
+
|
1251
|
+
*/
|
1252
|
+
|
1253
|
+
/*
|
1254
|
+
We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
|
1255
|
+
Multiplication of ed (double E) and log2h is thus correct
|
1256
|
+
The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
|
1257
|
+
is enough for the accurate phase
|
1258
|
+
The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
|
1259
|
+
Nevertheless the storage with trailing zeros implies an overlap of the tabulated
|
1260
|
+
triple double values. We have to take it into account for the accurate phase
|
1261
|
+
basic procedures for addition and multiplication
|
1262
|
+
The condition on the next Add12 is verified as log2m is smaller than log2h
|
1263
|
+
and both are scaled by ed
|
1264
|
+
*/
|
1265
|
+
|
1266
|
+
Add12(log2edh, log2edl, log2h * ed, log2m * ed);
|
1267
|
+
|
1268
|
+
/* Add logih and logim to ph and pl
|
1269
|
+
|
1270
|
+
We must use conditioned Add22 as logih can move over ph
|
1271
|
+
*/
|
1272
|
+
|
1273
|
+
Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
|
1274
|
+
|
1275
|
+
/* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
|
1276
|
+
|
1277
|
+
Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
|
1278
|
+
|
1279
|
+
/* Rounding test and eventual return or call to the accurate function */
|
1280
|
+
|
1281
|
+
if(E==0)
|
1282
|
+
roundcst = RDROUNDCST1;
|
1283
|
+
else
|
1284
|
+
roundcst = RDROUNDCST2;
|
1285
|
+
|
1286
|
+
TEST_AND_RETURN_RZ(logh, logm, roundcst);
|
1287
|
+
|
1288
|
+
#if DEBUG
|
1289
|
+
printf("Going for Accurate Phase for x=%1.50e\n",x);
|
1290
|
+
#endif
|
1291
|
+
|
1292
|
+
log1p_td_accurate(&logh, &logm, &logl, ed, index, zh, zm, zl, logih, logim);
|
1293
|
+
|
1294
|
+
ReturnRoundTowardsZero3(logh, logm, logl);
|
1295
|
+
}
|