crmf 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +105 -3
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +117 -20
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +108 -10
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,480 @@
|
|
1
|
+
/*
|
2
|
+
*this function computes log, correctly rounded,
|
3
|
+
using double-extended arithmetic
|
4
|
+
|
5
|
+
THIS IS EXPERIMENTAL SOFTWARE
|
6
|
+
|
7
|
+
In particular it changes rounding modes all the time without warning
|
8
|
+
nor restoring.
|
9
|
+
|
10
|
+
*
|
11
|
+
* Author : Florent de Dinechin
|
12
|
+
* Florent.de.Dinechin at ens-lyon.fr
|
13
|
+
*
|
14
|
+
|
15
|
+
|
16
|
+
This function compiles both on IA32 and IA64 architectures. On IA64,
|
17
|
+
it needs icc 8.1 or higher, with the following flags (which should be
|
18
|
+
set up by the autoconf).
|
19
|
+
|
20
|
+
icc -DHAVE_CONFIG_H -Qoption,cpp,--extended_float_types \
|
21
|
+
-IPF_fp_speculationsafe -c log-de.c;\
|
22
|
+
mv log-de.o log-td.o; make
|
23
|
+
|
24
|
+
|
25
|
+
*/
|
26
|
+
|
27
|
+
|
28
|
+
#include <stdio.h>
|
29
|
+
#include <stdlib.h>
|
30
|
+
#include "crlibm.h"
|
31
|
+
#include "crlibm_private.h"
|
32
|
+
#include "double-extended.h"
|
33
|
+
#include "log-de.h"
|
34
|
+
|
35
|
+
|
36
|
+
static void log_accurate(double_ext* prh, double_ext* prl, double_ext z, int E, int index) {
|
37
|
+
|
38
|
+
double_ext eh,el, t13, t12, t11, t10, t9, t8,
|
39
|
+
p7h,p7l, t7h,t7l, t6h,t6l, t5h,t5l, t4h,t4l,
|
40
|
+
t3h,t3l, t2h,t2l, t1h,t1l, t0h,t0l;
|
41
|
+
/* Many temporary because single assignment form is nicer for Gappa */
|
42
|
+
|
43
|
+
#if !(defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
44
|
+
double_ext c1h,c2h,c3h,c4h,c5h,c6h,c7h,c8h,c9h,c10h,c11h,c12h,c13h,c14h,c15h;
|
45
|
+
double_ext c1l,c2l,c3l,c4l,c5l,c6l,c7l,c8l;
|
46
|
+
#endif
|
47
|
+
|
48
|
+
|
49
|
+
#if EVAL_PERF
|
50
|
+
crlibm_second_step_taken++;
|
51
|
+
#endif
|
52
|
+
|
53
|
+
/* TODO check the conditions for the double-double ops */
|
54
|
+
|
55
|
+
|
56
|
+
PREFETCH_POLY_ACCURATE;
|
57
|
+
t13 = c13h + z*c14h;
|
58
|
+
t12 = c12h + z*t13;
|
59
|
+
t11 = c11h + z*t12;
|
60
|
+
t10 = c10h + z*t11;
|
61
|
+
t9 = c9h + z*t10;
|
62
|
+
t8 = c8h + z*t9;
|
63
|
+
#if 1 /* This is faster on PIII. Your mileage may vary */
|
64
|
+
Mul12_ext(&p7h, &p7l, z, t8);
|
65
|
+
Add22_ext(&t7h, &t7l, p7h,p7l, c7h,c7l);
|
66
|
+
#else
|
67
|
+
FMA22_ext(&t7h, &t7l, z,0, t8,0, c7h,c7l);
|
68
|
+
#endif
|
69
|
+
FMA22_ext(&t6h, &t6l, z,0, t7h,t7l, c6h,c6l);
|
70
|
+
FMA22_ext(&t5h, &t5l, z,0, t6h,t6l, c5h,c5l);
|
71
|
+
FMA22_ext(&t4h, &t4l, z,0, t5h,t5l, c4h,c4l);
|
72
|
+
FMA22_ext(&t3h, &t3l, z,0, t4h,t4l, c3h,c3l);
|
73
|
+
FMA22_ext(&t2h, &t2l, z,0, t3h,t3l, c2h,c2l);
|
74
|
+
FMA22_ext(&t1h, &t1l, z,0, t2h,t2l, c1h,c1l);
|
75
|
+
FMA22_ext(&t0h, &t0l, z,0, t1h,t1l, argredtable[index].logirh, argredtable[index].logirl);
|
76
|
+
|
77
|
+
Mul22_ext(&eh, &el, log2H,log2L, E, 0);
|
78
|
+
Add22_ext(prh, prl, eh,el, t0h,t0l);
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
double log_rn(double x) {
|
88
|
+
double_ext logirh, r, y, z, th, tl, logde;
|
89
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
90
|
+
db_number xdb;
|
91
|
+
int E, index, index0, roundtestmask;
|
92
|
+
#else /* assuming Itanium here */
|
93
|
+
int64_t E, i;
|
94
|
+
uint64_t index, roundtestmask;
|
95
|
+
double c2,c3,c4,c5,c6,c7;
|
96
|
+
#endif
|
97
|
+
|
98
|
+
|
99
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
100
|
+
xdb.d=x;
|
101
|
+
|
102
|
+
index0 = (xdb.i[HI] & 0x000fffff);
|
103
|
+
index = (index0 + (1<<(20-L-1))) >> (20-L);
|
104
|
+
E = (xdb.i[HI]>>20)-1023; /* extract the exponent */
|
105
|
+
|
106
|
+
/* Filter cases */
|
107
|
+
if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
|
108
|
+
if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
|
109
|
+
if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
|
110
|
+
/* Else subnormal number */
|
111
|
+
xdb.d *= two64; /* make x a normal number */
|
112
|
+
E = -64 + (xdb.i[HI]>>20)-1023; /* extract the exponent */
|
113
|
+
index0 = (xdb.i[HI] & 0x000fffff);
|
114
|
+
index = (index0 + (1<<(20-L-1))) >> (20-L);
|
115
|
+
}
|
116
|
+
if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
|
117
|
+
|
118
|
+
DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
|
119
|
+
|
120
|
+
/* Extract exponent and mantissa */
|
121
|
+
xdb.i[HI] = index0 | 0x3ff00000; /* do exponent = 0 */
|
122
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
123
|
+
if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
|
124
|
+
xdb.i[HI] -= 0x00100000;
|
125
|
+
index = index & INDEXMASK;
|
126
|
+
E++;
|
127
|
+
}
|
128
|
+
y = xdb.d;
|
129
|
+
|
130
|
+
#else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
131
|
+
/* Here come the code specific to Itanium processor */
|
132
|
+
E=0;
|
133
|
+
PREFETCH_POLY_QUICK; /* defined in log-de.h */
|
134
|
+
y=x;
|
135
|
+
i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
|
136
|
+
|
137
|
+
/* Filter special cases */
|
138
|
+
if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
|
139
|
+
if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
|
140
|
+
if (i<0) return (x-x)/0; /* log(-x) = Nan */
|
141
|
+
/* Else subnormal number */
|
142
|
+
y *= two64; /* make x a normal number */
|
143
|
+
E = -64;
|
144
|
+
i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
|
145
|
+
}
|
146
|
+
if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
|
147
|
+
|
148
|
+
/* Extract exponent and mantissa */
|
149
|
+
E += (i>>52)-1023;
|
150
|
+
i = i & ULL(000fffffffffffff); /* keep only mantissa */
|
151
|
+
index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
|
152
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
153
|
+
if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
|
154
|
+
y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
|
155
|
+
index = index & INDEXMASK;
|
156
|
+
E++;
|
157
|
+
}
|
158
|
+
else
|
159
|
+
y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
|
160
|
+
#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
/* All the previous argument reduction was exact */
|
165
|
+
/* now y holds 1+f, and E is the exponent */
|
166
|
+
|
167
|
+
r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
|
168
|
+
logirh = argredtable[index].logirh;
|
169
|
+
z = y*r - 1. ; /* even without an FMA, all exact */
|
170
|
+
|
171
|
+
#if 0
|
172
|
+
if(E==0)
|
173
|
+
roundtestmask=ACCURATE_TO_61_BITS;
|
174
|
+
else
|
175
|
+
roundtestmask=ACCURATE_TO_61_BITS;
|
176
|
+
#else
|
177
|
+
roundtestmask=ACCURATE_TO_62_BITS;
|
178
|
+
#endif
|
179
|
+
|
180
|
+
#ifdef ESTRIN
|
181
|
+
/* Estrin polynomial evaluation */
|
182
|
+
double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
|
183
|
+
|
184
|
+
z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
|
185
|
+
z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
|
186
|
+
p07 = p03 + z4*p47;
|
187
|
+
logde = p07 + E*log2H;
|
188
|
+
#endif
|
189
|
+
|
190
|
+
#ifdef PATERSON
|
191
|
+
double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
|
192
|
+
|
193
|
+
z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
|
194
|
+
z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
|
195
|
+
t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
|
196
|
+
logde = t8 + E*log2H;
|
197
|
+
#endif
|
198
|
+
|
199
|
+
#if 0 /* to time the first step only */
|
200
|
+
BACK_TO_DOUBLE_MODE; return (double)t;
|
201
|
+
#endif
|
202
|
+
|
203
|
+
|
204
|
+
/* To test the second step only, comment out the following line */
|
205
|
+
DE_TEST_AND_RETURN_RN(logde, roundtestmask);
|
206
|
+
|
207
|
+
|
208
|
+
log_accurate(&th, &tl, z, E, index);
|
209
|
+
|
210
|
+
BACK_TO_DOUBLE_MODE;
|
211
|
+
|
212
|
+
return (double) (th+tl); /* The exact sum of these double-extended is rounded to the nearest */
|
213
|
+
}
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
double log_rd(double x) {
|
225
|
+
double_ext logirh, r, y, z, th, tl, logde;
|
226
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
227
|
+
db_number xdb;
|
228
|
+
int E, index, roundtestmask;
|
229
|
+
#else
|
230
|
+
int64_t E, i;
|
231
|
+
uint64_t index, roundtestmask;
|
232
|
+
double_ext c1,c2,c3,c4,c5,c6,c7;
|
233
|
+
#endif
|
234
|
+
|
235
|
+
E=0;
|
236
|
+
|
237
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
238
|
+
xdb.d=x;
|
239
|
+
|
240
|
+
/* Filter cases */
|
241
|
+
if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
|
242
|
+
if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
|
243
|
+
if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
|
244
|
+
/* Else subnormal number */
|
245
|
+
E = -64;
|
246
|
+
xdb.d *= two64; /* make x a normal number */
|
247
|
+
}
|
248
|
+
if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
|
249
|
+
|
250
|
+
DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
|
251
|
+
|
252
|
+
/* Extract exponent and mantissa */
|
253
|
+
E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
|
254
|
+
index = (xdb.i[HI] & 0x000fffff);
|
255
|
+
xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
256
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
257
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
258
|
+
if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
|
259
|
+
xdb.i[HI] -= 0x00100000;
|
260
|
+
E++;
|
261
|
+
}
|
262
|
+
y = xdb.d;
|
263
|
+
|
264
|
+
#else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
265
|
+
/* Here come the code specific to Itanium processor */
|
266
|
+
PREFETCH_POLY_QUICK; /* defined in log-de.h */
|
267
|
+
y=x;
|
268
|
+
i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
|
269
|
+
|
270
|
+
/* Filter special cases */
|
271
|
+
if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
|
272
|
+
if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
|
273
|
+
if (i<0) return (x-x)/0; /* log(-x) = Nan */
|
274
|
+
/* Else subnormal number */
|
275
|
+
y *= two64; /* make x a normal number */
|
276
|
+
E = -64;
|
277
|
+
i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
|
278
|
+
}
|
279
|
+
if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
|
280
|
+
|
281
|
+
/* Extract exponent and mantissa */
|
282
|
+
E += (i>>52)-1023;
|
283
|
+
i = i & ULL(000fffffffffffff); /* keep only mantissa */
|
284
|
+
index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
|
285
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
286
|
+
if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
|
287
|
+
y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
|
288
|
+
E++;
|
289
|
+
}
|
290
|
+
else
|
291
|
+
y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
|
292
|
+
#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
293
|
+
|
294
|
+
/* All the previous argument reduction was exact */
|
295
|
+
/* now y holds 1+f, and E is the exponent */
|
296
|
+
index = index & INDEXMASK;
|
297
|
+
|
298
|
+
logirh = argredtable[index].logirh;
|
299
|
+
r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
|
300
|
+
z = y*r - 1. ; /* even without an FMA, all exact */
|
301
|
+
|
302
|
+
if(E==0)
|
303
|
+
roundtestmask=ACCURATE_TO_61_BITS;
|
304
|
+
else
|
305
|
+
roundtestmask=ACCURATE_TO_62_BITS;
|
306
|
+
|
307
|
+
#ifdef ESTRIN
|
308
|
+
/* Estrin polynomial evaluation */
|
309
|
+
double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
|
310
|
+
|
311
|
+
z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
|
312
|
+
z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
|
313
|
+
p07 = p03 + z4*p47;
|
314
|
+
logde = p07 + E*log2H;
|
315
|
+
#endif
|
316
|
+
|
317
|
+
#ifdef PATERSON
|
318
|
+
double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
|
319
|
+
|
320
|
+
z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
|
321
|
+
z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
|
322
|
+
|
323
|
+
t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
|
324
|
+
|
325
|
+
logde = t8 + E*log2H;
|
326
|
+
#endif
|
327
|
+
|
328
|
+
#if 0 /* to time the first step only */
|
329
|
+
BACK_TO_DOUBLE_MODE; return (double)t;
|
330
|
+
#endif
|
331
|
+
|
332
|
+
|
333
|
+
/* To test the second step only, comment out the following line */
|
334
|
+
DE_TEST_AND_RETURN_RD(logde, roundtestmask);
|
335
|
+
|
336
|
+
log_accurate(&th, &tl, z, E, index);
|
337
|
+
|
338
|
+
RETURN_SUM_ROUNDED_DOWN(th, tl);
|
339
|
+
|
340
|
+
}
|
341
|
+
|
342
|
+
|
343
|
+
|
344
|
+
|
345
|
+
|
346
|
+
|
347
|
+
|
348
|
+
|
349
|
+
|
350
|
+
|
351
|
+
double log_ru(double x) {
|
352
|
+
double_ext logirh, r, y, z, th, tl, logde;
|
353
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
354
|
+
db_number xdb;
|
355
|
+
int E, index, roundtestmask;
|
356
|
+
#else
|
357
|
+
int64_t E, i;
|
358
|
+
uint64_t index, roundtestmask;
|
359
|
+
double_ext c1,c2,c3,c4,c5,c6,c7;
|
360
|
+
#endif
|
361
|
+
|
362
|
+
E=0;
|
363
|
+
|
364
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
365
|
+
xdb.d=x;
|
366
|
+
|
367
|
+
/* Filter cases */
|
368
|
+
if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
|
369
|
+
if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
|
370
|
+
if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
|
371
|
+
/* Else subnormal number */
|
372
|
+
E = -64;
|
373
|
+
xdb.d *= two64; /* make x a normal number */
|
374
|
+
}
|
375
|
+
if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
|
376
|
+
|
377
|
+
DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
|
378
|
+
|
379
|
+
/* Extract exponent and mantissa */
|
380
|
+
E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
|
381
|
+
index = (xdb.i[HI] & 0x000fffff);
|
382
|
+
xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
383
|
+
index = (index + (1<<(20-L-1))) >> (20-L);
|
384
|
+
|
385
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
386
|
+
if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
|
387
|
+
index = index & INDEXMASK;
|
388
|
+
xdb.i[HI] -= 0x00100000;
|
389
|
+
E++;
|
390
|
+
}
|
391
|
+
y = xdb.d;
|
392
|
+
|
393
|
+
#else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
394
|
+
/* Here come the code specific to Itanium processor */
|
395
|
+
PREFETCH_POLY_QUICK; /* defined in log-de.h */
|
396
|
+
y=x;
|
397
|
+
i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
|
398
|
+
|
399
|
+
/* Filter special cases */
|
400
|
+
if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
|
401
|
+
if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
|
402
|
+
if (i<0) return (x-x)/0; /* log(-x) = Nan */
|
403
|
+
/* Else subnormal number */
|
404
|
+
y *= two64; /* make x a normal number */
|
405
|
+
E = -64;
|
406
|
+
i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
|
407
|
+
}
|
408
|
+
if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
|
409
|
+
|
410
|
+
/* Extract exponent and mantissa */
|
411
|
+
E += (i>>52)-1023;
|
412
|
+
i = i & ULL(000fffffffffffff); /* keep only mantissa */
|
413
|
+
index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
|
414
|
+
/* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
|
415
|
+
if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
|
416
|
+
y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
|
417
|
+
index = index & INDEXMASK;
|
418
|
+
E++;
|
419
|
+
}
|
420
|
+
else
|
421
|
+
y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
|
422
|
+
#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
423
|
+
|
424
|
+
/* All the previous argument reduction was exact */
|
425
|
+
/* now y holds 1+f, and E is the exponent */
|
426
|
+
|
427
|
+
logirh = argredtable[index].logirh;
|
428
|
+
r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
|
429
|
+
z = y*r - 1. ; /* even without an FMA, all exact */
|
430
|
+
|
431
|
+
if(E==0)
|
432
|
+
roundtestmask=ACCURATE_TO_61_BITS;
|
433
|
+
else
|
434
|
+
roundtestmask=ACCURATE_TO_62_BITS;
|
435
|
+
|
436
|
+
#ifdef ESTRIN
|
437
|
+
/* Estrin polynomial evaluation */
|
438
|
+
double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
|
439
|
+
|
440
|
+
z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
|
441
|
+
z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
|
442
|
+
p07 = p03 + z4*p47;
|
443
|
+
logde = p07 + E*log2H;
|
444
|
+
#endif
|
445
|
+
|
446
|
+
#ifdef PATERSON
|
447
|
+
double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
|
448
|
+
|
449
|
+
z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
|
450
|
+
z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
|
451
|
+
|
452
|
+
t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
|
453
|
+
|
454
|
+
logde = t8 + E*log2H;
|
455
|
+
#endif
|
456
|
+
|
457
|
+
|
458
|
+
#if 0 /* to time the first step only */
|
459
|
+
BACK_TO_DOUBLE_MODE; return (double)t;
|
460
|
+
#endif
|
461
|
+
|
462
|
+
|
463
|
+
/* To test the second step only, comment out the following line */
|
464
|
+
DE_TEST_AND_RETURN_RU(logde, roundtestmask);
|
465
|
+
|
466
|
+
log_accurate(&th, &tl, z, E, index);
|
467
|
+
|
468
|
+
RETURN_SUM_ROUNDED_UP(th, tl);
|
469
|
+
|
470
|
+
}
|
471
|
+
|
472
|
+
|
473
|
+
double log_rz(double x) {
|
474
|
+
if (x>1.0)
|
475
|
+
return log_rd(x);
|
476
|
+
else
|
477
|
+
return log_ru(x);
|
478
|
+
}
|
479
|
+
|
480
|
+
|