crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,280 @@
|
|
1
|
+
/*
|
2
|
+
*this function computes log, correctly rounded,
|
3
|
+
using experimental techniques based on double-extended arithmetic
|
4
|
+
|
5
|
+
THIS IS EXPERIMENTAL SOFTWARE
|
6
|
+
|
7
|
+
In particular it changes rounding modes all the time without warning
|
8
|
+
nor restoring.
|
9
|
+
|
10
|
+
*
|
11
|
+
* Author : Florent de Dinechin
|
12
|
+
* Florent.de.Dinechin at ens-lyon.fr
|
13
|
+
*
|
14
|
+
|
15
|
+
To have it replace the crlibm log, do:
|
16
|
+
|
17
|
+
on pentium,
|
18
|
+
gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c log-de2.c; mv log-de2.o log_fast.o; make
|
19
|
+
|
20
|
+
on itanium,
|
21
|
+
icc -I/users/fdedinex/local/IA64/include -mcpu=itanium2\
|
22
|
+
-Qoption,cpp,--extended_float_types -IPF_fp_speculationsafe -c log-de2.c;\
|
23
|
+
mv log-de2.o log_fast.o; make
|
24
|
+
|
25
|
+
|
26
|
+
*/
|
27
|
+
|
28
|
+
|
29
|
+
#include <stdio.h>
|
30
|
+
#include <stdlib.h>
|
31
|
+
#include "crlibm.h"
|
32
|
+
#include "crlibm_private.h"
|
33
|
+
#include "double-extended.h"
|
34
|
+
#include "log-de2.h"
|
35
|
+
|
36
|
+
|
37
|
+
double log_rn(double x) {
|
38
|
+
double wi;
|
39
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
40
|
+
db_number y;
|
41
|
+
int E, i, index, roundtestmask;
|
42
|
+
long double r, logirh, logirl, z, z2, z4, t,evenp,oddp, th, tl, eh,el,p1,p2,p3;
|
43
|
+
#else
|
44
|
+
long int E, i;
|
45
|
+
unsigned long int index, roundtestmask;
|
46
|
+
__fpreg xe, ye, r, logirh, logirl, z, z2, z4, t,evenp,oddp, th, tl, eh,el, c1,c2,c3,c4,c5,c6,p1,p2,p3;
|
47
|
+
#endif
|
48
|
+
|
49
|
+
|
50
|
+
E=0;
|
51
|
+
|
52
|
+
|
53
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
54
|
+
/* Pentium has few registers, so load coefficients directly from memory */
|
55
|
+
#define c6 c[0]
|
56
|
+
#define c5 c[1]
|
57
|
+
#define c4 c[2]
|
58
|
+
#define c3 c[3]
|
59
|
+
#define c2 c[4]
|
60
|
+
#define c1 c[5]
|
61
|
+
/* Special case hadling on a x86 */
|
62
|
+
y.d=x;
|
63
|
+
|
64
|
+
/* Filter special cases */
|
65
|
+
if (y.i[HI] < 0x00100000){ /* x < 2^(-1022) */
|
66
|
+
|
67
|
+
if (((y.i[HI] & 0x7fffffff)|y.i[LO])==0){
|
68
|
+
return -1.0/0.0;
|
69
|
+
} /* log(+/-0) = -Inf */
|
70
|
+
if (y.i[HI] < 0){
|
71
|
+
return (x-x)/0; /* log(-x) = Nan */
|
72
|
+
}
|
73
|
+
/* Subnormal number */
|
74
|
+
E = -64;
|
75
|
+
y.d *= two64; /* make x a normal number */
|
76
|
+
}
|
77
|
+
|
78
|
+
if (y.i[HI] >= 0x7ff00000){
|
79
|
+
return x+x; /* Inf or Nan */
|
80
|
+
}
|
81
|
+
|
82
|
+
DOUBLE_EXTENDED_MODE; /* This one should be overlapped with integer computation */
|
83
|
+
|
84
|
+
#define X_NEAR_1 (y.i[HI]>MINYFAST) && (y.i[HI]<MAXYFAST)
|
85
|
+
|
86
|
+
|
87
|
+
#else
|
88
|
+
/* prefetch coefficients */
|
89
|
+
c6=c[0]; c5=c[1]; c4=c[2]; c3=c[3]; c2=c[4]; c1=c[5];
|
90
|
+
/* Special case handling on the Itanium */
|
91
|
+
xe=x;
|
92
|
+
i = _Asm_getf(2/*_FR_D*/, xe);
|
93
|
+
|
94
|
+
/* Filter special cases */
|
95
|
+
//if (__builtin_expect( (i<0x0010000000000000ULL), (1+1==3))){ /* x < 2^(-1022) */
|
96
|
+
if (i<0x0010000000000000LL){ /* x < 2^(-1022) */
|
97
|
+
if ((i & 0x7fffffffffffffffULL)==0){
|
98
|
+
return -1.0/0.0;
|
99
|
+
} /* log(+/-0) = -Inf */
|
100
|
+
if (i<0){
|
101
|
+
return (x-x)/0; /* log(-x) = Nan */
|
102
|
+
}
|
103
|
+
/* Subnormal number */
|
104
|
+
xe *= two64; /* make x a normal number */
|
105
|
+
E = -64;
|
106
|
+
i = _Asm_getf(2/*_FR_D*/, xe); /* and update i */
|
107
|
+
}
|
108
|
+
|
109
|
+
//if (__builtin_expect( (i >= 0x7ff0000000000000ULL), (1+1==3) )){
|
110
|
+
if (i >= 0x7ff0000000000000ULL){
|
111
|
+
return x+x; /* Inf or Nan */
|
112
|
+
}
|
113
|
+
#define X_NEAR_1 __builtin_expect((i>(((uint64_t) MINYFAST)<<32)) && (i<(((uint64_t) MAXYFAST)<<32)), (1+1==3))
|
114
|
+
|
115
|
+
|
116
|
+
#endif
|
117
|
+
|
118
|
+
|
119
|
+
if(X_NEAR_1) {
|
120
|
+
roundtestmask=0x7fc;
|
121
|
+
z = x - 1 ; /* Sterbenz exact */
|
122
|
+
z2 = z*z;
|
123
|
+
evenp = c6; /* c6 */
|
124
|
+
oddp = c5; /* c5 */
|
125
|
+
evenp = c4 + z2 * evenp; /* c4 */
|
126
|
+
oddp = c3 + z2 * oddp; /* c3 */
|
127
|
+
evenp = c2 + z2 * evenp; /* c2 */
|
128
|
+
|
129
|
+
t = c1 + (z * evenp + z2 * oddp);
|
130
|
+
t = z*t;
|
131
|
+
|
132
|
+
//printf("z= %1.20e, t=%1.20e \n ", (double)z, (double)t);
|
133
|
+
|
134
|
+
}
|
135
|
+
|
136
|
+
else {
|
137
|
+
|
138
|
+
#if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
|
139
|
+
/* Extract exponent and mantissa */
|
140
|
+
E += (y.i[HI]>>20)-1023; /* extract the exponent */
|
141
|
+
index = (y.i[HI] & 0x000fffff);
|
142
|
+
y.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
|
143
|
+
index = index >> (20-L);
|
144
|
+
/* now y.d holds 1+f, and E is the exponent */
|
145
|
+
|
146
|
+
logirh = argredtable[index].h;
|
147
|
+
r = (long double) (argredtable[index].r); /* approx to 1/y.d */
|
148
|
+
z = r*(long double)y.d - 1. ; /* even without an FMA, all exact */
|
149
|
+
|
150
|
+
if(E>12 || E<-12) { /* faster and less accurate polynomial evaluation */
|
151
|
+
roundtestmask=0x7fe;
|
152
|
+
p1 = logirh + z*c1; p2 = c2 + z*c3; p3 = c4 + z*c5; z2 = z*z;
|
153
|
+
p1 = p1 + z2*p2; p3 = p3 + z2*c6; z4=z2*z2;
|
154
|
+
t = p1 + z4*p3;
|
155
|
+
t = t + E*ln2h;
|
156
|
+
}
|
157
|
+
else {
|
158
|
+
roundtestmask=0x7f0;
|
159
|
+
p1 = c5 + z*c6; z2 = z*z;
|
160
|
+
p2 = c3 + z*c4; p3 = c1+z*c2;
|
161
|
+
|
162
|
+
t = p2 + z2*p1;
|
163
|
+
t = p3 + z2*t;
|
164
|
+
t = logirh + z*t;
|
165
|
+
t = t + E*ln2h;
|
166
|
+
}
|
167
|
+
|
168
|
+
#if 0
|
169
|
+
if(E>12 || E<-12)
|
170
|
+
roundtestmask=0x7fe;
|
171
|
+
else
|
172
|
+
roundtestmask=0x7f0;
|
173
|
+
|
174
|
+
p1 = c5 + z*c6; z2 = z*z;
|
175
|
+
p2 = c3 + z*c4; p3 = c1+z*c2;
|
176
|
+
|
177
|
+
t = p2 + z2*p1;
|
178
|
+
t = p3 + z2*t;
|
179
|
+
t = logirh + z*t;
|
180
|
+
#endif
|
181
|
+
|
182
|
+
#else /* Itanium here*/
|
183
|
+
/* Extract exponent and mantissa */
|
184
|
+
E += (i>>52)-1023;
|
185
|
+
//printf("\nE = %llx\n", E);
|
186
|
+
i = i & 0x000fffffffffffffULL; /* keep only mantissa */
|
187
|
+
ye = _Asm_setf(2/*_FR_D*/, i | 0x3ff0000000000000ULL ); /* exponent = 0*/
|
188
|
+
index = i >> (52-L);
|
189
|
+
//printf("\nindex= %lld\n", index);
|
190
|
+
//printf("\n ye = %1.20Le\n",(long double)ye);
|
191
|
+
/* now ye holds 1+f, and E is the exponent */
|
192
|
+
|
193
|
+
logirh = argredtable[index].h;
|
194
|
+
|
195
|
+
_Asm_frcpa(&r, 1.0L, ye, 1/*_SF1*/);
|
196
|
+
z = r*ye - 1. ; /* even without an FMA, all exact */
|
197
|
+
|
198
|
+
if(E>12 || E<-12) { /* faster and less accurate polynomial evaluation */
|
199
|
+
roundtestmask=0x7fe;
|
200
|
+
p1 = logirh + z*c1; p2 = c2 + z*c3; p3 = c4 + z*c5; z2 = z*z;
|
201
|
+
p1 = p1 + z2*p2; p3 = p3 + z2*c6; z4=z2*z2;
|
202
|
+
t = p1 + z4*p3;
|
203
|
+
t = t + E*ln2h;
|
204
|
+
}
|
205
|
+
else {
|
206
|
+
roundtestmask=0x7f0;
|
207
|
+
p1 = c5 + z*c6; z2 = z*z;
|
208
|
+
p2 = c3 + z*c4; p3 = c1+z*c2;
|
209
|
+
|
210
|
+
t = p2 + z2*p1;
|
211
|
+
t = p3 + z2*t;
|
212
|
+
t = logirh + z*t;
|
213
|
+
t = t + E*ln2h;
|
214
|
+
}
|
215
|
+
|
216
|
+
|
217
|
+
#endif
|
218
|
+
|
219
|
+
//printf(" x=%1.20Le\n r=%1.20Le\n z=%1.20Le\n logirh=%1.20Le\n ",(long double)xe, (long double)r,(long double)z, (long double)logirh);
|
220
|
+
/* Polynomial evaluation, unrolled to go through Gappa */
|
221
|
+
|
222
|
+
//printf("t=%1.20e \n ", (double)t);
|
223
|
+
|
224
|
+
|
225
|
+
}
|
226
|
+
|
227
|
+
|
228
|
+
|
229
|
+
|
230
|
+
|
231
|
+
#if 0 /* to time the first step only */
|
232
|
+
BACK_TO_DOUBLE_MODE; return (double)t;
|
233
|
+
#endif
|
234
|
+
|
235
|
+
|
236
|
+
/* To test the second step only, comment out the following line */
|
237
|
+
DE_TEST_AND_RETURN_RN(t, roundtestmask);
|
238
|
+
|
239
|
+
|
240
|
+
/* Accurate phase */
|
241
|
+
#if EVAL_PERF
|
242
|
+
crlibm_second_step_taken++;
|
243
|
+
#endif
|
244
|
+
|
245
|
+
t = c13h;
|
246
|
+
t = c12h + z*t;
|
247
|
+
t = c11h + z*t;
|
248
|
+
t = c10h + z*t;
|
249
|
+
t = c9h + z*t;
|
250
|
+
t = c8h + z*t;
|
251
|
+
|
252
|
+
//printf("\n t = %1.20Le\n", (long double)t);
|
253
|
+
|
254
|
+
Mul12_ext(&th, &tl, z, t);
|
255
|
+
Add22_ext(&th, &tl, c7h,c7l, th,tl);
|
256
|
+
FMA22_ext(&th, &tl, z,0, th,tl, c6h,c6l);
|
257
|
+
FMA22_ext(&th, &tl, z,0, th,tl, c5h,c5l);
|
258
|
+
FMA22_ext(&th, &tl, z,0, th,tl, c4h,c4l);
|
259
|
+
FMA22_ext(&th, &tl, z,0, th,tl, c3h,c3l);
|
260
|
+
FMA22_ext(&th, &tl, z,0, th,tl, c2h,c2l);
|
261
|
+
FMA22_ext(&th, &tl, z,0, th,tl, c1h,c1l);
|
262
|
+
|
263
|
+
if((X_NEAR_1)) {
|
264
|
+
Mul22_ext(&th, &tl, z,0, th,tl);
|
265
|
+
}
|
266
|
+
else{
|
267
|
+
FMA22_ext(&th, &tl, z,0, th,tl, logirh, argredtable[index].l);
|
268
|
+
|
269
|
+
/* the following is not a FMA22 (Eln2 > th+tl) */
|
270
|
+
Mul22_ext(&eh, &el, ln2h,ln2l, E, 0);
|
271
|
+
Add22_ext(&th, &tl, eh,el, th,tl);
|
272
|
+
}
|
273
|
+
BACK_TO_DOUBLE_MODE;
|
274
|
+
return (double) (th+tl); /* The exact sum of these double-extended is rounded to the nearest */
|
275
|
+
}
|
276
|
+
|
277
|
+
|
278
|
+
double log_ru(double x) { return x;};
|
279
|
+
double log_rd(double x) { return x;};
|
280
|
+
double log_rz(double x) { return x;};
|