crmf 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +105 -3
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +117 -20
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +108 -10
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,480 @@
1
+ /*
2
+ *this function computes log, correctly rounded,
3
+ using double-extended arithmetic
4
+
5
+ THIS IS EXPERIMENTAL SOFTWARE
6
+
7
+ In particular it changes rounding modes all the time without warning
8
+ nor restoring.
9
+
10
+ *
11
+ * Author : Florent de Dinechin
12
+ * Florent.de.Dinechin at ens-lyon.fr
13
+ *
14
+
15
+
16
+ This function compiles both on IA32 and IA64 architectures. On IA64,
17
+ it needs icc 8.1 or higher, with the following flags (which should be
18
+ set up by the autoconf).
19
+
20
+ icc -DHAVE_CONFIG_H -Qoption,cpp,--extended_float_types \
21
+ -IPF_fp_speculationsafe -c log-de.c;\
22
+ mv log-de.o log-td.o; make
23
+
24
+
25
+ */
26
+
27
+
28
+ #include <stdio.h>
29
+ #include <stdlib.h>
30
+ #include "crlibm.h"
31
+ #include "crlibm_private.h"
32
+ #include "double-extended.h"
33
+ #include "log-de.h"
34
+
35
+
36
+ static void log_accurate(double_ext* prh, double_ext* prl, double_ext z, int E, int index) {
37
+
38
+ double_ext eh,el, t13, t12, t11, t10, t9, t8,
39
+ p7h,p7l, t7h,t7l, t6h,t6l, t5h,t5l, t4h,t4l,
40
+ t3h,t3l, t2h,t2l, t1h,t1l, t0h,t0l;
41
+ /* Many temporary because single assignment form is nicer for Gappa */
42
+
43
+ #if !(defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
44
+ double_ext c1h,c2h,c3h,c4h,c5h,c6h,c7h,c8h,c9h,c10h,c11h,c12h,c13h,c14h,c15h;
45
+ double_ext c1l,c2l,c3l,c4l,c5l,c6l,c7l,c8l;
46
+ #endif
47
+
48
+
49
+ #if EVAL_PERF
50
+ crlibm_second_step_taken++;
51
+ #endif
52
+
53
+ /* TODO check the conditions for the double-double ops */
54
+
55
+
56
+ PREFETCH_POLY_ACCURATE;
57
+ t13 = c13h + z*c14h;
58
+ t12 = c12h + z*t13;
59
+ t11 = c11h + z*t12;
60
+ t10 = c10h + z*t11;
61
+ t9 = c9h + z*t10;
62
+ t8 = c8h + z*t9;
63
+ #if 1 /* This is faster on PIII. Your mileage may vary */
64
+ Mul12_ext(&p7h, &p7l, z, t8);
65
+ Add22_ext(&t7h, &t7l, p7h,p7l, c7h,c7l);
66
+ #else
67
+ FMA22_ext(&t7h, &t7l, z,0, t8,0, c7h,c7l);
68
+ #endif
69
+ FMA22_ext(&t6h, &t6l, z,0, t7h,t7l, c6h,c6l);
70
+ FMA22_ext(&t5h, &t5l, z,0, t6h,t6l, c5h,c5l);
71
+ FMA22_ext(&t4h, &t4l, z,0, t5h,t5l, c4h,c4l);
72
+ FMA22_ext(&t3h, &t3l, z,0, t4h,t4l, c3h,c3l);
73
+ FMA22_ext(&t2h, &t2l, z,0, t3h,t3l, c2h,c2l);
74
+ FMA22_ext(&t1h, &t1l, z,0, t2h,t2l, c1h,c1l);
75
+ FMA22_ext(&t0h, &t0l, z,0, t1h,t1l, argredtable[index].logirh, argredtable[index].logirl);
76
+
77
+ Mul22_ext(&eh, &el, log2H,log2L, E, 0);
78
+ Add22_ext(prh, prl, eh,el, t0h,t0l);
79
+ }
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+ double log_rn(double x) {
88
+ double_ext logirh, r, y, z, th, tl, logde;
89
+ #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
90
+ db_number xdb;
91
+ int E, index, index0, roundtestmask;
92
+ #else /* assuming Itanium here */
93
+ int64_t E, i;
94
+ uint64_t index, roundtestmask;
95
+ double c2,c3,c4,c5,c6,c7;
96
+ #endif
97
+
98
+
99
+ #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
100
+ xdb.d=x;
101
+
102
+ index0 = (xdb.i[HI] & 0x000fffff);
103
+ index = (index0 + (1<<(20-L-1))) >> (20-L);
104
+ E = (xdb.i[HI]>>20)-1023; /* extract the exponent */
105
+
106
+ /* Filter cases */
107
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
108
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
109
+ if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
110
+ /* Else subnormal number */
111
+ xdb.d *= two64; /* make x a normal number */
112
+ E = -64 + (xdb.i[HI]>>20)-1023; /* extract the exponent */
113
+ index0 = (xdb.i[HI] & 0x000fffff);
114
+ index = (index0 + (1<<(20-L-1))) >> (20-L);
115
+ }
116
+ if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
117
+
118
+ DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
119
+
120
+ /* Extract exponent and mantissa */
121
+ xdb.i[HI] = index0 | 0x3ff00000; /* do exponent = 0 */
122
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
123
+ if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
124
+ xdb.i[HI] -= 0x00100000;
125
+ index = index & INDEXMASK;
126
+ E++;
127
+ }
128
+ y = xdb.d;
129
+
130
+ #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
131
+ /* Here come the code specific to Itanium processor */
132
+ E=0;
133
+ PREFETCH_POLY_QUICK; /* defined in log-de.h */
134
+ y=x;
135
+ i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
136
+
137
+ /* Filter special cases */
138
+ if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
139
+ if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
140
+ if (i<0) return (x-x)/0; /* log(-x) = Nan */
141
+ /* Else subnormal number */
142
+ y *= two64; /* make x a normal number */
143
+ E = -64;
144
+ i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
145
+ }
146
+ if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
147
+
148
+ /* Extract exponent and mantissa */
149
+ E += (i>>52)-1023;
150
+ i = i & ULL(000fffffffffffff); /* keep only mantissa */
151
+ index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
152
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
153
+ if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
154
+ y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
155
+ index = index & INDEXMASK;
156
+ E++;
157
+ }
158
+ else
159
+ y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
160
+ #endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
161
+
162
+
163
+
164
+ /* All the previous argument reduction was exact */
165
+ /* now y holds 1+f, and E is the exponent */
166
+
167
+ r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
168
+ logirh = argredtable[index].logirh;
169
+ z = y*r - 1. ; /* even without an FMA, all exact */
170
+
171
+ #if 0
172
+ if(E==0)
173
+ roundtestmask=ACCURATE_TO_61_BITS;
174
+ else
175
+ roundtestmask=ACCURATE_TO_61_BITS;
176
+ #else
177
+ roundtestmask=ACCURATE_TO_62_BITS;
178
+ #endif
179
+
180
+ #ifdef ESTRIN
181
+ /* Estrin polynomial evaluation */
182
+ double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
183
+
184
+ z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
185
+ z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
186
+ p07 = p03 + z4*p47;
187
+ logde = p07 + E*log2H;
188
+ #endif
189
+
190
+ #ifdef PATERSON
191
+ double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
192
+
193
+ z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
194
+ z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
195
+ t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
196
+ logde = t8 + E*log2H;
197
+ #endif
198
+
199
+ #if 0 /* to time the first step only */
200
+ BACK_TO_DOUBLE_MODE; return (double)t;
201
+ #endif
202
+
203
+
204
+ /* To test the second step only, comment out the following line */
205
+ DE_TEST_AND_RETURN_RN(logde, roundtestmask);
206
+
207
+
208
+ log_accurate(&th, &tl, z, E, index);
209
+
210
+ BACK_TO_DOUBLE_MODE;
211
+
212
+ return (double) (th+tl); /* The exact sum of these double-extended is rounded to the nearest */
213
+ }
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+ double log_rd(double x) {
225
+ double_ext logirh, r, y, z, th, tl, logde;
226
+ #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
227
+ db_number xdb;
228
+ int E, index, roundtestmask;
229
+ #else
230
+ int64_t E, i;
231
+ uint64_t index, roundtestmask;
232
+ double_ext c1,c2,c3,c4,c5,c6,c7;
233
+ #endif
234
+
235
+ E=0;
236
+
237
+ #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
238
+ xdb.d=x;
239
+
240
+ /* Filter cases */
241
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
242
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
243
+ if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
244
+ /* Else subnormal number */
245
+ E = -64;
246
+ xdb.d *= two64; /* make x a normal number */
247
+ }
248
+ if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
249
+
250
+ DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
251
+
252
+ /* Extract exponent and mantissa */
253
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
254
+ index = (xdb.i[HI] & 0x000fffff);
255
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
256
+ index = (index + (1<<(20-L-1))) >> (20-L);
257
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
258
+ if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
259
+ xdb.i[HI] -= 0x00100000;
260
+ E++;
261
+ }
262
+ y = xdb.d;
263
+
264
+ #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
265
+ /* Here come the code specific to Itanium processor */
266
+ PREFETCH_POLY_QUICK; /* defined in log-de.h */
267
+ y=x;
268
+ i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
269
+
270
+ /* Filter special cases */
271
+ if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
272
+ if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
273
+ if (i<0) return (x-x)/0; /* log(-x) = Nan */
274
+ /* Else subnormal number */
275
+ y *= two64; /* make x a normal number */
276
+ E = -64;
277
+ i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
278
+ }
279
+ if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
280
+
281
+ /* Extract exponent and mantissa */
282
+ E += (i>>52)-1023;
283
+ i = i & ULL(000fffffffffffff); /* keep only mantissa */
284
+ index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
285
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
286
+ if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
287
+ y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
288
+ E++;
289
+ }
290
+ else
291
+ y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
292
+ #endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
293
+
294
+ /* All the previous argument reduction was exact */
295
+ /* now y holds 1+f, and E is the exponent */
296
+ index = index & INDEXMASK;
297
+
298
+ logirh = argredtable[index].logirh;
299
+ r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
300
+ z = y*r - 1. ; /* even without an FMA, all exact */
301
+
302
+ if(E==0)
303
+ roundtestmask=ACCURATE_TO_61_BITS;
304
+ else
305
+ roundtestmask=ACCURATE_TO_62_BITS;
306
+
307
+ #ifdef ESTRIN
308
+ /* Estrin polynomial evaluation */
309
+ double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
310
+
311
+ z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
312
+ z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
313
+ p07 = p03 + z4*p47;
314
+ logde = p07 + E*log2H;
315
+ #endif
316
+
317
+ #ifdef PATERSON
318
+ double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
319
+
320
+ z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
321
+ z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
322
+
323
+ t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
324
+
325
+ logde = t8 + E*log2H;
326
+ #endif
327
+
328
+ #if 0 /* to time the first step only */
329
+ BACK_TO_DOUBLE_MODE; return (double)t;
330
+ #endif
331
+
332
+
333
+ /* To test the second step only, comment out the following line */
334
+ DE_TEST_AND_RETURN_RD(logde, roundtestmask);
335
+
336
+ log_accurate(&th, &tl, z, E, index);
337
+
338
+ RETURN_SUM_ROUNDED_DOWN(th, tl);
339
+
340
+ }
341
+
342
+
343
+
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+ double log_ru(double x) {
352
+ double_ext logirh, r, y, z, th, tl, logde;
353
+ #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
354
+ db_number xdb;
355
+ int E, index, roundtestmask;
356
+ #else
357
+ int64_t E, i;
358
+ uint64_t index, roundtestmask;
359
+ double_ext c1,c2,c3,c4,c5,c6,c7;
360
+ #endif
361
+
362
+ E=0;
363
+
364
+ #if defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64)
365
+ xdb.d=x;
366
+
367
+ /* Filter cases */
368
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
369
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0) return -1.0/0.0; /* log(+/-0) = -Inf */
370
+ if (xdb.i[HI] < 0) return (x-x)/0; /* log(-x) = Nan */
371
+ /* Else subnormal number */
372
+ E = -64;
373
+ xdb.d *= two64; /* make x a normal number */
374
+ }
375
+ if (xdb.i[HI] >= 0x7ff00000) return x+x; /* Inf or Nan */
376
+
377
+ DOUBLE_EXTENDED_MODE; /* This one should be overlapped with following integer computation */
378
+
379
+ /* Extract exponent and mantissa */
380
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
381
+ index = (xdb.i[HI] & 0x000fffff);
382
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
383
+ index = (index + (1<<(20-L-1))) >> (20-L);
384
+
385
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
386
+ if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
387
+ index = index & INDEXMASK;
388
+ xdb.i[HI] -= 0x00100000;
389
+ E++;
390
+ }
391
+ y = xdb.d;
392
+
393
+ #else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
394
+ /* Here come the code specific to Itanium processor */
395
+ PREFETCH_POLY_QUICK; /* defined in log-de.h */
396
+ y=x;
397
+ i = _Asm_getf(2/*_FR_D*/, y); /* Cast y to a 64-bit integer */
398
+
399
+ /* Filter special cases */
400
+ if (i<(int64_t)ULL(0010000000000000)){ /* equivalent to : x < 2^(-1022) */
401
+ if ((i & ULL(7fffffffffffffff))==0) return -1.0/0.0; /* log(+/-0) = -Inf */
402
+ if (i<0) return (x-x)/0; /* log(-x) = Nan */
403
+ /* Else subnormal number */
404
+ y *= two64; /* make x a normal number */
405
+ E = -64;
406
+ i = _Asm_getf(2/*_FR_D*/, y); /* and update i */
407
+ }
408
+ if (i >= ULL(7ff0000000000000)) return x+x; /* Inf or Nan */
409
+
410
+ /* Extract exponent and mantissa */
411
+ E += (i>>52)-1023;
412
+ i = i & ULL(000fffffffffffff); /* keep only mantissa */
413
+ index = (i + (ULL(1)<<(52-L-1))) >> (52-L);
414
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
415
+ if (index >= MAXINDEX){ /* corresponds to y>sqrt(2)*/
416
+ y = _Asm_setf(2/*_FR_D*/, (i | ULL(3ff0000000000000)) - ULL(0010000000000000) ); /* exponent = -1 */
417
+ index = index & INDEXMASK;
418
+ E++;
419
+ }
420
+ else
421
+ y = _Asm_setf(2/*_FR_D*/, i | ULL(3ff0000000000000) ); /* exponent = 0*/
422
+ #endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
423
+
424
+ /* All the previous argument reduction was exact */
425
+ /* now y holds 1+f, and E is the exponent */
426
+
427
+ logirh = argredtable[index].logirh;
428
+ r = (double_ext) (argredtable[index].r); /* approx to 1/y.d */
429
+ z = y*r - 1. ; /* even without an FMA, all exact */
430
+
431
+ if(E==0)
432
+ roundtestmask=ACCURATE_TO_61_BITS;
433
+ else
434
+ roundtestmask=ACCURATE_TO_62_BITS;
435
+
436
+ #ifdef ESTRIN
437
+ /* Estrin polynomial evaluation */
438
+ double_ext z2,z4, p01, p23, p45, p67, p03, p47,p07;
439
+
440
+ z2 = z*z; p67 = c6 + z*c7; p45 = c4 + z*c5; p23 = c2 + z*c3; p01 = logirh + z;
441
+ z4 = z2*z2; p47 = p45 + z2*p67; p03 = p01 + z2*p23;
442
+ p07 = p03 + z4*p47;
443
+ logde = p07 + E*log2H;
444
+ #endif
445
+
446
+ #ifdef PATERSON
447
+ double_ext z4,z2,t0,t1,t2,t3,t4,t5,t6,t7,t8;
448
+
449
+ z2 = z * z; t1 = z + ps_alpha; t2 = z + ps_beta; t3 = c3 * z + c2; t4 = z + logirh;
450
+ z4 = z2 * z2; t5 = z2 + ps_c; t6 = t3 * z2 + t4;
451
+
452
+ t7 = t5 * t1 + t2; t0 = z4 * c7; t8 = t7 * t0 + t6;
453
+
454
+ logde = t8 + E*log2H;
455
+ #endif
456
+
457
+
458
+ #if 0 /* to time the first step only */
459
+ BACK_TO_DOUBLE_MODE; return (double)t;
460
+ #endif
461
+
462
+
463
+ /* To test the second step only, comment out the following line */
464
+ DE_TEST_AND_RETURN_RU(logde, roundtestmask);
465
+
466
+ log_accurate(&th, &tl, z, E, index);
467
+
468
+ RETURN_SUM_ROUNDED_UP(th, tl);
469
+
470
+ }
471
+
472
+
473
+ double log_rz(double x) {
474
+ if (x>1.0)
475
+ return log_rd(x);
476
+ else
477
+ return log_ru(x);
478
+ }
479
+
480
+