crmf 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +105 -3
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +117 -20
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +108 -10
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,906 @@
1
+ /*
2
+ * This function computes log10, correctly rounded,
3
+ * using experimental techniques based on triple double arithmetics
4
+
5
+ THIS IS EXPERIMENTAL SOFTWARE
6
+
7
+ *
8
+ * Author : Christoph Lauter
9
+ * christoph.lauter at ens-lyon.fr
10
+ *
11
+
12
+ To have it replace the crlibm log10, do:
13
+
14
+ gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c log10-td.c; mv log10-td.o log10_accurate.o; make
15
+
16
+
17
+ **********************************************************
18
+ *
19
+ * NOTE: THIS FUNCTION USES SOME BASIC SEQUENCES AND
20
+ * FURTHER FINAL ROUNDING SEQUENCES WITH SPECIAL CASE
21
+ * TESTS NO FORMAL PROOF IS AVAILABLE FOR IN THE MOMENT
22
+ *
23
+ **********************************************************
24
+
25
+ */
26
+
27
+
28
+ #include <stdio.h>
29
+ #include <stdlib.h>
30
+ #include "crlibm.h"
31
+ #include "crlibm_private.h"
32
+ #include "triple-double.h"
33
+ #include "log10-td.h"
34
+
35
+ #define AVOID_FMA 0
36
+
37
+
38
+ void log10_td_accurate(double *logb10h, double *logb10m, double *logb10l, int E, double ed, int index, double zh, double zl, double logih, double logim) {
39
+ double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
40
+ double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
41
+ double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
42
+ double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
43
+ double log2edh, log2edm, log2edl, logb10hover, logb10mover, logb10lover;
44
+ double logyhnorm, logymnorm, logylnorm;
45
+
46
+
47
+
48
+
49
+
50
+ #if EVAL_PERF
51
+ crlibm_second_step_taken++;
52
+ #endif
53
+
54
+
55
+ /* Accurate phase:
56
+
57
+ Argument reduction is already done.
58
+ We must return logh, logm and logl representing the intermediate result in 118 bits precision.
59
+
60
+ We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
61
+ calculating the next 7 coefficients in double double arithmetics and the last in double.
62
+
63
+ We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
64
+ double double calculations won't account for it.
65
+
66
+ */
67
+
68
+ /* Start of the horner scheme */
69
+
70
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
71
+ highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
72
+ #else
73
+ highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
74
+ #endif
75
+
76
+ /* We want to write
77
+
78
+ accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
79
+ ( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
80
+
81
+ with all additions and multiplications in double double arithmetics
82
+ but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
83
+ */
84
+
85
+ Mul12(&t1h, &t1l, zh, highPoly);
86
+ Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
87
+ Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
88
+ Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
89
+ Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
90
+ Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
91
+ Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
92
+ Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
93
+ Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
94
+ Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
95
+ Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
96
+ Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
97
+ Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
98
+ Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
99
+
100
+ /* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
101
+
102
+ Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
103
+ Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
104
+
105
+ /* We can now multiplicate the middle and higher polynomial by z^3 */
106
+
107
+ Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
108
+
109
+ /* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
110
+
111
+ zSquareHalfh = zSquareh * -0.5;
112
+ zSquareHalfm = zSquarem * -0.5;
113
+ zSquareHalfl = zSquarel * -0.5;
114
+
115
+ Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
116
+ zSquareHalfh, zSquareHalfm, zSquareHalfl,
117
+ higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
118
+
119
+ /* Add now zh and zl to obtain the polynomial evaluation result */
120
+
121
+ Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
122
+
123
+ /* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
124
+ logil has not been read to the time, do this first
125
+ */
126
+
127
+ logil = argredtable[index].logil;
128
+
129
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
130
+
131
+
132
+ /* Renormalize logyh, logym and logyl to a non-overlapping triple-double for winning some
133
+ accuracy in the final ln(x) result before multiplying with log10inv
134
+
135
+ THIS MAY NOT BE NECESSARY NOR SUFFICIENT
136
+
137
+ */
138
+
139
+ Renormalize3(&logyhnorm, &logymnorm, &logylnorm, logyh, logym, logyl);
140
+
141
+
142
+ /* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
143
+ ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
144
+ So multiplying naively is correct (up to 134 bits at least)
145
+
146
+ The final result is thus obtained by adding log2 * E to log(y)
147
+ */
148
+
149
+ log2edhover = log2h * ed;
150
+ log2edmover = log2m * ed;
151
+ log2edlover = log2l * ed;
152
+
153
+ /* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
154
+ the to the log(y)-result
155
+
156
+ If needed, uncomment the following Renormalize3-Statement and comment out the copies
157
+ following it.
158
+ */
159
+
160
+ /* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
161
+
162
+ log2edh = log2edhover;
163
+ log2edm = log2edmover;
164
+ log2edl = log2edlover;
165
+
166
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyhnorm, logymnorm, logylnorm);
167
+
168
+
169
+ /* Change logarithm base from natural base to base 10 by multiplying */
170
+
171
+ Mul33(&logb10hover, &logb10mover, &logb10lover, log10invh, log10invm, log10invl, loghover, logmover, loglover);
172
+
173
+
174
+ /* Since we can not guarantee in each addition and multiplication procedure that
175
+ the results are not overlapping, we must renormalize the result before handing
176
+ it over to the final rounding
177
+ */
178
+
179
+ Renormalize3(logb10h,logb10m,logb10l,logb10hover,logb10mover,logb10lover);
180
+ }
181
+
182
+
183
+
184
+ /*************************************************************
185
+ *************************************************************
186
+ * ROUNDED TO NEAREST *
187
+ *************************************************************
188
+ *************************************************************/
189
+ double log10_rn(double x){
190
+ db_number xdb;
191
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
192
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
193
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
194
+ double logb10h, logb10m, logb10l;
195
+ int E, index;
196
+
197
+ E=0;
198
+ xdb.d=x;
199
+
200
+ /* Filter cases */
201
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
202
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
203
+ return -1.0/0.0;
204
+ } /* log(+/-0) = -Inf */
205
+ if (xdb.i[HI] < 0){
206
+ return (x-x)/0; /* log(-x) = Nan */
207
+ }
208
+ /* Subnormal number */
209
+ E = -52;
210
+ xdb.d *= two52; /* make x a normal number */
211
+ }
212
+
213
+ if (xdb.i[HI] >= 0x7ff00000){
214
+ return x+x; /* Inf or Nan */
215
+ }
216
+
217
+
218
+ /* Extract exponent and mantissa
219
+ Do range reduction,
220
+ yielding to E holding the exponent and
221
+ y the mantissa between sqrt(2)/2 and sqrt(2)
222
+ */
223
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
224
+ index = (xdb.i[HI] & 0x000fffff);
225
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
226
+ index = (index + (1<<(20-L-1))) >> (20-L);
227
+
228
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
229
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
230
+ xdb.i[HI] -= 0x00100000;
231
+ E++;
232
+ }
233
+ y = xdb.d;
234
+ index = index & INDEXMASK;
235
+ /* Cast integer E into double ed for multiplication later */
236
+ ed = (double) E;
237
+
238
+ /*
239
+ Read tables:
240
+ Read one float for ri
241
+ Read the first two doubles for -log(r_i) (out of three)
242
+
243
+ Organization of the table:
244
+
245
+ one struct entry per index, the struct entry containing
246
+ r, logih, logim and logil in this order
247
+ */
248
+
249
+
250
+ ri = argredtable[index].ri;
251
+ /*
252
+ Actually we don't need the logarithm entries now
253
+ Move the following two lines to the eventual reconstruction
254
+ As long as we don't have any if in the following code, we can overlap
255
+ memory access with calculations
256
+ */
257
+ logih = argredtable[index].logih;
258
+ logim = argredtable[index].logim;
259
+
260
+ /* Do range reduction:
261
+
262
+ zh + zl = y * ri - 1.0 correctly
263
+
264
+ Correctness is assured by use of Mul12 and Add12
265
+ even if we don't force ri to have its' LSBs set to zero
266
+
267
+ Discard zl for higher monome degrees
268
+ */
269
+
270
+ Mul12(&yrih, &yril, y, ri);
271
+ th = yrih - 1.0;
272
+ Add12Cond(zh, zl, th, yril);
273
+
274
+ /*
275
+ Polynomial evaluation
276
+
277
+ Use a 7 degree polynomial
278
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
279
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
280
+ using an ad hoc method
281
+
282
+ */
283
+
284
+
285
+
286
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
287
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
288
+ #else
289
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
290
+ #endif
291
+
292
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
293
+ polyUpper = polyHorner * (zh * zhSquareh);
294
+ zhSquareHalfh = zhSquareh * -0.5;
295
+ zhSquareHalfl = zhSquarel * -0.5;
296
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
297
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
298
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
299
+
300
+ /* Reconstruction
301
+
302
+ Read logih and logim in the tables (already done)
303
+
304
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
305
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
306
+
307
+ Carry out everything in double double precision
308
+
309
+ */
310
+
311
+ /*
312
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
313
+ Multiplication of ed (double E) and log2h is thus correct
314
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
315
+ is enough for the accurate phase
316
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
317
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
318
+ triple double values. We have to take it into account for the accurate phase
319
+ basic procedures for addition and multiplication
320
+ The condition on the next Add12 is verified as log2m is smaller than log2h
321
+ and both are scaled by ed
322
+ */
323
+
324
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
325
+
326
+ /* Add logih and logim to ph and pl
327
+
328
+ We must use conditioned Add22 as logih can move over ph
329
+ */
330
+
331
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
332
+
333
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
334
+
335
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
336
+
337
+
338
+
339
+ /* Change logarithm base from natural base to base 10 by multiplying */
340
+
341
+ Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
342
+
343
+
344
+ /* Rounding test and eventual return or call to the accurate function */
345
+
346
+ if(E==0)
347
+ roundcst = ROUNDCST1;
348
+ else
349
+ roundcst = ROUNDCST2;
350
+
351
+
352
+ if(logb10h == (logb10h + (logb10m * roundcst)))
353
+ return logb10h;
354
+ else
355
+ {
356
+
357
+ #if DEBUG
358
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
359
+ #endif
360
+
361
+ log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
362
+
363
+ ReturnRoundToNearest3(logb10h, logb10m, logb10l);
364
+
365
+ } /* Accurate phase launched */
366
+ }
367
+
368
+
369
+ /*************************************************************
370
+ *************************************************************
371
+ * ROUNDED UPWARDS *
372
+ *************************************************************
373
+ *************************************************************/
374
+ double log10_ru(double x) {
375
+ db_number xdb;
376
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
377
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
378
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
379
+ double logb10h, logb10m, logb10l;
380
+ int E, index;
381
+
382
+ E=0;
383
+ xdb.d=x;
384
+
385
+ /* Filter cases */
386
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
387
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
388
+ return -1.0/0.0;
389
+ } /* log(+/-0) = -Inf */
390
+ if (xdb.i[HI] < 0){
391
+ return (x-x)/0; /* log(-x) = Nan */
392
+ }
393
+ /* Subnormal number */
394
+ E = -52;
395
+ xdb.d *= two52; /* make x a normal number */
396
+ }
397
+
398
+ if (xdb.i[HI] >= 0x7ff00000){
399
+ return x+x; /* Inf or Nan */
400
+ }
401
+
402
+
403
+ /* Extract exponent and mantissa
404
+ Do range reduction,
405
+ yielding to E holding the exponent and
406
+ y the mantissa between sqrt(2)/2 and sqrt(2)
407
+ */
408
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
409
+ index = (xdb.i[HI] & 0x000fffff);
410
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
411
+ index = (index + (1<<(20-L-1))) >> (20-L);
412
+
413
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
414
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
415
+ xdb.i[HI] -= 0x00100000;
416
+ E++;
417
+ }
418
+ y = xdb.d;
419
+ index = index & INDEXMASK;
420
+ /* Cast integer E into double ed for multiplication later */
421
+ ed = (double) E;
422
+
423
+ /*
424
+ Read tables:
425
+ Read one float for ri
426
+ Read the first two doubles for -log(r_i) (out of three)
427
+
428
+ Organization of the table:
429
+
430
+ one struct entry per index, the struct entry containing
431
+ r, logih, logim and logil in this order
432
+ */
433
+
434
+
435
+ ri = argredtable[index].ri;
436
+ /*
437
+ Actually we don't need the logarithm entries now
438
+ Move the following two lines to the eventual reconstruction
439
+ As long as we don't have any if in the following code, we can overlap
440
+ memory access with calculations
441
+ */
442
+ logih = argredtable[index].logih;
443
+ logim = argredtable[index].logim;
444
+
445
+ /* Do range reduction:
446
+
447
+ zh + zl = y * ri - 1.0 correctly
448
+
449
+ Correctness is assured by use of Mul12 and Add12
450
+ even if we don't force ri to have its' LSBs set to zero
451
+
452
+ Discard zl for higher monome degrees
453
+ */
454
+
455
+ Mul12(&yrih, &yril, y, ri);
456
+ th = yrih - 1.0;
457
+ Add12Cond(zh, zl, th, yril);
458
+
459
+ /*
460
+ Polynomial evaluation
461
+
462
+ Use a 7 degree polynomial
463
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
464
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
465
+ using an ad hoc method
466
+
467
+ */
468
+
469
+
470
+
471
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
472
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
473
+ #else
474
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
475
+ #endif
476
+
477
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
478
+ polyUpper = polyHorner * (zh * zhSquareh);
479
+ zhSquareHalfh = zhSquareh * -0.5;
480
+ zhSquareHalfl = zhSquarel * -0.5;
481
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
482
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
483
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
484
+
485
+ /* Reconstruction
486
+
487
+ Read logih and logim in the tables (already done)
488
+
489
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
490
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
491
+
492
+ Carry out everything in double double precision
493
+
494
+ */
495
+
496
+ /*
497
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
498
+ Multiplication of ed (double E) and log2h is thus correct
499
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
500
+ is enough for the accurate phase
501
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
502
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
503
+ triple double values. We have to take it into account for the accurate phase
504
+ basic procedures for addition and multiplication
505
+ The condition on the next Add12 is verified as log2m is smaller than log2h
506
+ and both are scaled by ed
507
+ */
508
+
509
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
510
+
511
+ /* Add logih and logim to ph and pl
512
+
513
+ We must use conditioned Add22 as logih can move over ph
514
+ */
515
+
516
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
517
+
518
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
519
+
520
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
521
+
522
+
523
+
524
+ /* Change logarithm base from natural base to base 10 by multiplying */
525
+
526
+ Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
527
+
528
+
529
+ /* Rounding test and eventual return or call to the accurate function */
530
+
531
+ if(E==0)
532
+ roundcst = RDROUNDCST1;
533
+ else
534
+ roundcst = RDROUNDCST2;
535
+
536
+ TEST_AND_RETURN_RU(logb10h, logb10m, roundcst);
537
+
538
+ #if DEBUG
539
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
540
+ #endif
541
+
542
+ log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
543
+
544
+ ReturnRoundUpwards3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
545
+
546
+ }
547
+
548
+
549
+ /*************************************************************
550
+ *************************************************************
551
+ * ROUNDED DOWNWARDS *
552
+ *************************************************************
553
+ *************************************************************/
554
+ double log10_rd(double x) {
555
+ db_number xdb;
556
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
557
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
558
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
559
+ double logb10h, logb10m, logb10l;
560
+ int E, index;
561
+
562
+ E=0;
563
+ xdb.d=x;
564
+
565
+ /* Filter cases */
566
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
567
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
568
+ return -1.0/0.0;
569
+ } /* log(+/-0) = -Inf */
570
+ if (xdb.i[HI] < 0){
571
+ return (x-x)/0; /* log(-x) = Nan */
572
+ }
573
+ /* Subnormal number */
574
+ E = -52;
575
+ xdb.d *= two52; /* make x a normal number */
576
+ }
577
+
578
+ if (xdb.i[HI] >= 0x7ff00000){
579
+ return x+x; /* Inf or Nan */
580
+ }
581
+
582
+
583
+ /* Extract exponent and mantissa
584
+ Do range reduction,
585
+ yielding to E holding the exponent and
586
+ y the mantissa between sqrt(2)/2 and sqrt(2)
587
+ */
588
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
589
+ index = (xdb.i[HI] & 0x000fffff);
590
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
591
+ index = (index + (1<<(20-L-1))) >> (20-L);
592
+
593
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
594
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
595
+ xdb.i[HI] -= 0x00100000;
596
+ E++;
597
+ }
598
+ y = xdb.d;
599
+ index = index & INDEXMASK;
600
+ /* Cast integer E into double ed for multiplication later */
601
+ ed = (double) E;
602
+
603
+ /*
604
+ Read tables:
605
+ Read one float for ri
606
+ Read the first two doubles for -log(r_i) (out of three)
607
+
608
+ Organization of the table:
609
+
610
+ one struct entry per index, the struct entry containing
611
+ r, logih, logim and logil in this order
612
+ */
613
+
614
+
615
+ ri = argredtable[index].ri;
616
+ /*
617
+ Actually we don't need the logarithm entries now
618
+ Move the following two lines to the eventual reconstruction
619
+ As long as we don't have any if in the following code, we can overlap
620
+ memory access with calculations
621
+ */
622
+ logih = argredtable[index].logih;
623
+ logim = argredtable[index].logim;
624
+
625
+ /* Do range reduction:
626
+
627
+ zh + zl = y * ri - 1.0 correctly
628
+
629
+ Correctness is assured by use of Mul12 and Add12
630
+ even if we don't force ri to have its' LSBs set to zero
631
+
632
+ Discard zl for higher monome degrees
633
+ */
634
+
635
+ Mul12(&yrih, &yril, y, ri);
636
+ th = yrih - 1.0;
637
+ Add12Cond(zh, zl, th, yril);
638
+
639
+ /*
640
+ Polynomial evaluation
641
+
642
+ Use a 7 degree polynomial
643
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
644
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
645
+ using an ad hoc method
646
+
647
+ */
648
+
649
+
650
+
651
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
652
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
653
+ #else
654
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
655
+ #endif
656
+
657
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
658
+ polyUpper = polyHorner * (zh * zhSquareh);
659
+ zhSquareHalfh = zhSquareh * -0.5;
660
+ zhSquareHalfl = zhSquarel * -0.5;
661
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
662
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
663
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
664
+
665
+ /* Reconstruction
666
+
667
+ Read logih and logim in the tables (already done)
668
+
669
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
670
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
671
+
672
+ Carry out everything in double double precision
673
+
674
+ */
675
+
676
+ /*
677
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
678
+ Multiplication of ed (double E) and log2h is thus correct
679
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
680
+ is enough for the accurate phase
681
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
682
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
683
+ triple double values. We have to take it into account for the accurate phase
684
+ basic procedures for addition and multiplication
685
+ The condition on the next Add12 is verified as log2m is smaller than log2h
686
+ and both are scaled by ed
687
+ */
688
+
689
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
690
+
691
+ /* Add logih and logim to ph and pl
692
+
693
+ We must use conditioned Add22 as logih can move over ph
694
+ */
695
+
696
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
697
+
698
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
699
+
700
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
701
+
702
+
703
+
704
+ /* Change logarithm base from natural base to base 10 by multiplying */
705
+
706
+ Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
707
+
708
+
709
+ /* Rounding test and eventual return or call to the accurate function */
710
+
711
+ if(E==0)
712
+ roundcst = RDROUNDCST1;
713
+ else
714
+ roundcst = RDROUNDCST2;
715
+
716
+ TEST_AND_RETURN_RD(logb10h, logb10m, roundcst);
717
+
718
+ #if DEBUG
719
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
720
+ #endif
721
+
722
+ log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
723
+
724
+ ReturnRoundDownwards3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
725
+ }
726
+
727
+ /*************************************************************
728
+ *************************************************************
729
+ * ROUNDED TOWARDS ZERO *
730
+ *************************************************************
731
+ *************************************************************/
732
+ double log10_rz(double x) {
733
+ db_number xdb;
734
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
735
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
736
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
737
+ double logb10h, logb10m, logb10l;
738
+ int E, index;
739
+
740
+ E=0;
741
+ xdb.d=x;
742
+
743
+ /* Filter cases */
744
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
745
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
746
+ return -1.0/0.0;
747
+ } /* log(+/-0) = -Inf */
748
+ if (xdb.i[HI] < 0){
749
+ return (x-x)/0; /* log(-x) = Nan */
750
+ }
751
+ /* Subnormal number */
752
+ E = -52;
753
+ xdb.d *= two52; /* make x a normal number */
754
+ }
755
+
756
+ if (xdb.i[HI] >= 0x7ff00000){
757
+ return x+x; /* Inf or Nan */
758
+ }
759
+
760
+
761
+ /* Extract exponent and mantissa
762
+ Do range reduction,
763
+ yielding to E holding the exponent and
764
+ y the mantissa between sqrt(2)/2 and sqrt(2)
765
+ */
766
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
767
+ index = (xdb.i[HI] & 0x000fffff);
768
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
769
+ index = (index + (1<<(20-L-1))) >> (20-L);
770
+
771
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
772
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
773
+ xdb.i[HI] -= 0x00100000;
774
+ E++;
775
+ }
776
+ y = xdb.d;
777
+ index = index & INDEXMASK;
778
+ /* Cast integer E into double ed for multiplication later */
779
+ ed = (double) E;
780
+
781
+ /*
782
+ Read tables:
783
+ Read one float for ri
784
+ Read the first two doubles for -log(r_i) (out of three)
785
+
786
+ Organization of the table:
787
+
788
+ one struct entry per index, the struct entry containing
789
+ r, logih, logim and logil in this order
790
+ */
791
+
792
+
793
+ ri = argredtable[index].ri;
794
+ /*
795
+ Actually we don't need the logarithm entries now
796
+ Move the following two lines to the eventual reconstruction
797
+ As long as we don't have any if in the following code, we can overlap
798
+ memory access with calculations
799
+ */
800
+ logih = argredtable[index].logih;
801
+ logim = argredtable[index].logim;
802
+
803
+ /* Do range reduction:
804
+
805
+ zh + zl = y * ri - 1.0 correctly
806
+
807
+ Correctness is assured by use of Mul12 and Add12
808
+ even if we don't force ri to have its' LSBs set to zero
809
+
810
+ Discard zl for higher monome degrees
811
+ */
812
+
813
+ Mul12(&yrih, &yril, y, ri);
814
+ th = yrih - 1.0;
815
+ Add12Cond(zh, zl, th, yril);
816
+
817
+ /*
818
+ Polynomial evaluation
819
+
820
+ Use a 7 degree polynomial
821
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
822
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
823
+ using an ad hoc method
824
+
825
+ */
826
+
827
+
828
+
829
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
830
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
831
+ #else
832
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
833
+ #endif
834
+
835
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
836
+ polyUpper = polyHorner * (zh * zhSquareh);
837
+ zhSquareHalfh = zhSquareh * -0.5;
838
+ zhSquareHalfl = zhSquarel * -0.5;
839
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
840
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
841
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
842
+
843
+ /* Reconstruction
844
+
845
+ Read logih and logim in the tables (already done)
846
+
847
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
848
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
849
+
850
+ Carry out everything in double double precision
851
+
852
+ */
853
+
854
+ /*
855
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
856
+ Multiplication of ed (double E) and log2h is thus correct
857
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
858
+ is enough for the accurate phase
859
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
860
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
861
+ triple double values. We have to take it into account for the accurate phase
862
+ basic procedures for addition and multiplication
863
+ The condition on the next Add12 is verified as log2m is smaller than log2h
864
+ and both are scaled by ed
865
+ */
866
+
867
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
868
+
869
+ /* Add logih and logim to ph and pl
870
+
871
+ We must use conditioned Add22 as logih can move over ph
872
+ */
873
+
874
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
875
+
876
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
877
+
878
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
879
+
880
+
881
+
882
+ /* Change logarithm base from natural base to base 10 by multiplying */
883
+
884
+ Mul22(&logb10h, &logb10m, log10invh, log10invm, logh, logm);
885
+
886
+
887
+ /* Rounding test and eventual return or call to the accurate function */
888
+
889
+ if(E==0)
890
+ roundcst = RDROUNDCST1;
891
+ else
892
+ roundcst = RDROUNDCST2;
893
+
894
+ TEST_AND_RETURN_RZ(logb10h, logb10m, roundcst);
895
+
896
+ #if DEBUG
897
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
898
+ #endif
899
+
900
+ log10_td_accurate(&logb10h, &logb10m, &logb10l, E, ed, index, zh, zl, logih, logim);
901
+
902
+ ReturnRoundTowardsZero3Unfiltered(logb10h, logb10m, logb10l, WORSTCASEACCURACY);
903
+ }
904
+
905
+
906
+