intervals 0.3.56

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,935 @@
1
+ /*
2
+ * This function computes log2, correctly rounded,
3
+ * using experimental techniques based on triple double arithmetics
4
+
5
+ THIS IS EXPERIMENTAL SOFTWARE
6
+
7
+ *
8
+ * Author : Christoph Lauter
9
+ * christoph.lauter at ens-lyon.fr
10
+ *
11
+
12
+ To have it replace the crlibm log2, do:
13
+
14
+ gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c log2-td.c; mv log2-td.o log2_accurate.o; make
15
+
16
+
17
+ */
18
+
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include "crlibm.h"
23
+ #include "crlibm_private.h"
24
+ #include "triple-double.h"
25
+ #include "log2-td.h"
26
+
27
+ #define AVOID_FMA 0
28
+
29
+
30
+ void log2_td_accurate(double *logb2h, double *logb2m, double *logb2l, int E, double ed, int index, double zh, double zl, double logih, double logim) {
31
+ double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
32
+ double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
33
+ double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
34
+ double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
35
+ double log2edh, log2edm, log2edl, logb2hover, logb2mover, logb2lover;
36
+
37
+
38
+ #if EVAL_PERF
39
+ crlibm_second_step_taken++;
40
+ #endif
41
+
42
+
43
+ /* Accurate phase:
44
+
45
+ Argument reduction is already done.
46
+ We must return logh, logm and logl representing the intermediate result in 118 bits precision.
47
+
48
+ We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
49
+ calculating the next 7 coefficients in double double arithmetics and the last in double.
50
+
51
+ We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
52
+ double double calculations won't account for it.
53
+
54
+ */
55
+
56
+ /* Start of the horner scheme */
57
+
58
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
59
+ highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
60
+ #else
61
+ highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
62
+ #endif
63
+
64
+ /* We want to write
65
+
66
+ accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
67
+ ( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
68
+
69
+ with all additions and multiplications in double double arithmetics
70
+ but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
71
+ */
72
+
73
+ Mul12(&t1h, &t1l, zh, highPoly);
74
+ Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
75
+ Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
76
+ Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
77
+ Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
78
+ Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
79
+ Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
80
+ Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
81
+ Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
82
+ Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
83
+ Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
84
+ Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
85
+ Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
86
+ Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
87
+
88
+ /* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
89
+
90
+ Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
91
+ Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
92
+
93
+ /* We can now multiplicate the middle and higher polynomial by z^3 */
94
+
95
+ Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
96
+
97
+ /* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
98
+
99
+ zSquareHalfh = zSquareh * -0.5;
100
+ zSquareHalfm = zSquarem * -0.5;
101
+ zSquareHalfl = zSquarel * -0.5;
102
+
103
+ Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
104
+ zSquareHalfh, zSquareHalfm, zSquareHalfl,
105
+ higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
106
+
107
+ /* Add now zh and zl to obtain the polynomial evaluation result */
108
+
109
+ Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
110
+
111
+ /* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
112
+ logil has not been read to the time, do this first
113
+ */
114
+
115
+ logil = argredtable[index].logil;
116
+
117
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
118
+
119
+ /* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
120
+ ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
121
+ So multiplying naively is correct (up to 134 bits at least)
122
+
123
+ The final result is thus obtained by adding log2 * E to log(y)
124
+ */
125
+
126
+ log2edhover = log2h * ed;
127
+ log2edmover = log2m * ed;
128
+ log2edlover = log2l * ed;
129
+
130
+ /* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
131
+ the to the log(y)-result
132
+
133
+ If needed, uncomment the following Renormalize3-Statement and comment out the copies
134
+ following it.
135
+ */
136
+
137
+ /* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
138
+
139
+ log2edh = log2edhover;
140
+ log2edm = log2edmover;
141
+ log2edl = log2edlover;
142
+
143
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
144
+
145
+
146
+ /* Change logarithm base from natural base to base 2 by multiplying */
147
+
148
+ Mul233(&logb2hover, &logb2mover, &logb2lover, log2invh, log2invl, loghover, logmover, loglover);
149
+
150
+
151
+ /* Since we can not guarantee in each addition and multiplication procedure that
152
+ the results are not overlapping, we must renormalize the result before handing
153
+ it over to the final rounding
154
+ */
155
+
156
+ Renormalize3(logb2h,logb2m,logb2l,logb2hover,logb2mover,logb2lover);
157
+
158
+ }
159
+
160
+
161
+
162
+ /*************************************************************
163
+ *************************************************************
164
+ * ROUNDED TO NEAREST *
165
+ *************************************************************
166
+ *************************************************************/
167
+ double log2_rn(double x){
168
+ db_number xdb;
169
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
170
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
171
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
172
+ double logb2h, logb2m, logb2l;
173
+ int E, index;
174
+
175
+ E=0;
176
+ xdb.d=x;
177
+
178
+ /* Filter cases */
179
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
180
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
181
+ return -1.0/0.0;
182
+ } /* log(+/-0) = -Inf */
183
+ if (xdb.i[HI] < 0){
184
+ return (x-x)/0; /* log(-x) = Nan */
185
+ }
186
+ /* Subnormal number */
187
+ E = -52;
188
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
189
+ }
190
+
191
+ if (xdb.i[HI] >= 0x7ff00000){
192
+ return x+x; /* Inf or Nan */
193
+ }
194
+
195
+
196
+ /* Extract exponent and mantissa
197
+ Do range reduction,
198
+ yielding to E holding the exponent and
199
+ y the mantissa between sqrt(2)/2 and sqrt(2)
200
+ */
201
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
202
+ index = (xdb.i[HI] & 0x000fffff);
203
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
204
+ index = (index + (1<<(20-L-1))) >> (20-L);
205
+
206
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
207
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
208
+ xdb.i[HI] -= 0x00100000;
209
+ E++;
210
+ }
211
+ y = xdb.d;
212
+ index = index & INDEXMASK;
213
+ /* Cast integer E into double ed for multiplication later */
214
+ ed = (double) E;
215
+
216
+ /*
217
+ Read tables:
218
+ Read one float for ri
219
+ Read the first two doubles for -log(r_i) (out of three)
220
+
221
+ Organization of the table:
222
+
223
+ one struct entry per index, the struct entry containing
224
+ r, logih, logim and logil in this order
225
+ */
226
+
227
+
228
+ ri = argredtable[index].ri;
229
+ /*
230
+ Actually we don't need the logarithm entries now
231
+ Move the following two lines to the eventual reconstruction
232
+ As long as we don't have any if in the following code, we can overlap
233
+ memory access with calculations
234
+ */
235
+ logih = argredtable[index].logih;
236
+ logim = argredtable[index].logim;
237
+
238
+ /* Do range reduction:
239
+
240
+ zh + zl = y * ri - 1.0 correctly
241
+
242
+ Correctness is assured by use of Mul12 and Add12
243
+ even if we don't force ri to have its' LSBs set to zero
244
+
245
+ Discard zl for higher monome degrees
246
+ */
247
+
248
+ Mul12(&yrih, &yril, y, ri);
249
+ th = yrih - 1.0;
250
+ Add12Cond(zh, zl, th, yril);
251
+
252
+ /*
253
+ Polynomial evaluation
254
+
255
+ Use a 7 degree polynomial
256
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
257
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
258
+ using an ad hoc method
259
+
260
+ */
261
+
262
+
263
+
264
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
265
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
266
+ #else
267
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
268
+ #endif
269
+
270
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
271
+ polyUpper = polyHorner * (zh * zhSquareh);
272
+ zhSquareHalfh = zhSquareh * -0.5;
273
+ zhSquareHalfl = zhSquarel * -0.5;
274
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
275
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
276
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
277
+
278
+ /* Reconstruction
279
+
280
+ Read logih and logim in the tables (already done)
281
+
282
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
283
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
284
+
285
+ Carry out everything in double double precision
286
+
287
+ */
288
+
289
+ /*
290
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
291
+ Multiplication of ed (double E) and log2h is thus correct
292
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
293
+ is enough for the accurate phase
294
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
295
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
296
+ triple double values. We have to take it into account for the accurate phase
297
+ basic procedures for addition and multiplication
298
+ The condition on the next Add12 is verified as log2m is smaller than log2h
299
+ and both are scaled by ed
300
+ */
301
+
302
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
303
+
304
+ /* Add logih and logim to ph and pl
305
+
306
+ We must use conditioned Add22 as logih can move over ph
307
+ */
308
+
309
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
310
+
311
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
312
+
313
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
314
+
315
+
316
+
317
+ /* Change logarithm base from natural base to base 2 by multiplying */
318
+
319
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
320
+
321
+
322
+ /* Rounding test and eventual return or call to the accurate function */
323
+
324
+ if(E==0)
325
+ roundcst = ROUNDCST1;
326
+ else
327
+ roundcst = ROUNDCST2;
328
+
329
+
330
+ if(logb2h == (logb2h + (logb2m * roundcst)))
331
+ return logb2h;
332
+ else
333
+ {
334
+
335
+ #if DEBUG
336
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
337
+ #endif
338
+
339
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
340
+
341
+ ReturnRoundToNearest3(logb2h, logb2m, logb2l);
342
+
343
+ } /* Accurate phase launched */
344
+ }
345
+
346
+
347
+ /*************************************************************
348
+ *************************************************************
349
+ * ROUNDED UPWARDS *
350
+ *************************************************************
351
+ *************************************************************/
352
+ double log2_ru(double x) {
353
+ db_number xdb;
354
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
355
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
356
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
357
+ double logb2h, logb2m, logb2l;
358
+ int E, index;
359
+
360
+
361
+ E=0;
362
+ xdb.d=x;
363
+
364
+ /* Filter cases */
365
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
366
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
367
+ return -1.0/0.0;
368
+ } /* log(+/-0) = -Inf */
369
+ if (xdb.i[HI] < 0){
370
+ return (x-x)/0; /* log(-x) = Nan */
371
+ }
372
+ /* Subnormal number */
373
+ E = -52;
374
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
375
+ }
376
+
377
+ if (xdb.i[HI] >= 0x7ff00000){
378
+ return x+x; /* Inf or Nan */
379
+ }
380
+
381
+
382
+ /* Extract exponent and mantissa
383
+ Do range reduction,
384
+ yielding to E holding the exponent and
385
+ y the mantissa between sqrt(2)/2 and sqrt(2)
386
+ */
387
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
388
+ index = (xdb.i[HI] & 0x000fffff);
389
+
390
+
391
+ /* Test now if the argument is an exact power of 2
392
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
393
+ This test is necessary for filtering out the cases where the final
394
+ rounding test cannot distinguish between an exact algebraic
395
+ number and a hard case to round
396
+ */
397
+
398
+ if ((index | xdb.i[LO]) == 0) {
399
+ /* Handle the "trivial" case for log2:
400
+ The argument is an exact power of 2, return thus
401
+ just the exponant of the number
402
+ */
403
+
404
+ return (double) E;
405
+
406
+ }
407
+
408
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
409
+ index = (index + (1<<(20-L-1))) >> (20-L);
410
+
411
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
412
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
413
+ xdb.i[HI] -= 0x00100000;
414
+ E++;
415
+ }
416
+ y = xdb.d;
417
+ index = index & INDEXMASK;
418
+ /* Cast integer E into double ed for multiplication later */
419
+ ed = (double) E;
420
+
421
+ /*
422
+ Read tables:
423
+ Read one float for ri
424
+ Read the first two doubles for -log(r_i) (out of three)
425
+
426
+ Organization of the table:
427
+
428
+ one struct entry per index, the struct entry containing
429
+ r, logih, logim and logil in this order
430
+ */
431
+
432
+
433
+ ri = argredtable[index].ri;
434
+ /*
435
+ Actually we don't need the logarithm entries now
436
+ Move the following two lines to the eventual reconstruction
437
+ As long as we don't have any if in the following code, we can overlap
438
+ memory access with calculations
439
+ */
440
+ logih = argredtable[index].logih;
441
+ logim = argredtable[index].logim;
442
+
443
+ /* Do range reduction:
444
+
445
+ zh + zl = y * ri - 1.0 correctly
446
+
447
+ Correctness is assured by use of Mul12 and Add12
448
+ even if we don't force ri to have its' LSBs set to zero
449
+
450
+ Discard zl for higher monome degrees
451
+ */
452
+
453
+ Mul12(&yrih, &yril, y, ri);
454
+ th = yrih - 1.0;
455
+ Add12Cond(zh, zl, th, yril);
456
+
457
+ /*
458
+ Polynomial evaluation
459
+
460
+ Use a 7 degree polynomial
461
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
462
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
463
+ using an ad hoc method
464
+
465
+ */
466
+
467
+
468
+
469
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
470
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
471
+ #else
472
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
473
+ #endif
474
+
475
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
476
+ polyUpper = polyHorner * (zh * zhSquareh);
477
+ zhSquareHalfh = zhSquareh * -0.5;
478
+ zhSquareHalfl = zhSquarel * -0.5;
479
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
480
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
481
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
482
+
483
+ /* Reconstruction
484
+
485
+ Read logih and logim in the tables (already done)
486
+
487
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
488
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
489
+
490
+ Carry out everything in double double precision
491
+
492
+ */
493
+
494
+ /*
495
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
496
+ Multiplication of ed (double E) and log2h is thus correct
497
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
498
+ is enough for the accurate phase
499
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
500
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
501
+ triple double values. We have to take it into account for the accurate phase
502
+ basic procedures for addition and multiplication
503
+ The condition on the next Add12 is verified as log2m is smaller than log2h
504
+ and both are scaled by ed
505
+ */
506
+
507
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
508
+
509
+ /* Add logih and logim to ph and pl
510
+
511
+ We must use conditioned Add22 as logih can move over ph
512
+ */
513
+
514
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
515
+
516
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
517
+
518
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
519
+
520
+ /* Change logarithm base from natural base to base 2 by multiplying */
521
+
522
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
523
+
524
+ /* Rounding test and eventual return or call to the accurate function */
525
+
526
+ if(E==0)
527
+ roundcst = RDROUNDCST1;
528
+ else
529
+ roundcst = RDROUNDCST2;
530
+
531
+ TEST_AND_RETURN_RU(logb2h, logb2m, roundcst);
532
+
533
+ #if DEBUG
534
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
535
+ #endif
536
+
537
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
538
+
539
+ ReturnRoundUpwards3(logb2h, logb2m, logb2l);
540
+
541
+ }
542
+
543
+
544
+ /*************************************************************
545
+ *************************************************************
546
+ * ROUNDED DOWNWARDS *
547
+ *************************************************************
548
+ *************************************************************/
549
+ double log2_rd(double x) {
550
+ db_number xdb;
551
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
552
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
553
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
554
+ double logb2h, logb2m, logb2l;
555
+ int E, index;
556
+
557
+
558
+ E=0;
559
+ xdb.d=x;
560
+
561
+ /* Filter cases */
562
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
563
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
564
+ return -1.0/0.0;
565
+ } /* log(+/-0) = -Inf */
566
+ if (xdb.i[HI] < 0){
567
+ return (x-x)/0; /* log(-x) = Nan */
568
+ }
569
+ /* Subnormal number */
570
+ E = -52;
571
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
572
+ }
573
+
574
+ if (xdb.i[HI] >= 0x7ff00000){
575
+ return x+x; /* Inf or Nan */
576
+ }
577
+
578
+
579
+ /* Extract exponent and mantissa
580
+ Do range reduction,
581
+ yielding to E holding the exponent and
582
+ y the mantissa between sqrt(2)/2 and sqrt(2)
583
+ */
584
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
585
+ index = (xdb.i[HI] & 0x000fffff);
586
+
587
+
588
+ /* Test now if the argument is an exact power of 2
589
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
590
+ This test is necessary for filtering out the cases where the final
591
+ rounding test cannot distinguish between an exact algebraic
592
+ number and a hard case to round
593
+ */
594
+
595
+ if ((index | xdb.i[LO]) == 0) {
596
+ /* Handle the "trivial" case for log2:
597
+ The argument is an exact power of 2, return thus
598
+ just the exponant of the number
599
+ */
600
+
601
+ return (double) E;
602
+
603
+ }
604
+
605
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
606
+ index = (index + (1<<(20-L-1))) >> (20-L);
607
+
608
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
609
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
610
+ xdb.i[HI] -= 0x00100000;
611
+ E++;
612
+ }
613
+ y = xdb.d;
614
+ index = index & INDEXMASK;
615
+ /* Cast integer E into double ed for multiplication later */
616
+ ed = (double) E;
617
+
618
+ /*
619
+ Read tables:
620
+ Read one float for ri
621
+ Read the first two doubles for -log(r_i) (out of three)
622
+
623
+ Organization of the table:
624
+
625
+ one struct entry per index, the struct entry containing
626
+ r, logih, logim and logil in this order
627
+ */
628
+
629
+
630
+ ri = argredtable[index].ri;
631
+ /*
632
+ Actually we don't need the logarithm entries now
633
+ Move the following two lines to the eventual reconstruction
634
+ As long as we don't have any if in the following code, we can overlap
635
+ memory access with calculations
636
+ */
637
+ logih = argredtable[index].logih;
638
+ logim = argredtable[index].logim;
639
+
640
+ /* Do range reduction:
641
+
642
+ zh + zl = y * ri - 1.0 correctly
643
+
644
+ Correctness is assured by use of Mul12 and Add12
645
+ even if we don't force ri to have its' LSBs set to zero
646
+
647
+ Discard zl for higher monome degrees
648
+ */
649
+
650
+ Mul12(&yrih, &yril, y, ri);
651
+ th = yrih - 1.0;
652
+ Add12Cond(zh, zl, th, yril);
653
+
654
+ /*
655
+ Polynomial evaluation
656
+
657
+ Use a 7 degree polynomial
658
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
659
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
660
+ using an ad hoc method
661
+
662
+ */
663
+
664
+
665
+
666
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
667
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
668
+ #else
669
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
670
+ #endif
671
+
672
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
673
+ polyUpper = polyHorner * (zh * zhSquareh);
674
+ zhSquareHalfh = zhSquareh * -0.5;
675
+ zhSquareHalfl = zhSquarel * -0.5;
676
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
677
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
678
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
679
+
680
+ /* Reconstruction
681
+
682
+ Read logih and logim in the tables (already done)
683
+
684
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
685
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
686
+
687
+ Carry out everything in double double precision
688
+
689
+ */
690
+
691
+ /*
692
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
693
+ Multiplication of ed (double E) and log2h is thus correct
694
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
695
+ is enough for the accurate phase
696
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
697
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
698
+ triple double values. We have to take it into account for the accurate phase
699
+ basic procedures for addition and multiplication
700
+ The condition on the next Add12 is verified as log2m is smaller than log2h
701
+ and both are scaled by ed
702
+ */
703
+
704
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
705
+
706
+ /* Add logih and logim to ph and pl
707
+
708
+ We must use conditioned Add22 as logih can move over ph
709
+ */
710
+
711
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
712
+
713
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
714
+
715
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
716
+
717
+ /* Change logarithm base from natural base to base 2 by multiplying */
718
+
719
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
720
+
721
+ /* Rounding test and eventual return or call to the accurate function */
722
+
723
+ if(E==0)
724
+ roundcst = RDROUNDCST1;
725
+ else
726
+ roundcst = RDROUNDCST2;
727
+
728
+ TEST_AND_RETURN_RD(logb2h, logb2m, roundcst);
729
+
730
+ #if DEBUG
731
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
732
+ #endif
733
+
734
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
735
+
736
+ ReturnRoundDownwards3(logb2h, logb2m, logb2l);
737
+ }
738
+
739
+ /*************************************************************
740
+ *************************************************************
741
+ * ROUNDED TOWARDS ZERO *
742
+ *************************************************************
743
+ *************************************************************/
744
+ double log2_rz(double x) {
745
+ db_number xdb;
746
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
747
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
748
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
749
+ double logb2h, logb2m, logb2l;
750
+ int E, index;
751
+
752
+
753
+ E=0;
754
+ xdb.d=x;
755
+
756
+ /* Filter cases */
757
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
758
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
759
+ return -1.0/0.0;
760
+ } /* log(+/-0) = -Inf */
761
+ if (xdb.i[HI] < 0){
762
+ return (x-x)/0; /* log(-x) = Nan */
763
+ }
764
+ /* Subnormal number */
765
+ E = -52;
766
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
767
+ }
768
+
769
+ if (xdb.i[HI] >= 0x7ff00000){
770
+ return x+x; /* Inf or Nan */
771
+ }
772
+
773
+
774
+ /* Extract exponent and mantissa
775
+ Do range reduction,
776
+ yielding to E holding the exponent and
777
+ y the mantissa between sqrt(2)/2 and sqrt(2)
778
+ */
779
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
780
+ index = (xdb.i[HI] & 0x000fffff);
781
+
782
+
783
+ /* Test now if the argument is an exact power of 2
784
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
785
+ This test is necessary for filtering out the cases where the final
786
+ rounding test cannot distinguish between an exact algebraic
787
+ number and a hard case to round
788
+ */
789
+
790
+ if ((index | xdb.i[LO]) == 0) {
791
+ /* Handle the "trivial" case for log2:
792
+ The argument is an exact power of 2, return thus
793
+ just the exponant of the number
794
+ */
795
+
796
+ return (double) E;
797
+
798
+ }
799
+
800
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
801
+ index = (index + (1<<(20-L-1))) >> (20-L);
802
+
803
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
804
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
805
+ xdb.i[HI] -= 0x00100000;
806
+ E++;
807
+ }
808
+ y = xdb.d;
809
+ index = index & INDEXMASK;
810
+ /* Cast integer E into double ed for multiplication later */
811
+ ed = (double) E;
812
+
813
+ /*
814
+ Read tables:
815
+ Read one float for ri
816
+ Read the first two doubles for -log(r_i) (out of three)
817
+
818
+ Organization of the table:
819
+
820
+ one struct entry per index, the struct entry containing
821
+ r, logih, logim and logil in this order
822
+ */
823
+
824
+
825
+ ri = argredtable[index].ri;
826
+ /*
827
+ Actually we don't need the logarithm entries now
828
+ Move the following two lines to the eventual reconstruction
829
+ As long as we don't have any if in the following code, we can overlap
830
+ memory access with calculations
831
+ */
832
+ logih = argredtable[index].logih;
833
+ logim = argredtable[index].logim;
834
+
835
+ /* Do range reduction:
836
+
837
+ zh + zl = y * ri - 1.0 correctly
838
+
839
+ Correctness is assured by use of Mul12 and Add12
840
+ even if we don't force ri to have its' LSBs set to zero
841
+
842
+ Discard zl for higher monome degrees
843
+ */
844
+
845
+ Mul12(&yrih, &yril, y, ri);
846
+ th = yrih - 1.0;
847
+ Add12Cond(zh, zl, th, yril);
848
+
849
+ /*
850
+ Polynomial evaluation
851
+
852
+ Use a 7 degree polynomial
853
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
854
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
855
+ using an ad hoc method
856
+
857
+ */
858
+
859
+
860
+
861
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
862
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
863
+ #else
864
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
865
+ #endif
866
+
867
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
868
+ polyUpper = polyHorner * (zh * zhSquareh);
869
+ zhSquareHalfh = zhSquareh * -0.5;
870
+ zhSquareHalfl = zhSquarel * -0.5;
871
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
872
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
873
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
874
+
875
+ /* Reconstruction
876
+
877
+ Read logih and logim in the tables (already done)
878
+
879
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
880
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
881
+
882
+ Carry out everything in double double precision
883
+
884
+ */
885
+
886
+ /*
887
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
888
+ Multiplication of ed (double E) and log2h is thus correct
889
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
890
+ is enough for the accurate phase
891
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
892
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
893
+ triple double values. We have to take it into account for the accurate phase
894
+ basic procedures for addition and multiplication
895
+ The condition on the next Add12 is verified as log2m is smaller than log2h
896
+ and both are scaled by ed
897
+ */
898
+
899
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
900
+
901
+ /* Add logih and logim to ph and pl
902
+
903
+ We must use conditioned Add22 as logih can move over ph
904
+ */
905
+
906
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
907
+
908
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
909
+
910
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
911
+
912
+ /* Change logarithm base from natural base to base 2 by multiplying */
913
+
914
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
915
+
916
+ /* Rounding test and eventual return or call to the accurate function */
917
+
918
+ if(E==0)
919
+ roundcst = RDROUNDCST1;
920
+ else
921
+ roundcst = RDROUNDCST2;
922
+
923
+ TEST_AND_RETURN_RZ(logb2h, logb2m, roundcst);
924
+
925
+ #if DEBUG
926
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
927
+ #endif
928
+
929
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
930
+
931
+ ReturnRoundTowardsZero3(logb2h, logb2m, logb2l);
932
+ }
933
+
934
+
935
+