intervals 0.3.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,935 @@
1
+ /*
2
+ * This function computes log2, correctly rounded,
3
+ * using experimental techniques based on triple double arithmetics
4
+
5
+ THIS IS EXPERIMENTAL SOFTWARE
6
+
7
+ *
8
+ * Author : Christoph Lauter
9
+ * christoph.lauter at ens-lyon.fr
10
+ *
11
+
12
+ To have it replace the crlibm log2, do:
13
+
14
+ gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c log2-td.c; mv log2-td.o log2_accurate.o; make
15
+
16
+
17
+ */
18
+
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include "crlibm.h"
23
+ #include "crlibm_private.h"
24
+ #include "triple-double.h"
25
+ #include "log2-td.h"
26
+
27
+ #define AVOID_FMA 0
28
+
29
+
30
+ void log2_td_accurate(double *logb2h, double *logb2m, double *logb2l, int E, double ed, int index, double zh, double zl, double logih, double logim) {
31
+ double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
32
+ double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
33
+ double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
34
+ double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
35
+ double log2edh, log2edm, log2edl, logb2hover, logb2mover, logb2lover;
36
+
37
+
38
+ #if EVAL_PERF
39
+ crlibm_second_step_taken++;
40
+ #endif
41
+
42
+
43
+ /* Accurate phase:
44
+
45
+ Argument reduction is already done.
46
+ We must return logh, logm and logl representing the intermediate result in 118 bits precision.
47
+
48
+ We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
49
+ calculating the next 7 coefficients in double double arithmetics and the last in double.
50
+
51
+ We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
52
+ double double calculations won't account for it.
53
+
54
+ */
55
+
56
+ /* Start of the horner scheme */
57
+
58
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
59
+ highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
60
+ #else
61
+ highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
62
+ #endif
63
+
64
+ /* We want to write
65
+
66
+ accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
67
+ ( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
68
+
69
+ with all additions and multiplications in double double arithmetics
70
+ but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
71
+ */
72
+
73
+ Mul12(&t1h, &t1l, zh, highPoly);
74
+ Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
75
+ Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
76
+ Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
77
+ Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
78
+ Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
79
+ Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
80
+ Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
81
+ Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
82
+ Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
83
+ Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
84
+ Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
85
+ Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
86
+ Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
87
+
88
+ /* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
89
+
90
+ Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
91
+ Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
92
+
93
+ /* We can now multiplicate the middle and higher polynomial by z^3 */
94
+
95
+ Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
96
+
97
+ /* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
98
+
99
+ zSquareHalfh = zSquareh * -0.5;
100
+ zSquareHalfm = zSquarem * -0.5;
101
+ zSquareHalfl = zSquarel * -0.5;
102
+
103
+ Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
104
+ zSquareHalfh, zSquareHalfm, zSquareHalfl,
105
+ higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
106
+
107
+ /* Add now zh and zl to obtain the polynomial evaluation result */
108
+
109
+ Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
110
+
111
+ /* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
112
+ logil has not been read to the time, do this first
113
+ */
114
+
115
+ logil = argredtable[index].logil;
116
+
117
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
118
+
119
+ /* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
120
+ ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
121
+ So multiplying naively is correct (up to 134 bits at least)
122
+
123
+ The final result is thus obtained by adding log2 * E to log(y)
124
+ */
125
+
126
+ log2edhover = log2h * ed;
127
+ log2edmover = log2m * ed;
128
+ log2edlover = log2l * ed;
129
+
130
+ /* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
131
+ the to the log(y)-result
132
+
133
+ If needed, uncomment the following Renormalize3-Statement and comment out the copies
134
+ following it.
135
+ */
136
+
137
+ /* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
138
+
139
+ log2edh = log2edhover;
140
+ log2edm = log2edmover;
141
+ log2edl = log2edlover;
142
+
143
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
144
+
145
+
146
+ /* Change logarithm base from natural base to base 2 by multiplying */
147
+
148
+ Mul233(&logb2hover, &logb2mover, &logb2lover, log2invh, log2invl, loghover, logmover, loglover);
149
+
150
+
151
+ /* Since we can not guarantee in each addition and multiplication procedure that
152
+ the results are not overlapping, we must renormalize the result before handing
153
+ it over to the final rounding
154
+ */
155
+
156
+ Renormalize3(logb2h,logb2m,logb2l,logb2hover,logb2mover,logb2lover);
157
+
158
+ }
159
+
160
+
161
+
162
+ /*************************************************************
163
+ *************************************************************
164
+ * ROUNDED TO NEAREST *
165
+ *************************************************************
166
+ *************************************************************/
167
+ double log2_rn(double x){
168
+ db_number xdb;
169
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
170
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
171
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
172
+ double logb2h, logb2m, logb2l;
173
+ int E, index;
174
+
175
+ E=0;
176
+ xdb.d=x;
177
+
178
+ /* Filter cases */
179
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
180
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
181
+ return -1.0/0.0;
182
+ } /* log(+/-0) = -Inf */
183
+ if (xdb.i[HI] < 0){
184
+ return (x-x)/0; /* log(-x) = Nan */
185
+ }
186
+ /* Subnormal number */
187
+ E = -52;
188
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
189
+ }
190
+
191
+ if (xdb.i[HI] >= 0x7ff00000){
192
+ return x+x; /* Inf or Nan */
193
+ }
194
+
195
+
196
+ /* Extract exponent and mantissa
197
+ Do range reduction,
198
+ yielding to E holding the exponent and
199
+ y the mantissa between sqrt(2)/2 and sqrt(2)
200
+ */
201
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
202
+ index = (xdb.i[HI] & 0x000fffff);
203
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
204
+ index = (index + (1<<(20-L-1))) >> (20-L);
205
+
206
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
207
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
208
+ xdb.i[HI] -= 0x00100000;
209
+ E++;
210
+ }
211
+ y = xdb.d;
212
+ index = index & INDEXMASK;
213
+ /* Cast integer E into double ed for multiplication later */
214
+ ed = (double) E;
215
+
216
+ /*
217
+ Read tables:
218
+ Read one float for ri
219
+ Read the first two doubles for -log(r_i) (out of three)
220
+
221
+ Organization of the table:
222
+
223
+ one struct entry per index, the struct entry containing
224
+ r, logih, logim and logil in this order
225
+ */
226
+
227
+
228
+ ri = argredtable[index].ri;
229
+ /*
230
+ Actually we don't need the logarithm entries now
231
+ Move the following two lines to the eventual reconstruction
232
+ As long as we don't have any if in the following code, we can overlap
233
+ memory access with calculations
234
+ */
235
+ logih = argredtable[index].logih;
236
+ logim = argredtable[index].logim;
237
+
238
+ /* Do range reduction:
239
+
240
+ zh + zl = y * ri - 1.0 correctly
241
+
242
+ Correctness is assured by use of Mul12 and Add12
243
+ even if we don't force ri to have its' LSBs set to zero
244
+
245
+ Discard zl for higher monome degrees
246
+ */
247
+
248
+ Mul12(&yrih, &yril, y, ri);
249
+ th = yrih - 1.0;
250
+ Add12Cond(zh, zl, th, yril);
251
+
252
+ /*
253
+ Polynomial evaluation
254
+
255
+ Use a 7 degree polynomial
256
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
257
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
258
+ using an ad hoc method
259
+
260
+ */
261
+
262
+
263
+
264
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
265
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
266
+ #else
267
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
268
+ #endif
269
+
270
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
271
+ polyUpper = polyHorner * (zh * zhSquareh);
272
+ zhSquareHalfh = zhSquareh * -0.5;
273
+ zhSquareHalfl = zhSquarel * -0.5;
274
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
275
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
276
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
277
+
278
+ /* Reconstruction
279
+
280
+ Read logih and logim in the tables (already done)
281
+
282
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
283
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
284
+
285
+ Carry out everything in double double precision
286
+
287
+ */
288
+
289
+ /*
290
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
291
+ Multiplication of ed (double E) and log2h is thus correct
292
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
293
+ is enough for the accurate phase
294
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
295
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
296
+ triple double values. We have to take it into account for the accurate phase
297
+ basic procedures for addition and multiplication
298
+ The condition on the next Add12 is verified as log2m is smaller than log2h
299
+ and both are scaled by ed
300
+ */
301
+
302
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
303
+
304
+ /* Add logih and logim to ph and pl
305
+
306
+ We must use conditioned Add22 as logih can move over ph
307
+ */
308
+
309
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
310
+
311
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
312
+
313
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
314
+
315
+
316
+
317
+ /* Change logarithm base from natural base to base 2 by multiplying */
318
+
319
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
320
+
321
+
322
+ /* Rounding test and eventual return or call to the accurate function */
323
+
324
+ if(E==0)
325
+ roundcst = ROUNDCST1;
326
+ else
327
+ roundcst = ROUNDCST2;
328
+
329
+
330
+ if(logb2h == (logb2h + (logb2m * roundcst)))
331
+ return logb2h;
332
+ else
333
+ {
334
+
335
+ #if DEBUG
336
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
337
+ #endif
338
+
339
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
340
+
341
+ ReturnRoundToNearest3(logb2h, logb2m, logb2l);
342
+
343
+ } /* Accurate phase launched */
344
+ }
345
+
346
+
347
+ /*************************************************************
348
+ *************************************************************
349
+ * ROUNDED UPWARDS *
350
+ *************************************************************
351
+ *************************************************************/
352
+ double log2_ru(double x) {
353
+ db_number xdb;
354
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
355
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
356
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
357
+ double logb2h, logb2m, logb2l;
358
+ int E, index;
359
+
360
+
361
+ E=0;
362
+ xdb.d=x;
363
+
364
+ /* Filter cases */
365
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
366
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
367
+ return -1.0/0.0;
368
+ } /* log(+/-0) = -Inf */
369
+ if (xdb.i[HI] < 0){
370
+ return (x-x)/0; /* log(-x) = Nan */
371
+ }
372
+ /* Subnormal number */
373
+ E = -52;
374
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
375
+ }
376
+
377
+ if (xdb.i[HI] >= 0x7ff00000){
378
+ return x+x; /* Inf or Nan */
379
+ }
380
+
381
+
382
+ /* Extract exponent and mantissa
383
+ Do range reduction,
384
+ yielding to E holding the exponent and
385
+ y the mantissa between sqrt(2)/2 and sqrt(2)
386
+ */
387
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
388
+ index = (xdb.i[HI] & 0x000fffff);
389
+
390
+
391
+ /* Test now if the argument is an exact power of 2
392
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
393
+ This test is necessary for filtering out the cases where the final
394
+ rounding test cannot distinguish between an exact algebraic
395
+ number and a hard case to round
396
+ */
397
+
398
+ if ((index | xdb.i[LO]) == 0) {
399
+ /* Handle the "trivial" case for log2:
400
+ The argument is an exact power of 2, return thus
401
+ just the exponant of the number
402
+ */
403
+
404
+ return (double) E;
405
+
406
+ }
407
+
408
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
409
+ index = (index + (1<<(20-L-1))) >> (20-L);
410
+
411
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
412
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
413
+ xdb.i[HI] -= 0x00100000;
414
+ E++;
415
+ }
416
+ y = xdb.d;
417
+ index = index & INDEXMASK;
418
+ /* Cast integer E into double ed for multiplication later */
419
+ ed = (double) E;
420
+
421
+ /*
422
+ Read tables:
423
+ Read one float for ri
424
+ Read the first two doubles for -log(r_i) (out of three)
425
+
426
+ Organization of the table:
427
+
428
+ one struct entry per index, the struct entry containing
429
+ r, logih, logim and logil in this order
430
+ */
431
+
432
+
433
+ ri = argredtable[index].ri;
434
+ /*
435
+ Actually we don't need the logarithm entries now
436
+ Move the following two lines to the eventual reconstruction
437
+ As long as we don't have any if in the following code, we can overlap
438
+ memory access with calculations
439
+ */
440
+ logih = argredtable[index].logih;
441
+ logim = argredtable[index].logim;
442
+
443
+ /* Do range reduction:
444
+
445
+ zh + zl = y * ri - 1.0 correctly
446
+
447
+ Correctness is assured by use of Mul12 and Add12
448
+ even if we don't force ri to have its' LSBs set to zero
449
+
450
+ Discard zl for higher monome degrees
451
+ */
452
+
453
+ Mul12(&yrih, &yril, y, ri);
454
+ th = yrih - 1.0;
455
+ Add12Cond(zh, zl, th, yril);
456
+
457
+ /*
458
+ Polynomial evaluation
459
+
460
+ Use a 7 degree polynomial
461
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
462
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
463
+ using an ad hoc method
464
+
465
+ */
466
+
467
+
468
+
469
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
470
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
471
+ #else
472
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
473
+ #endif
474
+
475
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
476
+ polyUpper = polyHorner * (zh * zhSquareh);
477
+ zhSquareHalfh = zhSquareh * -0.5;
478
+ zhSquareHalfl = zhSquarel * -0.5;
479
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
480
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
481
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
482
+
483
+ /* Reconstruction
484
+
485
+ Read logih and logim in the tables (already done)
486
+
487
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
488
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
489
+
490
+ Carry out everything in double double precision
491
+
492
+ */
493
+
494
+ /*
495
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
496
+ Multiplication of ed (double E) and log2h is thus correct
497
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
498
+ is enough for the accurate phase
499
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
500
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
501
+ triple double values. We have to take it into account for the accurate phase
502
+ basic procedures for addition and multiplication
503
+ The condition on the next Add12 is verified as log2m is smaller than log2h
504
+ and both are scaled by ed
505
+ */
506
+
507
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
508
+
509
+ /* Add logih and logim to ph and pl
510
+
511
+ We must use conditioned Add22 as logih can move over ph
512
+ */
513
+
514
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
515
+
516
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
517
+
518
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
519
+
520
+ /* Change logarithm base from natural base to base 2 by multiplying */
521
+
522
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
523
+
524
+ /* Rounding test and eventual return or call to the accurate function */
525
+
526
+ if(E==0)
527
+ roundcst = RDROUNDCST1;
528
+ else
529
+ roundcst = RDROUNDCST2;
530
+
531
+ TEST_AND_RETURN_RU(logb2h, logb2m, roundcst);
532
+
533
+ #if DEBUG
534
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
535
+ #endif
536
+
537
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
538
+
539
+ ReturnRoundUpwards3(logb2h, logb2m, logb2l);
540
+
541
+ }
542
+
543
+
544
+ /*************************************************************
545
+ *************************************************************
546
+ * ROUNDED DOWNWARDS *
547
+ *************************************************************
548
+ *************************************************************/
549
+ double log2_rd(double x) {
550
+ db_number xdb;
551
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
552
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
553
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
554
+ double logb2h, logb2m, logb2l;
555
+ int E, index;
556
+
557
+
558
+ E=0;
559
+ xdb.d=x;
560
+
561
+ /* Filter cases */
562
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
563
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
564
+ return -1.0/0.0;
565
+ } /* log(+/-0) = -Inf */
566
+ if (xdb.i[HI] < 0){
567
+ return (x-x)/0; /* log(-x) = Nan */
568
+ }
569
+ /* Subnormal number */
570
+ E = -52;
571
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
572
+ }
573
+
574
+ if (xdb.i[HI] >= 0x7ff00000){
575
+ return x+x; /* Inf or Nan */
576
+ }
577
+
578
+
579
+ /* Extract exponent and mantissa
580
+ Do range reduction,
581
+ yielding to E holding the exponent and
582
+ y the mantissa between sqrt(2)/2 and sqrt(2)
583
+ */
584
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
585
+ index = (xdb.i[HI] & 0x000fffff);
586
+
587
+
588
+ /* Test now if the argument is an exact power of 2
589
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
590
+ This test is necessary for filtering out the cases where the final
591
+ rounding test cannot distinguish between an exact algebraic
592
+ number and a hard case to round
593
+ */
594
+
595
+ if ((index | xdb.i[LO]) == 0) {
596
+ /* Handle the "trivial" case for log2:
597
+ The argument is an exact power of 2, return thus
598
+ just the exponant of the number
599
+ */
600
+
601
+ return (double) E;
602
+
603
+ }
604
+
605
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
606
+ index = (index + (1<<(20-L-1))) >> (20-L);
607
+
608
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
609
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
610
+ xdb.i[HI] -= 0x00100000;
611
+ E++;
612
+ }
613
+ y = xdb.d;
614
+ index = index & INDEXMASK;
615
+ /* Cast integer E into double ed for multiplication later */
616
+ ed = (double) E;
617
+
618
+ /*
619
+ Read tables:
620
+ Read one float for ri
621
+ Read the first two doubles for -log(r_i) (out of three)
622
+
623
+ Organization of the table:
624
+
625
+ one struct entry per index, the struct entry containing
626
+ r, logih, logim and logil in this order
627
+ */
628
+
629
+
630
+ ri = argredtable[index].ri;
631
+ /*
632
+ Actually we don't need the logarithm entries now
633
+ Move the following two lines to the eventual reconstruction
634
+ As long as we don't have any if in the following code, we can overlap
635
+ memory access with calculations
636
+ */
637
+ logih = argredtable[index].logih;
638
+ logim = argredtable[index].logim;
639
+
640
+ /* Do range reduction:
641
+
642
+ zh + zl = y * ri - 1.0 correctly
643
+
644
+ Correctness is assured by use of Mul12 and Add12
645
+ even if we don't force ri to have its' LSBs set to zero
646
+
647
+ Discard zl for higher monome degrees
648
+ */
649
+
650
+ Mul12(&yrih, &yril, y, ri);
651
+ th = yrih - 1.0;
652
+ Add12Cond(zh, zl, th, yril);
653
+
654
+ /*
655
+ Polynomial evaluation
656
+
657
+ Use a 7 degree polynomial
658
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
659
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
660
+ using an ad hoc method
661
+
662
+ */
663
+
664
+
665
+
666
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
667
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
668
+ #else
669
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
670
+ #endif
671
+
672
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
673
+ polyUpper = polyHorner * (zh * zhSquareh);
674
+ zhSquareHalfh = zhSquareh * -0.5;
675
+ zhSquareHalfl = zhSquarel * -0.5;
676
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
677
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
678
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
679
+
680
+ /* Reconstruction
681
+
682
+ Read logih and logim in the tables (already done)
683
+
684
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
685
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
686
+
687
+ Carry out everything in double double precision
688
+
689
+ */
690
+
691
+ /*
692
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
693
+ Multiplication of ed (double E) and log2h is thus correct
694
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
695
+ is enough for the accurate phase
696
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
697
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
698
+ triple double values. We have to take it into account for the accurate phase
699
+ basic procedures for addition and multiplication
700
+ The condition on the next Add12 is verified as log2m is smaller than log2h
701
+ and both are scaled by ed
702
+ */
703
+
704
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
705
+
706
+ /* Add logih and logim to ph and pl
707
+
708
+ We must use conditioned Add22 as logih can move over ph
709
+ */
710
+
711
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
712
+
713
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
714
+
715
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
716
+
717
+ /* Change logarithm base from natural base to base 2 by multiplying */
718
+
719
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
720
+
721
+ /* Rounding test and eventual return or call to the accurate function */
722
+
723
+ if(E==0)
724
+ roundcst = RDROUNDCST1;
725
+ else
726
+ roundcst = RDROUNDCST2;
727
+
728
+ TEST_AND_RETURN_RD(logb2h, logb2m, roundcst);
729
+
730
+ #if DEBUG
731
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
732
+ #endif
733
+
734
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
735
+
736
+ ReturnRoundDownwards3(logb2h, logb2m, logb2l);
737
+ }
738
+
739
+ /*************************************************************
740
+ *************************************************************
741
+ * ROUNDED TOWARDS ZERO *
742
+ *************************************************************
743
+ *************************************************************/
744
+ double log2_rz(double x) {
745
+ db_number xdb;
746
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
747
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
748
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
749
+ double logb2h, logb2m, logb2l;
750
+ int E, index;
751
+
752
+
753
+ E=0;
754
+ xdb.d=x;
755
+
756
+ /* Filter cases */
757
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
758
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
759
+ return -1.0/0.0;
760
+ } /* log(+/-0) = -Inf */
761
+ if (xdb.i[HI] < 0){
762
+ return (x-x)/0; /* log(-x) = Nan */
763
+ }
764
+ /* Subnormal number */
765
+ E = -52;
766
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
767
+ }
768
+
769
+ if (xdb.i[HI] >= 0x7ff00000){
770
+ return x+x; /* Inf or Nan */
771
+ }
772
+
773
+
774
+ /* Extract exponent and mantissa
775
+ Do range reduction,
776
+ yielding to E holding the exponent and
777
+ y the mantissa between sqrt(2)/2 and sqrt(2)
778
+ */
779
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
780
+ index = (xdb.i[HI] & 0x000fffff);
781
+
782
+
783
+ /* Test now if the argument is an exact power of 2
784
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
785
+ This test is necessary for filtering out the cases where the final
786
+ rounding test cannot distinguish between an exact algebraic
787
+ number and a hard case to round
788
+ */
789
+
790
+ if ((index | xdb.i[LO]) == 0) {
791
+ /* Handle the "trivial" case for log2:
792
+ The argument is an exact power of 2, return thus
793
+ just the exponant of the number
794
+ */
795
+
796
+ return (double) E;
797
+
798
+ }
799
+
800
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
801
+ index = (index + (1<<(20-L-1))) >> (20-L);
802
+
803
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
804
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
805
+ xdb.i[HI] -= 0x00100000;
806
+ E++;
807
+ }
808
+ y = xdb.d;
809
+ index = index & INDEXMASK;
810
+ /* Cast integer E into double ed for multiplication later */
811
+ ed = (double) E;
812
+
813
+ /*
814
+ Read tables:
815
+ Read one float for ri
816
+ Read the first two doubles for -log(r_i) (out of three)
817
+
818
+ Organization of the table:
819
+
820
+ one struct entry per index, the struct entry containing
821
+ r, logih, logim and logil in this order
822
+ */
823
+
824
+
825
+ ri = argredtable[index].ri;
826
+ /*
827
+ Actually we don't need the logarithm entries now
828
+ Move the following two lines to the eventual reconstruction
829
+ As long as we don't have any if in the following code, we can overlap
830
+ memory access with calculations
831
+ */
832
+ logih = argredtable[index].logih;
833
+ logim = argredtable[index].logim;
834
+
835
+ /* Do range reduction:
836
+
837
+ zh + zl = y * ri - 1.0 correctly
838
+
839
+ Correctness is assured by use of Mul12 and Add12
840
+ even if we don't force ri to have its' LSBs set to zero
841
+
842
+ Discard zl for higher monome degrees
843
+ */
844
+
845
+ Mul12(&yrih, &yril, y, ri);
846
+ th = yrih - 1.0;
847
+ Add12Cond(zh, zl, th, yril);
848
+
849
+ /*
850
+ Polynomial evaluation
851
+
852
+ Use a 7 degree polynomial
853
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
854
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
855
+ using an ad hoc method
856
+
857
+ */
858
+
859
+
860
+
861
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
862
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
863
+ #else
864
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
865
+ #endif
866
+
867
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
868
+ polyUpper = polyHorner * (zh * zhSquareh);
869
+ zhSquareHalfh = zhSquareh * -0.5;
870
+ zhSquareHalfl = zhSquarel * -0.5;
871
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
872
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
873
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
874
+
875
+ /* Reconstruction
876
+
877
+ Read logih and logim in the tables (already done)
878
+
879
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
880
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
881
+
882
+ Carry out everything in double double precision
883
+
884
+ */
885
+
886
+ /*
887
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
888
+ Multiplication of ed (double E) and log2h is thus correct
889
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
890
+ is enough for the accurate phase
891
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
892
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
893
+ triple double values. We have to take it into account for the accurate phase
894
+ basic procedures for addition and multiplication
895
+ The condition on the next Add12 is verified as log2m is smaller than log2h
896
+ and both are scaled by ed
897
+ */
898
+
899
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
900
+
901
+ /* Add logih and logim to ph and pl
902
+
903
+ We must use conditioned Add22 as logih can move over ph
904
+ */
905
+
906
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
907
+
908
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
909
+
910
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
911
+
912
+ /* Change logarithm base from natural base to base 2 by multiplying */
913
+
914
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
915
+
916
+ /* Rounding test and eventual return or call to the accurate function */
917
+
918
+ if(E==0)
919
+ roundcst = RDROUNDCST1;
920
+ else
921
+ roundcst = RDROUNDCST2;
922
+
923
+ TEST_AND_RETURN_RZ(logb2h, logb2m, roundcst);
924
+
925
+ #if DEBUG
926
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
927
+ #endif
928
+
929
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
930
+
931
+ ReturnRoundTowardsZero3(logb2h, logb2m, logb2l);
932
+ }
933
+
934
+
935
+