intervals 0.3.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,852 @@
1
+ /*
2
+ * This function computes log, correctly rounded,
3
+ * using experimental techniques based on triple double arithmetics
4
+
5
+ THIS IS EXPERIMENTAL SOFTWARE
6
+
7
+ *
8
+ * Author : Christoph Lauter
9
+ * christoph.lauter at ens-lyon.fr
10
+ *
11
+
12
+ To have it replace the crlibm log, do:
13
+
14
+ gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c log-td.c; mv log-td.o log_fast.o; make
15
+
16
+ */
17
+
18
+
19
+ #include <stdio.h>
20
+ #include <stdlib.h>
21
+ #include "crlibm.h"
22
+ #include "crlibm_private.h"
23
+ #include "triple-double.h"
24
+ #include "log-td.h"
25
+
26
+ #define AVOID_FMA 0
27
+
28
+
29
+
30
+ void log_td_accurate(double *logh, double *logm, double *logl, int E, double ed, int index, double zh, double zl, double logih, double logim) {
31
+ double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
32
+ double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
33
+ double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
34
+ double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
35
+ double log2edh, log2edm, log2edl;
36
+
37
+
38
+ #if EVAL_PERF
39
+ crlibm_second_step_taken++;
40
+ #endif
41
+
42
+
43
+ /* Accurate phase:
44
+
45
+ Argument reduction is already done.
46
+ We must return logh, logm and logl representing the intermediate result in 118 bits precision.
47
+
48
+ We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
49
+ calculating the next 7 coefficients in double double arithmetics and the last in double.
50
+
51
+ We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
52
+ double double calculations won't account for it.
53
+
54
+ */
55
+
56
+ /* Start of the horner scheme */
57
+
58
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
59
+ highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
60
+ #else
61
+ highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
62
+ #endif
63
+
64
+ /* We want to write
65
+
66
+ accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
67
+ ( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
68
+
69
+ with all additions and multiplications in double double arithmetics
70
+ but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
71
+ */
72
+
73
+ Mul12(&t1h, &t1l, zh, highPoly);
74
+ Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
75
+ Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
76
+ Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
77
+ Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
78
+ Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
79
+ Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
80
+ Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
81
+ Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
82
+ Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
83
+ Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
84
+ Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
85
+ Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
86
+ Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
87
+
88
+ /* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
89
+
90
+ Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
91
+ Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
92
+
93
+ /* We can now multiplicate the middle and higher polynomial by z^3 */
94
+
95
+ Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
96
+
97
+ /* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
98
+
99
+ zSquareHalfh = zSquareh * -0.5;
100
+ zSquareHalfm = zSquarem * -0.5;
101
+ zSquareHalfl = zSquarel * -0.5;
102
+
103
+ Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
104
+ zSquareHalfh, zSquareHalfm, zSquareHalfl,
105
+ higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
106
+
107
+ /* Add now zh and zl to obtain the polynomial evaluation result */
108
+
109
+ Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
110
+
111
+ /* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
112
+ logil has not been read to the time, do this first
113
+ */
114
+
115
+ logil = argredtable[index].logil;
116
+
117
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
118
+
119
+ /* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
120
+ ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
121
+ So multiplying naively is correct (up to 134 bits at least)
122
+
123
+ The final result is thus obtained by adding log2 * E to log(y)
124
+ */
125
+
126
+ log2edhover = log2h * ed;
127
+ log2edmover = log2m * ed;
128
+ log2edlover = log2l * ed;
129
+
130
+ /* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
131
+ the to the log(y)-result
132
+
133
+ If needed, uncomment the following Renormalize3-Statement and comment out the copies
134
+ following it.
135
+ */
136
+
137
+ /* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
138
+
139
+ log2edh = log2edhover;
140
+ log2edm = log2edmover;
141
+ log2edl = log2edlover;
142
+
143
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
144
+
145
+ /* Since we can not guarantee in each addition and multiplication procedure that
146
+ the results are not overlapping, we must renormalize the result before handing
147
+ it over to the final rounding
148
+ */
149
+
150
+ Renormalize3(logh,logm,logl,loghover,logmover,loglover);
151
+
152
+ }
153
+
154
+
155
+
156
+ /*************************************************************
157
+ *************************************************************
158
+ * ROUNDED TO NEAREST *
159
+ *************************************************************
160
+ *************************************************************/
161
+ double log_rn(double x){
162
+ db_number xdb;
163
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
164
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
165
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
166
+ int E, index;
167
+
168
+ E=0;
169
+ xdb.d=x;
170
+
171
+ /* Filter cases */
172
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
173
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
174
+ return -1.0/0.0;
175
+ } /* log(+/-0) = -Inf */
176
+ if (xdb.i[HI] < 0){
177
+ return (x-x)/0; /* log(-x) = Nan */
178
+ }
179
+ /* Subnormal number */
180
+ E = -52;
181
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
182
+ }
183
+
184
+ if (xdb.i[HI] >= 0x7ff00000){
185
+ return x+x; /* Inf or Nan */
186
+ }
187
+
188
+
189
+ /* Extract exponent and mantissa
190
+ Do range reduction,
191
+ yielding to E holding the exponent and
192
+ y the mantissa between sqrt(2)/2 and sqrt(2)
193
+ */
194
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
195
+ index = (xdb.i[HI] & 0x000fffff);
196
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
197
+ index = (index + (1<<(20-L-1))) >> (20-L);
198
+
199
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
200
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
201
+ xdb.i[HI] -= 0x00100000;
202
+ E++;
203
+ }
204
+ y = xdb.d;
205
+ index = index & INDEXMASK;
206
+ /* Cast integer E into double ed for multiplication later */
207
+ ed = (double) E;
208
+
209
+ /*
210
+ Read tables:
211
+ Read one float for ri
212
+ Read the first two doubles for -log(r_i) (out of three)
213
+
214
+ Organization of the table:
215
+
216
+ one struct entry per index, the struct entry containing
217
+ r, logih, logim and logil in this order
218
+ */
219
+
220
+
221
+ ri = argredtable[index].ri;
222
+ /*
223
+ Actually we don't need the logarithm entries now
224
+ Move the following two lines to the eventual reconstruction
225
+ As long as we don't have any if in the following code, we can overlap
226
+ memory access with calculations
227
+ */
228
+ logih = argredtable[index].logih;
229
+ logim = argredtable[index].logim;
230
+
231
+ /* Do range reduction:
232
+
233
+ zh + zl = y * ri - 1.0 correctly
234
+
235
+ Correctness is assured by use of Mul12 and Add12
236
+ even if we don't force ri to have its' LSBs set to zero
237
+
238
+ Discard zl for higher monome degrees
239
+ */
240
+
241
+ Mul12(&yrih, &yril, y, ri);
242
+ th = yrih - 1.0;
243
+ Add12Cond(zh, zl, th, yril);
244
+
245
+ /*
246
+ Polynomial evaluation
247
+
248
+ Use a 7 degree polynomial
249
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
250
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
251
+ using an ad hoc method
252
+
253
+ */
254
+
255
+
256
+
257
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
258
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
259
+ #else
260
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
261
+ #endif
262
+
263
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
264
+ polyUpper = polyHorner * (zh * zhSquareh);
265
+ zhSquareHalfh = zhSquareh * -0.5;
266
+ zhSquareHalfl = zhSquarel * -0.5;
267
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
268
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
269
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
270
+
271
+ /* Reconstruction
272
+
273
+ Read logih and logim in the tables (already done)
274
+
275
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
276
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
277
+
278
+ Carry out everything in double double precision
279
+
280
+ */
281
+
282
+ /*
283
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
284
+ Multiplication of ed (double E) and log2h is thus correct
285
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
286
+ is enough for the accurate phase
287
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
288
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
289
+ triple double values. We have to take it into account for the accurate phase
290
+ basic procedures for addition and multiplication
291
+ The condition on the next Add12 is verified as log2m is smaller than log2h
292
+ and both are scaled by ed
293
+ */
294
+
295
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
296
+
297
+ /* Add logih and logim to ph and pl
298
+
299
+ We must use conditioned Add22 as logih can move over ph
300
+ */
301
+
302
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
303
+
304
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
305
+
306
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
307
+
308
+ /* Rounding test and eventual return or call to the accurate function */
309
+
310
+ if(E==0)
311
+ roundcst = ROUNDCST1;
312
+ else
313
+ roundcst = ROUNDCST2;
314
+
315
+
316
+ if(logh == (logh + (logm * roundcst)))
317
+ return logh;
318
+ else
319
+ {
320
+
321
+ #if DEBUG
322
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
323
+ #endif
324
+
325
+ log_td_accurate(&logh, &logm, &logl, E, ed, index, zh, zl, logih, logim);
326
+
327
+ ReturnRoundToNearest3(logh, logm, logl);
328
+
329
+ } /* Accurate phase launched */
330
+ }
331
+
332
+
333
+ /*************************************************************
334
+ *************************************************************
335
+ * ROUNDED UPWARDS *
336
+ *************************************************************
337
+ *************************************************************/
338
+ double log_ru(double x) {
339
+ db_number xdb;
340
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
341
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
342
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
343
+ int E, index;
344
+
345
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
346
+
347
+ E=0;
348
+ xdb.d=x;
349
+
350
+ /* Filter cases */
351
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
352
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
353
+ return -1.0/0.0;
354
+ } /* log(+/-0) = -Inf */
355
+ if (xdb.i[HI] < 0){
356
+ return (x-x)/0; /* log(-x) = Nan */
357
+ }
358
+ /* Subnormal number */
359
+ E = -52;
360
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
361
+ }
362
+
363
+ if (xdb.i[HI] >= 0x7ff00000){
364
+ return x+x; /* Inf or Nan */
365
+ }
366
+
367
+
368
+ /* Extract exponent and mantissa
369
+ Do range reduction,
370
+ yielding to E holding the exponent and
371
+ y the mantissa between sqrt(2)/2 and sqrt(2)
372
+ */
373
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
374
+ index = (xdb.i[HI] & 0x000fffff);
375
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
376
+ index = (index + (1<<(20-L-1))) >> (20-L);
377
+
378
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
379
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
380
+ xdb.i[HI] -= 0x00100000;
381
+ E++;
382
+ }
383
+ y = xdb.d;
384
+ index = index & INDEXMASK;
385
+ /* Cast integer E into double ed for multiplication later */
386
+ ed = (double) E;
387
+
388
+ /*
389
+ Read tables:
390
+ Read one float for ri
391
+ Read the first two doubles for -log(r_i) (out of three)
392
+
393
+ Organization of the table:
394
+
395
+ one struct entry per index, the struct entry containing
396
+ r, logih, logim and logil in this order
397
+ */
398
+
399
+
400
+ ri = argredtable[index].ri;
401
+ /*
402
+ Actually we don't need the logarithm entries now
403
+ Move the following two lines to the eventual reconstruction
404
+ As long as we don't have any if in the following code, we can overlap
405
+ memory access with calculations
406
+ */
407
+ logih = argredtable[index].logih;
408
+ logim = argredtable[index].logim;
409
+
410
+ /* Do range reduction:
411
+
412
+ zh + zl = y * ri - 1.0 correctly
413
+
414
+ Correctness is assured by use of Mul12 and Add12
415
+ even if we don't force ri to have its' LSBs set to zero
416
+
417
+ Discard zl for higher monome degrees
418
+ */
419
+
420
+ Mul12(&yrih, &yril, y, ri);
421
+ th = yrih - 1.0;
422
+ Add12Cond(zh, zl, th, yril);
423
+
424
+ /*
425
+ Polynomial evaluation
426
+
427
+ Use a 7 degree polynomial
428
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
429
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
430
+ using an ad hoc method
431
+
432
+ */
433
+
434
+
435
+
436
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
437
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
438
+ #else
439
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
440
+ #endif
441
+
442
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
443
+ polyUpper = polyHorner * (zh * zhSquareh);
444
+ zhSquareHalfh = zhSquareh * -0.5;
445
+ zhSquareHalfl = zhSquarel * -0.5;
446
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
447
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
448
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
449
+
450
+ /* Reconstruction
451
+
452
+ Read logih and logim in the tables (already done)
453
+
454
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
455
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
456
+
457
+ Carry out everything in double double precision
458
+
459
+ */
460
+
461
+ /*
462
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
463
+ Multiplication of ed (double E) and log2h is thus correct
464
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
465
+ is enough for the accurate phase
466
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
467
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
468
+ triple double values. We have to take it into account for the accurate phase
469
+ basic procedures for addition and multiplication
470
+ The condition on the next Add12 is verified as log2m is smaller than log2h
471
+ and both are scaled by ed
472
+ */
473
+
474
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
475
+
476
+ /* Add logih and logim to ph and pl
477
+
478
+ We must use conditioned Add22 as logih can move over ph
479
+ */
480
+
481
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
482
+
483
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
484
+
485
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
486
+
487
+ /* Rounding test and eventual return or call to the accurate function */
488
+
489
+ if(E==0)
490
+ roundcst = RDROUNDCST1;
491
+ else
492
+ roundcst = RDROUNDCST2;
493
+
494
+ TEST_AND_RETURN_RU(logh, logm, roundcst);
495
+
496
+ #if DEBUG
497
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
498
+ #endif
499
+
500
+ log_td_accurate(&logh, &logm, &logl, E, ed, index, zh, zl, logih, logim);
501
+
502
+ ReturnRoundUpwards3(logh, logm, logl);
503
+
504
+ }
505
+
506
+
507
+ /*************************************************************
508
+ *************************************************************
509
+ * ROUNDED DOWNWARDS *
510
+ *************************************************************
511
+ *************************************************************/
512
+ double log_rd(double x) {
513
+ db_number xdb;
514
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
515
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
516
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
517
+ int E, index;
518
+
519
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
520
+
521
+ E=0;
522
+ xdb.d=x;
523
+
524
+ /* Filter cases */
525
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
526
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
527
+ return -1.0/0.0;
528
+ } /* log(+/-0) = -Inf */
529
+ if (xdb.i[HI] < 0){
530
+ return (x-x)/0; /* log(-x) = Nan */
531
+ }
532
+ /* Subnormal number */
533
+ E = -52;
534
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
535
+ }
536
+
537
+ if (xdb.i[HI] >= 0x7ff00000){
538
+ return x+x; /* Inf or Nan */
539
+ }
540
+
541
+
542
+ /* Extract exponent and mantissa
543
+ Do range reduction,
544
+ yielding to E holding the exponent and
545
+ y the mantissa between sqrt(2)/2 and sqrt(2)
546
+ */
547
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
548
+ index = (xdb.i[HI] & 0x000fffff);
549
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
550
+ index = (index + (1<<(20-L-1))) >> (20-L);
551
+
552
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
553
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
554
+ xdb.i[HI] -= 0x00100000;
555
+ E++;
556
+ }
557
+ y = xdb.d;
558
+ index = index & INDEXMASK;
559
+ /* Cast integer E into double ed for multiplication later */
560
+ ed = (double) E;
561
+
562
+ /*
563
+ Read tables:
564
+ Read one float for ri
565
+ Read the first two doubles for -log(r_i) (out of three)
566
+
567
+ Organization of the table:
568
+
569
+ one struct entry per index, the struct entry containing
570
+ r, logih, logim and logil in this order
571
+ */
572
+
573
+
574
+ ri = argredtable[index].ri;
575
+ /*
576
+ Actually we don't need the logarithm entries now
577
+ Move the following two lines to the eventual reconstruction
578
+ As long as we don't have any if in the following code, we can overlap
579
+ memory access with calculations
580
+ */
581
+ logih = argredtable[index].logih;
582
+ logim = argredtable[index].logim;
583
+
584
+ /* Do range reduction:
585
+
586
+ zh + zl = y * ri - 1.0 correctly
587
+
588
+ Correctness is assured by use of Mul12 and Add12
589
+ even if we don't force ri to have its' LSBs set to zero
590
+
591
+ Discard zl for higher monome degrees
592
+ */
593
+
594
+ Mul12(&yrih, &yril, y, ri);
595
+ th = yrih - 1.0;
596
+ Add12Cond(zh, zl, th, yril);
597
+
598
+ /*
599
+ Polynomial evaluation
600
+
601
+ Use a 7 degree polynomial
602
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
603
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
604
+ using an ad hoc method
605
+
606
+ */
607
+
608
+
609
+
610
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
611
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
612
+ #else
613
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
614
+ #endif
615
+
616
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
617
+ polyUpper = polyHorner * (zh * zhSquareh);
618
+ zhSquareHalfh = zhSquareh * -0.5;
619
+ zhSquareHalfl = zhSquarel * -0.5;
620
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
621
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
622
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
623
+
624
+ /* Reconstruction
625
+
626
+ Read logih and logim in the tables (already done)
627
+
628
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
629
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
630
+
631
+ Carry out everything in double double precision
632
+
633
+ */
634
+
635
+ /*
636
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
637
+ Multiplication of ed (double E) and log2h is thus correct
638
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
639
+ is enough for the accurate phase
640
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
641
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
642
+ triple double values. We have to take it into account for the accurate phase
643
+ basic procedures for addition and multiplication
644
+ The condition on the next Add12 is verified as log2m is smaller than log2h
645
+ and both are scaled by ed
646
+ */
647
+
648
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
649
+
650
+ /* Add logih and logim to ph and pl
651
+
652
+ We must use conditioned Add22 as logih can move over ph
653
+ */
654
+
655
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
656
+
657
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
658
+
659
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
660
+
661
+ /* Rounding test and eventual return or call to the accurate function */
662
+
663
+ if(E==0)
664
+ roundcst = RDROUNDCST1;
665
+ else
666
+ roundcst = RDROUNDCST2;
667
+
668
+ TEST_AND_RETURN_RD(logh, logm, roundcst);
669
+
670
+ #if DEBUG
671
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
672
+ #endif
673
+
674
+ log_td_accurate(&logh, &logm, &logl, E, ed, index, zh, zl, logih, logim);
675
+
676
+ ReturnRoundDownwards3(logh, logm, logl);
677
+ }
678
+
679
+ /*************************************************************
680
+ *************************************************************
681
+ * ROUNDED TOWARDS ZERO *
682
+ *************************************************************
683
+ *************************************************************/
684
+ double log_rz(double x) {
685
+ db_number xdb;
686
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
687
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
688
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
689
+ int E, index;
690
+
691
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
692
+
693
+ E=0;
694
+ xdb.d=x;
695
+
696
+ /* Filter cases */
697
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
698
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
699
+ return -1.0/0.0;
700
+ } /* log(+/-0) = -Inf */
701
+ if (xdb.i[HI] < 0){
702
+ return (x-x)/0; /* log(-x) = Nan */
703
+ }
704
+ /* Subnormal number */
705
+ E = -52;
706
+ xdb.d *= ((db_number) ((double) two52)).d; /* make x a normal number */
707
+ }
708
+
709
+ if (xdb.i[HI] >= 0x7ff00000){
710
+ return x+x; /* Inf or Nan */
711
+ }
712
+
713
+
714
+ /* Extract exponent and mantissa
715
+ Do range reduction,
716
+ yielding to E holding the exponent and
717
+ y the mantissa between sqrt(2)/2 and sqrt(2)
718
+ */
719
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
720
+ index = (xdb.i[HI] & 0x000fffff);
721
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
722
+ index = (index + (1<<(20-L-1))) >> (20-L);
723
+
724
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
725
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
726
+ xdb.i[HI] -= 0x00100000;
727
+ E++;
728
+ }
729
+ y = xdb.d;
730
+ index = index & INDEXMASK;
731
+ /* Cast integer E into double ed for multiplication later */
732
+ ed = (double) E;
733
+
734
+ /*
735
+ Read tables:
736
+ Read one float for ri
737
+ Read the first two doubles for -log(r_i) (out of three)
738
+
739
+ Organization of the table:
740
+
741
+ one struct entry per index, the struct entry containing
742
+ r, logih, logim and logil in this order
743
+ */
744
+
745
+
746
+ ri = argredtable[index].ri;
747
+ /*
748
+ Actually we don't need the logarithm entries now
749
+ Move the following two lines to the eventual reconstruction
750
+ As long as we don't have any if in the following code, we can overlap
751
+ memory access with calculations
752
+ */
753
+ logih = argredtable[index].logih;
754
+ logim = argredtable[index].logim;
755
+
756
+ /* Do range reduction:
757
+
758
+ zh + zl = y * ri - 1.0 correctly
759
+
760
+ Correctness is assured by use of Mul12 and Add12
761
+ even if we don't force ri to have its' LSBs set to zero
762
+
763
+ Discard zl for higher monome degrees
764
+ */
765
+
766
+ Mul12(&yrih, &yril, y, ri);
767
+ th = yrih - 1.0;
768
+ Add12Cond(zh, zl, th, yril);
769
+
770
+ /*
771
+ Polynomial evaluation
772
+
773
+ Use a 7 degree polynomial
774
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
775
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
776
+ using an ad hoc method
777
+
778
+ */
779
+
780
+
781
+
782
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
783
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
784
+ #else
785
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
786
+ #endif
787
+
788
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
789
+ polyUpper = polyHorner * (zh * zhSquareh);
790
+ zhSquareHalfh = zhSquareh * -0.5;
791
+ zhSquareHalfl = zhSquarel * -0.5;
792
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
793
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
794
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
795
+
796
+ /* Reconstruction
797
+
798
+ Read logih and logim in the tables (already done)
799
+
800
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
801
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
802
+
803
+ Carry out everything in double double precision
804
+
805
+ */
806
+
807
+ /*
808
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
809
+ Multiplication of ed (double E) and log2h is thus correct
810
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
811
+ is enough for the accurate phase
812
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
813
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
814
+ triple double values. We have to take it into account for the accurate phase
815
+ basic procedures for addition and multiplication
816
+ The condition on the next Add12 is verified as log2m is smaller than log2h
817
+ and both are scaled by ed
818
+ */
819
+
820
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
821
+
822
+ /* Add logih and logim to ph and pl
823
+
824
+ We must use conditioned Add22 as logih can move over ph
825
+ */
826
+
827
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
828
+
829
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
830
+
831
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
832
+
833
+ /* Rounding test and eventual return or call to the accurate function */
834
+
835
+ if(E==0)
836
+ roundcst = RDROUNDCST1;
837
+ else
838
+ roundcst = RDROUNDCST2;
839
+
840
+ TEST_AND_RETURN_RZ(logh, logm, roundcst);
841
+
842
+ #if DEBUG
843
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
844
+ #endif
845
+
846
+ log_td_accurate(&logh, &logm, &logl, E, ed, index, zh, zl, logih, logim);
847
+
848
+ ReturnRoundTowardsZero3(logh, logm, logl);
849
+ }
850
+
851
+
852
+