crmf 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +105 -3
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +117 -20
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +108 -10
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1521 @@
1
+ /*
2
+ * This function computes log2, correctly rounded,
3
+ * using experimental techniques based on triple double arithmetics
4
+
5
+ THIS IS EXPERIMENTAL SOFTWARE
6
+
7
+ *
8
+ * Author : Christoph Lauter
9
+ * christoph.lauter at ens-lyon.fr
10
+ *
11
+
12
+ To have it replace the crlibm log2, do:
13
+
14
+ gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c log2-td.c; mv log2-td.o log2_accurate.o; make
15
+
16
+
17
+ */
18
+
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include "crlibm.h"
23
+ #include "crlibm_private.h"
24
+ #include "triple-double.h"
25
+ #include "log2-td.h"
26
+ #ifdef BUILD_INTERVAL_FUNCTIONS
27
+ #include "interval.h"
28
+ #endif
29
+
30
+
31
+ #define AVOID_FMA 0
32
+
33
+
34
+ void log2_td_accurate(double *logb2h, double *logb2m, double *logb2l, int E, double ed, int index, double zh, double zl, double logih, double logim) {
35
+ double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
36
+ double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
37
+ double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
38
+ double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
39
+ double log2edh, log2edm, log2edl, logb2hover, logb2mover, logb2lover;
40
+
41
+
42
+ #if EVAL_PERF
43
+ crlibm_second_step_taken++;
44
+ #endif
45
+
46
+
47
+ /* Accurate phase:
48
+
49
+ Argument reduction is already done.
50
+ We must return logh, logm and logl representing the intermediate result in 118 bits precision.
51
+
52
+ We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
53
+ calculating the next 7 coefficients in double double arithmetics and the last in double.
54
+
55
+ We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
56
+ double double calculations won't account for it.
57
+
58
+ */
59
+
60
+ /* Start of the horner scheme */
61
+
62
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
63
+ highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
64
+ #else
65
+ highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
66
+ #endif
67
+
68
+ /* We want to write
69
+
70
+ accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
71
+ ( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
72
+
73
+ with all additions and multiplications in double double arithmetics
74
+ but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
75
+ */
76
+
77
+ Mul12(&t1h, &t1l, zh, highPoly);
78
+ Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
79
+ Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
80
+ Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
81
+ Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
82
+ Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
83
+ Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
84
+ Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
85
+ Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
86
+ Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
87
+ Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
88
+ Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
89
+ Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
90
+ Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
91
+
92
+ /* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
93
+
94
+ Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
95
+ Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
96
+
97
+ /* We can now multiplicate the middle and higher polynomial by z^3 */
98
+
99
+ Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
100
+
101
+ /* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
102
+
103
+ zSquareHalfh = zSquareh * -0.5;
104
+ zSquareHalfm = zSquarem * -0.5;
105
+ zSquareHalfl = zSquarel * -0.5;
106
+
107
+ Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
108
+ zSquareHalfh, zSquareHalfm, zSquareHalfl,
109
+ higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
110
+
111
+ /* Add now zh and zl to obtain the polynomial evaluation result */
112
+
113
+ Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
114
+
115
+ /* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
116
+ logil has not been read to the time, do this first
117
+ */
118
+
119
+ logil = argredtable[index].logil;
120
+
121
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
122
+
123
+ /* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
124
+ ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
125
+ So multiplying naively is correct (up to 134 bits at least)
126
+
127
+ The final result is thus obtained by adding log2 * E to log(y)
128
+ */
129
+
130
+ log2edhover = log2h * ed;
131
+ log2edmover = log2m * ed;
132
+ log2edlover = log2l * ed;
133
+
134
+ /* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
135
+ the to the log(y)-result
136
+
137
+ If needed, uncomment the following Renormalize3-Statement and comment out the copies
138
+ following it.
139
+ */
140
+
141
+ /* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
142
+
143
+ log2edh = log2edhover;
144
+ log2edm = log2edmover;
145
+ log2edl = log2edlover;
146
+
147
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
148
+
149
+
150
+ /* Change logarithm base from natural base to base 2 by multiplying */
151
+
152
+ Mul233(&logb2hover, &logb2mover, &logb2lover, log2invh, log2invl, loghover, logmover, loglover);
153
+
154
+
155
+ /* Since we can not guarantee in each addition and multiplication procedure that
156
+ the results are not overlapping, we must renormalize the result before handing
157
+ it over to the final rounding
158
+ */
159
+
160
+ Renormalize3(logb2h,logb2m,logb2l,logb2hover,logb2mover,logb2lover);
161
+
162
+ }
163
+
164
+
165
+
166
+ /*************************************************************
167
+ *************************************************************
168
+ * ROUNDED TO NEAREST *
169
+ *************************************************************
170
+ *************************************************************/
171
+ double log2_rn(double x){
172
+ db_number xdb;
173
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
174
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
175
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
176
+ double logb2h, logb2m, logb2l;
177
+ int E, index;
178
+
179
+ E=0;
180
+ xdb.d=x;
181
+
182
+ /* Filter cases */
183
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
184
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
185
+ return -1.0/0.0;
186
+ } /* log(+/-0) = -Inf */
187
+ if (xdb.i[HI] < 0){
188
+ return (x-x)/0; /* log(-x) = Nan */
189
+ }
190
+ /* Subnormal number */
191
+ E = -52;
192
+ xdb.d *= two52; /* make x a normal number */
193
+ }
194
+
195
+ if (xdb.i[HI] >= 0x7ff00000){
196
+ return x+x; /* Inf or Nan */
197
+ }
198
+
199
+
200
+ /* Extract exponent and mantissa
201
+ Do range reduction,
202
+ yielding to E holding the exponent and
203
+ y the mantissa between sqrt(2)/2 and sqrt(2)
204
+ */
205
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
206
+ index = (xdb.i[HI] & 0x000fffff);
207
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
208
+ index = (index + (1<<(20-L-1))) >> (20-L);
209
+
210
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
211
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
212
+ xdb.i[HI] -= 0x00100000;
213
+ E++;
214
+ }
215
+ y = xdb.d;
216
+ index = index & INDEXMASK;
217
+ /* Cast integer E into double ed for multiplication later */
218
+ ed = (double) E;
219
+
220
+ /*
221
+ Read tables:
222
+ Read one float for ri
223
+ Read the first two doubles for -log(r_i) (out of three)
224
+
225
+ Organization of the table:
226
+
227
+ one struct entry per index, the struct entry containing
228
+ r, logih, logim and logil in this order
229
+ */
230
+
231
+
232
+ ri = argredtable[index].ri;
233
+ /*
234
+ Actually we don't need the logarithm entries now
235
+ Move the following two lines to the eventual reconstruction
236
+ As long as we don't have any if in the following code, we can overlap
237
+ memory access with calculations
238
+ */
239
+ logih = argredtable[index].logih;
240
+ logim = argredtable[index].logim;
241
+
242
+ /* Do range reduction:
243
+
244
+ zh + zl = y * ri - 1.0 correctly
245
+
246
+ Correctness is assured by use of Mul12 and Add12
247
+ even if we don't force ri to have its' LSBs set to zero
248
+
249
+ Discard zl for higher monome degrees
250
+ */
251
+
252
+ Mul12(&yrih, &yril, y, ri);
253
+ th = yrih - 1.0;
254
+ Add12Cond(zh, zl, th, yril);
255
+
256
+ /*
257
+ Polynomial evaluation
258
+
259
+ Use a 7 degree polynomial
260
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
261
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
262
+ using an ad hoc method
263
+
264
+ */
265
+
266
+
267
+
268
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
269
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
270
+ #else
271
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
272
+ #endif
273
+
274
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
275
+ polyUpper = polyHorner * (zh * zhSquareh);
276
+ zhSquareHalfh = zhSquareh * -0.5;
277
+ zhSquareHalfl = zhSquarel * -0.5;
278
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
279
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
280
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
281
+
282
+ /* Reconstruction
283
+
284
+ Read logih and logim in the tables (already done)
285
+
286
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
287
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
288
+
289
+ Carry out everything in double double precision
290
+
291
+ */
292
+
293
+ /*
294
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
295
+ Multiplication of ed (double E) and log2h is thus correct
296
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
297
+ is enough for the accurate phase
298
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
299
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
300
+ triple double values. We have to take it into account for the accurate phase
301
+ basic procedures for addition and multiplication
302
+ The condition on the next Add12 is verified as log2m is smaller than log2h
303
+ and both are scaled by ed
304
+ */
305
+
306
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
307
+
308
+ /* Add logih and logim to ph and pl
309
+
310
+ We must use conditioned Add22 as logih can move over ph
311
+ */
312
+
313
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
314
+
315
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
316
+
317
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
318
+
319
+
320
+
321
+ /* Change logarithm base from natural base to base 2 by multiplying */
322
+
323
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
324
+
325
+
326
+ /* Rounding test and eventual return or call to the accurate function */
327
+
328
+ if(E==0)
329
+ roundcst = ROUNDCST1;
330
+ else
331
+ roundcst = ROUNDCST2;
332
+
333
+
334
+ if(logb2h == (logb2h + (logb2m * roundcst)))
335
+ return logb2h;
336
+ else
337
+ {
338
+
339
+ #if DEBUG
340
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
341
+ #endif
342
+
343
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
344
+
345
+ ReturnRoundToNearest3(logb2h, logb2m, logb2l);
346
+
347
+ } /* Accurate phase launched */
348
+ }
349
+
350
+
351
+ /*************************************************************
352
+ *************************************************************
353
+ * ROUNDED UPWARDS *
354
+ *************************************************************
355
+ *************************************************************/
356
+ double log2_ru(double x) {
357
+ db_number xdb;
358
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
359
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
360
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
361
+ double logb2h, logb2m, logb2l;
362
+ int E, index;
363
+
364
+
365
+ E=0;
366
+ xdb.d=x;
367
+
368
+ /* Filter cases */
369
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
370
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
371
+ return -1.0/0.0;
372
+ } /* log(+/-0) = -Inf */
373
+ if (xdb.i[HI] < 0){
374
+ return (x-x)/0; /* log(-x) = Nan */
375
+ }
376
+ /* Subnormal number */
377
+ E = -52;
378
+ xdb.d *= two52; /* make x a normal number */
379
+ }
380
+
381
+ if (xdb.i[HI] >= 0x7ff00000){
382
+ return x+x; /* Inf or Nan */
383
+ }
384
+
385
+
386
+ /* Extract exponent and mantissa
387
+ Do range reduction,
388
+ yielding to E holding the exponent and
389
+ y the mantissa between sqrt(2)/2 and sqrt(2)
390
+ */
391
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
392
+ index = (xdb.i[HI] & 0x000fffff);
393
+
394
+
395
+ /* Test now if the argument is an exact power of 2
396
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
397
+ This test is necessary for filtering out the cases where the final
398
+ rounding test cannot distinguish between an exact algebraic
399
+ number and a hard case to round
400
+ */
401
+
402
+ if ((index | xdb.i[LO]) == 0) {
403
+ /* Handle the "trivial" case for log2:
404
+ The argument is an exact power of 2, return thus
405
+ just the exponant of the number
406
+ */
407
+
408
+ return (double) E;
409
+
410
+ }
411
+
412
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
413
+ index = (index + (1<<(20-L-1))) >> (20-L);
414
+
415
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
416
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
417
+ xdb.i[HI] -= 0x00100000;
418
+ E++;
419
+ }
420
+ y = xdb.d;
421
+ index = index & INDEXMASK;
422
+ /* Cast integer E into double ed for multiplication later */
423
+ ed = (double) E;
424
+
425
+ /*
426
+ Read tables:
427
+ Read one float for ri
428
+ Read the first two doubles for -log(r_i) (out of three)
429
+
430
+ Organization of the table:
431
+
432
+ one struct entry per index, the struct entry containing
433
+ r, logih, logim and logil in this order
434
+ */
435
+
436
+
437
+ ri = argredtable[index].ri;
438
+ /*
439
+ Actually we don't need the logarithm entries now
440
+ Move the following two lines to the eventual reconstruction
441
+ As long as we don't have any if in the following code, we can overlap
442
+ memory access with calculations
443
+ */
444
+ logih = argredtable[index].logih;
445
+ logim = argredtable[index].logim;
446
+
447
+ /* Do range reduction:
448
+
449
+ zh + zl = y * ri - 1.0 correctly
450
+
451
+ Correctness is assured by use of Mul12 and Add12
452
+ even if we don't force ri to have its' LSBs set to zero
453
+
454
+ Discard zl for higher monome degrees
455
+ */
456
+
457
+ Mul12(&yrih, &yril, y, ri);
458
+ th = yrih - 1.0;
459
+ Add12Cond(zh, zl, th, yril);
460
+
461
+ /*
462
+ Polynomial evaluation
463
+
464
+ Use a 7 degree polynomial
465
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
466
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
467
+ using an ad hoc method
468
+
469
+ */
470
+
471
+
472
+
473
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
474
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
475
+ #else
476
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
477
+ #endif
478
+
479
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
480
+ polyUpper = polyHorner * (zh * zhSquareh);
481
+ zhSquareHalfh = zhSquareh * -0.5;
482
+ zhSquareHalfl = zhSquarel * -0.5;
483
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
484
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
485
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
486
+
487
+ /* Reconstruction
488
+
489
+ Read logih and logim in the tables (already done)
490
+
491
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
492
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
493
+
494
+ Carry out everything in double double precision
495
+
496
+ */
497
+
498
+ /*
499
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
500
+ Multiplication of ed (double E) and log2h is thus correct
501
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
502
+ is enough for the accurate phase
503
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
504
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
505
+ triple double values. We have to take it into account for the accurate phase
506
+ basic procedures for addition and multiplication
507
+ The condition on the next Add12 is verified as log2m is smaller than log2h
508
+ and both are scaled by ed
509
+ */
510
+
511
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
512
+
513
+ /* Add logih and logim to ph and pl
514
+
515
+ We must use conditioned Add22 as logih can move over ph
516
+ */
517
+
518
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
519
+
520
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
521
+
522
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
523
+
524
+ /* Change logarithm base from natural base to base 2 by multiplying */
525
+
526
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
527
+
528
+ /* Rounding test and eventual return or call to the accurate function */
529
+
530
+ if(E==0)
531
+ roundcst = RDROUNDCST1;
532
+ else
533
+ roundcst = RDROUNDCST2;
534
+
535
+ TEST_AND_RETURN_RU(logb2h, logb2m, roundcst);
536
+
537
+ #if DEBUG
538
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
539
+ #endif
540
+
541
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
542
+
543
+ ReturnRoundUpwards3(logb2h, logb2m, logb2l);
544
+
545
+ }
546
+
547
+
548
+ /*************************************************************
549
+ *************************************************************
550
+ * ROUNDED DOWNWARDS *
551
+ *************************************************************
552
+ *************************************************************/
553
+ double log2_rd(double x) {
554
+ db_number xdb;
555
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
556
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
557
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
558
+ double logb2h, logb2m, logb2l;
559
+ int E, index;
560
+
561
+
562
+ E=0;
563
+ xdb.d=x;
564
+
565
+ /* Filter cases */
566
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
567
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
568
+ return -1.0/0.0;
569
+ } /* log(+/-0) = -Inf */
570
+ if (xdb.i[HI] < 0){
571
+ return (x-x)/0; /* log(-x) = Nan */
572
+ }
573
+ /* Subnormal number */
574
+ E = -52;
575
+ xdb.d *= two52; /* make x a normal number */
576
+ }
577
+
578
+ if (xdb.i[HI] >= 0x7ff00000){
579
+ return x+x; /* Inf or Nan */
580
+ }
581
+
582
+
583
+ /* Extract exponent and mantissa
584
+ Do range reduction,
585
+ yielding to E holding the exponent and
586
+ y the mantissa between sqrt(2)/2 and sqrt(2)
587
+ */
588
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
589
+ index = (xdb.i[HI] & 0x000fffff);
590
+
591
+
592
+ /* Test now if the argument is an exact power of 2
593
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
594
+ This test is necessary for filtering out the cases where the final
595
+ rounding test cannot distinguish between an exact algebraic
596
+ number and a hard case to round
597
+ */
598
+
599
+ if ((index | xdb.i[LO]) == 0) {
600
+ /* Handle the "trivial" case for log2:
601
+ The argument is an exact power of 2, return thus
602
+ just the exponant of the number
603
+ */
604
+
605
+ return (double) E;
606
+
607
+ }
608
+
609
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
610
+ index = (index + (1<<(20-L-1))) >> (20-L);
611
+
612
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
613
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
614
+ xdb.i[HI] -= 0x00100000;
615
+ E++;
616
+ }
617
+ y = xdb.d;
618
+ index = index & INDEXMASK;
619
+ /* Cast integer E into double ed for multiplication later */
620
+ ed = (double) E;
621
+
622
+ /*
623
+ Read tables:
624
+ Read one float for ri
625
+ Read the first two doubles for -log(r_i) (out of three)
626
+
627
+ Organization of the table:
628
+
629
+ one struct entry per index, the struct entry containing
630
+ r, logih, logim and logil in this order
631
+ */
632
+
633
+
634
+ ri = argredtable[index].ri;
635
+ /*
636
+ Actually we don't need the logarithm entries now
637
+ Move the following two lines to the eventual reconstruction
638
+ As long as we don't have any if in the following code, we can overlap
639
+ memory access with calculations
640
+ */
641
+ logih = argredtable[index].logih;
642
+ logim = argredtable[index].logim;
643
+
644
+ /* Do range reduction:
645
+
646
+ zh + zl = y * ri - 1.0 correctly
647
+
648
+ Correctness is assured by use of Mul12 and Add12
649
+ even if we don't force ri to have its' LSBs set to zero
650
+
651
+ Discard zl for higher monome degrees
652
+ */
653
+
654
+ Mul12(&yrih, &yril, y, ri);
655
+ th = yrih - 1.0;
656
+ Add12Cond(zh, zl, th, yril);
657
+
658
+ /*
659
+ Polynomial evaluation
660
+
661
+ Use a 7 degree polynomial
662
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
663
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
664
+ using an ad hoc method
665
+
666
+ */
667
+
668
+
669
+
670
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
671
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
672
+ #else
673
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
674
+ #endif
675
+
676
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
677
+ polyUpper = polyHorner * (zh * zhSquareh);
678
+ zhSquareHalfh = zhSquareh * -0.5;
679
+ zhSquareHalfl = zhSquarel * -0.5;
680
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
681
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
682
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
683
+
684
+ /* Reconstruction
685
+
686
+ Read logih and logim in the tables (already done)
687
+
688
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
689
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
690
+
691
+ Carry out everything in double double precision
692
+
693
+ */
694
+
695
+ /*
696
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
697
+ Multiplication of ed (double E) and log2h is thus correct
698
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
699
+ is enough for the accurate phase
700
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
701
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
702
+ triple double values. We have to take it into account for the accurate phase
703
+ basic procedures for addition and multiplication
704
+ The condition on the next Add12 is verified as log2m is smaller than log2h
705
+ and both are scaled by ed
706
+ */
707
+
708
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
709
+
710
+ /* Add logih and logim to ph and pl
711
+
712
+ We must use conditioned Add22 as logih can move over ph
713
+ */
714
+
715
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
716
+
717
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
718
+
719
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
720
+
721
+ /* Change logarithm base from natural base to base 2 by multiplying */
722
+
723
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
724
+
725
+ /* Rounding test and eventual return or call to the accurate function */
726
+
727
+ if(E==0)
728
+ roundcst = RDROUNDCST1;
729
+ else
730
+ roundcst = RDROUNDCST2;
731
+
732
+ TEST_AND_RETURN_RD(logb2h, logb2m, roundcst);
733
+
734
+ #if DEBUG
735
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
736
+ #endif
737
+
738
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
739
+
740
+ ReturnRoundDownwards3(logb2h, logb2m, logb2l);
741
+ }
742
+
743
+ /*************************************************************
744
+ *************************************************************
745
+ * ROUNDED TOWARDS ZERO *
746
+ *************************************************************
747
+ *************************************************************/
748
+ double log2_rz(double x) {
749
+ db_number xdb;
750
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
751
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
752
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, roundcst;
753
+ double logb2h, logb2m, logb2l;
754
+ int E, index;
755
+
756
+
757
+ E=0;
758
+ xdb.d=x;
759
+
760
+ /* Filter cases */
761
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
762
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
763
+ return -1.0/0.0;
764
+ } /* log(+/-0) = -Inf */
765
+ if (xdb.i[HI] < 0){
766
+ return (x-x)/0; /* log(-x) = Nan */
767
+ }
768
+ /* Subnormal number */
769
+ E = -52;
770
+ xdb.d *= two52; /* make x a normal number */
771
+ }
772
+
773
+ if (xdb.i[HI] >= 0x7ff00000){
774
+ return x+x; /* Inf or Nan */
775
+ }
776
+
777
+
778
+ /* Extract exponent and mantissa
779
+ Do range reduction,
780
+ yielding to E holding the exponent and
781
+ y the mantissa between sqrt(2)/2 and sqrt(2)
782
+ */
783
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
784
+ index = (xdb.i[HI] & 0x000fffff);
785
+
786
+
787
+ /* Test now if the argument is an exact power of 2
788
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
789
+ This test is necessary for filtering out the cases where the final
790
+ rounding test cannot distinguish between an exact algebraic
791
+ number and a hard case to round
792
+ */
793
+
794
+ if ((index | xdb.i[LO]) == 0) {
795
+ /* Handle the "trivial" case for log2:
796
+ The argument is an exact power of 2, return thus
797
+ just the exponant of the number
798
+ */
799
+
800
+ return (double) E;
801
+
802
+ }
803
+
804
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
805
+ index = (index + (1<<(20-L-1))) >> (20-L);
806
+
807
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
808
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
809
+ xdb.i[HI] -= 0x00100000;
810
+ E++;
811
+ }
812
+ y = xdb.d;
813
+ index = index & INDEXMASK;
814
+ /* Cast integer E into double ed for multiplication later */
815
+ ed = (double) E;
816
+
817
+ /*
818
+ Read tables:
819
+ Read one float for ri
820
+ Read the first two doubles for -log(r_i) (out of three)
821
+
822
+ Organization of the table:
823
+
824
+ one struct entry per index, the struct entry containing
825
+ r, logih, logim and logil in this order
826
+ */
827
+
828
+
829
+ ri = argredtable[index].ri;
830
+ /*
831
+ Actually we don't need the logarithm entries now
832
+ Move the following two lines to the eventual reconstruction
833
+ As long as we don't have any if in the following code, we can overlap
834
+ memory access with calculations
835
+ */
836
+ logih = argredtable[index].logih;
837
+ logim = argredtable[index].logim;
838
+
839
+ /* Do range reduction:
840
+
841
+ zh + zl = y * ri - 1.0 correctly
842
+
843
+ Correctness is assured by use of Mul12 and Add12
844
+ even if we don't force ri to have its' LSBs set to zero
845
+
846
+ Discard zl for higher monome degrees
847
+ */
848
+
849
+ Mul12(&yrih, &yril, y, ri);
850
+ th = yrih - 1.0;
851
+ Add12Cond(zh, zl, th, yril);
852
+
853
+ /*
854
+ Polynomial evaluation
855
+
856
+ Use a 7 degree polynomial
857
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
858
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
859
+ using an ad hoc method
860
+
861
+ */
862
+
863
+
864
+
865
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
866
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
867
+ #else
868
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
869
+ #endif
870
+
871
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
872
+ polyUpper = polyHorner * (zh * zhSquareh);
873
+ zhSquareHalfh = zhSquareh * -0.5;
874
+ zhSquareHalfl = zhSquarel * -0.5;
875
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
876
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
877
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
878
+
879
+ /* Reconstruction
880
+
881
+ Read logih and logim in the tables (already done)
882
+
883
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
884
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
885
+
886
+ Carry out everything in double double precision
887
+
888
+ */
889
+
890
+ /*
891
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
892
+ Multiplication of ed (double E) and log2h is thus correct
893
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
894
+ is enough for the accurate phase
895
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
896
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
897
+ triple double values. We have to take it into account for the accurate phase
898
+ basic procedures for addition and multiplication
899
+ The condition on the next Add12 is verified as log2m is smaller than log2h
900
+ and both are scaled by ed
901
+ */
902
+
903
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
904
+
905
+ /* Add logih and logim to ph and pl
906
+
907
+ We must use conditioned Add22 as logih can move over ph
908
+ */
909
+
910
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
911
+
912
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
913
+
914
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
915
+
916
+ /* Change logarithm base from natural base to base 2 by multiplying */
917
+
918
+ Mul22(&logb2h, &logb2m, log2invh, log2invl, logh, logm);
919
+
920
+ /* Rounding test and eventual return or call to the accurate function */
921
+
922
+ if(E==0)
923
+ roundcst = RDROUNDCST1;
924
+ else
925
+ roundcst = RDROUNDCST2;
926
+
927
+ TEST_AND_RETURN_RZ(logb2h, logb2m, roundcst);
928
+
929
+ #if DEBUG
930
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
931
+ #endif
932
+
933
+ log2_td_accurate(&logb2h, &logb2m, &logb2l, E, ed, index, zh, zl, logih, logim);
934
+
935
+ ReturnRoundTowardsZero3(logb2h, logb2m, logb2l);
936
+ }
937
+
938
+ #ifdef BUILD_INTERVAL_FUNCTIONS
939
+ interval j_log2(interval x) {
940
+ interval res;
941
+ double res_inf, res_sup, restemp_inf, restemp_sup;
942
+ int infDone, supDone;
943
+ int roundable;
944
+ db_number xdb_inf;
945
+ double y_inf, ed_inf, ri_inf, logih_inf, logim_inf, yrih_inf, yril_inf, th_inf, zh_inf, zl_inf;
946
+ double polyHorner_inf, zhSquareh_inf, zhSquarel_inf, polyUpper_inf, zhSquareHalfh_inf, zhSquareHalfl_inf;
947
+ double t1h_inf, t1l_inf, t2h_inf, t2l_inf, ph_inf, pl_inf, log2edh_inf, log2edl_inf, logTabPolyh_inf, logTabPolyl_inf, logh_inf, logm_inf, roundcst_inf;
948
+ double logb2h_inf, logb2m_inf, logb2l_inf;
949
+ int E_inf, index_inf;
950
+
951
+ db_number xdb_sup;
952
+ double y_sup, ed_sup, ri_sup, logih_sup, logim_sup, yrih_sup, yril_sup, th_sup, zh_sup, zl_sup;
953
+ double polyHorner_sup, zhSquareh_sup, zhSquarel_sup, polyUpper_sup, zhSquareHalfh_sup, zhSquareHalfl_sup;
954
+ double t1h_sup, t1l_sup, t2h_sup, t2l_sup, ph_sup, pl_sup, log2edh_sup, log2edl_sup, logTabPolyh_sup, logTabPolyl_sup, logh_sup, logm_sup, roundcst_sup;
955
+ double logb2h_sup, logb2m_sup, logb2l_sup;
956
+ int E_sup, index_sup;
957
+
958
+ double x_inf, x_sup;
959
+ x_inf=LOW(x);
960
+ x_sup=UP(x);
961
+ infDone=0; supDone=0;
962
+
963
+ E_inf=0;
964
+ E_sup=0;
965
+ xdb_inf.d=x_inf;
966
+ xdb_sup.d=x_sup;
967
+
968
+ /* Filter cases */
969
+ if (xdb_inf.i[HI] < 0x00100000){ /* x < 2^(-1022) */
970
+ if (((xdb_inf.i[HI] & 0x7fffffff)|xdb_inf.i[LO])==0){
971
+ infDone=1;
972
+ restemp_inf = -1.0/0.0;
973
+ } /* log(+/-0) = -Inf */
974
+ if ((xdb_inf.i[HI] < 0) && (infDone==0)){
975
+ infDone=1;
976
+ restemp_inf = (x_inf-x_inf)/0; /* log(-x) = Nan */
977
+ }
978
+ /* Subnormal number */
979
+ E_inf = -52;
980
+ xdb_inf.d *= two52; /* make x a normal number */
981
+ }
982
+
983
+ if ((xdb_inf.i[HI] >= 0x7ff00000) && (infDone==0)){
984
+ infDone=1;
985
+ restemp_inf = x_inf+x_inf; /* Inf or Nan */
986
+ }
987
+
988
+ /* Filter cases */
989
+ if (xdb_sup.i[HI] < 0x00100000){ /* x < 2^(-1022) */
990
+ if (((xdb_sup.i[HI] & 0x7fffffff)|xdb_sup.i[LO])==0){
991
+ supDone=1;
992
+ restemp_sup = -1.0/0.0;
993
+ } /* log(+/-0) = -Inf */
994
+ if ((xdb_sup.i[HI] < 0) && (supDone==0)){
995
+ supDone=1;
996
+ restemp_sup = (x_sup-x_sup)/0; /* log(-x) = Nan */
997
+ }
998
+ /* Subnormal number */
999
+ E_sup = -52;
1000
+ xdb_sup.d *= two52; /* make x a normal number */
1001
+ }
1002
+
1003
+ if ((xdb_sup.i[HI] >= 0x7ff00000) && (supDone==0)){
1004
+ supDone=1;
1005
+ restemp_sup = x_sup+x_sup; /* Inf or Nan */
1006
+ }
1007
+
1008
+
1009
+ /* Extract exponent and mantissa
1010
+ Do range reduction,
1011
+ yielding to E holding the exponent and
1012
+ y the mantissa between sqrt(2)/2 and sqrt(2)
1013
+ */
1014
+ E_inf += (xdb_inf.i[HI]>>20)-1023; /* extract the exponent */
1015
+ E_sup += (xdb_sup.i[HI]>>20)-1023; /* extract the exponent */
1016
+ index_inf = (xdb_inf.i[HI] & 0x000fffff);
1017
+ index_sup = (xdb_sup.i[HI] & 0x000fffff);
1018
+
1019
+ /* Test now if the argument is an exact power of 2
1020
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
1021
+ This test is necessary for filtering out the cases where the final
1022
+ rounding test cannot distinguish between an exact algebraic
1023
+ number and a hard case to round
1024
+ */
1025
+
1026
+ if (((index_inf | xdb_inf.i[LO]) == 0) && (infDone==0)) {
1027
+ /* Handle the "trivial" case for log2:
1028
+ The argument is an exact power of 2, return thus
1029
+ just the exponant of the number
1030
+ */
1031
+
1032
+ supDone=1;
1033
+ restemp_inf = (double) E_inf;
1034
+
1035
+ }
1036
+ if (((index_sup | xdb_sup.i[LO]) == 0) && (supDone==0)) {
1037
+ /* Handle the "trivial" case for log2:
1038
+ The argument is an exact power of 2, return thus
1039
+ just the exponant of the number
1040
+ */
1041
+
1042
+ supDone=1;
1043
+ restemp_sup = (double) E_sup;
1044
+
1045
+ }
1046
+
1047
+ if((infDone==1) && (supDone==1))
1048
+ {
1049
+ ASSIGN_LOW(res,restemp_inf);
1050
+ ASSIGN_UP(res,restemp_sup);
1051
+ return res;
1052
+ }
1053
+ if((infDone==0) && (supDone==0))
1054
+ {
1055
+ xdb_inf.i[HI] = index_inf | 0x3ff00000; /* do exponent = 0 */
1056
+ xdb_sup.i[HI] = index_sup | 0x3ff00000; /* do exponent = 0 */
1057
+ index_inf = (index_inf + (1<<(20-L-1))) >> (20-L);
1058
+ index_sup = (index_sup + (1<<(20-L-1))) >> (20-L);
1059
+
1060
+
1061
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1062
+ if (index_inf >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1063
+ xdb_inf.i[HI] -= 0x00100000;
1064
+ E_inf++;
1065
+ }
1066
+ if (index_sup >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1067
+ xdb_sup.i[HI] -= 0x00100000;
1068
+ E_sup++;
1069
+ }
1070
+ y_inf = xdb_inf.d;
1071
+ y_sup = xdb_sup.d;
1072
+ index_inf = index_inf & INDEXMASK;
1073
+ index_sup = index_sup & INDEXMASK;
1074
+ /* Cast integer E into double ed for multiplication later */
1075
+ ed_inf = (double) E_inf;
1076
+ ed_sup = (double) E_sup;
1077
+ /*
1078
+ Read tables:
1079
+ Read one float for ri
1080
+ Read the first two doubles for -log(r_i) (out of three)
1081
+
1082
+ Organization of the table:
1083
+
1084
+ one struct entry per index, the struct entry containing
1085
+ r, logih, logim and logil in this order
1086
+ */
1087
+
1088
+ ri_inf = argredtable[index_inf].ri;
1089
+ ri_sup = argredtable[index_sup].ri;
1090
+
1091
+ /*
1092
+ Actually we don't need the logarithm entries now
1093
+ Move the following two lines to the eventual reconstruction
1094
+ As long as we don't have any if in the following code, we can overlap
1095
+ memory access with calculations
1096
+ */
1097
+ logih_inf = argredtable[index_inf].logih;
1098
+ logih_sup = argredtable[index_sup].logih;
1099
+ logim_inf = argredtable[index_inf].logim;
1100
+ logim_sup = argredtable[index_sup].logim;
1101
+
1102
+ /* Do range reduction:
1103
+
1104
+ zh + zl = y * ri - 1.0 correctly
1105
+
1106
+ Correctness is assured by use of Mul12 and Add12
1107
+ even if we don't force ri to have its' LSBs set to zero
1108
+
1109
+ Discard zl for higher monome degrees
1110
+ */
1111
+
1112
+ Mul12(&yrih_inf, &yril_inf, y_inf, ri_inf);
1113
+ Mul12(&yrih_sup, &yril_sup, y_sup, ri_sup);
1114
+ th_inf = yrih_inf - 1.0;
1115
+ th_sup = yrih_sup - 1.0;
1116
+ Add12Cond(zh_inf, zl_inf, th_inf, yril_inf);
1117
+ Add12Cond(zh_sup, zl_sup, th_sup, yril_sup);
1118
+
1119
+
1120
+ /*
1121
+ Polynomial evaluation
1122
+
1123
+ Use a 7 degree polynomial
1124
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
1125
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
1126
+ using an ad hoc method
1127
+
1128
+ */
1129
+
1130
+
1131
+
1132
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1133
+ polyHorner_inf = FMA(FMA(FMA(FMA(c7_inf,zh_inf,c6),zh_inf,c5),zh_inf,c4),zh_inf,c3);
1134
+ polyHorner_sup = FMA(FMA(FMA(FMA(c7,zh_sup,c6),zh_sup,c5),zh_sup,c4),zh_sup,c3);
1135
+ #else
1136
+ polyHorner_inf = c3 + zh_inf * (c4 + zh_inf * (c5 + zh_inf * (c6 + zh_inf * c7)));
1137
+ polyHorner_sup = c3 + zh_sup * (c4 + zh_sup * (c5 + zh_sup * (c6 + zh_sup * c7)));
1138
+ #endif
1139
+
1140
+ Mul12(&zhSquareh_inf, &zhSquarel_inf, zh_inf, zh_inf);
1141
+ Mul12(&zhSquareh_sup, &zhSquarel_sup, zh_sup, zh_sup);
1142
+ polyUpper_inf = polyHorner_inf * (zh_inf * zhSquareh_inf);
1143
+ polyUpper_sup = polyHorner_sup * (zh_sup * zhSquareh_sup);
1144
+ zhSquareHalfh_inf = zhSquareh_inf * -0.5;
1145
+ zhSquareHalfh_sup = zhSquareh_sup * -0.5;
1146
+ zhSquareHalfl_inf = zhSquarel_inf * -0.5;
1147
+ zhSquareHalfl_sup = zhSquarel_sup * -0.5;
1148
+ Add12(t1h_inf, t1l_inf, polyUpper_inf, -1 * (zh_inf * zl_inf));
1149
+ Add12(t1h_sup, t1l_sup, polyUpper_sup, -1 * (zh_sup * zl_sup));
1150
+ Add22(&t2h_inf, &t2l_inf, zh_inf, zl_inf, zhSquareHalfh_inf, zhSquareHalfl_inf);
1151
+ Add22(&t2h_sup, &t2l_sup, zh_sup, zl_sup, zhSquareHalfh_sup, zhSquareHalfl_sup);
1152
+ Add22(&ph_inf, &pl_inf, t2h_inf, t2l_inf, t1h_inf, t1l_inf);
1153
+ Add22(&ph_sup, &pl_sup, t2h_sup, t2l_sup, t1h_sup, t1l_sup);
1154
+
1155
+
1156
+ /* Reconstruction
1157
+
1158
+ Read logih and logim in the tables (already done)
1159
+
1160
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
1161
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
1162
+
1163
+ Carry out everything in double double precision
1164
+
1165
+ */
1166
+
1167
+ /*
1168
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
1169
+ Multiplication of ed (double E) and log2h is thus correct
1170
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
1171
+ is enough for the accurate phase
1172
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
1173
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
1174
+ triple double values. We have to take it into account for the accurate phase
1175
+ basic procedures for addition and multiplication
1176
+ The condition on the next Add12 is verified as log2m is smaller than log2h
1177
+ and both are scaled by ed
1178
+ */
1179
+
1180
+ Add12(log2edh_inf, log2edl_inf, log2h * ed_inf, log2m * ed_inf);
1181
+ Add12(log2edh_sup, log2edl_sup, log2h * ed_sup, log2m * ed_sup);
1182
+
1183
+ /* Add logih and logim to ph and pl
1184
+
1185
+ We must use conditioned Add22 as logih can move over ph
1186
+ */
1187
+
1188
+ Add22Cond(&logTabPolyh_inf, &logTabPolyl_inf, logih_inf, logim_inf, ph_inf, pl_inf);
1189
+ Add22Cond(&logTabPolyh_sup, &logTabPolyl_sup, logih_sup, logim_sup, ph_sup, pl_sup);
1190
+
1191
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
1192
+
1193
+ Add22Cond(&logh_inf, &logm_inf, log2edh_inf, log2edl_inf, logTabPolyh_inf, logTabPolyl_inf);
1194
+ Add22Cond(&logh_sup, &logm_sup, log2edh_sup, log2edl_sup, logTabPolyh_sup, logTabPolyl_sup);
1195
+
1196
+ /* Change logarithm base from natural base to base 2 by multiplying */
1197
+
1198
+ Mul22(&logb2h_inf, &logb2m_inf, log2invh, log2invl, logh_inf, logm_inf);
1199
+ Mul22(&logb2h_sup, &logb2m_sup, log2invh, log2invl, logh_sup, logm_sup);
1200
+
1201
+ /* Rounding test and eventual return or call to the accurate function */
1202
+
1203
+ if(E_inf==0)
1204
+ roundcst_inf = RDROUNDCST1;
1205
+ else
1206
+ roundcst_inf = RDROUNDCST2;
1207
+
1208
+ if(E_sup==0)
1209
+ roundcst_sup = RDROUNDCST1;
1210
+ else
1211
+ roundcst_sup = RDROUNDCST2;
1212
+
1213
+ TEST_AND_COPY_RDRU_LOG2(roundable, restemp_inf,logb2h_inf, logb2m_inf, restemp_sup, logb2h_sup, logb2m_sup, roundcst_inf, roundcst_sup);
1214
+
1215
+
1216
+ #if DEBUG
1217
+ printf("Going for Accurate Phase for x=%1.50e\n",x_inf);
1218
+ #endif
1219
+
1220
+ if((roundable==2) || (roundable==0))
1221
+ {
1222
+ log2_td_accurate(&logb2h_inf, &logb2m_inf, &logb2l_inf, E_inf, ed_inf, index_inf, zh_inf, zl_inf, logih_inf, logim_inf);
1223
+ RoundDownwards3(&restemp_inf,logb2h_inf, logb2m_inf, logb2l_inf);
1224
+ }
1225
+ if((roundable==1) || (roundable==0))
1226
+ {
1227
+ log2_td_accurate(&logb2h_sup, &logb2m_sup, &logb2l_sup, E_sup, ed_sup, index_sup, zh_sup, zl_sup, logih_sup, logim_sup);
1228
+ RoundUpwards3(&restemp_sup,logb2h_sup, logb2m_sup, logb2l_sup);
1229
+ }
1230
+ ASSIGN_LOW(res,restemp_inf);
1231
+ ASSIGN_UP(res,restemp_sup);
1232
+ return res;
1233
+ }
1234
+ if((infDone==0))
1235
+ {
1236
+ xdb_inf.i[HI] = index_inf | 0x3ff00000; /* do exponent = 0 */
1237
+ index_inf = (index_inf + (1<<(20-L-1))) >> (20-L);
1238
+
1239
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1240
+ if (index_inf >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1241
+ xdb_inf.i[HI] -= 0x00100000;
1242
+ E_inf++;
1243
+ }
1244
+ y_inf = xdb_inf.d;
1245
+ index_inf = index_inf & INDEXMASK;
1246
+ /* Cast integer E into double ed for multiplication later */
1247
+ ed_inf = (double) E_inf;
1248
+ /*
1249
+ Read tables:
1250
+ Read one float for ri
1251
+ Read the first two doubles for -log(r_i) (out of three)
1252
+ Organization of the table:
1253
+ one struct entry per index, the struct entry containing
1254
+ r, logih, logim and logil in this order
1255
+ */
1256
+
1257
+ ri_inf = argredtable[index_inf].ri;
1258
+
1259
+ /*
1260
+ Actually we don't need the logarithm entries now
1261
+ Move the following two lines to the eventual reconstruction
1262
+ As long as we don't have any if in the following code, we can overlap
1263
+ memory access with calculations
1264
+ */
1265
+ logih_inf = argredtable[index_inf].logih;
1266
+ logim_inf = argredtable[index_inf].logim;
1267
+
1268
+ /* Do range reduction:
1269
+
1270
+ zh + zl = y * ri - 1.0 correctly
1271
+
1272
+ Correctness is assured by use of Mul12 and Add12
1273
+ even if we don't force ri to have its' LSBs set to zero
1274
+
1275
+ Discard zl for higher monome degrees
1276
+ */
1277
+
1278
+ Mul12(&yrih_inf, &yril_inf, y_inf, ri_inf);
1279
+ th_inf = yrih_inf - 1.0;
1280
+ Add12Cond(zh_inf, zl_inf, th_inf, yril_inf);
1281
+
1282
+
1283
+ /*
1284
+ Polynomial evaluation
1285
+
1286
+ Use a 7 degree polynomial
1287
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
1288
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
1289
+ using an ad hoc method
1290
+
1291
+ */
1292
+
1293
+
1294
+
1295
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1296
+ polyHorner_inf = FMA(FMA(FMA(FMA(c7_inf,zh_inf,c6),zh_inf,c5),zh_inf,c4),zh_inf,c3);
1297
+ #else
1298
+ polyHorner_inf = c3 + zh_inf * (c4 + zh_inf * (c5 + zh_inf * (c6 + zh_inf * c7)));
1299
+ #endif
1300
+
1301
+ Mul12(&zhSquareh_inf, &zhSquarel_inf, zh_inf, zh_inf);
1302
+ polyUpper_inf = polyHorner_inf * (zh_inf * zhSquareh_inf);
1303
+ zhSquareHalfh_inf = zhSquareh_inf * -0.5;
1304
+ zhSquareHalfl_inf = zhSquarel_inf * -0.5;
1305
+ Add12(t1h_inf, t1l_inf, polyUpper_inf, -1 * (zh_inf * zl_inf));
1306
+ Add22(&t2h_inf, &t2l_inf, zh_inf, zl_inf, zhSquareHalfh_inf, zhSquareHalfl_inf);
1307
+ Add22(&ph_inf, &pl_inf, t2h_inf, t2l_inf, t1h_inf, t1l_inf);
1308
+
1309
+
1310
+ /* Reconstruction
1311
+
1312
+ Read logih and logim in the tables (already done)
1313
+
1314
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
1315
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
1316
+
1317
+ Carry out everything in double double precision
1318
+
1319
+ */
1320
+
1321
+ /*
1322
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
1323
+ Multiplication of ed (double E) and log2h is thus correct
1324
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
1325
+ is enough for the accurate phase
1326
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
1327
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
1328
+ triple double values. We have to take it into account for the accurate phase
1329
+ basic procedures for addition and multiplication
1330
+ The condition on the next Add12 is verified as log2m is smaller than log2h
1331
+ and both are scaled by ed
1332
+ */
1333
+
1334
+ Add12(log2edh_inf, log2edl_inf, log2h * ed_inf, log2m * ed_inf);
1335
+
1336
+ /* Add logih and logim to ph and pl
1337
+
1338
+ We must use conditioned Add22 as logih can move over ph
1339
+ */
1340
+
1341
+ Add22Cond(&logTabPolyh_inf, &logTabPolyl_inf, logih_inf, logim_inf, ph_inf, pl_inf);
1342
+
1343
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
1344
+
1345
+ Add22Cond(&logh_inf, &logm_inf, log2edh_inf, log2edl_inf, logTabPolyh_inf, logTabPolyl_inf);
1346
+
1347
+ /* Change logarithm base from natural base to base 2 by multiplying */
1348
+
1349
+ Mul22(&logb2h_inf, &logb2m_inf, log2invh, log2invl, logh_inf, logm_inf);
1350
+
1351
+ /* Rounding test and eventual return or call to the accurate function */
1352
+
1353
+ if(E_inf==0)
1354
+ roundcst_inf = RDROUNDCST1;
1355
+ else
1356
+ roundcst_inf = RDROUNDCST2;
1357
+
1358
+
1359
+ TEST_AND_COPY_RD(roundable, restemp_inf,logb2h_inf, logb2m_inf, roundcst_inf);
1360
+
1361
+
1362
+ #if DEBUG
1363
+ printf("Going for Accurate Phase for x=%1.50e\n",x_inf);
1364
+ #endif
1365
+
1366
+ if((roundable==0))
1367
+ {
1368
+ log2_td_accurate(&logb2h_inf, &logb2m_inf, &logb2l_inf, E_inf, ed_inf, index_inf, zh_inf, zl_inf, logih_inf, logim_inf);
1369
+ RoundDownwards3(&restemp_inf,logb2h_inf, logb2m_inf, logb2l_inf);
1370
+ }
1371
+ ASSIGN_LOW(res,restemp_inf);
1372
+ ASSIGN_UP(res,restemp_sup);
1373
+ return res;
1374
+ }
1375
+ if((supDone==0))
1376
+ {
1377
+ xdb_sup.i[HI] = index_sup | 0x3ff00000; /* do exponent = 0 */
1378
+ index_sup = (index_sup + (1<<(20-L-1))) >> (20-L);
1379
+
1380
+
1381
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1382
+ if (index_sup >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1383
+ xdb_sup.i[HI] -= 0x00100000;
1384
+ E_sup++;
1385
+ }
1386
+ y_sup = xdb_sup.d;
1387
+ index_sup = index_sup & INDEXMASK;
1388
+ /* Cast integer E into double ed for multiplication later */
1389
+ ed_sup = (double) E_sup;
1390
+ /*
1391
+ Read tables:
1392
+ Read one float for ri
1393
+ Read the first two doubles for -log(r_i) (out of three)
1394
+
1395
+ Organization of the table:
1396
+
1397
+ one struct entry per index, the struct entry containing
1398
+ r, logih, logim and logil in this order
1399
+ */
1400
+
1401
+ ri_sup = argredtable[index_sup].ri;
1402
+
1403
+ /*
1404
+ Actually we don't need the logarithm entries now
1405
+ Move the following two lines to the eventual reconstruction
1406
+ As long as we don't have any if in the following code, we can overlap
1407
+ memory access with calculations
1408
+ */
1409
+ logih_sup = argredtable[index_sup].logih;
1410
+ logim_sup = argredtable[index_sup].logim;
1411
+
1412
+ /* Do range reduction:
1413
+
1414
+ zh + zl = y * ri - 1.0 correctly
1415
+
1416
+ Correctness is assured by use of Mul12 and Add12
1417
+ even if we don't force ri to have its' LSBs set to zero
1418
+
1419
+ Discard zl for higher monome degrees
1420
+ */
1421
+
1422
+ Mul12(&yrih_sup, &yril_sup, y_sup, ri_sup);
1423
+ th_sup = yrih_sup - 1.0;
1424
+ Add12Cond(zh_sup, zl_sup, th_sup, yril_sup);
1425
+
1426
+
1427
+ /*
1428
+ Polynomial evaluation
1429
+
1430
+ Use a 7 degree polynomial
1431
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
1432
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
1433
+ using an ad hoc method
1434
+
1435
+ */
1436
+
1437
+
1438
+
1439
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1440
+ polyHorner_sup = FMA(FMA(FMA(FMA(c7,zh_sup,c6),zh_sup,c5),zh_sup,c4),zh_sup,c3);
1441
+ #else
1442
+ polyHorner_sup = c3 + zh_sup * (c4 + zh_sup * (c5 + zh_sup * (c6 + zh_sup * c7)));
1443
+ #endif
1444
+
1445
+ Mul12(&zhSquareh_sup, &zhSquarel_sup, zh_sup, zh_sup);
1446
+ polyUpper_sup = polyHorner_sup * (zh_sup * zhSquareh_sup);
1447
+ zhSquareHalfh_sup = zhSquareh_sup * -0.5;
1448
+ zhSquareHalfl_sup = zhSquarel_sup * -0.5;
1449
+ Add12(t1h_sup, t1l_sup, polyUpper_sup, -1 * (zh_sup * zl_sup));
1450
+ Add22(&t2h_sup, &t2l_sup, zh_sup, zl_sup, zhSquareHalfh_sup, zhSquareHalfl_sup);
1451
+ Add22(&ph_sup, &pl_sup, t2h_sup, t2l_sup, t1h_sup, t1l_sup);
1452
+
1453
+
1454
+ /* Reconstruction
1455
+
1456
+ Read logih and logim in the tables (already done)
1457
+
1458
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
1459
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
1460
+
1461
+ Carry out everything in double double precision
1462
+
1463
+ */
1464
+
1465
+ /*
1466
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
1467
+ Multiplication of ed (double E) and log2h is thus correct
1468
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
1469
+ is enough for the accurate phase
1470
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
1471
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
1472
+ triple double values. We have to take it into account for the accurate phase
1473
+ basic procedures for addition and multiplication
1474
+ The condition on the next Add12 is verified as log2m is smaller than log2h
1475
+ and both are scaled by ed
1476
+ */
1477
+
1478
+ Add12(log2edh_sup, log2edl_sup, log2h * ed_sup, log2m * ed_sup);
1479
+
1480
+ /* Add logih and logim to ph and pl
1481
+
1482
+ We must use conditioned Add22 as logih can move over ph
1483
+ */
1484
+
1485
+ Add22Cond(&logTabPolyh_sup, &logTabPolyl_sup, logih_sup, logim_sup, ph_sup, pl_sup);
1486
+
1487
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
1488
+
1489
+ Add22Cond(&logh_sup, &logm_sup, log2edh_sup, log2edl_sup, logTabPolyh_sup, logTabPolyl_sup);
1490
+
1491
+ /* Change logarithm base from natural base to base 2 by multiplying */
1492
+
1493
+ Mul22(&logb2h_sup, &logb2m_sup, log2invh, log2invl, logh_sup, logm_sup);
1494
+
1495
+ /* Rounding test and eventual return or call to the accurate function */
1496
+
1497
+ if(E_sup==0)
1498
+ roundcst_sup = RDROUNDCST1;
1499
+ else
1500
+ roundcst_sup = RDROUNDCST2;
1501
+
1502
+ TEST_AND_COPY_RU(roundable, restemp_sup,logb2h_sup, logb2m_sup, roundcst_sup);
1503
+
1504
+
1505
+ #if DEBUG
1506
+ printf("Going for Accurate Phase for x=%1.50e\n",x_inf);
1507
+ #endif
1508
+
1509
+ if((roundable==0))
1510
+ {
1511
+ log2_td_accurate(&logb2h_sup, &logb2m_sup, &logb2l_sup, E_sup, ed_sup, index_sup, zh_sup, zl_sup, logih_sup, logim_sup);
1512
+ RoundUpwards3(&restemp_sup,logb2h_sup, logb2m_sup, logb2l_sup);
1513
+ }
1514
+ ASSIGN_LOW(res,restemp_inf);
1515
+ ASSIGN_UP(res,restemp_sup);
1516
+ return res;
1517
+ }
1518
+
1519
+ }
1520
+ #endif
1521
+