intervals 0.3.56

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,962 @@
1
+ /*
2
+ * This function computes exp, correctly rounded,
3
+ * using experimental techniques based on triple double arithmetics
4
+
5
+ THIS IS EXPERIMENTAL SOFTWARE
6
+
7
+ *
8
+ * Author : Christoph Lauter
9
+ * christoph.lauter at ens-lyon.fr
10
+ *
11
+
12
+ To have it replace the crlibm exp, do:
13
+
14
+ gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c exp-td.c; mv exp-td.o exp_fast.o; make
15
+
16
+ */
17
+
18
+
19
+ #include <stdio.h>
20
+ #include <stdlib.h>
21
+ #include "crlibm.h"
22
+ #include "crlibm_private.h"
23
+ #include "triple-double.h"
24
+ #include "exp-td.h"
25
+
26
+ #define AVOID_FMA 0
27
+ #define EVAL_PERF 1
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+ void exp_td_accurate(double *polyTblh, double *polyTblm, double *polyTbll,
36
+ double rh, double rm, double rl,
37
+ double tbl1h, double tbl1m, double tbl1l,
38
+ double tbl2h, double tbl2m, double tbl2l) {
39
+ double highPoly, highPolyMulth, highPolyMultm, highPolyMultl;
40
+ double rhSquareh, rhSquarel, rhSquareHalfh, rhSquareHalfl;
41
+ double rhCubeh, rhCubem, rhCubel;
42
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5, t6;
43
+ double lowPolyh, lowPolym, lowPolyl;
44
+ double ph, pm, pl, phnorm, pmnorm, rmlMultPh, rmlMultPl;
45
+ double qh, ql, fullPolyh, fullPolym, fullPolyl;
46
+ double polyWithTbl1h, polyWithTbl1m, polyWithTbl1l;
47
+ double polyAddOneh,polyAddOnem,polyAddOnel;
48
+ double polyWithTablesh, polyWithTablesm, polyWithTablesl;
49
+
50
+
51
+ #if EVAL_PERF
52
+ crlibm_second_step_taken++;
53
+ #endif
54
+
55
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
56
+ highPoly = FMA(FMA(accPolyC7,rh,accPolyC6),rh,accPolyC5);
57
+ #else
58
+ highPoly = accPolyC5 + rh * (accPolyC6 + rh * accPolyC7);
59
+ #endif
60
+
61
+ Mul12(&t1h,&t1l,rh,highPoly);
62
+ Add22(&t2h,&t2l,accPolyC4h,accPolyC4l,t1h,t1l);
63
+ Mul22(&t3h,&t3l,rh,0,t2h,t2l);
64
+ Add22(&t4h,&t4l,accPolyC3h,accPolyC3l,t3h,t3l);
65
+
66
+ Mul12(&rhSquareh,&rhSquarel,rh,rh);
67
+ Mul23(&rhCubeh,&rhCubem,&rhCubel,rh,0,rhSquareh,rhSquarel);
68
+
69
+ rhSquareHalfh = 0.5 * rhSquareh;
70
+ rhSquareHalfl = 0.5 * rhSquarel;
71
+
72
+ Renormalize3(&lowPolyh,&lowPolym,&lowPolyl,rh,rhSquareHalfh,rhSquareHalfl);
73
+
74
+ Mul233(&highPolyMulth,&highPolyMultm,&highPolyMultl,t4h,t4l,rhCubeh,rhCubem,rhCubel);
75
+
76
+ Add33(&ph,&pm,&pl,lowPolyh,lowPolym,lowPolyl,highPolyMulth,highPolyMultm,highPolyMultl);
77
+
78
+ Add12(phnorm,pmnorm,ph,pm);
79
+ Mul22(&rmlMultPh,&rmlMultPl,rm,rl,phnorm,pmnorm);
80
+ Add22(&qh,&ql,rm,rl,rmlMultPh,rmlMultPl);
81
+
82
+ Add233Cond(&fullPolyh,&fullPolym,&fullPolyl,qh,ql,ph,pm,pl);
83
+ Add12(polyAddOneh,t5,1,fullPolyh);
84
+ Add12Cond(polyAddOnem,t6,t5,fullPolym);
85
+ polyAddOnel = t6 + fullPolyl;
86
+ Mul33(&polyWithTbl1h,&polyWithTbl1m,&polyWithTbl1l,tbl1h,tbl1m,tbl1l,polyAddOneh,polyAddOnem,polyAddOnel);
87
+ Mul33(&polyWithTablesh,&polyWithTablesm,&polyWithTablesl,
88
+ tbl2h,tbl2m,tbl2l,
89
+ polyWithTbl1h,polyWithTbl1m,polyWithTbl1l);
90
+
91
+ Renormalize3(polyTblh,polyTblm,polyTbll,polyWithTablesh,polyWithTablesm,polyWithTablesl);
92
+ }
93
+
94
+
95
+
96
+ /*************************************************************
97
+ *************************************************************
98
+ * ROUNDED TO NEAREST *
99
+ *************************************************************
100
+ *************************************************************/
101
+ double exp_rn(double x){
102
+ double rh, rm, rl, tbl1h, tbl1m, tbl1l;
103
+ double tbl2h, tbl2m, tbl2l;
104
+ double xMultLog2InvMult2L, shiftedXMult, kd;
105
+ double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
106
+ double t1, t2, t3, t4, polyTblh, polyTblm, polyTbll;
107
+ db_number shiftedXMultdb, twoPowerMdb, xdb, t4db, t4db2, polyTblhdb, resdb;
108
+ int k, M, index1, index2, xIntHi, mightBeDenorm;
109
+ double t5, t6, t7, t8, t9, t10, t11, t12, t13;
110
+ double rhSquare, rhSquareHalf, rhC3, rhFour, monomialCube;
111
+ double highPoly, highPolyWithSquare, monomialFour;
112
+ double tablesh, tablesl;
113
+ double s1, s2, s3, s4, s5;
114
+ double res;
115
+
116
+ /* Argument reduction and filtering for special cases */
117
+
118
+ /* Compute k as a double and as an int */
119
+ xdb.d = x;
120
+ xMultLog2InvMult2L = x * log2InvMult2L;
121
+ shiftedXMult = xMultLog2InvMult2L + shiftConst;
122
+ kd = shiftedXMult - shiftConst;
123
+ shiftedXMultdb.d = shiftedXMult;
124
+
125
+ /* Special cases tests */
126
+ xIntHi = xdb.i[HI];
127
+ mightBeDenorm = 0;
128
+ /* Test if argument is a denormal or zero */
129
+ if ((xIntHi & 0x7ff00000) == 0) {
130
+ /* We are in the RN case, return 1.0 in all cases */
131
+ return 1.0;
132
+ }
133
+
134
+ /* Test if argument is greater than approx. 709 in magnitude */
135
+ if ((xIntHi & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) {
136
+ /* If we are here, the result might be overflowed, underflowed, inf, or NaN */
137
+
138
+ /* Test if +/- Inf or NaN */
139
+ if ((xIntHi & 0x7fffffff) >= 0x7ff00000) {
140
+ /* Either NaN or Inf in this case since exponent is maximal */
141
+
142
+ /* Test if NaN: mantissa is not 0 */
143
+ if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
144
+ /* x = NaN, return NaN */
145
+ return x + x;
146
+ } else {
147
+ /* +/- Inf */
148
+
149
+ /* Test sign */
150
+ if ((xIntHi & 0x80000000)==0)
151
+ /* x = +Inf, return +Inf */
152
+ return x;
153
+ else
154
+ /* x = -Inf, return 0 */
155
+ return 0;
156
+ } /* End which in NaN, Inf */
157
+ } /* End NaN or Inf ? */
158
+
159
+ /* If we are here, we might be overflowed, denormalized or underflowed in the result
160
+ but there is no special case (NaN, Inf) left */
161
+
162
+ /* Test if actually overflowed */
163
+ if (x > OVRFLWBOUND) {
164
+ /* We are actually overflowed in the result */
165
+ return LARGEST * LARGEST;
166
+ }
167
+
168
+ /* Test if surely underflowed */
169
+ if (x <= UNDERFLWBOUND) {
170
+ /* We are actually sure to be underflowed and not denormalized any more
171
+ So we return 0 and raise the inexact flag */
172
+ return SMALLEST * SMALLEST;
173
+ }
174
+
175
+ /* Test if possibly denormalized */
176
+ if (x <= DENORMBOUND) {
177
+ /* We know now that we are not sure to be normalized in the result
178
+ We just set an internal flag for a further test
179
+ */
180
+ mightBeDenorm = 1;
181
+ }
182
+ } /* End might be a special case */
183
+
184
+ /* If we are here, we are sure to be neither +/- Inf nor NaN nor overflowed nor denormalized in the argument
185
+ but we might be denormalized in the result
186
+
187
+ We continue the argument reduction for the quick phase and table reads for both phases
188
+ */
189
+
190
+ Mul12(&s1,&s2,msLog2Div2Lh,kd);
191
+ s3 = kd * msLog2Div2Lm;
192
+ s4 = s2 + s3;
193
+ s5 = x + s1;
194
+ Add12Cond(rh,rm,s5,s4);
195
+
196
+ k = shiftedXMultdb.i[LO];
197
+ M = k >> L;
198
+ index1 = k & INDEXMASK1;
199
+ index2 = (k & INDEXMASK2) >> LHALF;
200
+
201
+ /* Table reads */
202
+ tbl1h = twoPowerIndex1[index1].hi;
203
+ tbl1m = twoPowerIndex1[index1].mi;
204
+ tbl2h = twoPowerIndex2[index2].hi;
205
+ tbl2m = twoPowerIndex2[index2].mi;
206
+
207
+ /* Test now if it is sure to launch the quick phase because no denormalized result is possible */
208
+ if (mightBeDenorm == 1) {
209
+ /* The result might be denormalized, we launch the accurate phase in all cases */
210
+
211
+ /* Rest of argument reduction for accurate phase */
212
+
213
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
214
+ t1 = x + msLog2Div2LMultKh;
215
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
216
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
217
+
218
+ /* Table reads for accurate phase */
219
+ tbl1l = twoPowerIndex1[index1].lo;
220
+ tbl2l = twoPowerIndex2[index2].lo;
221
+
222
+ /* Call accurate phase */
223
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
224
+
225
+ /* Final rounding and multiplication with 2^M
226
+
227
+ We first multiply the highest significant byte by 2^M in two steps
228
+ and adjust it then depending on the lower significant parts.
229
+
230
+ We cannot multiply directly by 2^M since M is less than -1022.
231
+ We first multiply by 2^(-1000) and then by 2^(M+1000).
232
+
233
+ */
234
+
235
+ t3 = polyTblh * twoPowerM1000;
236
+
237
+ /* Form now twoPowerM with adjusted M */
238
+ twoPowerMdb.i[LO] = 0;
239
+ twoPowerMdb.i[HI] = (M + 2023) << 20;
240
+
241
+
242
+ /* Multiply with the rest of M, the result will be denormalized */
243
+ t4 = t3 * twoPowerMdb.d;
244
+
245
+ /* For x86, force the compiler to pass through memory for having the right rounding */
246
+
247
+ t4db.d = t4; /* Do not #if-ify this line, we need the copy */
248
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
249
+ t4db2.i[HI] = t4db.i[HI];
250
+ t4db2.i[LO] = t4db.i[LO];
251
+ t4 = t4db2.d;
252
+ #endif
253
+
254
+ /* Remultiply by 2^(-M) for manipulating the rounding error and the lower significant parts */
255
+ M *= -1;
256
+ twoPowerMdb.i[LO] = 0;
257
+ twoPowerMdb.i[HI] = (M + 23) << 20;
258
+ t5 = t4 * twoPowerMdb.d;
259
+ t6 = t5 * twoPower1000;
260
+ t7 = polyTblh - t6;
261
+
262
+ /* The rounding decision is made at 1 ulp of a denormal, i.e. at 2^(-1075)
263
+ We construct this number and by comparing with it we get to know
264
+ whether we are in a difficult rounding case or not. If not we just return
265
+ the known result. Otherwise we continue with further tests.
266
+ */
267
+
268
+ twoPowerMdb.i[LO] = 0;
269
+ twoPowerMdb.i[HI] = (M - 52) << 20;
270
+
271
+ if (ABS(t7) != twoPowerMdb.d) return t4;
272
+
273
+ /* If we are here, we are in a difficult rounding case */
274
+
275
+ /* We have to adjust the result iff the sign of the error on
276
+ rounding 2^M * polyTblh (which must be an ulp of a denormal)
277
+ and polyTblm +arith polyTbll is the same which means that
278
+ the error made was greater than an ulp of an denormal.
279
+ */
280
+
281
+ polyTblm = polyTblm + polyTbll;
282
+
283
+ if (t7 > 0.0) {
284
+ if (polyTblm > 0.0) {
285
+ t4db.l++;
286
+ return t4db.d;
287
+ } else return t4;
288
+ } else {
289
+ if (polyTblm < 0.0) {
290
+ t4db.l--;
291
+ return t4db.d;
292
+ } else return t4;
293
+ }
294
+ } /* End accurate phase launched as there might be a denormalized result */
295
+
296
+ /* No more underflow nor denormal is possible. There may be the case where
297
+ M is 1024 and the value 2^M is to be multiplied may be less than 1
298
+ So the final result will be normalized and representable by the multiplication must be
299
+ made in 2 steps
300
+ */
301
+
302
+ /* Quick phase starts here */
303
+
304
+ rhSquare = rh * rh;
305
+ rhC3 = c3 * rh;
306
+
307
+ rhSquareHalf = 0.5 * rhSquare;
308
+ monomialCube = rhC3 * rhSquare;
309
+ rhFour = rhSquare * rhSquare;
310
+
311
+ monomialFour = c4 * rhFour;
312
+
313
+ highPoly = monomialCube + monomialFour;
314
+
315
+ highPolyWithSquare = rhSquareHalf + highPoly;
316
+
317
+ Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
318
+
319
+ t8 = rm + highPolyWithSquare;
320
+ t9 = rh + t8;
321
+
322
+ t10 = tablesh * t9;
323
+
324
+ Add12(t11,t12,tablesh,t10);
325
+ t13 = t12 + tablesl;
326
+ Add12(polyTblh,polyTblm,t11,t13);
327
+
328
+ /* Rounding test
329
+ Since we know that the result of the final multiplication with 2^M
330
+ will always be representable, we can do the rounding test on the
331
+ factors and multiply only the final result.
332
+ We implement the multiplication in integer computations to overcome
333
+ the problem of the non-representability of 2^1024 if M = 1024
334
+ */
335
+
336
+ if(polyTblh == (polyTblh + (polyTblm * ROUNDCST))) {
337
+ polyTblhdb.d = polyTblh;
338
+ polyTblhdb.i[HI] += M << 20;
339
+ return polyTblhdb.d;
340
+ } else
341
+ {
342
+ /* Rest of argument reduction for accurate phase */
343
+
344
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
345
+ t1 = x + msLog2Div2LMultKh;
346
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
347
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
348
+
349
+ /* Table reads for accurate phase */
350
+ tbl1l = twoPowerIndex1[index1].lo;
351
+ tbl2l = twoPowerIndex2[index2].lo;
352
+
353
+ /* Call accurate phase */
354
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
355
+
356
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
357
+ We overcome this way also the cases where the final result is not underflowed whereas the
358
+ lower parts of the intermediate final result are.
359
+ */
360
+
361
+ RoundToNearest3(&res,polyTblh,polyTblm,polyTbll);
362
+
363
+ /* Final multiplication with 2^M
364
+ We implement the multiplication in integer computations to overcome
365
+ the problem of the non-representability of 2^1024 if M = 1024
366
+ */
367
+
368
+ resdb.d = res;
369
+ resdb.i[HI] += M << 20;
370
+ return resdb.d;
371
+ } /* Accurate phase launched after rounding test*/
372
+ }
373
+
374
+
375
+ /*************************************************************
376
+ *************************************************************
377
+ * ROUNDED UPWARDS *
378
+ *************************************************************
379
+ *************************************************************/
380
+ double exp_ru(double x) {
381
+ double rh, rm, rl, tbl1h, tbl1m, tbl1l;
382
+ double tbl2h, tbl2m, tbl2l;
383
+ double xMultLog2InvMult2L, shiftedXMult, kd;
384
+ double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
385
+ double t1, t2, t3, t4, polyTblh, polyTblm, polyTbll;
386
+ db_number shiftedXMultdb, twoPowerMdb, xdb, t4db, t4db2, resdb;
387
+ int k, M, index1, index2, xIntHi, mightBeDenorm, roundable;
388
+ double t5, t6, t7, t8, t9, t10, t11, t12, t13;
389
+ double rhSquare, rhSquareHalf, rhC3, rhFour, monomialCube;
390
+ double highPoly, highPolyWithSquare, monomialFour;
391
+ double tablesh, tablesl;
392
+ double s1, s2, s3, s4, s5;
393
+ double res;
394
+
395
+ /* Argument reduction and filtering for special cases */
396
+
397
+ /* Compute k as a double and as an int */
398
+ xdb.d = x;
399
+ xMultLog2InvMult2L = x * log2InvMult2L;
400
+ shiftedXMult = xMultLog2InvMult2L + shiftConst;
401
+ kd = shiftedXMult - shiftConst;
402
+ shiftedXMultdb.d = shiftedXMult;
403
+
404
+ /* Special cases tests */
405
+ xIntHi = xdb.i[HI];
406
+ mightBeDenorm = 0;
407
+ /* Test if argument is a denormal or zero */
408
+ if ((xIntHi & 0x7ff00000) == 0) {
409
+ /* If the argument is exactly zero, we just return 1.0
410
+ which is the mathematical image of the function
411
+ */
412
+ if (x == 0.0) return 1.0;
413
+
414
+ /* If the argument is a negative denormal, we
415
+ must return 1.0 and raise the inexact flag.
416
+ */
417
+
418
+ if (x < 0.0) return 1.0 + SMALLEST;
419
+
420
+ /* Otherwise, we return 1.0 + 1ulp since
421
+ exp(greatest denorm) < 1.0 + 1ulp
422
+ We must do the addition dynamically for
423
+ raising the inexact flag.
424
+ */
425
+
426
+ return 1.0 + twoM52;
427
+ }
428
+
429
+ /* Test if argument is greater than approx. 709 in magnitude */
430
+ if ((xIntHi & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) {
431
+ /* If we are here, the result might be overflowed, underflowed, inf, or NaN */
432
+
433
+ /* Test if +/- Inf or NaN */
434
+ if ((xIntHi & 0x7fffffff) >= 0x7ff00000) {
435
+ /* Either NaN or Inf in this case since exponent is maximal */
436
+
437
+ /* Test if NaN: mantissa is not 0 */
438
+ if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
439
+ /* x = NaN, return NaN */
440
+ return x + x;
441
+ } else {
442
+ /* +/- Inf */
443
+
444
+ /* Test sign */
445
+ if ((xIntHi & 0x80000000)==0)
446
+ /* x = +Inf, return +Inf */
447
+ return x;
448
+ else
449
+ /* x = -Inf, return 0 (even in RU!) */
450
+ return 0;
451
+ } /* End which in NaN, Inf */
452
+ } /* End NaN or Inf ? */
453
+
454
+ /* If we are here, we might be overflowed, denormalized or underflowed in the result
455
+ but there is no special case (NaN, Inf) left */
456
+
457
+ /* Test if actually overflowed */
458
+ if (x > OVRFLWBOUND) {
459
+ /* We are actually overflowed in the result */
460
+ return LARGEST * LARGEST;
461
+ }
462
+
463
+ /* Test if surely underflowed */
464
+ if (x <= UNDERFLWBOUND) {
465
+ /* We are actually sure to be underflowed and not denormalized any more
466
+ (at least where computing makes sense); since we are in the round
467
+ upwards case, we return the smallest denormal possible.
468
+ */
469
+ return SMALLEST;
470
+ }
471
+
472
+ /* Test if possibly denormalized */
473
+ if (x <= DENORMBOUND) {
474
+ /* We know now that we are not sure to be normalized in the result
475
+ We just set an internal flag for a further test
476
+ */
477
+ mightBeDenorm = 1;
478
+ }
479
+ } /* End might be a special case */
480
+
481
+ /* If we are here, we are sure to be neither +/- Inf nor NaN nor overflowed nor denormalized in the argument
482
+ but we might be denormalized in the result
483
+
484
+ We continue the argument reduction for the quick phase and table reads for both phases
485
+ */
486
+
487
+ Mul12(&s1,&s2,msLog2Div2Lh,kd);
488
+ s3 = kd * msLog2Div2Lm;
489
+ s4 = s2 + s3;
490
+ s5 = x + s1;
491
+ Add12Cond(rh,rm,s5,s4);
492
+
493
+ k = shiftedXMultdb.i[LO];
494
+ M = k >> L;
495
+ index1 = k & INDEXMASK1;
496
+ index2 = (k & INDEXMASK2) >> LHALF;
497
+
498
+ /* Table reads */
499
+ tbl1h = twoPowerIndex1[index1].hi;
500
+ tbl1m = twoPowerIndex1[index1].mi;
501
+ tbl2h = twoPowerIndex2[index2].hi;
502
+ tbl2m = twoPowerIndex2[index2].mi;
503
+
504
+ /* Test now if it is sure to launch the quick phase because no denormalized result is possible */
505
+ if (mightBeDenorm == 1) {
506
+ /* The result might be denormalized, we launch the accurate phase in all cases */
507
+
508
+ /* Rest of argument reduction for accurate phase */
509
+
510
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
511
+ t1 = x + msLog2Div2LMultKh;
512
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
513
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
514
+
515
+ /* Table reads for accurate phase */
516
+ tbl1l = twoPowerIndex1[index1].lo;
517
+ tbl2l = twoPowerIndex2[index2].lo;
518
+
519
+ /* Call accurate phase */
520
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
521
+
522
+ /* Final rounding and multiplication with 2^M
523
+
524
+ We first multiply the highest significant byte by 2^M in two steps
525
+ and adjust it then depending on the lower significant parts.
526
+
527
+ We cannot multiply directly by 2^M since M is less than -1022.
528
+ We first multiply by 2^(-1000) and then by 2^(M+1000).
529
+
530
+ */
531
+
532
+ t3 = polyTblh * twoPowerM1000;
533
+
534
+ /* Form now twoPowerM with adjusted M */
535
+ twoPowerMdb.i[LO] = 0;
536
+ twoPowerMdb.i[HI] = (M + 2023) << 20;
537
+
538
+
539
+ /* Multiply with the rest of M, the result will be denormalized */
540
+ t4 = t3 * twoPowerMdb.d;
541
+
542
+ /* For x86, force the compiler to pass through memory for having the right rounding */
543
+
544
+ t4db.d = t4; /* Do not #if-ify this line, we need the copy */
545
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
546
+ t4db2.i[HI] = t4db.i[HI];
547
+ t4db2.i[LO] = t4db.i[LO];
548
+ t4 = t4db2.d;
549
+ #endif
550
+
551
+
552
+ /* Remultiply by 2^(-M) for manipulating the rounding error and the lower significant parts */
553
+ M *= -1;
554
+ twoPowerMdb.i[LO] = 0;
555
+ twoPowerMdb.i[HI] = (M + 23) << 20;
556
+ t5 = t4 * twoPowerMdb.d;
557
+ t6 = t5 * twoPower1000;
558
+ t7 = polyTblh - t6;
559
+
560
+ /* The rounding can be decided using the sign of the arithmetical sum of the
561
+ round-to-nearest-error (i.e. t7) and the lower part(s) of the final result.
562
+ We add first the lower parts and add the result to the error in t7. We have to
563
+ keep in mind that everything is scaled by 2^(-M).
564
+ t8 can never be exactly 0 since we filter out the cases where the image of the
565
+ function is algebraic and the implementation is exacter than the TMD worst case.
566
+ */
567
+
568
+ polyTblm = polyTblm + polyTbll;
569
+ t8 = t7 + polyTblm;
570
+
571
+ /* Since we are rounding upwards, the round-to-nearest-rounding result in t4 is
572
+ equal to the final result if the rounding error (i.e. the error plus the lower parts)
573
+ is negative, i.e. if the rounding-to-nearest was upwards.
574
+ */
575
+
576
+ if (t8 < 0.0) return t4;
577
+
578
+ /* If we are here, we must adjust the final result by +1ulp
579
+ Relying on the fact that the exponential is always positive, we can simplify this
580
+ adjustment
581
+ */
582
+
583
+ t4db.l++;
584
+ return t4db.d;
585
+ } /* End accurate phase launched as there might be a denormalized result */
586
+
587
+ /* No more underflow nor denormal is possible. There may be the case where
588
+ M is 1024 and the value 2^M is to be multiplied may be less than 1
589
+ So the final result will be normalized and representable by the multiplication must be
590
+ made in 2 steps
591
+ */
592
+
593
+ /* Quick phase starts here */
594
+
595
+ rhSquare = rh * rh;
596
+ rhC3 = c3 * rh;
597
+
598
+ rhSquareHalf = 0.5 * rhSquare;
599
+ monomialCube = rhC3 * rhSquare;
600
+ rhFour = rhSquare * rhSquare;
601
+
602
+ monomialFour = c4 * rhFour;
603
+
604
+ highPoly = monomialCube + monomialFour;
605
+
606
+ highPolyWithSquare = rhSquareHalf + highPoly;
607
+
608
+ Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
609
+
610
+ t8 = rm + highPolyWithSquare;
611
+ t9 = rh + t8;
612
+
613
+ t10 = tablesh * t9;
614
+
615
+ Add12(t11,t12,tablesh,t10);
616
+ t13 = t12 + tablesl;
617
+ Add12(polyTblh,polyTblm,t11,t13);
618
+
619
+ /* Rounding test
620
+ Since we know that the result of the final multiplication with 2^M
621
+ will always be representable, we can do the rounding test on the
622
+ factors and multiply only the final result.
623
+ We implement the multiplication in integer computations to overcome
624
+ the problem of the non-representability of 2^1024 if M = 1024
625
+ */
626
+
627
+ TEST_AND_COPY_RU(roundable,res,polyTblh,polyTblm,RDROUNDCST);
628
+
629
+ if (roundable) {
630
+ resdb.d = res;
631
+ resdb.i[HI] += M << 20;
632
+ return resdb.d;
633
+ } else
634
+ {
635
+ /* Rest of argument reduction for accurate phase */
636
+
637
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
638
+ t1 = x + msLog2Div2LMultKh;
639
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
640
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
641
+
642
+ /* Table reads for accurate phase */
643
+ tbl1l = twoPowerIndex1[index1].lo;
644
+ tbl2l = twoPowerIndex2[index2].lo;
645
+
646
+ /* Call accurate phase */
647
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
648
+
649
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
650
+ We overcome this way also the cases where the final result is not underflowed whereas the
651
+ lower parts of the intermediate final result are.
652
+ */
653
+
654
+ RoundUpwards3(&res,polyTblh,polyTblm,polyTbll);
655
+
656
+ /* Final multiplication with 2^M
657
+ We implement the multiplication in integer computations to overcome
658
+ the problem of the non-representability of 2^1024 if M = 1024
659
+ */
660
+
661
+ resdb.d = res;
662
+ resdb.i[HI] += M << 20;
663
+ return resdb.d;
664
+ } /* Accurate phase launched after rounding test*/
665
+ }
666
+
667
+
668
+ /*************************************************************
669
+ *************************************************************
670
+ * ROUNDED DOWNWARDS *
671
+ *************************************************************
672
+ *************************************************************/
673
+ double exp_rd(double x) {
674
+ double rh, rm, rl, tbl1h, tbl1m, tbl1l;
675
+ double tbl2h, tbl2m, tbl2l;
676
+ double xMultLog2InvMult2L, shiftedXMult, kd;
677
+ double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
678
+ double t1, t2, t3, t4, polyTblh, polyTblm, polyTbll;
679
+ db_number shiftedXMultdb, twoPowerMdb, xdb, t4db, t4db2, resdb;
680
+ int k, M, index1, index2, xIntHi, mightBeDenorm, roundable;
681
+ double t5, t6, t7, t8, t9, t10, t11, t12, t13;
682
+ double rhSquare, rhSquareHalf, rhC3, rhFour, monomialCube;
683
+ double highPoly, highPolyWithSquare, monomialFour;
684
+ double tablesh, tablesl;
685
+ double s1, s2, s3, s4, s5;
686
+ double res;
687
+
688
+ /* Argument reduction and filtering for special cases */
689
+
690
+ /* Compute k as a double and as an int */
691
+ xdb.d = x;
692
+ xMultLog2InvMult2L = x * log2InvMult2L;
693
+ shiftedXMult = xMultLog2InvMult2L + shiftConst;
694
+ kd = shiftedXMult - shiftConst;
695
+ shiftedXMultdb.d = shiftedXMult;
696
+
697
+ /* Special cases tests */
698
+ xIntHi = xdb.i[HI];
699
+ mightBeDenorm = 0;
700
+ /* Test if argument is a denormal or zero */
701
+ if ((xIntHi & 0x7ff00000) == 0) {
702
+ /* If the argument is exactly zero, we just return 1.0
703
+ which is the mathematical image of the function
704
+ */
705
+ if (x == 0.0) return 1.0;
706
+
707
+ /* If the argument is a positive denormal, we
708
+ must return 1.0 and raise the inexact flag.
709
+ */
710
+
711
+ if (x > 0.0) return 1.0 + SMALLEST;
712
+
713
+ /* Otherwise, we return 1.0 - 1ulp since
714
+ exp(-greatest denorm) > 1.0 - 1ulp
715
+ We must do the addition dynamically for
716
+ raising the inexact flag.
717
+ */
718
+
719
+ return 1.0 + mTwoM53;
720
+
721
+ }
722
+
723
+ /* Test if argument is greater than approx. 709 in magnitude */
724
+ if ((xIntHi & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) {
725
+ /* If we are here, the result might be overflowed, underflowed, inf, or NaN */
726
+
727
+ /* Test if +/- Inf or NaN */
728
+ if ((xIntHi & 0x7fffffff) >= 0x7ff00000) {
729
+ /* Either NaN or Inf in this case since exponent is maximal */
730
+
731
+ /* Test if NaN: mantissa is not 0 */
732
+ if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
733
+ /* x = NaN, return NaN */
734
+ return x + x;
735
+ } else {
736
+ /* +/- Inf */
737
+
738
+ /* Test sign */
739
+ if ((xIntHi & 0x80000000)==0)
740
+ /* x = +Inf, return +Inf */
741
+ return x;
742
+ else
743
+ /* x = -Inf, return 0 */
744
+ return 0;
745
+ } /* End which in NaN, Inf */
746
+ } /* End NaN or Inf ? */
747
+
748
+ /* If we are here, we might be overflowed, denormalized or underflowed in the result
749
+ but there is no special case (NaN, Inf) left */
750
+
751
+ /* Test if actually overflowed */
752
+ if (x > OVRFLWBOUND) {
753
+ /* We would be overflowed but as we are rounding downwards
754
+ the nearest number lesser than the exact result is the greatest
755
+ normal. In any case, we must raise the inexact flag.
756
+ */
757
+ return LARGEST * (1.0 + SMALLEST);
758
+ }
759
+
760
+ /* Test if surely underflowed */
761
+ if (x <= UNDERFLWBOUND) {
762
+ /* We are actually sure to be underflowed and not denormalized any more
763
+ (at least where computing makes sense); since we are in the round
764
+ upwards case, we return the smallest denormal possible.
765
+ */
766
+ return SMALLEST * SMALLEST;
767
+ }
768
+
769
+ /* Test if possibly denormalized */
770
+ if (x <= DENORMBOUND) {
771
+ /* We know now that we are not sure to be normalized in the result
772
+ We just set an internal flag for a further test
773
+ */
774
+ mightBeDenorm = 1;
775
+ }
776
+ } /* End might be a special case */
777
+
778
+ /* If we are here, we are sure to be neither +/- Inf nor NaN nor overflowed nor denormalized in the argument
779
+ but we might be denormalized in the result
780
+
781
+ We continue the argument reduction for the quick phase and table reads for both phases
782
+ */
783
+
784
+ Mul12(&s1,&s2,msLog2Div2Lh,kd);
785
+ s3 = kd * msLog2Div2Lm;
786
+ s4 = s2 + s3;
787
+ s5 = x + s1;
788
+ Add12Cond(rh,rm,s5,s4);
789
+
790
+ k = shiftedXMultdb.i[LO];
791
+ M = k >> L;
792
+ index1 = k & INDEXMASK1;
793
+ index2 = (k & INDEXMASK2) >> LHALF;
794
+
795
+ /* Table reads */
796
+ tbl1h = twoPowerIndex1[index1].hi;
797
+ tbl1m = twoPowerIndex1[index1].mi;
798
+ tbl2h = twoPowerIndex2[index2].hi;
799
+ tbl2m = twoPowerIndex2[index2].mi;
800
+
801
+ /* Test now if it is sure to launch the quick phase because no denormalized result is possible */
802
+ if (mightBeDenorm == 1) {
803
+ /* The result might be denormalized, we launch the accurate phase in all cases */
804
+
805
+ /* Rest of argument reduction for accurate phase */
806
+
807
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
808
+ t1 = x + msLog2Div2LMultKh;
809
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
810
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
811
+
812
+ /* Table reads for accurate phase */
813
+ tbl1l = twoPowerIndex1[index1].lo;
814
+ tbl2l = twoPowerIndex2[index2].lo;
815
+
816
+ /* Call accurate phase */
817
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
818
+
819
+ /* Final rounding and multiplication with 2^M
820
+
821
+ We first multiply the highest significant byte by 2^M in two steps
822
+ and adjust it then depending on the lower significant parts.
823
+
824
+ We cannot multiply directly by 2^M since M is less than -1022.
825
+ We first multiply by 2^(-1000) and then by 2^(M+1000).
826
+
827
+ */
828
+
829
+ t3 = polyTblh * twoPowerM1000;
830
+
831
+ /* Form now twoPowerM with adjusted M */
832
+ twoPowerMdb.i[LO] = 0;
833
+ twoPowerMdb.i[HI] = (M + 2023) << 20;
834
+
835
+
836
+ /* Multiply with the rest of M, the result will be denormalized */
837
+ t4 = t3 * twoPowerMdb.d;
838
+
839
+ /* For x86, force the compiler to pass through memory for having the right rounding */
840
+
841
+ t4db.d = t4; /* Do not #if-ify this line, we need the copy */
842
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
843
+ t4db2.i[HI] = t4db.i[HI];
844
+ t4db2.i[LO] = t4db.i[LO];
845
+ t4 = t4db2.d;
846
+ #endif
847
+
848
+ /* Remultiply by 2^(-M) for manipulating the rounding error and the lower significant parts */
849
+ M *= -1;
850
+ twoPowerMdb.i[LO] = 0;
851
+ twoPowerMdb.i[HI] = (M + 23) << 20;
852
+ t5 = t4 * twoPowerMdb.d;
853
+ t6 = t5 * twoPower1000;
854
+ t7 = polyTblh - t6;
855
+
856
+ /* The rounding can be decided using the sign of the arithmetical sum of the
857
+ round-to-nearest-error (i.e. t7) and the lower part(s) of the final result.
858
+ We add first the lower parts and add the result to the error in t7. We have to
859
+ keep in mind that everything is scaled by 2^(-M).
860
+ t8 can never be exactly 0 since we filter out the cases where the image of the
861
+ function is algebraic and the implementation is exacter than the TMD worst case.
862
+ */
863
+
864
+ polyTblm = polyTblm + polyTbll;
865
+ t8 = t7 + polyTblm;
866
+
867
+ /* Since we are rounding downwards, the round-to-nearest-rounding result in t4 is
868
+ equal to the final result if the rounding error (i.e. the error plus the lower parts)
869
+ is positive, i.e. if the rounding-to-nearest was downwards.
870
+ */
871
+
872
+ if (t8 > 0.0) return t4;
873
+
874
+ /* If we are here, we must adjust the final result by +1ulp
875
+ Relying on the fact that the exponential is always positive, we can simplify this
876
+ adjustment
877
+ */
878
+
879
+ t4db.l--;
880
+ return t4db.d;
881
+ } /* End accurate phase launched as there might be a denormalized result */
882
+
883
+ /* No more underflow nor denormal is possible. There may be the case where
884
+ M is 1024 and the value 2^M is to be multiplied may be less than 1
885
+ So the final result will be normalized and representable by the multiplication must be
886
+ made in 2 steps
887
+ */
888
+
889
+ /* Quick phase starts here */
890
+
891
+ rhSquare = rh * rh;
892
+ rhC3 = c3 * rh;
893
+
894
+ rhSquareHalf = 0.5 * rhSquare;
895
+ monomialCube = rhC3 * rhSquare;
896
+ rhFour = rhSquare * rhSquare;
897
+
898
+ monomialFour = c4 * rhFour;
899
+
900
+ highPoly = monomialCube + monomialFour;
901
+
902
+ highPolyWithSquare = rhSquareHalf + highPoly;
903
+
904
+ Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
905
+
906
+ t8 = rm + highPolyWithSquare;
907
+ t9 = rh + t8;
908
+
909
+ t10 = tablesh * t9;
910
+
911
+ Add12(t11,t12,tablesh,t10);
912
+ t13 = t12 + tablesl;
913
+ Add12(polyTblh,polyTblm,t11,t13);
914
+
915
+ /* Rounding test
916
+ Since we know that the result of the final multiplication with 2^M
917
+ will always be representable, we can do the rounding test on the
918
+ factors and multiply only the final result.
919
+ We implement the multiplication in integer computations to overcome
920
+ the problem of the non-representability of 2^1024 if M = 1024
921
+ */
922
+
923
+ TEST_AND_COPY_RD(roundable,res,polyTblh,polyTblm,RDROUNDCST);
924
+
925
+ if (roundable) {
926
+ resdb.d = res;
927
+ resdb.i[HI] += M << 20;
928
+ return resdb.d;
929
+ } else {
930
+ /* Rest of argument reduction for accurate phase */
931
+
932
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
933
+ t1 = x + msLog2Div2LMultKh;
934
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
935
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
936
+
937
+ /* Table reads for accurate phase */
938
+ tbl1l = twoPowerIndex1[index1].lo;
939
+ tbl2l = twoPowerIndex2[index2].lo;
940
+
941
+ /* Call accurate phase */
942
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
943
+
944
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
945
+ We overcome this way also the cases where the final result is not underflowed whereas the
946
+ lower parts of the intermediate final result are.
947
+ */
948
+
949
+ RoundDownwards3(&res,polyTblh,polyTblm,polyTbll);
950
+
951
+ /* Final multiplication with 2^M
952
+ We implement the multiplication in integer computations to overcome
953
+ the problem of the non-representability of 2^1024 if M = 1024
954
+ */
955
+
956
+ resdb.d = res;
957
+ resdb.i[HI] += M << 20;
958
+ return resdb.d;
959
+ } /* Accurate phase launched after rounding test*/
960
+ }
961
+
962
+