crmf 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +102 -1
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +16 -16
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +104 -3
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1363 @@
1
+ /*
2
+ * This function computes exp, correctly rounded,
3
+ * using experimental techniques based on triple double arithmetics
4
+
5
+ THIS IS EXPERIMENTAL SOFTWARE
6
+
7
+ *
8
+ * Author : Christoph Lauter
9
+ * christoph.lauter at ens-lyon.fr
10
+ *
11
+
12
+ To have it replace the crlibm exp, do:
13
+
14
+ gcc -DHAVE_CONFIG_H -I. -fPIC -O2 -c exp-td.c; mv exp-td.o exp_fast.o; make
15
+
16
+ */
17
+
18
+
19
+ #include <stdio.h>
20
+ #include <stdlib.h>
21
+ #include "crlibm.h"
22
+ #include "crlibm_private.h"
23
+ #include "triple-double.h"
24
+ #include "exp-td.h"
25
+ #ifdef BUILD_INTERVAL_FUNCTIONS
26
+ #include "interval.h"
27
+ #endif
28
+
29
+ #define AVOID_FMA 0
30
+ #define EVAL_PERF 1
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+ void exp_td_accurate(double *polyTblh, double *polyTblm, double *polyTbll,
39
+ double rh, double rm, double rl,
40
+ double tbl1h, double tbl1m, double tbl1l,
41
+ double tbl2h, double tbl2m, double tbl2l) {
42
+ double highPoly, highPolyMulth, highPolyMultm, highPolyMultl;
43
+ double rhSquareh, rhSquarel, rhSquareHalfh, rhSquareHalfl;
44
+ double rhCubeh, rhCubem, rhCubel;
45
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5, t6;
46
+ double lowPolyh, lowPolym, lowPolyl;
47
+ double ph, pm, pl, phnorm, pmnorm, rmlMultPh, rmlMultPl;
48
+ double qh, ql, fullPolyh, fullPolym, fullPolyl;
49
+ double polyWithTbl1h, polyWithTbl1m, polyWithTbl1l;
50
+ double polyAddOneh,polyAddOnem,polyAddOnel;
51
+ double polyWithTablesh, polyWithTablesm, polyWithTablesl;
52
+
53
+
54
+ #if EVAL_PERF
55
+ crlibm_second_step_taken++;
56
+ #endif
57
+
58
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
59
+ highPoly = FMA(FMA(accPolyC7,rh,accPolyC6),rh,accPolyC5);
60
+ #else
61
+ highPoly = accPolyC5 + rh * (accPolyC6 + rh * accPolyC7);
62
+ #endif
63
+
64
+ Mul12(&t1h,&t1l,rh,highPoly);
65
+ Add22(&t2h,&t2l,accPolyC4h,accPolyC4l,t1h,t1l);
66
+ Mul22(&t3h,&t3l,rh,0,t2h,t2l);
67
+ Add22(&t4h,&t4l,accPolyC3h,accPolyC3l,t3h,t3l);
68
+
69
+ Mul12(&rhSquareh,&rhSquarel,rh,rh);
70
+ Mul23(&rhCubeh,&rhCubem,&rhCubel,rh,0,rhSquareh,rhSquarel);
71
+
72
+ rhSquareHalfh = 0.5 * rhSquareh;
73
+ rhSquareHalfl = 0.5 * rhSquarel;
74
+
75
+ Renormalize3(&lowPolyh,&lowPolym,&lowPolyl,rh,rhSquareHalfh,rhSquareHalfl);
76
+
77
+ Mul233(&highPolyMulth,&highPolyMultm,&highPolyMultl,t4h,t4l,rhCubeh,rhCubem,rhCubel);
78
+
79
+ Add33(&ph,&pm,&pl,lowPolyh,lowPolym,lowPolyl,highPolyMulth,highPolyMultm,highPolyMultl);
80
+
81
+ Add12(phnorm,pmnorm,ph,pm);
82
+ Mul22(&rmlMultPh,&rmlMultPl,rm,rl,phnorm,pmnorm);
83
+ Add22(&qh,&ql,rm,rl,rmlMultPh,rmlMultPl);
84
+
85
+ Add233Cond(&fullPolyh,&fullPolym,&fullPolyl,qh,ql,ph,pm,pl);
86
+
87
+ Add12(polyAddOneh,t5,1,fullPolyh);
88
+ Add12Cond(polyAddOnem,t6,t5,fullPolym);
89
+ polyAddOnel = t6 + fullPolyl;
90
+ Mul33(&polyWithTbl1h,&polyWithTbl1m,&polyWithTbl1l,tbl1h,tbl1m,tbl1l,polyAddOneh,polyAddOnem,polyAddOnel);
91
+ Mul33(&polyWithTablesh,&polyWithTablesm,&polyWithTablesl,
92
+ tbl2h,tbl2m,tbl2l,
93
+ polyWithTbl1h,polyWithTbl1m,polyWithTbl1l);
94
+
95
+ Renormalize3(polyTblh,polyTblm,polyTbll,polyWithTablesh,polyWithTablesm,polyWithTablesl);
96
+
97
+ }
98
+
99
+
100
+
101
+ /*************************************************************
102
+ *************************************************************
103
+ * ROUNDED TO NEAREST *
104
+ *************************************************************
105
+ *************************************************************/
106
+ double exp_rn(double x){
107
+ double rh, rm, rl, tbl1h, tbl1m, tbl1l;
108
+ double tbl2h, tbl2m, tbl2l;
109
+ double xMultLog2InvMult2L, shiftedXMult, kd;
110
+ double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
111
+ double t1, t2, t3, t4, polyTblh, polyTblm, polyTbll;
112
+ db_number shiftedXMultdb, twoPowerMdb, xdb, t4db, t4db2, polyTblhdb, resdb;
113
+ int k, M, index1, index2, xIntHi, mightBeDenorm;
114
+ double t5, t6, t7, t8, t9, t10, t11, t12, t13;
115
+ double rhSquare, rhSquareHalf, rhC3, rhFour, monomialCube;
116
+ double highPoly, highPolyWithSquare, monomialFour;
117
+ double tablesh, tablesl;
118
+ double s1, s2, s3, s4, s5;
119
+ double res;
120
+
121
+ /* Argument reduction and filtering for special cases */
122
+
123
+ /* Compute k as a double and as an int */
124
+ xdb.d = x;
125
+ xMultLog2InvMult2L = x * log2InvMult2L;
126
+ shiftedXMult = xMultLog2InvMult2L + shiftConst;
127
+ kd = shiftedXMult - shiftConst;
128
+ shiftedXMultdb.d = shiftedXMult;
129
+
130
+ /* Special cases tests */
131
+ xIntHi = xdb.i[HI];
132
+ mightBeDenorm = 0;
133
+ /* Test if argument is a denormal or zero */
134
+ if ((xIntHi & 0x7ff00000) == 0) {
135
+ /* We are in the RN case, return 1.0 in all cases */
136
+ return 1.0;
137
+ }
138
+
139
+ /* Test if argument is greater than approx. 709 in magnitude */
140
+ if ((xIntHi & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) {
141
+ /* If we are here, the result might be overflowed, underflowed, inf, or NaN */
142
+
143
+ /* Test if +/- Inf or NaN */
144
+ if ((xIntHi & 0x7fffffff) >= 0x7ff00000) {
145
+ /* Either NaN or Inf in this case since exponent is maximal */
146
+
147
+ /* Test if NaN: mantissa is not 0 */
148
+ if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
149
+ /* x = NaN, return NaN */
150
+ return x + x;
151
+ } else {
152
+ /* +/- Inf */
153
+
154
+ /* Test sign */
155
+ if ((xIntHi & 0x80000000)==0)
156
+ /* x = +Inf, return +Inf */
157
+ return x;
158
+ else
159
+ /* x = -Inf, return 0 */
160
+ return 0;
161
+ } /* End which in NaN, Inf */
162
+ } /* End NaN or Inf ? */
163
+
164
+ /* If we are here, we might be overflowed, denormalized or underflowed in the result
165
+ but there is no special case (NaN, Inf) left */
166
+
167
+ /* Test if actually overflowed */
168
+ if (x > OVRFLWBOUND) {
169
+ /* We are actually overflowed in the result */
170
+ return LARGEST * LARGEST;
171
+ }
172
+
173
+ /* Test if surely underflowed */
174
+ if (x <= UNDERFLWBOUND) {
175
+ /* We are actually sure to be underflowed and not denormalized any more
176
+ So we return 0 and raise the inexact flag */
177
+ return SMALLEST * SMALLEST;
178
+ }
179
+
180
+ /* Test if possibly denormalized */
181
+ if (x <= DENORMBOUND) {
182
+ /* We know now that we are not sure to be normalized in the result
183
+ We just set an internal flag for a further test
184
+ */
185
+ mightBeDenorm = 1;
186
+ }
187
+ } /* End might be a special case */
188
+
189
+ /* If we are here, we are sure to be neither +/- Inf nor NaN nor overflowed nor denormalized in the argument
190
+ but we might be denormalized in the result
191
+
192
+ We continue the argument reduction for the quick phase and table reads for both phases
193
+ */
194
+
195
+ #if 0
196
+ Mul12(&s1,&s2,msLog2Div2Lh,kd);
197
+ s3 = kd * msLog2Div2Lm;
198
+ s4 = s2 + s3;
199
+ s5 = x + s1;
200
+ Add12Cond(rh,rm,s5,s4);
201
+ #else
202
+
203
+ /* Cody and Waite like, accurate to 2^-84 */
204
+ double Log2h= 0xb.17217f8p-16 ;
205
+ double Log2l= -0x2.e308654361c4cp-48 ;
206
+ Add12Cond(rh,rm, x-kd*Log2h, -kd*Log2l);
207
+
208
+ #endif
209
+
210
+
211
+ k = shiftedXMultdb.i[LO];
212
+ M = k >> L;
213
+ index1 = k & INDEXMASK1;
214
+ index2 = (k & INDEXMASK2) >> LHALF;
215
+
216
+ /* Table reads */
217
+ tbl1h = twoPowerIndex1[index1].hi;
218
+ tbl1m = twoPowerIndex1[index1].mi;
219
+ tbl2h = twoPowerIndex2[index2].hi;
220
+ tbl2m = twoPowerIndex2[index2].mi;
221
+
222
+ /* Test now if it is sure to launch the quick phase because no denormalized result is possible */
223
+ if (mightBeDenorm == 1) {
224
+ /* The result might be denormalized, we launch the accurate phase in all cases */
225
+
226
+ /* Rest of argument reduction for accurate phase */
227
+
228
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
229
+ t1 = x + msLog2Div2LMultKh;
230
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
231
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
232
+
233
+ /* Table reads for accurate phase */
234
+ tbl1l = twoPowerIndex1[index1].lo;
235
+ tbl2l = twoPowerIndex2[index2].lo;
236
+
237
+ /* Call accurate phase */
238
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
239
+
240
+ /* Final rounding and multiplication with 2^M
241
+
242
+ We first multiply the highest significant byte by 2^M in two steps
243
+ and adjust it then depending on the lower significant parts.
244
+
245
+ We cannot multiply directly by 2^M since M is less than -1022.
246
+ We first multiply by 2^(-1000) and then by 2^(M+1000).
247
+
248
+ */
249
+
250
+ t3 = polyTblh * twoPowerM1000;
251
+
252
+ /* Form now twoPowerM with adjusted M */
253
+ twoPowerMdb.i[LO] = 0;
254
+ twoPowerMdb.i[HI] = (M + 2023) << 20;
255
+
256
+
257
+ /* Multiply with the rest of M, the result will be denormalized */
258
+ t4 = t3 * twoPowerMdb.d;
259
+
260
+ /* For x86, force the compiler to pass through memory for having the right rounding */
261
+
262
+ t4db.d = t4; /* Do not #if-ify this line, we need the copy */
263
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
264
+ t4db2.i[HI] = t4db.i[HI];
265
+ t4db2.i[LO] = t4db.i[LO];
266
+ t4 = t4db2.d;
267
+ #endif
268
+
269
+ /* Remultiply by 2^(-M) for manipulating the rounding error and the lower significant parts */
270
+ M *= -1;
271
+ twoPowerMdb.i[LO] = 0;
272
+ twoPowerMdb.i[HI] = (M + 23) << 20;
273
+ t5 = t4 * twoPowerMdb.d;
274
+ t6 = t5 * twoPower1000;
275
+ t7 = polyTblh - t6;
276
+
277
+ /* The rounding decision is made at 1/2 ulp of a denormal, i.e. at 2^(-1075)
278
+ We construct this number and by comparing with it we get to know
279
+ whether we are in a difficult rounding case or not. If not we just return
280
+ the known result. Otherwise we continue with further tests.
281
+ */
282
+
283
+ twoPowerMdb.i[LO] = 0;
284
+ twoPowerMdb.i[HI] = (M - 52) << 20;
285
+
286
+ if (ABS(t7) != twoPowerMdb.d) return t4;
287
+
288
+ /* If we are here, we are in a difficult rounding case */
289
+
290
+ /* We have to adjust the result iff the sign of the error on
291
+ rounding 2^M * polyTblh (which must be an ulp of a denormal)
292
+ and polyTblm +arith polyTbll is the same which means that
293
+ the error made was greater than an ulp of an denormal.
294
+ */
295
+
296
+ polyTblm = polyTblm + polyTbll;
297
+
298
+ if (t7 > 0.0) {
299
+ if (polyTblm > 0.0) {
300
+ t4db.l++;
301
+ return t4db.d;
302
+ } else return t4;
303
+ } else {
304
+ if (polyTblm < 0.0) {
305
+ t4db.l--;
306
+ return t4db.d;
307
+ } else return t4;
308
+ }
309
+ } /* End accurate phase launched as there might be a denormalized result */
310
+
311
+ /* No more underflow nor denormal is possible. There may be the case where
312
+ M is 1024 and the value 2^M is to be multiplied may be less than 1
313
+ So the final result will be normalized and representable by the multiplication must be
314
+ made in 2 steps
315
+ */
316
+
317
+ /* Quick phase starts here */
318
+
319
+ rhSquare = rh * rh;
320
+ rhC3 = c3 * rh;
321
+
322
+ rhSquareHalf = 0.5 * rhSquare;
323
+ monomialCube = rhC3 * rhSquare;
324
+ rhFour = rhSquare * rhSquare;
325
+
326
+ monomialFour = c4 * rhFour;
327
+
328
+ highPoly = monomialCube + monomialFour;
329
+
330
+ highPolyWithSquare = rhSquareHalf + highPoly;
331
+
332
+ Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
333
+
334
+ t8 = rm + highPolyWithSquare;
335
+ t9 = rh + t8;
336
+
337
+ t10 = tablesh * t9;
338
+
339
+ Add12(t11,t12,tablesh,t10);
340
+ t13 = t12 + tablesl;
341
+ Add12(polyTblh,polyTblm,t11,t13);
342
+
343
+ /* Rounding test
344
+ Since we know that the result of the final multiplication with 2^M
345
+ will always be representable, we can do the rounding test on the
346
+ factors and multiply only the final result.
347
+ We implement the multiplication in integer computations to overcome
348
+ the problem of the non-representability of 2^1024 if M = 1024
349
+ */
350
+
351
+ if(polyTblh == (polyTblh + (polyTblm * ROUNDCST))) {
352
+ polyTblhdb.d = polyTblh;
353
+ polyTblhdb.i[HI] += M << 20;
354
+ return polyTblhdb.d;
355
+ } else
356
+ {
357
+ /* Rest of argument reduction for accurate phase */
358
+
359
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
360
+ t1 = x + msLog2Div2LMultKh;
361
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
362
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
363
+
364
+ /* Table reads for accurate phase */
365
+ tbl1l = twoPowerIndex1[index1].lo;
366
+ tbl2l = twoPowerIndex2[index2].lo;
367
+
368
+ /* Call accurate phase */
369
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
370
+
371
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
372
+ We overcome this way also the cases where the final result is not underflowed whereas the
373
+ lower parts of the intermediate final result are.
374
+ */
375
+
376
+ RoundToNearest3(&res,polyTblh,polyTblm,polyTbll);
377
+
378
+ /* Final multiplication with 2^M
379
+ We implement the multiplication in integer computations to overcome
380
+ the problem of the non-representability of 2^1024 if M = 1024
381
+ */
382
+
383
+ resdb.d = res;
384
+ resdb.i[HI] += M << 20;
385
+ return resdb.d;
386
+ } /* Accurate phase launched after rounding test*/
387
+ }
388
+
389
+
390
+ /*************************************************************
391
+ *************************************************************
392
+ * ROUNDED UPWARDS *
393
+ *************************************************************
394
+ *************************************************************/
395
+ double exp_ru(double x) {
396
+ double rh, rm, rl, tbl1h, tbl1m, tbl1l;
397
+ double tbl2h, tbl2m, tbl2l;
398
+ double xMultLog2InvMult2L, shiftedXMult, kd;
399
+ double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
400
+ double t1, t2, t3, t4, polyTblh, polyTblm, polyTbll;
401
+ db_number shiftedXMultdb, twoPowerMdb, xdb, t4db, t4db2, resdb;
402
+ int k, M, index1, index2, xIntHi, mightBeDenorm, roundable;
403
+ double t5, t6, t7, t8, t9, t10, t11, t12, t13;
404
+ double rhSquare, rhSquareHalf, rhC3, rhFour, monomialCube;
405
+ double highPoly, highPolyWithSquare, monomialFour;
406
+ double tablesh, tablesl;
407
+ double s1, s2, s3, s4, s5;
408
+ double res;
409
+
410
+ /* Argument reduction and filtering for special cases */
411
+
412
+ /* Compute k as a double and as an int */
413
+ xdb.d = x;
414
+ xMultLog2InvMult2L = x * log2InvMult2L;
415
+ shiftedXMult = xMultLog2InvMult2L + shiftConst;
416
+ kd = shiftedXMult - shiftConst;
417
+ shiftedXMultdb.d = shiftedXMult;
418
+
419
+ /* Special cases tests */
420
+ xIntHi = xdb.i[HI];
421
+ mightBeDenorm = 0;
422
+ /* Test if argument is a denormal or zero */
423
+ if ((xIntHi & 0x7ff00000) == 0) {
424
+ /* If the argument is exactly zero, we just return 1.0
425
+ which is the mathematical image of the function
426
+ */
427
+ if (x == 0.0) return 1.0;
428
+
429
+ /* If the argument is a negative denormal, we
430
+ must return 1.0 and raise the inexact flag.
431
+ */
432
+
433
+ if (x < 0.0) return 1.0 + SMALLEST;
434
+
435
+ /* Otherwise, we return 1.0 + 1ulp since
436
+ exp(greatest denorm) < 1.0 + 1ulp
437
+ We must do the addition dynamically for
438
+ raising the inexact flag.
439
+ */
440
+
441
+ return 1.0 + twoM52;
442
+ }
443
+
444
+ /* Test if argument is greater than approx. 709 in magnitude */
445
+ if ((xIntHi & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) {
446
+ /* If we are here, the result might be overflowed, underflowed, inf, or NaN */
447
+
448
+ /* Test if +/- Inf or NaN */
449
+ if ((xIntHi & 0x7fffffff) >= 0x7ff00000) {
450
+ /* Either NaN or Inf in this case since exponent is maximal */
451
+
452
+ /* Test if NaN: mantissa is not 0 */
453
+ if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
454
+ /* x = NaN, return NaN */
455
+ return x + x;
456
+ } else {
457
+ /* +/- Inf */
458
+
459
+ /* Test sign */
460
+ if ((xIntHi & 0x80000000)==0)
461
+ /* x = +Inf, return +Inf */
462
+ return x;
463
+ else
464
+ /* x = -Inf, return 0 (even in RU!) */
465
+ return 0;
466
+ } /* End which in NaN, Inf */
467
+ } /* End NaN or Inf ? */
468
+
469
+ /* If we are here, we might be overflowed, denormalized or underflowed in the result
470
+ but there is no special case (NaN, Inf) left */
471
+
472
+ /* Test if actually overflowed */
473
+ if (x > OVRFLWBOUND) {
474
+ /* We are actually overflowed in the result */
475
+ return LARGEST * LARGEST;
476
+ }
477
+
478
+ /* Test if surely underflowed */
479
+ if (x <= UNDERFLWBOUND) {
480
+ /* We are actually sure to be underflowed and not denormalized any more
481
+ (at least where computing makes sense); since we are in the round
482
+ upwards case, we return the smallest denormal possible.
483
+ */
484
+ return SMALLEST;
485
+ }
486
+
487
+ /* Test if possibly denormalized */
488
+ if (x <= DENORMBOUND) {
489
+ /* We know now that we are not sure to be normalized in the result
490
+ We just set an internal flag for a further test
491
+ */
492
+ mightBeDenorm = 1;
493
+ }
494
+ } /* End might be a special case */
495
+
496
+ /* If we are here, we are sure to be neither +/- Inf nor NaN nor overflowed nor denormalized in the argument
497
+ but we might be denormalized in the result
498
+
499
+ We continue the argument reduction for the quick phase and table reads for both phases
500
+ */
501
+
502
+ #if 0
503
+ Mul12(&s1,&s2,msLog2Div2Lh,kd);
504
+ s3 = kd * msLog2Div2Lm;
505
+ s4 = s2 + s3;
506
+ s5 = x + s1;
507
+ Add12Cond(rh,rm,s5,s4);
508
+ #else
509
+
510
+ /* Cody and Waite like, accurate to 2^-84 */
511
+ double Log2h= 0xb.17217f8p-16 ;
512
+ double Log2l= -0x2.e308654361c4cp-48 ;
513
+ Add12Cond(rh,rm, x-kd*Log2h, -kd*Log2l);
514
+
515
+ #endif
516
+
517
+ k = shiftedXMultdb.i[LO];
518
+ M = k >> L;
519
+ index1 = k & INDEXMASK1;
520
+ index2 = (k & INDEXMASK2) >> LHALF;
521
+
522
+ /* Table reads */
523
+ tbl1h = twoPowerIndex1[index1].hi;
524
+ tbl1m = twoPowerIndex1[index1].mi;
525
+ tbl2h = twoPowerIndex2[index2].hi;
526
+ tbl2m = twoPowerIndex2[index2].mi;
527
+
528
+ /* Test now if it is sure to launch the quick phase because no denormalized result is possible */
529
+ if (mightBeDenorm == 1) {
530
+ /* The result might be denormalized, we launch the accurate phase in all cases */
531
+
532
+ /* Rest of argument reduction for accurate phase */
533
+
534
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
535
+ t1 = x + msLog2Div2LMultKh;
536
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
537
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
538
+
539
+ /* Table reads for accurate phase */
540
+ tbl1l = twoPowerIndex1[index1].lo;
541
+ tbl2l = twoPowerIndex2[index2].lo;
542
+
543
+ /* Call accurate phase */
544
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
545
+
546
+ /* Final rounding and multiplication with 2^M
547
+
548
+ We first multiply the highest significant byte by 2^M in two steps
549
+ and adjust it then depending on the lower significant parts.
550
+
551
+ We cannot multiply directly by 2^M since M is less than -1022.
552
+ We first multiply by 2^(-1000) and then by 2^(M+1000).
553
+
554
+ */
555
+
556
+ t3 = polyTblh * twoPowerM1000;
557
+
558
+ /* Form now twoPowerM with adjusted M */
559
+ twoPowerMdb.i[LO] = 0;
560
+ twoPowerMdb.i[HI] = (M + 2023) << 20;
561
+
562
+
563
+ /* Multiply with the rest of M, the result will be denormalized */
564
+ t4 = t3 * twoPowerMdb.d;
565
+
566
+ /* For x86, force the compiler to pass through memory for having the right rounding */
567
+
568
+ t4db.d = t4; /* Do not #if-ify this line, we need the copy */
569
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
570
+ t4db2.i[HI] = t4db.i[HI];
571
+ t4db2.i[LO] = t4db.i[LO];
572
+ t4 = t4db2.d;
573
+ #endif
574
+
575
+
576
+ /* Remultiply by 2^(-M) for manipulating the rounding error and the lower significant parts */
577
+ M *= -1;
578
+ twoPowerMdb.i[LO] = 0;
579
+ twoPowerMdb.i[HI] = (M + 23) << 20;
580
+ t5 = t4 * twoPowerMdb.d;
581
+ t6 = t5 * twoPower1000;
582
+ t7 = polyTblh - t6;
583
+
584
+ /* The rounding can be decided using the sign of the arithmetical sum of the
585
+ round-to-nearest-error (i.e. t7) and the lower part(s) of the final result.
586
+ We add first the lower parts and add the result to the error in t7. We have to
587
+ keep in mind that everything is scaled by 2^(-M).
588
+ t8 can never be exactly 0 since we filter out the cases where the image of the
589
+ function is algebraic and the implementation is exacter than the TMD worst case.
590
+ */
591
+
592
+ polyTblm = polyTblm + polyTbll;
593
+ t8 = t7 + polyTblm;
594
+
595
+ /* Since we are rounding upwards, the round-to-nearest-rounding result in t4 is
596
+ equal to the final result if the rounding error (i.e. the error plus the lower parts)
597
+ is negative, i.e. if the rounding-to-nearest was upwards.
598
+ */
599
+
600
+ if (t8 < 0.0) return t4;
601
+
602
+ /* If we are here, we must adjust the final result by +1ulp
603
+ Relying on the fact that the exponential is always positive, we can simplify this
604
+ adjustment
605
+ */
606
+
607
+ t4db.l++;
608
+ return t4db.d;
609
+ } /* End accurate phase launched as there might be a denormalized result */
610
+
611
+ /* No more underflow nor denormal is possible. There may be the case where
612
+ M is 1024 and the value 2^M is to be multiplied may be less than 1
613
+ So the final result will be normalized and representable by the multiplication must be
614
+ made in 2 steps
615
+ */
616
+
617
+ /* Quick phase starts here */
618
+
619
+ rhSquare = rh * rh;
620
+ rhC3 = c3 * rh;
621
+
622
+ rhSquareHalf = 0.5 * rhSquare;
623
+ monomialCube = rhC3 * rhSquare;
624
+ rhFour = rhSquare * rhSquare;
625
+
626
+ monomialFour = c4 * rhFour;
627
+
628
+ highPoly = monomialCube + monomialFour;
629
+
630
+ highPolyWithSquare = rhSquareHalf + highPoly;
631
+
632
+ Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
633
+
634
+ t8 = rm + highPolyWithSquare;
635
+ t9 = rh + t8;
636
+
637
+ t10 = tablesh * t9;
638
+
639
+ Add12(t11,t12,tablesh,t10);
640
+ t13 = t12 + tablesl;
641
+ Add12(polyTblh,polyTblm,t11,t13);
642
+
643
+ /* Rounding test
644
+ Since we know that the result of the final multiplication with 2^M
645
+ will always be representable, we can do the rounding test on the
646
+ factors and multiply only the final result.
647
+ We implement the multiplication in integer computations to overcome
648
+ the problem of the non-representability of 2^1024 if M = 1024
649
+ */
650
+
651
+ TEST_AND_COPY_RU(roundable,res,polyTblh,polyTblm,RDROUNDCST);
652
+
653
+ if (roundable) {
654
+ resdb.d = res;
655
+ resdb.i[HI] += M << 20;
656
+ return resdb.d;
657
+ } else
658
+ {
659
+ /* Rest of argument reduction for accurate phase */
660
+
661
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
662
+ t1 = x + msLog2Div2LMultKh;
663
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
664
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
665
+
666
+ /* Table reads for accurate phase */
667
+ tbl1l = twoPowerIndex1[index1].lo;
668
+ tbl2l = twoPowerIndex2[index2].lo;
669
+
670
+ /* Call accurate phase */
671
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
672
+
673
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
674
+ We overcome this way also the cases where the final result is not underflowed whereas the
675
+ lower parts of the intermediate final result are.
676
+ */
677
+
678
+ RoundUpwards3(&res,polyTblh,polyTblm,polyTbll);
679
+
680
+ /* Final multiplication with 2^M
681
+ We implement the multiplication in integer computations to overcome
682
+ the problem of the non-representability of 2^1024 if M = 1024
683
+ */
684
+
685
+ resdb.d = res;
686
+ resdb.i[HI] += M << 20;
687
+ return resdb.d;
688
+ } /* Accurate phase launched after rounding test*/
689
+ }
690
+
691
+
692
+ /*************************************************************
693
+ *************************************************************
694
+ * ROUNDED DOWNWARDS *
695
+ *************************************************************
696
+ *************************************************************/
697
+ double exp_rd(double x) {
698
+ double rh, rm, rl, tbl1h, tbl1m, tbl1l;
699
+ double tbl2h, tbl2m, tbl2l;
700
+ double xMultLog2InvMult2L, shiftedXMult, kd;
701
+ double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
702
+ double t1, t2, t3, t4, polyTblh, polyTblm, polyTbll;
703
+ db_number shiftedXMultdb, twoPowerMdb, xdb, t4db, t4db2, resdb;
704
+ int k, M, index1, index2, xIntHi, mightBeDenorm, roundable;
705
+ double t5, t6, t7, t8, t9, t10, t11, t12, t13;
706
+ double rhSquare, rhSquareHalf, rhC3, rhFour, monomialCube;
707
+ double highPoly, highPolyWithSquare, monomialFour;
708
+ double tablesh, tablesl;
709
+ double s1, s2, s3, s4, s5;
710
+ double res;
711
+
712
+ /* Argument reduction and filtering for special cases */
713
+
714
+ /* Compute k as a double and as an int */
715
+ xdb.d = x;
716
+ xMultLog2InvMult2L = x * log2InvMult2L;
717
+ shiftedXMult = xMultLog2InvMult2L + shiftConst;
718
+ kd = shiftedXMult - shiftConst;
719
+ shiftedXMultdb.d = shiftedXMult;
720
+
721
+ /* Special cases tests */
722
+ xIntHi = xdb.i[HI];
723
+ mightBeDenorm = 0;
724
+ /* Test if argument is a denormal or zero */
725
+ if ((xIntHi & 0x7ff00000) == 0) {
726
+ /* If the argument is exactly zero, we just return 1.0
727
+ which is the mathematical image of the function
728
+ */
729
+ if (x == 0.0) return 1.0;
730
+
731
+ /* If the argument is a positive denormal, we
732
+ must return 1.0 and raise the inexact flag.
733
+ */
734
+
735
+ if (x > 0.0) return 1.0 + SMALLEST;
736
+
737
+ /* Otherwise, we return 1.0 - 1ulp since
738
+ exp(-greatest denorm) > 1.0 - 1ulp
739
+ We must do the addition dynamically for
740
+ raising the inexact flag.
741
+ */
742
+
743
+ return 1.0 + mTwoM53;
744
+
745
+ }
746
+
747
+ /* Test if argument is greater than approx. 709 in magnitude */
748
+ if ((xIntHi & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) {
749
+ /* If we are here, the result might be overflowed, underflowed, inf, or NaN */
750
+
751
+ /* Test if +/- Inf or NaN */
752
+ if ((xIntHi & 0x7fffffff) >= 0x7ff00000) {
753
+ /* Either NaN or Inf in this case since exponent is maximal */
754
+
755
+ /* Test if NaN: mantissa is not 0 */
756
+ if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
757
+ /* x = NaN, return NaN */
758
+ return x + x;
759
+ } else {
760
+ /* +/- Inf */
761
+
762
+ /* Test sign */
763
+ if ((xIntHi & 0x80000000)==0)
764
+ /* x = +Inf, return +Inf */
765
+ return x;
766
+ else
767
+ /* x = -Inf, return 0 */
768
+ return 0;
769
+ } /* End which in NaN, Inf */
770
+ } /* End NaN or Inf ? */
771
+
772
+ /* If we are here, we might be overflowed, denormalized or underflowed in the result
773
+ but there is no special case (NaN, Inf) left */
774
+
775
+ /* Test if actually overflowed */
776
+ if (x > OVRFLWBOUND) {
777
+ /* We would be overflowed but as we are rounding downwards
778
+ the nearest number lesser than the exact result is the greatest
779
+ normal. In any case, we must raise the inexact flag.
780
+ */
781
+ return LARGEST * (1.0 + SMALLEST);
782
+ }
783
+
784
+ /* Test if surely underflowed */
785
+ if (x <= UNDERFLWBOUND) {
786
+ /* We are actually sure to be underflowed and not denormalized any more
787
+ (at least where computing makes sense); since we are in the round
788
+ upwards case, we return the smallest denormal possible.
789
+ */
790
+ return SMALLEST * SMALLEST;
791
+ }
792
+
793
+ /* Test if possibly denormalized */
794
+ if (x <= DENORMBOUND) {
795
+ /* We know now that we are not sure to be normalized in the result
796
+ We just set an internal flag for a further test
797
+ */
798
+ mightBeDenorm = 1;
799
+ }
800
+ } /* End might be a special case */
801
+
802
+ /* If we are here, we are sure to be neither +/- Inf nor NaN nor overflowed nor denormalized in the argument
803
+ but we might be denormalized in the result
804
+
805
+ We continue the argument reduction for the quick phase and table reads for both phases
806
+ */
807
+
808
+
809
+ #if 0
810
+ Mul12(&s1,&s2,msLog2Div2Lh,kd);
811
+ s3 = kd * msLog2Div2Lm;
812
+ s4 = s2 + s3;
813
+ s5 = x + s1;
814
+ Add12Cond(rh,rm,s5,s4);
815
+ #else
816
+
817
+ /* Cody and Waite like, accurate to 2^-84 */
818
+ double Log2h= 0xb.17217f8p-16 ;
819
+ double Log2l= -0x2.e308654361c4cp-48 ;
820
+ Add12Cond(rh,rm, x-kd*Log2h, -kd*Log2l);
821
+
822
+ #endif
823
+
824
+ k = shiftedXMultdb.i[LO];
825
+ M = k >> L;
826
+ index1 = k & INDEXMASK1;
827
+ index2 = (k & INDEXMASK2) >> LHALF;
828
+
829
+ /* Table reads */
830
+ tbl1h = twoPowerIndex1[index1].hi;
831
+ tbl1m = twoPowerIndex1[index1].mi;
832
+ tbl2h = twoPowerIndex2[index2].hi;
833
+ tbl2m = twoPowerIndex2[index2].mi;
834
+
835
+ /* Test now if it is sure to launch the quick phase because no denormalized result is possible */
836
+ if (mightBeDenorm == 1) {
837
+ /* The result might be denormalized, we launch the accurate phase in all cases */
838
+
839
+ /* Rest of argument reduction for accurate phase */
840
+
841
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
842
+ t1 = x + msLog2Div2LMultKh;
843
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
844
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
845
+
846
+ /* Table reads for accurate phase */
847
+ tbl1l = twoPowerIndex1[index1].lo;
848
+ tbl2l = twoPowerIndex2[index2].lo;
849
+
850
+ /* Call accurate phase */
851
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
852
+
853
+ /* Final rounding and multiplication with 2^M
854
+
855
+ We first multiply the highest significant byte by 2^M in two steps
856
+ and adjust it then depending on the lower significant parts.
857
+
858
+ We cannot multiply directly by 2^M since M is less than -1022.
859
+ We first multiply by 2^(-1000) and then by 2^(M+1000).
860
+
861
+ */
862
+
863
+ t3 = polyTblh * twoPowerM1000;
864
+
865
+ /* Form now twoPowerM with adjusted M */
866
+ twoPowerMdb.i[LO] = 0;
867
+ twoPowerMdb.i[HI] = (M + 2023) << 20;
868
+
869
+
870
+ /* Multiply with the rest of M, the result will be denormalized */
871
+ t4 = t3 * twoPowerMdb.d;
872
+
873
+ /* For x86, force the compiler to pass through memory for having the right rounding */
874
+
875
+ t4db.d = t4; /* Do not #if-ify this line, we need the copy */
876
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
877
+ t4db2.i[HI] = t4db.i[HI];
878
+ t4db2.i[LO] = t4db.i[LO];
879
+ t4 = t4db2.d;
880
+ #endif
881
+
882
+ /* Remultiply by 2^(-M) for manipulating the rounding error and the lower significant parts */
883
+ M *= -1;
884
+ twoPowerMdb.i[LO] = 0;
885
+ twoPowerMdb.i[HI] = (M + 23) << 20;
886
+ t5 = t4 * twoPowerMdb.d;
887
+ t6 = t5 * twoPower1000;
888
+ t7 = polyTblh - t6;
889
+
890
+ /* The rounding can be decided using the sign of the arithmetical sum of the
891
+ round-to-nearest-error (i.e. t7) and the lower part(s) of the final result.
892
+ We add first the lower parts and add the result to the error in t7. We have to
893
+ keep in mind that everything is scaled by 2^(-M).
894
+ t8 can never be exactly 0 since we filter out the cases where the image of the
895
+ function is algebraic and the implementation is exacter than the TMD worst case.
896
+ */
897
+
898
+ polyTblm = polyTblm + polyTbll;
899
+ t8 = t7 + polyTblm;
900
+
901
+ /* Since we are rounding downwards, the round-to-nearest-rounding result in t4 is
902
+ equal to the final result if the rounding error (i.e. the error plus the lower parts)
903
+ is positive, i.e. if the rounding-to-nearest was downwards.
904
+ */
905
+
906
+ if (t8 > 0.0) return t4;
907
+
908
+ /* If we are here, we must adjust the final result by +1ulp
909
+ Relying on the fact that the exponential is always positive, we can simplify this
910
+ adjustment
911
+ */
912
+
913
+ t4db.l--;
914
+ return t4db.d;
915
+ } /* End accurate phase launched as there might be a denormalized result */
916
+
917
+ /* No more underflow nor denormal is possible. There may be the case where
918
+ M is 1024 and the value 2^M is to be multiplied may be less than 1
919
+ So the final result will be normalized and representable by the multiplication must be
920
+ made in 2 steps
921
+ */
922
+
923
+ /* Quick phase starts here */
924
+
925
+ rhSquare = rh * rh;
926
+ rhC3 = c3 * rh;
927
+
928
+ rhSquareHalf = 0.5 * rhSquare;
929
+ monomialCube = rhC3 * rhSquare;
930
+ rhFour = rhSquare * rhSquare;
931
+
932
+ monomialFour = c4 * rhFour;
933
+
934
+ highPoly = monomialCube + monomialFour;
935
+
936
+ highPolyWithSquare = rhSquareHalf + highPoly;
937
+
938
+ Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
939
+
940
+ t8 = rm + highPolyWithSquare;
941
+ t9 = rh + t8;
942
+
943
+ t10 = tablesh * t9;
944
+
945
+ Add12(t11,t12,tablesh,t10);
946
+ t13 = t12 + tablesl;
947
+ Add12(polyTblh,polyTblm,t11,t13);
948
+
949
+ /* Rounding test
950
+ Since we know that the result of the final multiplication with 2^M
951
+ will always be representable, we can do the rounding test on the
952
+ factors and multiply only the final result.
953
+ We implement the multiplication in integer computations to overcome
954
+ the problem of the non-representability of 2^1024 if M = 1024
955
+ */
956
+
957
+ TEST_AND_COPY_RD(roundable,res,polyTblh,polyTblm,RDROUNDCST);
958
+
959
+ if (roundable) {
960
+ resdb.d = res;
961
+ resdb.i[HI] += M << 20;
962
+ return resdb.d;
963
+ } else {
964
+ /* Rest of argument reduction for accurate phase */
965
+
966
+ Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
967
+ t1 = x + msLog2Div2LMultKh;
968
+ Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
969
+ Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
970
+
971
+ /* Table reads for accurate phase */
972
+ tbl1l = twoPowerIndex1[index1].lo;
973
+ tbl2l = twoPowerIndex2[index2].lo;
974
+
975
+ /* Call accurate phase */
976
+ exp_td_accurate(&polyTblh, &polyTblm, &polyTbll, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l);
977
+
978
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
979
+ We overcome this way also the cases where the final result is not underflowed whereas the
980
+ lower parts of the intermediate final result are.
981
+ */
982
+
983
+ RoundDownwards3(&res,polyTblh,polyTblm,polyTbll);
984
+
985
+ /* Final multiplication with 2^M
986
+ We implement the multiplication in integer computations to overcome
987
+ the problem of the non-representability of 2^1024 if M = 1024
988
+ */
989
+
990
+ resdb.d = res;
991
+ resdb.i[HI] += M << 20;
992
+ return resdb.d;
993
+ } /* Accurate phase launched after rounding test*/
994
+ }
995
+
996
+
997
+ #ifdef BUILD_INTERVAL_FUNCTIONS
998
+ interval j_exp(interval x)
999
+ {
1000
+ interval res;
1001
+ double x_inf, x_sup;
1002
+ double rh_sup, rm_sup, rl_sup, tbl1h_sup, tbl1m_sup, tbl1l_sup;
1003
+ double tbl2h_sup, tbl2m_sup, tbl2l_sup;
1004
+ double xMultLog2InvMult2L_sup, shiftedXMult_sup, kd_sup;
1005
+ double msLog2Div2LMultKh_sup, msLog2Div2LMultKm_sup, msLog2Div2LMultKl_sup;
1006
+ double t1_sup, t2_sup, polyTblh_sup, polyTblm_sup, polyTbll_sup;
1007
+ db_number shiftedXMultdb_sup, xdb_sup, resdb_sup;
1008
+ int k_sup, M_sup, index1_sup, index2_sup, xIntHi_sup, mightBeDenorm_sup, roundable;
1009
+ double t8_sup, t9_sup, t10_sup, t11_sup, t12_sup, t13_sup;
1010
+ double rhSquare_sup, rhSquareHalf_sup, rhC3_sup, rhFour_sup, monomialCube_sup;
1011
+ double highPoly_sup, highPolyWithSquare_sup, monomialFour_sup;
1012
+ double tablesh_sup, tablesl_sup;
1013
+ double s1_sup, s2_sup, s3_sup, s4_sup, s5_sup;
1014
+ double res_sup;
1015
+
1016
+ double rh_inf, rm_inf, rl_inf, tbl1h_inf, tbl1m_inf, tbl1l_inf;
1017
+ double tbl2h_inf, tbl2m_inf, tbl2l_inf;
1018
+ double xMultLog2InvMult2L_inf, shiftedXMult_inf, kd_inf;
1019
+ double msLog2Div2LMultKh_inf, msLog2Div2LMultKm_inf, msLog2Div2LMultKl_inf;
1020
+ double t1_inf, t2_inf, polyTblh_inf, polyTblm_inf, polyTbll_inf;
1021
+ db_number shiftedXMultdb_inf, xdb_inf, resdb_inf;
1022
+ int k_inf, M_inf, index1_inf, index2_inf, xIntHi_inf, mightBeDenorm_inf;
1023
+ double t8_inf, t9_inf, t10_inf, t11_inf, t12_inf, t13_inf;
1024
+ double rhSquare_inf, rhSquareHalf_inf, rhC3_inf, rhFour_inf, monomialCube_inf;
1025
+ double highPoly_inf, highPolyWithSquare_inf, monomialFour_inf;
1026
+ double tablesh_inf, tablesl_inf;
1027
+ double s1_inf, s2_inf, s3_inf, s4_inf, s5_inf;
1028
+ double res_inf;
1029
+
1030
+ double res_simple_inf, res_simple_sup;
1031
+ int infDone=0; int supDone=0;
1032
+
1033
+ x_inf=LOW(x);
1034
+ x_sup=UP(x);
1035
+
1036
+ /* Argument reduction and filtering for special cases */
1037
+
1038
+ /* Compute k as a double and as an int */
1039
+ xdb_sup.d = x_sup;
1040
+ xdb_inf.d = x_inf;
1041
+ xMultLog2InvMult2L_sup = x_sup * log2InvMult2L;
1042
+ xMultLog2InvMult2L_inf = x_inf * log2InvMult2L;
1043
+ shiftedXMult_sup = xMultLog2InvMult2L_sup + shiftConst;
1044
+ shiftedXMult_inf = xMultLog2InvMult2L_inf + shiftConst;
1045
+ kd_sup = shiftedXMult_sup - shiftConst;
1046
+ kd_inf = shiftedXMult_inf - shiftConst;
1047
+ shiftedXMultdb_sup.d = shiftedXMult_sup;
1048
+ shiftedXMultdb_inf.d = shiftedXMult_inf;
1049
+
1050
+
1051
+ /* Special cases tests */
1052
+ xIntHi_sup = xdb_sup.i[HI];
1053
+ mightBeDenorm_sup = 0;
1054
+
1055
+ /* Special cases tests */
1056
+ xIntHi_inf = xdb_inf.i[HI];
1057
+ mightBeDenorm_inf = 0;
1058
+
1059
+ if ( __builtin_expect(
1060
+ ((xIntHi_sup & 0x7ff00000) == 0)
1061
+ || (((xIntHi_sup & 0x7ff00000) == 0) && (x_sup == 0.0))
1062
+ || (((xIntHi_sup & 0x7ff00000) == 0) && (x_sup < 0.0))
1063
+ || (((xIntHi_sup & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && ((xIntHi_sup & 0x7fffffff) >= 0x7ff00000))
1064
+ || (((xIntHi_sup & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && ((xIntHi_sup & 0x7fffffff) >= 0x7ff00000) && (((xIntHi_sup & 0x000fffff) | xdb_sup.i[LO]) != 0))
1065
+ || (((xIntHi_sup & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && ((xIntHi_sup & 0x7fffffff) >= 0x7ff00000) && ((xIntHi_sup & 0x80000000)==0))
1066
+ || (((xIntHi_sup & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && (x_sup > OVRFLWBOUND))
1067
+ || (((xIntHi_sup & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && (x_sup <= UNDERFLWBOUND))
1068
+ || (((xIntHi_sup & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && (x_sup <= DENORMBOUND))
1069
+ || ((xIntHi_inf & 0x7ff00000) == 0)
1070
+ || (((xIntHi_inf & 0x7ff00000) == 0) && (x_inf == 0.0))
1071
+ || (((xIntHi_inf & 0x7ff00000) == 0) && (x_inf > 0.0))
1072
+ || (((xIntHi_inf & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && ((xIntHi_inf & 0x7fffffff) >= 0x7ff00000))
1073
+ || (((xIntHi_inf & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && ((xIntHi_inf & 0x7fffffff) >= 0x7ff00000) && (((xIntHi_inf & 0x000fffff) | xdb_inf.i[LO]) != 0))
1074
+ || (((xIntHi_inf & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && ((xIntHi_inf & 0x7fffffff) >= 0x7ff00000) && ((xIntHi_inf & 0x80000000)==0))
1075
+ || (((xIntHi_inf & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && (x_inf > OVRFLWBOUND))
1076
+ || (((xIntHi_inf & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && (x_inf <= UNDERFLWBOUND))
1077
+ || (((xIntHi_inf & 0x7fffffff) >= OVRUDRFLWSMPLBOUND) && (x_inf <= DENORMBOUND))
1078
+ ,FALSE))
1079
+ {
1080
+ ASSIGN_LOW(res,exp_rd(LOW(x)));
1081
+ ASSIGN_UP(res,exp_ru(UP(x)));
1082
+ return res;
1083
+ }
1084
+
1085
+ /* Test if argument is a denormal or zero */
1086
+ /* If we are here, we are sure to be neither +/- Inf nor NaN nor overflowed nor denormalized in the argument
1087
+ but we might be denormalized in the result
1088
+
1089
+ We continue the argument reduction for the quick phase and table reads for both phases
1090
+ */
1091
+
1092
+ Mul12(&s1_sup,&s2_sup,msLog2Div2Lh,kd_sup);
1093
+ Mul12(&s1_inf,&s2_inf,msLog2Div2Lh,kd_inf);
1094
+ s3_sup = kd_sup * msLog2Div2Lm;
1095
+ s3_inf = kd_inf * msLog2Div2Lm;
1096
+ s4_sup = s2_sup + s3_sup;
1097
+ s4_inf = s2_inf + s3_inf;
1098
+ s5_sup = x_sup + s1_sup;
1099
+ s5_inf = x_inf + s1_inf;
1100
+ Add12Cond(rh_sup,rm_sup,s5_sup,s4_sup);
1101
+ Add12Cond(rh_inf,rm_inf,s5_inf,s4_inf);
1102
+ k_sup = shiftedXMultdb_sup.i[LO];
1103
+ k_inf = shiftedXMultdb_inf.i[LO];
1104
+ M_sup = k_sup >> L;
1105
+ M_inf = k_inf >> L;
1106
+ index1_sup = k_sup & INDEXMASK1;
1107
+ index1_inf = k_inf & INDEXMASK1;
1108
+ index2_sup = (k_sup & INDEXMASK2) >> LHALF;
1109
+ index2_inf = (k_inf & INDEXMASK2) >> LHALF;
1110
+
1111
+ /* Table reads */
1112
+ tbl1h_sup = twoPowerIndex1[index1_sup].hi;
1113
+ tbl1h_inf = twoPowerIndex1[index1_inf].hi;
1114
+ tbl1m_sup = twoPowerIndex1[index1_sup].mi;
1115
+ tbl1m_inf = twoPowerIndex1[index1_inf].mi;
1116
+ tbl2h_sup = twoPowerIndex2[index2_sup].hi;
1117
+ tbl2h_inf = twoPowerIndex2[index2_inf].hi;
1118
+ tbl2m_sup = twoPowerIndex2[index2_sup].mi;
1119
+ tbl2m_inf = twoPowerIndex2[index2_inf].mi;
1120
+
1121
+
1122
+
1123
+
1124
+ /* No more underflow nor denormal is possible. There may be the case where
1125
+ M is 1024 and the value 2^M is to be multiplied may be less than 1
1126
+ So the final result will be normalized and representable by the multiplication must be
1127
+ made in 2 steps
1128
+ */
1129
+
1130
+ /* Quick phase starts here */
1131
+
1132
+ rhSquare_sup = rh_sup * rh_sup;
1133
+ rhSquare_inf = rh_inf * rh_inf;
1134
+ rhC3_sup = c3 * rh_sup;
1135
+ rhC3_inf = c3 * rh_inf;
1136
+ rhSquareHalf_sup = 0.5 * rhSquare_sup;
1137
+ rhSquareHalf_inf = 0.5 * rhSquare_inf;
1138
+ monomialCube_sup = rhC3_sup * rhSquare_sup;
1139
+ monomialCube_inf = rhC3_inf * rhSquare_inf;
1140
+ rhFour_sup = rhSquare_sup * rhSquare_sup;
1141
+ rhFour_inf = rhSquare_inf * rhSquare_inf;
1142
+ monomialFour_sup = c4 * rhFour_sup;
1143
+ monomialFour_inf = c4 * rhFour_inf;
1144
+ highPoly_sup = monomialCube_sup + monomialFour_sup;
1145
+ highPoly_inf = monomialCube_inf + monomialFour_inf;
1146
+ highPolyWithSquare_sup = rhSquareHalf_sup + highPoly_sup;
1147
+ highPolyWithSquare_inf = rhSquareHalf_inf + highPoly_inf;
1148
+ Mul22(&tablesh_sup,&tablesl_sup,tbl1h_sup,tbl1m_sup,tbl2h_sup,tbl2m_sup);
1149
+ Mul22(&tablesh_inf,&tablesl_inf,tbl1h_inf,tbl1m_inf,tbl2h_inf,tbl2m_inf);
1150
+ t8_sup = rm_sup + highPolyWithSquare_sup;
1151
+ t8_inf = rm_inf + highPolyWithSquare_inf;
1152
+ t9_sup = rh_sup + t8_sup;
1153
+ t9_inf = rh_inf + t8_inf;
1154
+ t10_sup = tablesh_sup * t9_sup;
1155
+ t10_inf = tablesh_inf * t9_inf;
1156
+ Add12(t11_sup,t12_sup,tablesh_sup,t10_sup);
1157
+ Add12(t11_inf,t12_inf,tablesh_inf,t10_inf);
1158
+ t13_sup = t12_sup + tablesl_sup;
1159
+ t13_inf = t12_inf + tablesl_inf;
1160
+ Add12(polyTblh_sup,polyTblm_sup,t11_sup,t13_sup);
1161
+ Add12(polyTblh_inf,polyTblm_inf,t11_inf,t13_inf);
1162
+
1163
+ /* Rounding test
1164
+ Since we know that the result of the final multiplication with 2^M
1165
+ will always be representable, we can do the rounding test on the
1166
+ factors and multiply only the final result.
1167
+ We implement the multiplication in integer computations to overcome
1168
+ the problem of the non-representability of 2^1024 if M = 1024
1169
+ */
1170
+
1171
+ if (infDone==1) res_inf=res_simple_inf;
1172
+ if (supDone==1) res_sup=res_simple_sup;
1173
+
1174
+ // TEST_AND_COPY_RDRU_EXP(roundable,infDone,supDone,res_inf,polyTblh_inf,polyTblm_inf,res_sup,polyTblh_sup,polyTblm_sup,RDROUNDCST);
1175
+ db_number yh_inf, yl_inf, u53_inf, yh_sup, yl_sup, u53_sup;
1176
+ int yh_inf_neg, yl_inf_neg, yh_sup_neg, yl_sup_neg;
1177
+ int rd_ok, ru_ok;
1178
+ double save_res_inf=res_inf;
1179
+ double save_res_sup=res_sup;
1180
+ yh_inf.d = polyTblh_inf; yl_inf.d = polyTblm_inf;
1181
+ yh_inf_neg = (yh_inf.i[HI] & 0x80000000);
1182
+ yl_inf_neg = (yl_inf.i[HI] & 0x80000000);
1183
+ yh_inf.l = yh_inf.l & 0x7fffffffffffffffLL; /* compute the absolute value*/
1184
+ yl_inf.l = yl_inf.l & 0x7fffffffffffffffLL; /* compute the absolute value*/
1185
+ u53_inf.l = (yh_inf.l & ULL(7ff0000000000000)) + ULL(0010000000000000);
1186
+ yh_sup.d = polyTblh_sup; yl_sup.d = polyTblm_sup;
1187
+ yh_sup_neg = (yh_sup.i[HI] & 0x80000000);
1188
+ yl_sup_neg = (yl_sup.i[HI] & 0x80000000);
1189
+ yh_sup.l = yh_sup.l & 0x7fffffffffffffffLL; /* compute the absolute value*/
1190
+ yl_sup.l = yl_sup.l & 0x7fffffffffffffffLL; /* compute the absolute value*/
1191
+ u53_sup.l = (yh_sup.l & ULL(7ff0000000000000)) + ULL(0010000000000000);
1192
+ roundable = 0;
1193
+ rd_ok=(yl_inf.d > RDROUNDCST * u53_inf.d);
1194
+ ru_ok=(yl_sup.d > RDROUNDCST * u53_sup.d);
1195
+ if(yl_inf_neg) { /* The case yl==0 is filtered by the above test*/
1196
+ /* return next down */
1197
+ yh_inf.d = polyTblh_inf;
1198
+ if(yh_inf_neg) yh_inf.l++; else yh_inf.l--; /* Beware: fails for zero */
1199
+ res_inf = yh_inf.d;
1200
+ }
1201
+ else {
1202
+ res_inf = polyTblh_inf;
1203
+ }
1204
+ if(!yl_sup_neg) { /* The case yl==0 is filtered by the above test*/
1205
+ /* return next up */
1206
+ yh_sup.d = polyTblh_sup;
1207
+ if(yh_sup_neg) yh_sup.l--; else yh_sup.l++; /* Beware: fails for zero */
1208
+ res_sup = yh_sup.d;
1209
+ }
1210
+ else {
1211
+ res_sup = polyTblh_sup;
1212
+ }
1213
+ if(infDone) res_inf=save_res_inf;
1214
+ if(supDone) res_sup=save_res_sup;
1215
+ if(rd_ok && ru_ok){
1216
+ roundable=3;
1217
+ }
1218
+ else if (rd_ok){
1219
+ roundable=1;
1220
+ }
1221
+ else if (ru_ok){
1222
+ roundable=2;
1223
+ }
1224
+ resdb_inf.d = res_inf;
1225
+ resdb_sup.d = res_sup;
1226
+
1227
+ if (roundable==3)
1228
+ {
1229
+ if (infDone==0){
1230
+ resdb_inf.i[HI] += M_inf << 20;
1231
+ }
1232
+ ASSIGN_LOW(res,resdb_inf.d);
1233
+ if (supDone==0){
1234
+ resdb_sup.i[HI] += M_sup << 20;
1235
+ }
1236
+ ASSIGN_UP(res,resdb_sup.d);
1237
+ return res;
1238
+ }
1239
+ if(roundable==1)
1240
+ {
1241
+ if(infDone==0){
1242
+ resdb_inf.i[HI] += M_inf << 20;
1243
+ }
1244
+ ASSIGN_LOW(res,resdb_inf.d);
1245
+ if(supDone==0){
1246
+ /* Rest of argument reduction for accurate phase */
1247
+ Mul133(&msLog2Div2LMultKh_sup,&msLog2Div2LMultKm_sup,&msLog2Div2LMultKl_sup,kd_sup,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
1248
+ t1_sup = x_sup + msLog2Div2LMultKh_sup;
1249
+ Add12Cond(rh_sup,t2_sup,t1_sup,msLog2Div2LMultKm_sup);
1250
+ Add12Cond(rm_sup,rl_sup,t2_sup,msLog2Div2LMultKl_sup);
1251
+ /* Table reads for accurate phase */
1252
+ tbl1l_sup = twoPowerIndex1[index1_sup].lo;
1253
+ tbl2l_sup = twoPowerIndex2[index2_sup].lo;
1254
+ /* Call accurate phase */
1255
+ exp_td_accurate(&polyTblh_sup, &polyTblm_sup, &polyTbll_sup, rh_sup, rm_sup, rl_sup, tbl1h_sup, tbl1m_sup, tbl1l_sup, tbl2h_sup, tbl2m_sup, tbl2l_sup);
1256
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
1257
+ We overcome this way also the cases where the final result is not underflowed whereas the
1258
+ lower parts of the intermediate final result are.
1259
+ */
1260
+ RoundUpwards3(&res_sup,polyTblh_sup,polyTblm_sup,polyTbll_sup);
1261
+ /* Final multiplication with 2^M
1262
+ We implement the multiplication in integer computations to overcome
1263
+ the problem of the non-representability of 2^1024 if M = 1024
1264
+ */
1265
+ resdb_sup.d = res_sup;
1266
+ resdb_sup.i[HI] += M_sup << 20;
1267
+ }
1268
+ ASSIGN_UP(res,resdb_sup.d);
1269
+ return res;
1270
+ } /* Accurate phase launched after rounding test*/
1271
+
1272
+ if (roundable==2) {
1273
+ if (infDone==0){
1274
+ /* Rest of argument reduction for accurate phase */
1275
+ Mul133(&msLog2Div2LMultKh_inf,&msLog2Div2LMultKm_inf,&msLog2Div2LMultKl_inf,kd_inf,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
1276
+ t1_inf = x_inf + msLog2Div2LMultKh_inf;
1277
+ Add12Cond(rh_inf,t2_inf,t1_inf,msLog2Div2LMultKm_inf);
1278
+ Add12Cond(rm_inf,rl_inf,t2_inf,msLog2Div2LMultKl_inf);
1279
+ /* Table reads for accurate phase */
1280
+ tbl1l_inf = twoPowerIndex1[index1_inf].lo;
1281
+ tbl2l_inf = twoPowerIndex2[index2_inf].lo;
1282
+ /* Call accurate phase */
1283
+ exp_td_accurate(&polyTblh_inf, &polyTblm_inf, &polyTbll_inf, rh_inf, rm_inf, rl_inf, tbl1h_inf, tbl1m_inf, tbl1l_inf, tbl2h_inf, tbl2m_inf, tbl2l_inf);
1284
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
1285
+ We overcome this way also the cases where the final result is not underflowed whereas the
1286
+ lower parts of the intermediate final result are.
1287
+ */
1288
+
1289
+ RoundDownwards3(&res_inf,polyTblh_inf,polyTblm_inf,polyTbll_inf);
1290
+ /* Final multiplication with 2^M
1291
+ We implement the multiplication in integer computations to overcome
1292
+ the problem of the non-representability of 2^1024 if M = 1024
1293
+ */
1294
+
1295
+ resdb_inf.d = res_inf;
1296
+ resdb_inf.i[HI] += M_inf << 20;
1297
+ }
1298
+ ASSIGN_LOW(res,resdb_inf.d);
1299
+ if(supDone==0){
1300
+ resdb_sup.i[HI] += M_sup << 20;
1301
+ }
1302
+ ASSIGN_UP(res,resdb_sup.d);
1303
+ return res;
1304
+ } /* Accurate phase launched after rounding test*/
1305
+ if(roundable==0)
1306
+ {
1307
+ if(supDone==0){
1308
+ /* Rest of argument reduction for accurate phase */
1309
+ Mul133(&msLog2Div2LMultKh_sup,&msLog2Div2LMultKm_sup,&msLog2Div2LMultKl_sup,kd_sup,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
1310
+ t1_sup = x_sup + msLog2Div2LMultKh_sup;
1311
+ Add12Cond(rh_sup,t2_sup,t1_sup,msLog2Div2LMultKm_sup);
1312
+ Add12Cond(rm_sup,rl_sup,t2_sup,msLog2Div2LMultKl_sup);
1313
+ /* Table reads for accurate phase */
1314
+ tbl1l_sup = twoPowerIndex1[index1_sup].lo;
1315
+ tbl2l_sup = twoPowerIndex2[index2_sup].lo;
1316
+ /* Call accurate phase */
1317
+ exp_td_accurate(&polyTblh_sup, &polyTblm_sup, &polyTbll_sup, rh_sup, rm_sup, rl_sup, tbl1h_sup, tbl1m_sup, tbl1l_sup, tbl2h_sup, tbl2m_sup, tbl2l_sup);
1318
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
1319
+ We overcome this way also the cases where the final result is not underflowed whereas the
1320
+ lower parts of the intermediate final result are.
1321
+ */
1322
+ RoundUpwards3(&res_sup,polyTblh_sup,polyTblm_sup,polyTbll_sup);
1323
+ /* Final multiplication with 2^M
1324
+ We implement the multiplication in integer computations to overcome
1325
+ the problem of the non-representability of 2^1024 if M = 1024
1326
+ */
1327
+ resdb_sup.d = res_sup;
1328
+ resdb_sup.i[HI] += M_sup << 20;
1329
+ }
1330
+ ASSIGN_UP(res,resdb_sup.d);
1331
+ if (infDone==0){
1332
+ /* Rest of argument reduction for accurate phase */
1333
+ Mul133(&msLog2Div2LMultKh_inf,&msLog2Div2LMultKm_inf,&msLog2Div2LMultKl_inf,kd_inf,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
1334
+ t1_inf = x_inf + msLog2Div2LMultKh_inf;
1335
+ Add12Cond(rh_inf,t2_inf,t1_inf,msLog2Div2LMultKm_inf);
1336
+ Add12Cond(rm_inf,rl_inf,t2_inf,msLog2Div2LMultKl_inf);
1337
+ /* Table reads for accurate phase */
1338
+ tbl1l_inf = twoPowerIndex1[index1_inf].lo;
1339
+ tbl2l_inf = twoPowerIndex2[index2_inf].lo;
1340
+ /* Call accurate phase */
1341
+ exp_td_accurate(&polyTblh_inf, &polyTblm_inf, &polyTbll_inf, rh_inf, rm_inf, rl_inf, tbl1h_inf, tbl1m_inf, tbl1l_inf, tbl2h_inf, tbl2m_inf, tbl2l_inf);
1342
+ /* Since the final multiplication is exact, we can do the final rounding before multiplying
1343
+ We overcome this way also the cases where the final result is not underflowed whereas the
1344
+ lower parts of the intermediate final result are.
1345
+ */
1346
+
1347
+ RoundDownwards3(&res_inf,polyTblh_inf,polyTblm_inf,polyTbll_inf);
1348
+ /* Final multiplication with 2^M
1349
+ We implement the multiplication in integer computations to overcome
1350
+ the problem of the non-representability of 2^1024 if M = 1024
1351
+ */
1352
+
1353
+ resdb_inf.d = res_inf;
1354
+ resdb_inf.i[HI] += M_inf << 20;
1355
+ }
1356
+ ASSIGN_LOW(res,resdb_inf.d);
1357
+ return res;
1358
+ } /* Accurate phase launched after rounding test*/
1359
+
1360
+ return res;
1361
+ }
1362
+ #endif
1363
+