crmf 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +102 -1
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +16 -16
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +104 -3
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1158 @@
1
+ /*
2
+ * This function computes log, correctly rounded,
3
+ * using triple double arithmetics
4
+
5
+ *
6
+ * Author : Christoph Lauter
7
+ * christoph.lauter at ens-lyon.fr
8
+ *
9
+
10
+ */
11
+
12
+
13
+ #include <stdio.h>
14
+ #include <stdlib.h>
15
+ #include "crlibm.h"
16
+ #include "crlibm_private.h"
17
+ #include "triple-double.h"
18
+ #include "log-td.h"
19
+ #ifdef BUILD_INTERVAL_FUNCTIONS
20
+ #include "interval.h"
21
+ #endif
22
+
23
+
24
+ #define AVOID_FMA 0
25
+
26
+
27
+
28
+ void log_td_accurate(double *logh, double *logm, double *logl, int E, double ed, int index, double zh, double zl, double logih, double logim) {
29
+ double highPoly, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l, t10h, t10l, t11h, t11l;
30
+ double t12h, t12l, t13h, t13l, t14h, t14l, zSquareh, zSquarem, zSquarel, zCubeh, zCubem, zCubel, higherPolyMultZh, higherPolyMultZm;
31
+ double higherPolyMultZl, zSquareHalfh, zSquareHalfm, zSquareHalfl, polyWithSquareh, polyWithSquarem, polyWithSquarel;
32
+ double polyh, polym, polyl, logil, logyh, logym, logyl, loghover, logmover, loglover, log2edhover, log2edmover, log2edlover;
33
+ double log2edh, log2edm, log2edl;
34
+
35
+
36
+ #if EVAL_PERF
37
+ crlibm_second_step_taken++;
38
+ #endif
39
+
40
+
41
+ /* Accurate phase:
42
+
43
+ Argument reduction is already done.
44
+ We must return logh, logm and logl representing the intermediate result in 118 bits precision.
45
+
46
+ We use a 14 degree polynomial, computing the first 3 (the first is 0) coefficients in triple double,
47
+ calculating the next 7 coefficients in double double arithmetics and the last in double.
48
+
49
+ We must account for zl starting with the monome of degree 4 (7^3 + 53 - 7 >> 118); so
50
+ double double calculations won't account for it.
51
+
52
+ */
53
+
54
+ /* Start of the horner scheme */
55
+
56
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
57
+ highPoly = FMA(FMA(FMA(FMA(accPolyC14,zh,accPolyC13),zh,accPolyC12),zh,accPolyC11),zh,accPolyC10);
58
+ #else
59
+ highPoly = accPolyC10 + zh * (accPolyC11 + zh * (accPolyC12 + zh * (accPolyC13 + zh * accPolyC14)));
60
+ #endif
61
+
62
+ /* We want to write
63
+
64
+ accPolyC3 + zh * (accPoly4 + zh * (accPoly5 + zh * (accPoly6 + zh * (accPoly7 + zh * (accPoly8 + zh * (accPoly9 + zh * highPoly))))));
65
+ ( t14 t13 t12 t11 t10 t9 t8 t7 t6 t5 t4 t3 t2 t1 )
66
+
67
+ with all additions and multiplications in double double arithmetics
68
+ but we will produce intermediate results labelled t1h/t1l thru t14h/t14l
69
+ */
70
+
71
+ Mul12(&t1h, &t1l, zh, highPoly);
72
+ Add22(&t2h, &t2l, accPolyC9h, accPolyC9l, t1h, t1l);
73
+ Mul22(&t3h, &t3l, zh, zl, t2h, t2l);
74
+ Add22(&t4h, &t4l, accPolyC8h, accPolyC8l, t3h, t3l);
75
+ Mul22(&t5h, &t5l, zh, zl, t4h, t4l);
76
+ Add22(&t6h, &t6l, accPolyC7h, accPolyC7l, t5h, t5l);
77
+ Mul22(&t7h, &t7l, zh, zl, t6h, t6l);
78
+ Add22(&t8h, &t8l, accPolyC6h, accPolyC6l, t7h, t7l);
79
+ Mul22(&t9h, &t9l, zh, zl, t8h, t8l);
80
+ Add22(&t10h, &t10l, accPolyC5h, accPolyC5l, t9h, t9l);
81
+ Mul22(&t11h, &t11l, zh, zl, t10h, t10l);
82
+ Add22(&t12h, &t12l, accPolyC4h, accPolyC4l, t11h, t11l);
83
+ Mul22(&t13h, &t13l, zh, zl, t12h, t12l);
84
+ Add22(&t14h, &t14l, accPolyC3h, accPolyC3l, t13h, t13l);
85
+
86
+ /* We must now prepare (zh + zl)^2 and (zh + zl)^3 as triple doubles */
87
+
88
+ Mul23(&zSquareh, &zSquarem, &zSquarel, zh, zl, zh, zl);
89
+ Mul233(&zCubeh, &zCubem, &zCubel, zh, zl, zSquareh, zSquarem, zSquarel);
90
+
91
+ /* We can now multiplicate the middle and higher polynomial by z^3 */
92
+
93
+ Mul233(&higherPolyMultZh, &higherPolyMultZm, &higherPolyMultZl, t14h, t14l, zCubeh, zCubem, zCubel);
94
+
95
+ /* Multiply now z^2 by -1/2 (exact op) and add to middle and higher polynomial */
96
+
97
+ zSquareHalfh = zSquareh * -0.5;
98
+ zSquareHalfm = zSquarem * -0.5;
99
+ zSquareHalfl = zSquarel * -0.5;
100
+
101
+ Add33(&polyWithSquareh, &polyWithSquarem, &polyWithSquarel,
102
+ zSquareHalfh, zSquareHalfm, zSquareHalfl,
103
+ higherPolyMultZh, higherPolyMultZm, higherPolyMultZl);
104
+
105
+ /* Add now zh and zl to obtain the polynomial evaluation result */
106
+
107
+ Add233(&polyh, &polym, &polyl, zh, zl, polyWithSquareh, polyWithSquarem, polyWithSquarel);
108
+
109
+ /* Reconstruct now log(y) = log(1 + z) - log(ri) by adding logih, logim, logil
110
+ logil has not been read to the time, do this first
111
+ */
112
+
113
+ logil = argredtable[index].logil;
114
+
115
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, polyh, polym, polyl);
116
+
117
+ /* Multiply log2 with E, i.e. log2h, log2m, log2l by ed
118
+ ed is always less than 2^(12) and log2h and log2m are stored with at least 12 trailing zeros
119
+ So multiplying naively is correct (up to 134 bits at least)
120
+
121
+ The final result is thus obtained by adding log2 * E to log(y)
122
+ */
123
+
124
+ log2edhover = log2h * ed;
125
+ log2edmover = log2m * ed;
126
+ log2edlover = log2l * ed;
127
+
128
+ /* It may be necessary to renormalize the tabulated value (multiplied by ed) before adding
129
+ the to the log(y)-result
130
+
131
+ If needed, uncomment the following Renormalize3-Statement and comment out the copies
132
+ following it.
133
+ */
134
+
135
+ /* Renormalize3(&log2edh, &log2edm, &log2edl, log2edhover, log2edmover, log2edlover); */
136
+
137
+ log2edh = log2edhover;
138
+ log2edm = log2edmover;
139
+ log2edl = log2edlover;
140
+
141
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
142
+
143
+ /* Since we can not guarantee in each addition and multiplication procedure that
144
+ the results are not overlapping, we must renormalize the result before handing
145
+ it over to the final rounding
146
+ */
147
+
148
+ Renormalize3(logh,logm,logl,loghover,logmover,loglover);
149
+
150
+ }
151
+
152
+
153
+
154
+ /*************************************************************
155
+ *************************************************************
156
+ * ROUNDED TO NEAREST *
157
+ *************************************************************
158
+ *************************************************************/
159
+ double log_rn(double x){
160
+ db_number xdb;
161
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
162
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
163
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
164
+ int E, index;
165
+
166
+ E=0;
167
+ xdb.d=x;
168
+
169
+ /* Filter cases */
170
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
171
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
172
+ return -1.0/0.0;
173
+ } /* log(+/-0) = -Inf */
174
+ if (xdb.i[HI] < 0){
175
+ return (x-x)/0; /* log(-x) = Nan */
176
+ }
177
+ /* Subnormal number */
178
+ E = -52;
179
+ xdb.d *= two52; /* make x a normal number */
180
+ }
181
+
182
+ if (xdb.i[HI] >= 0x7ff00000){
183
+ return x+x; /* Inf or Nan */
184
+ }
185
+
186
+
187
+ /* Extract exponent and mantissa
188
+ Do range reduction,
189
+ yielding to E holding the exponent and
190
+ y the mantissa between sqrt(2)/2 and sqrt(2)
191
+ */
192
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
193
+ index = (xdb.i[HI] & 0x000fffff);
194
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
195
+ index = (index + (1<<(20-L-1))) >> (20-L);
196
+
197
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
198
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
199
+ xdb.i[HI] -= 0x00100000;
200
+ E++;
201
+ }
202
+ y = xdb.d;
203
+ index = index & INDEXMASK;
204
+ /* Cast integer E into double ed for multiplication later */
205
+ ed = (double) E;
206
+
207
+ /*
208
+ Read tables:
209
+ Read one float for ri
210
+ Read the first two doubles for -log(r_i) (out of three)
211
+
212
+ Organization of the table:
213
+
214
+ one struct entry per index, the struct entry containing
215
+ r, logih, logim and logil in this order
216
+ */
217
+
218
+
219
+ ri = argredtable[index].ri;
220
+ /*
221
+ Actually we don't need the logarithm entries now
222
+ Move the following two lines to the eventual reconstruction
223
+ As long as we don't have any if in the following code, we can overlap
224
+ memory access with calculations
225
+ */
226
+ logih = argredtable[index].logih;
227
+ logim = argredtable[index].logim;
228
+
229
+ /* Do range reduction:
230
+
231
+ zh + zl = y * ri - 1.0 correctly
232
+
233
+ Correctness is assured by use of Mul12 and Add12
234
+ even if we don't force ri to have its' LSBs set to zero
235
+
236
+ Discard zl for higher monome degrees
237
+ */
238
+
239
+ Mul12(&yrih, &yril, y, ri);
240
+ th = yrih - 1.0;
241
+ Add12Cond(zh, zl, th, yril);
242
+
243
+ /*
244
+ Polynomial evaluation
245
+
246
+ Use a 7 degree polynomial
247
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
248
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
249
+ using an ad hoc method
250
+
251
+ */
252
+
253
+
254
+
255
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
256
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
257
+ #else
258
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
259
+ #endif
260
+
261
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
262
+ polyUpper = polyHorner * (zh * zhSquareh);
263
+ zhSquareHalfh = zhSquareh * -0.5;
264
+ zhSquareHalfl = zhSquarel * -0.5;
265
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
266
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
267
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
268
+
269
+ /* Reconstruction
270
+
271
+ Read logih and logim in the tables (already done)
272
+
273
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
274
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
275
+
276
+ Carry out everything in double double precision
277
+
278
+ */
279
+
280
+ /*
281
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
282
+ Multiplication of ed (double E) and log2h is thus correct
283
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
284
+ is enough for the accurate phase
285
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
286
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
287
+ triple double values. We have to take it into account for the accurate phase
288
+ basic procedures for addition and multiplication
289
+ The condition on the next Add12 is verified as log2m is smaller than log2h
290
+ and both are scaled by ed
291
+ */
292
+
293
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
294
+
295
+ /* Add logih and logim to ph and pl
296
+
297
+ We must use conditioned Add22 as logih can move over ph
298
+ */
299
+
300
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
301
+
302
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
303
+
304
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
305
+
306
+ /* Rounding test and eventual return or call to the accurate function */
307
+
308
+ if(E==0)
309
+ roundcst = ROUNDCST1;
310
+ else
311
+ roundcst = ROUNDCST2;
312
+
313
+
314
+ if(logh == (logh + (logm * roundcst)))
315
+ return logh;
316
+ else
317
+ {
318
+
319
+ #if DEBUG
320
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
321
+ #endif
322
+
323
+ log_td_accurate(&logh, &logm, &logl, E, ed, index, zh, zl, logih, logim);
324
+
325
+ ReturnRoundToNearest3(logh, logm, logl);
326
+
327
+ } /* Accurate phase launched */
328
+ }
329
+
330
+
331
+ /*************************************************************
332
+ *************************************************************
333
+ * ROUNDED UPWARDS *
334
+ *************************************************************
335
+ *************************************************************/
336
+ double log_ru(double x) {
337
+ db_number xdb;
338
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
339
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
340
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
341
+ int E, index;
342
+
343
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
344
+
345
+ E=0;
346
+ xdb.d=x;
347
+
348
+ /* Filter cases */
349
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
350
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
351
+ return -1.0/0.0;
352
+ } /* log(+/-0) = -Inf */
353
+ if (xdb.i[HI] < 0){
354
+ return (x-x)/0; /* log(-x) = Nan */
355
+ }
356
+ /* Subnormal number */
357
+ E = -52;
358
+ xdb.d *= two52; /* make x a normal number */
359
+ }
360
+
361
+ if (xdb.i[HI] >= 0x7ff00000){
362
+ return x+x; /* Inf or Nan */
363
+ }
364
+
365
+
366
+ /* Extract exponent and mantissa
367
+ Do range reduction,
368
+ yielding to E holding the exponent and
369
+ y the mantissa between sqrt(2)/2 and sqrt(2)
370
+ */
371
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
372
+ index = (xdb.i[HI] & 0x000fffff);
373
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
374
+ index = (index + (1<<(20-L-1))) >> (20-L);
375
+
376
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
377
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
378
+ xdb.i[HI] -= 0x00100000;
379
+ E++;
380
+ }
381
+ y = xdb.d;
382
+ index = index & INDEXMASK;
383
+ /* Cast integer E into double ed for multiplication later */
384
+ ed = (double) E;
385
+
386
+ /*
387
+ Read tables:
388
+ Read one float for ri
389
+ Read the first two doubles for -log(r_i) (out of three)
390
+
391
+ Organization of the table:
392
+
393
+ one struct entry per index, the struct entry containing
394
+ r, logih, logim and logil in this order
395
+ */
396
+
397
+
398
+ ri = argredtable[index].ri;
399
+ /*
400
+ Actually we don't need the logarithm entries now
401
+ Move the following two lines to the eventual reconstruction
402
+ As long as we don't have any if in the following code, we can overlap
403
+ memory access with calculations
404
+ */
405
+ logih = argredtable[index].logih;
406
+ logim = argredtable[index].logim;
407
+
408
+ /* Do range reduction:
409
+
410
+ zh + zl = y * ri - 1.0 correctly
411
+
412
+ Correctness is assured by use of Mul12 and Add12
413
+ even if we don't force ri to have its' LSBs set to zero
414
+
415
+ Discard zl for higher monome degrees
416
+ */
417
+
418
+ Mul12(&yrih, &yril, y, ri);
419
+ th = yrih - 1.0;
420
+ Add12Cond(zh, zl, th, yril);
421
+
422
+ /*
423
+ Polynomial evaluation
424
+
425
+ Use a 7 degree polynomial
426
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
427
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
428
+ using an ad hoc method
429
+
430
+ */
431
+
432
+
433
+
434
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
435
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
436
+ #else
437
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
438
+ #endif
439
+
440
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
441
+ polyUpper = polyHorner * (zh * zhSquareh);
442
+ zhSquareHalfh = zhSquareh * -0.5;
443
+ zhSquareHalfl = zhSquarel * -0.5;
444
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
445
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
446
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
447
+
448
+ /* Reconstruction
449
+
450
+ Read logih and logim in the tables (already done)
451
+
452
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
453
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
454
+
455
+ Carry out everything in double double precision
456
+
457
+ */
458
+
459
+ /*
460
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
461
+ Multiplication of ed (double E) and log2h is thus correct
462
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
463
+ is enough for the accurate phase
464
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
465
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
466
+ triple double values. We have to take it into account for the accurate phase
467
+ basic procedures for addition and multiplication
468
+ The condition on the next Add12 is verified as log2m is smaller than log2h
469
+ and both are scaled by ed
470
+ */
471
+
472
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
473
+
474
+ /* Add logih and logim to ph and pl
475
+
476
+ We must use conditioned Add22 as logih can move over ph
477
+ */
478
+
479
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
480
+
481
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
482
+
483
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
484
+
485
+ /* Rounding test and eventual return or call to the accurate function */
486
+
487
+ if(E==0)
488
+ roundcst = RDROUNDCST1;
489
+ else
490
+ roundcst = RDROUNDCST2;
491
+
492
+ TEST_AND_RETURN_RU(logh, logm, roundcst);
493
+
494
+ #if DEBUG
495
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
496
+ #endif
497
+
498
+ log_td_accurate(&logh, &logm, &logl, E, ed, index, zh, zl, logih, logim);
499
+
500
+ ReturnRoundUpwards3(logh, logm, logl);
501
+
502
+ }
503
+
504
+
505
+ /*************************************************************
506
+ *************************************************************
507
+ * ROUNDED DOWNWARDS *
508
+ *************************************************************
509
+ *************************************************************/
510
+ double log_rd(double x) {
511
+ db_number xdb;
512
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
513
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
514
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
515
+ int E, index;
516
+
517
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
518
+
519
+ E=0;
520
+ xdb.d=x;
521
+
522
+ /* Filter cases */
523
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
524
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
525
+ return -1.0/0.0;
526
+ } /* log(+/-0) = -Inf */
527
+ if (xdb.i[HI] < 0){
528
+ return (x-x)/0; /* log(-x) = Nan */
529
+ }
530
+ /* Subnormal number */
531
+ E = -52;
532
+ xdb.d *= two52; /* make x a normal number */
533
+ }
534
+
535
+ if (xdb.i[HI] >= 0x7ff00000){
536
+ return x+x; /* Inf or Nan */
537
+ }
538
+
539
+
540
+ /* Extract exponent and mantissa
541
+ Do range reduction,
542
+ yielding to E holding the exponent and
543
+ y the mantissa between sqrt(2)/2 and sqrt(2)
544
+ */
545
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
546
+ index = (xdb.i[HI] & 0x000fffff);
547
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
548
+ index = (index + (1<<(20-L-1))) >> (20-L);
549
+
550
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
551
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
552
+ xdb.i[HI] -= 0x00100000;
553
+ E++;
554
+ }
555
+ y = xdb.d;
556
+ index = index & INDEXMASK;
557
+ /* Cast integer E into double ed for multiplication later */
558
+ ed = (double) E;
559
+
560
+ /*
561
+ Read tables:
562
+ Read one float for ri
563
+ Read the first two doubles for -log(r_i) (out of three)
564
+
565
+ Organization of the table:
566
+
567
+ one struct entry per index, the struct entry containing
568
+ r, logih, logim and logil in this order
569
+ */
570
+
571
+
572
+ ri = argredtable[index].ri;
573
+ /*
574
+ Actually we don't need the logarithm entries now
575
+ Move the following two lines to the eventual reconstruction
576
+ As long as we don't have any if in the following code, we can overlap
577
+ memory access with calculations
578
+ */
579
+ logih = argredtable[index].logih;
580
+ logim = argredtable[index].logim;
581
+
582
+ /* Do range reduction:
583
+
584
+ zh + zl = y * ri - 1.0 correctly
585
+
586
+ Correctness is assured by use of Mul12 and Add12
587
+ even if we don't force ri to have its' LSBs set to zero
588
+
589
+ Discard zl for higher monome degrees
590
+ */
591
+
592
+ Mul12(&yrih, &yril, y, ri);
593
+ th = yrih - 1.0;
594
+ Add12Cond(zh, zl, th, yril);
595
+
596
+ /*
597
+ Polynomial evaluation
598
+
599
+ Use a 7 degree polynomial
600
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
601
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
602
+ using an ad hoc method
603
+
604
+ */
605
+
606
+
607
+
608
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
609
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
610
+ #else
611
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
612
+ #endif
613
+
614
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
615
+ polyUpper = polyHorner * (zh * zhSquareh);
616
+ zhSquareHalfh = zhSquareh * -0.5;
617
+ zhSquareHalfl = zhSquarel * -0.5;
618
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
619
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
620
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
621
+
622
+ /* Reconstruction
623
+
624
+ Read logih and logim in the tables (already done)
625
+
626
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
627
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
628
+
629
+ Carry out everything in double double precision
630
+
631
+ */
632
+
633
+ /*
634
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
635
+ Multiplication of ed (double E) and log2h is thus correct
636
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
637
+ is enough for the accurate phase
638
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
639
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
640
+ triple double values. We have to take it into account for the accurate phase
641
+ basic procedures for addition and multiplication
642
+ The condition on the next Add12 is verified as log2m is smaller than log2h
643
+ and both are scaled by ed
644
+ */
645
+
646
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
647
+
648
+ /* Add logih and logim to ph and pl
649
+
650
+ We must use conditioned Add22 as logih can move over ph
651
+ */
652
+
653
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
654
+
655
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
656
+
657
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
658
+
659
+ /* Rounding test and eventual return or call to the accurate function */
660
+
661
+ if(E==0)
662
+ roundcst = RDROUNDCST1;
663
+ else
664
+ roundcst = RDROUNDCST2;
665
+
666
+ TEST_AND_RETURN_RD(logh, logm, roundcst);
667
+
668
+ #if DEBUG
669
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
670
+ #endif
671
+
672
+ log_td_accurate(&logh, &logm, &logl, E, ed, index, zh, zl, logih, logim);
673
+
674
+ ReturnRoundDownwards3(logh, logm, logl);
675
+ }
676
+
677
+ /*************************************************************
678
+ *************************************************************
679
+ * ROUNDED TOWARDS ZERO *
680
+ *************************************************************
681
+ *************************************************************/
682
+ double log_rz(double x) {
683
+ db_number xdb;
684
+ double y, ed, ri, logih, logim, yrih, yril, th, zh, zl;
685
+ double polyHorner, zhSquareh, zhSquarel, polyUpper, zhSquareHalfh, zhSquareHalfl;
686
+ double t1h, t1l, t2h, t2l, ph, pl, log2edh, log2edl, logTabPolyh, logTabPolyl, logh, logm, logl, roundcst;
687
+ int E, index;
688
+
689
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
690
+
691
+ E=0;
692
+ xdb.d=x;
693
+
694
+ /* Filter cases */
695
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
696
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
697
+ return -1.0/0.0;
698
+ } /* log(+/-0) = -Inf */
699
+ if (xdb.i[HI] < 0){
700
+ return (x-x)/0; /* log(-x) = Nan */
701
+ }
702
+ /* Subnormal number */
703
+ E = -52;
704
+ xdb.d *= two52; /* make x a normal number */
705
+ }
706
+
707
+ if (xdb.i[HI] >= 0x7ff00000){
708
+ return x+x; /* Inf or Nan */
709
+ }
710
+
711
+
712
+ /* Extract exponent and mantissa
713
+ Do range reduction,
714
+ yielding to E holding the exponent and
715
+ y the mantissa between sqrt(2)/2 and sqrt(2)
716
+ */
717
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
718
+ index = (xdb.i[HI] & 0x000fffff);
719
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
720
+ index = (index + (1<<(20-L-1))) >> (20-L);
721
+
722
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
723
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
724
+ xdb.i[HI] -= 0x00100000;
725
+ E++;
726
+ }
727
+ y = xdb.d;
728
+ index = index & INDEXMASK;
729
+ /* Cast integer E into double ed for multiplication later */
730
+ ed = (double) E;
731
+
732
+ /*
733
+ Read tables:
734
+ Read one float for ri
735
+ Read the first two doubles for -log(r_i) (out of three)
736
+
737
+ Organization of the table:
738
+
739
+ one struct entry per index, the struct entry containing
740
+ r, logih, logim and logil in this order
741
+ */
742
+
743
+
744
+ ri = argredtable[index].ri;
745
+ /*
746
+ Actually we don't need the logarithm entries now
747
+ Move the following two lines to the eventual reconstruction
748
+ As long as we don't have any if in the following code, we can overlap
749
+ memory access with calculations
750
+ */
751
+ logih = argredtable[index].logih;
752
+ logim = argredtable[index].logim;
753
+
754
+ /* Do range reduction:
755
+
756
+ zh + zl = y * ri - 1.0 correctly
757
+
758
+ Correctness is assured by use of Mul12 and Add12
759
+ even if we don't force ri to have its' LSBs set to zero
760
+
761
+ Discard zl for higher monome degrees
762
+ */
763
+
764
+ Mul12(&yrih, &yril, y, ri);
765
+ th = yrih - 1.0;
766
+ Add12Cond(zh, zl, th, yril);
767
+
768
+ /*
769
+ Polynomial evaluation
770
+
771
+ Use a 7 degree polynomial
772
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
773
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
774
+ using an ad hoc method
775
+
776
+ */
777
+
778
+
779
+
780
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
781
+ polyHorner = FMA(FMA(FMA(FMA(c7,zh,c6),zh,c5),zh,c4),zh,c3);
782
+ #else
783
+ polyHorner = c3 + zh * (c4 + zh * (c5 + zh * (c6 + zh * c7)));
784
+ #endif
785
+
786
+ Mul12(&zhSquareh, &zhSquarel, zh, zh);
787
+ polyUpper = polyHorner * (zh * zhSquareh);
788
+ zhSquareHalfh = zhSquareh * -0.5;
789
+ zhSquareHalfl = zhSquarel * -0.5;
790
+ Add12(t1h, t1l, polyUpper, -1 * (zh * zl));
791
+ Add22(&t2h, &t2l, zh, zl, zhSquareHalfh, zhSquareHalfl);
792
+ Add22(&ph, &pl, t2h, t2l, t1h, t1l);
793
+
794
+ /* Reconstruction
795
+
796
+ Read logih and logim in the tables (already done)
797
+
798
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
799
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
800
+
801
+ Carry out everything in double double precision
802
+
803
+ */
804
+
805
+ /*
806
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
807
+ Multiplication of ed (double E) and log2h is thus correct
808
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
809
+ is enough for the accurate phase
810
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
811
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
812
+ triple double values. We have to take it into account for the accurate phase
813
+ basic procedures for addition and multiplication
814
+ The condition on the next Add12 is verified as log2m is smaller than log2h
815
+ and both are scaled by ed
816
+ */
817
+
818
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
819
+
820
+ /* Add logih and logim to ph and pl
821
+
822
+ We must use conditioned Add22 as logih can move over ph
823
+ */
824
+
825
+ Add22Cond(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
826
+
827
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
828
+
829
+ Add22Cond(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
830
+
831
+ /* Rounding test and eventual return or call to the accurate function */
832
+
833
+ if(E==0)
834
+ roundcst = RDROUNDCST1;
835
+ else
836
+ roundcst = RDROUNDCST2;
837
+
838
+ TEST_AND_RETURN_RZ(logh, logm, roundcst);
839
+
840
+ #if DEBUG
841
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
842
+ #endif
843
+
844
+ log_td_accurate(&logh, &logm, &logl, E, ed, index, zh, zl, logih, logim);
845
+
846
+ ReturnRoundTowardsZero3(logh, logm, logl);
847
+ }
848
+
849
+ #ifdef BUILD_INTERVAL_FUNCTIONS
850
+ interval j_log(interval x)
851
+ {
852
+ interval res;
853
+ int roundable;
854
+ int cs_inf=0; int cs_sup=0;
855
+ double x_inf,x_sup;
856
+ x_inf=LOW(x);
857
+ x_sup=UP(x);
858
+ double res_inf, res_sup, res_simple_inf, res_simple_sup;
859
+
860
+ db_number xdb_sup;
861
+ double y_sup, ed_sup, ri_sup, logih_sup, logim_sup, yrih_sup, yril_sup, th_sup, zh_sup, zl_sup;
862
+ double polyHorner_sup, zhSquareh_sup, zhSquarel_sup, polyUpper_sup, zhSquareHalfh_sup, zhSquareHalfl_sup;
863
+ double t1h_sup, t1l_sup, t2h_sup, t2l_sup, ph_sup, pl_sup, log2edh_sup, log2edl_sup, logTabPolyh_sup, logTabPolyl_sup, logh_sup, logm_sup, logl_sup, roundcst;
864
+ int E_sup, index_sup;
865
+
866
+
867
+ db_number xdb_inf;
868
+ double y_inf, ed_inf, ri_inf, logih_inf, logim_inf, yrih_inf, yril_inf, th_inf, zh_inf, zl_inf;
869
+ double polyHorner_inf, zhSquareh_inf, zhSquarel_inf, polyUpper_inf, zhSquareHalfh_inf, zhSquareHalfl_inf;
870
+ double t1h_inf, t1l_inf, t2h_inf, t2l_inf, ph_inf, pl_inf, log2edh_inf, log2edl_inf, logTabPolyh_inf, logTabPolyl_inf, logh_inf, logm_inf, logl_inf;
871
+ int E_inf, index_inf;
872
+
873
+ E_inf=0;
874
+ xdb_inf.d=x_inf;
875
+ E_sup=0;
876
+ xdb_sup.d=x_sup;
877
+
878
+ if (__builtin_expect(
879
+ (x_inf == 1.0) || (!(x_inf<=x_sup)) || (xdb_sup.i[HI] < 0)
880
+ || (xdb_inf.i[HI] < 0x00100000) || (((xdb_inf.i[HI] & 0x7fffffff)|xdb_inf.i[LO])==0)
881
+ || (xdb_inf.i[HI] < 0)
882
+ || (xdb_inf.i[HI] >= 0x7ff00000)
883
+ || (x_sup == 1.0)
884
+ || (xdb_sup.i[HI] < 0x00100000)
885
+ || (((xdb_sup.i[HI] & 0x7fffffff)|xdb_sup.i[LO])==0)
886
+ || (xdb_sup.i[HI] < 0)
887
+ || (xdb_sup.i[HI] >= 0x7ff00000)
888
+ || ((xdb_inf.d<00) && (xdb_sup.d>0) )
889
+ ,FALSE))
890
+ {
891
+ if (!(x_inf<=x_sup)) RETURN_EMPTY_INTERVAL;
892
+ if (xdb_sup.i[HI] < 0) RETURN_EMPTY_INTERVAL;
893
+ if ((xdb_inf.d<00) && (xdb_sup.d>0) )
894
+ {
895
+ ASSIGN_LOW(res,-1.0/0.0);
896
+ ASSIGN_UP(res,log_ru(UP(x)));
897
+ return res;
898
+ }
899
+ ASSIGN_LOW(res,log_rd(LOW(x)));
900
+ ASSIGN_UP(res,log_ru(UP(x)));
901
+ return res;
902
+ }
903
+ /* Extract exponent and mantissa
904
+ Do range reduction,
905
+ yielding to E holding the exponent and
906
+ y the mantissa between sqrt(2)/2 and sqrt(2)
907
+ */
908
+ E_inf += (xdb_inf.i[HI]>>20)-1023; /* extract the exponent */
909
+ E_sup += (xdb_sup.i[HI]>>20)-1023; /* extract the exponent */
910
+ index_inf = (xdb_inf.i[HI] & 0x000fffff);
911
+ index_sup = (xdb_sup.i[HI] & 0x000fffff);
912
+ xdb_inf.i[HI] = index_inf | 0x3ff00000; /* do exponent = 0 */
913
+ xdb_sup.i[HI] = index_sup | 0x3ff00000; /* do exponent = 0 */
914
+ index_inf = (index_inf + (1<<(20-L-1))) >> (20-L);
915
+ index_sup = (index_sup + (1<<(20-L-1))) >> (20-L);
916
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
917
+ if (index_inf >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
918
+ xdb_inf.i[HI] -= 0x00100000;
919
+ E_inf++;
920
+ }
921
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
922
+ if (index_sup >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
923
+ xdb_sup.i[HI] -= 0x00100000;
924
+ E_sup++;
925
+ }
926
+ y_inf = xdb_inf.d;
927
+ y_sup = xdb_sup.d;
928
+ index_inf = index_inf & INDEXMASK;
929
+ index_sup = index_sup & INDEXMASK;
930
+ /* Cast integer E into double ed for multiplication later */
931
+ ed_inf = (double) E_inf;
932
+ ed_sup = (double) E_sup;
933
+ /*
934
+ Read tables:
935
+ Read one float for ri
936
+ Read the first two doubles for -log(r_i) (out of three)
937
+ Organization of the table:
938
+ one struct entry per index, the struct entry containing
939
+ r, logih, logim and logil in this order
940
+ */
941
+ ri_inf = argredtable[index_inf].ri;
942
+ ri_sup = argredtable[index_sup].ri;
943
+ /*
944
+ Actually we don't need the logarithm entries now
945
+ Move the following two lines to the eventual reconstruction
946
+ As long as we don't have any if in the following code, we can overlap
947
+ memory access with calculations
948
+ */
949
+ logih_inf = argredtable[index_inf].logih;
950
+ logih_sup = argredtable[index_sup].logih;
951
+ logim_inf = argredtable[index_inf].logim;
952
+ logim_sup = argredtable[index_sup].logim;
953
+ /* Do range reduction:
954
+ zh + zl = y * ri - 1.0 correctly
955
+ Correctness is assured by use of Mul12 and Add12
956
+ even if we don't force ri to have its' LSBs set to zero
957
+ Discard zl for higher monome degrees
958
+ */
959
+
960
+ Mul12(&yrih_inf, &yril_inf, y_inf, ri_inf);
961
+ Mul12(&yrih_sup, &yril_sup, y_sup, ri_sup);
962
+ th_inf = yrih_inf - 1.0;
963
+ th_sup = yrih_sup - 1.0;
964
+ /* Do range reduction:
965
+ zh + zl = y * ri - 1.0 correctly
966
+ Correctness is assured by use of Mul12 and Add12
967
+ even if we don't force ri to have its' LSBs set to zero
968
+ Discard zl for higher monome degrees
969
+ */
970
+
971
+
972
+ Add12Cond(zh_inf, zl_inf, th_inf, yril_inf);
973
+ Add12Cond(zh_sup, zl_sup, th_sup, yril_sup);
974
+ /*
975
+ Polynomial evaluation
976
+ Use a 7 degree polynomial
977
+ Evaluate the higher 5 terms in double precision (-7 * 3 = -21) using Horner's scheme
978
+ Evaluate the lower 3 terms (the last is 0) in double double precision accounting also for zl
979
+ using an ad hoc method
980
+ */
981
+
982
+
983
+
984
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
985
+ polyHorner_inf = FMA(FMA(FMA(FMA(c7,zh_inf,c6),zh_inf,c5),zh_inf,c4),zh_inf,c3);
986
+ polyHorner_sup = FMA(FMA(FMA(FMA(c7,zh_sup,c6),zh_sup,c5),zh_sup,c4),zh_sup,c3);
987
+ #else
988
+ polyHorner_inf = c3 + zh_inf * (c4 + zh_inf * (c5 + zh_inf * (c6 + zh_inf * c7)));
989
+ polyHorner_sup = c3 + zh_sup * (c4 + zh_sup * (c5 + zh_sup * (c6 + zh_sup * c7)));
990
+ #endif
991
+
992
+ Mul12(&zhSquareh_inf, &zhSquarel_inf, zh_inf, zh_inf);
993
+ Mul12(&zhSquareh_sup, &zhSquarel_sup, zh_sup, zh_sup);
994
+ polyUpper_inf = polyHorner_inf * (zh_inf * zhSquareh_inf);
995
+ polyUpper_sup = polyHorner_sup * (zh_sup * zhSquareh_sup);
996
+ zhSquareHalfh_inf = zhSquareh_inf * -0.5;
997
+ zhSquareHalfh_sup = zhSquareh_sup * -0.5;
998
+ zhSquareHalfl_inf = zhSquarel_inf * -0.5;
999
+ zhSquareHalfl_sup = zhSquarel_sup * -0.5;
1000
+ Add12(t1h_inf, t1l_inf, polyUpper_inf, -1 * (zh_inf * zl_inf));
1001
+ Add12(t1h_sup, t1l_sup, polyUpper_sup, -1 * (zh_sup * zl_sup));
1002
+ Add22(&t2h_inf, &t2l_inf, zh_inf, zl_inf, zhSquareHalfh_inf, zhSquareHalfl_inf);
1003
+ Add22(&t2h_sup, &t2l_sup, zh_sup, zl_sup, zhSquareHalfh_sup, zhSquareHalfl_sup);
1004
+ Add22(&ph_inf, &pl_inf, t2h_inf, t2l_inf, t1h_inf, t1l_inf);
1005
+ Add22(&ph_sup, &pl_sup, t2h_sup, t2l_sup, t1h_sup, t1l_sup);
1006
+ /* Reconstruction
1007
+
1008
+ Read logih and logim in the tables (already done)
1009
+
1010
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
1011
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
1012
+
1013
+ Carry out everything in double double precision
1014
+
1015
+ */
1016
+
1017
+ /*
1018
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
1019
+ Multiplication of ed (double E) and log2h is thus correct
1020
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
1021
+ is enough for the accurate phase
1022
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
1023
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
1024
+ triple double values. We have to take it into account for the accurate phase
1025
+ basic procedures for addition and multiplication
1026
+ The condition on the next Add12 is verified as log2m is smaller than log2h
1027
+ and both are scaled by ed
1028
+ */
1029
+
1030
+ Add12(log2edh_inf, log2edl_inf, log2h * ed_inf, log2m * ed_inf);
1031
+
1032
+ /*
1033
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
1034
+ Multiplication of ed (double E) and log2h is thus correct
1035
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
1036
+ is enough for the accurate phase
1037
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
1038
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
1039
+ triple double values. We have to take it into account for the accurate phase
1040
+ basic procedures for addition and multiplication
1041
+ The condition on the next Add12 is verified as log2m is smaller than log2h
1042
+ and both are scaled by ed
1043
+ */
1044
+
1045
+ Add12(log2edh_sup, log2edl_sup, log2h * ed_sup, log2m * ed_sup);
1046
+
1047
+
1048
+ /* Add logih and logim to ph and pl
1049
+
1050
+ We must use conditioned Add22 as logih can move over ph
1051
+ */
1052
+
1053
+ Add22Cond(&logTabPolyh_inf, &logTabPolyl_inf, logih_inf, logim_inf, ph_inf, pl_inf);
1054
+
1055
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
1056
+
1057
+ Add22Cond(&logh_inf, &logm_inf, log2edh_inf, log2edl_inf, logTabPolyh_inf, logTabPolyl_inf);
1058
+
1059
+ /* Add logih and logim to ph and pl
1060
+
1061
+ We must use conditioned Add22 as logih can move over ph
1062
+ */
1063
+
1064
+ Add22Cond(&logTabPolyh_sup, &logTabPolyl_sup, logih_sup, logim_sup, ph_sup, pl_sup);
1065
+
1066
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
1067
+
1068
+ Add22Cond(&logh_sup, &logm_sup, log2edh_sup, log2edl_sup, logTabPolyh_sup, logTabPolyl_sup);
1069
+
1070
+
1071
+ /* Rounding test and eventual return or call to the accurate function */
1072
+
1073
+ roundcst = RDROUNDCST1;
1074
+
1075
+ if(cs_inf) { res_inf=res_simple_inf; }
1076
+ if(cs_sup) { res_sup=res_simple_sup; }
1077
+ //TEST_AND_COPY_RDRU_LOG(roundable,res_inf,logh_inf,logm_inf,res_sup,logh_sup,logm_sup,roundcst);
1078
+
1079
+ //#define TEST_AND_COPY_RDRU_LOG(__cond__, __res_inf__, __yh_inf__, __yl_inf__, __res_sup__, __yh_sup__, __yl_sup__, __eps__)
1080
+
1081
+ db_number yh_inf, yl_inf, u53_inf, yh_sup, yl_sup, u53_sup;
1082
+ int yh_inf_neg, yl_inf_neg, yh_sup_neg, yl_sup_neg;
1083
+ int rd_ok, ru_ok;
1084
+ double save_res_inf=res_inf;
1085
+ yh_inf.d = logh_inf; yl_inf.d = logm_inf;
1086
+ yh_inf_neg = (yh_inf.i[HI] & 0x80000000);
1087
+ yl_inf_neg = (yl_inf.i[HI] & 0x80000000);
1088
+ yh_inf.l = yh_inf.l & 0x7fffffffffffffffLL; /* compute the absolute value*/
1089
+ yl_inf.l = yl_inf.l & 0x7fffffffffffffffLL; /* compute the absolute value*/
1090
+ u53_inf.l = (yh_inf.l & ULL(7ff0000000000000)) + ULL(0010000000000000);
1091
+ yh_sup.d = logh_sup; yl_sup.d = logm_sup;
1092
+ yh_sup_neg = (yh_sup.i[HI] & 0x80000000);
1093
+ yl_sup_neg = (yl_sup.i[HI] & 0x80000000);
1094
+ yh_sup.l = yh_sup.l & 0x7fffffffffffffffLL; /* compute the absolute value*/
1095
+ yl_sup.l = yl_sup.l & 0x7fffffffffffffffLL; /* compute the absolute value*/
1096
+ u53_sup.l = (yh_sup.l & ULL(7ff0000000000000)) + ULL(0010000000000000);
1097
+ roundable = 0;
1098
+ rd_ok=(yl_inf.d > roundcst * u53_inf.d);
1099
+ ru_ok=(yl_sup.d > roundcst * u53_sup.d);
1100
+ if(yl_inf_neg) { /* The case yl==0 is filtered by the above test*/
1101
+ /* return next down */
1102
+ yh_inf.d = logh_inf;
1103
+ if(yh_inf_neg) yh_inf.l++; else yh_inf.l--; /* Beware: fails for zero */
1104
+ res_inf = yh_inf.d ;
1105
+ }
1106
+ else {
1107
+ res_inf = logh_inf;
1108
+ }
1109
+ if(!yl_sup_neg) { /* The case yl==0 is filtered by the above test*/
1110
+ /* return next up */
1111
+ yh_sup.d = logh_sup;
1112
+ if(yh_sup_neg) yh_sup.l--; else yh_sup.l++; /* Beware: fails for zero */
1113
+ res_sup = yh_sup.d ;
1114
+ }
1115
+ else {
1116
+ res_sup = logh_sup;
1117
+ }
1118
+ if(save_res_inf==-1.0/0.0) res_inf=-1.0/0.0;
1119
+ if(rd_ok && ru_ok){
1120
+ ASSIGN_LOW(res,res_inf);
1121
+ ASSIGN_UP(res,res_sup);
1122
+ return(res);
1123
+ }
1124
+ else if (rd_ok){
1125
+ roundable=1;
1126
+ }
1127
+ else if (ru_ok){
1128
+ roundable=2;
1129
+ }
1130
+
1131
+
1132
+ #if DEBUG
1133
+ printf("Going for Accurate Phase for x=%1.50e\n",x);
1134
+ #endif
1135
+ if (roundable==1)
1136
+ {
1137
+ log_td_accurate(&logh_sup, &logm_sup, &logl_sup, E_sup, ed_sup, index_sup, zh_sup, zl_sup, logih_sup, logim_sup);
1138
+ RoundUpwards3(&res_sup, logh_sup, logm_sup, logl_sup);
1139
+ }
1140
+
1141
+ if (roundable==2)
1142
+ {
1143
+ log_td_accurate(&logh_inf, &logm_inf, &logl_inf, E_inf, ed_inf, index_inf, zh_inf, zl_inf, logih_inf, logim_inf);
1144
+ RoundDownwards3(&res_inf, logh_inf, logm_inf, logl_inf);
1145
+ }
1146
+ if (roundable==0)
1147
+ {
1148
+ log_td_accurate(&logh_inf, &logm_inf, &logl_inf, E_inf, ed_inf, index_inf, zh_inf, zl_inf, logih_inf, logim_inf);
1149
+ RoundDownwards3(&res_inf, logh_inf, logm_inf, logl_inf);
1150
+ log_td_accurate(&logh_sup, &logm_sup, &logl_sup, E_sup, ed_sup, index_sup, zh_sup, zl_sup, logih_sup, logim_sup);
1151
+ RoundUpwards3(&res_sup, logh_sup, logm_sup, logl_sup);
1152
+ }
1153
+ ASSIGN_LOW(res,res_inf);
1154
+ ASSIGN_UP(res,res_sup);
1155
+ return res;
1156
+ }
1157
+ #endif
1158
+