crmf 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +105 -3
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +117 -20
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +108 -10
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,2244 @@
1
+
2
+
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include "crlibm.h"
6
+ #include "crlibm_private.h"
7
+ #include "triple-double.h"
8
+ #include "log.h"
9
+
10
+
11
+
12
+ void p_accu(double *p_resh, double *p_resm, double *p_resl, double xh, double xm) {
13
+
14
+
15
+
16
+
17
+ double p_t_1_0h;
18
+ double p_t_2_0h;
19
+ double p_t_3_0h;
20
+ double p_t_4_0h;
21
+ double p_t_5_0h;
22
+ double p_t_6_0h;
23
+ double p_t_7_0h;
24
+ double p_t_8_0h;
25
+ double p_t_9_0h, p_t_9_0m;
26
+ double p_t_10_0h, p_t_10_0m;
27
+ double p_t_11_0h, p_t_11_0m;
28
+ double p_t_12_0h, p_t_12_0m;
29
+ double p_t_13_0h, p_t_13_0m;
30
+ double p_t_14_0h, p_t_14_0m;
31
+ double p_t_15_0h, p_t_15_0m;
32
+ double p_t_16_0h, p_t_16_0m, p_t_16_0l;
33
+ double p_t_17_0h, p_t_17_0m, p_t_17_0l;
34
+ double p_t_18_0h, p_t_18_0m, p_t_18_0l;
35
+ double p_t_19_0h, p_t_19_0m, p_t_19_0l;
36
+ double p_t_20_0h, p_t_20_0m, p_t_20_0l;
37
+ double p_t_21_0h, p_t_21_0m, p_t_21_0l;
38
+
39
+
40
+ #if EVAL_PERF
41
+ crlibm_second_step_taken++;
42
+ #endif
43
+
44
+
45
+
46
+ p_t_1_0h = p_coeff_accu_12h;
47
+ p_t_2_0h = p_t_1_0h * xh;
48
+ p_t_3_0h = p_coeff_accu_11h + p_t_2_0h;
49
+ p_t_4_0h = p_t_3_0h * xh;
50
+ p_t_5_0h = p_coeff_accu_10h + p_t_4_0h;
51
+ p_t_6_0h = p_t_5_0h * xh;
52
+ p_t_7_0h = p_coeff_accu_9h + p_t_6_0h;
53
+ p_t_8_0h = p_t_7_0h * xh;
54
+ Add12(p_t_9_0h,p_t_9_0m,p_coeff_accu_8h,p_t_8_0h);
55
+ MulAdd22(&p_t_10_0h,&p_t_10_0m,p_coeff_accu_7h,p_coeff_accu_7m,xh,xm,p_t_9_0h,p_t_9_0m);
56
+ MulAdd22(&p_t_11_0h,&p_t_11_0m,p_coeff_accu_6h,p_coeff_accu_6m,xh,xm,p_t_10_0h,p_t_10_0m);
57
+ MulAdd22(&p_t_12_0h,&p_t_12_0m,p_coeff_accu_5h,p_coeff_accu_5m,xh,xm,p_t_11_0h,p_t_11_0m);
58
+ Mul22(&p_t_13_0h,&p_t_13_0m,p_t_12_0h,p_t_12_0m,xh,xm);
59
+ Add122(&p_t_14_0h,&p_t_14_0m,p_coeff_accu_4h,p_t_13_0h,p_t_13_0m);
60
+ Mul22(&p_t_15_0h,&p_t_15_0m,p_t_14_0h,p_t_14_0m,xh,xm);
61
+ Add23(&p_t_16_0h,&p_t_16_0m,&p_t_16_0l,p_coeff_accu_3h,p_coeff_accu_3m,p_t_15_0h,p_t_15_0m);
62
+ Mul233(&p_t_17_0h,&p_t_17_0m,&p_t_17_0l,xh,xm,p_t_16_0h,p_t_16_0m,p_t_16_0l);
63
+ Add133(&p_t_18_0h,&p_t_18_0m,&p_t_18_0l,p_coeff_accu_2h,p_t_17_0h,p_t_17_0m,p_t_17_0l);
64
+ Mul233(&p_t_19_0h,&p_t_19_0m,&p_t_19_0l,xh,xm,p_t_18_0h,p_t_18_0m,p_t_18_0l);
65
+ Add133(&p_t_20_0h,&p_t_20_0m,&p_t_20_0l,p_coeff_accu_1h,p_t_19_0h,p_t_19_0m,p_t_19_0l);
66
+ Mul233(&p_t_21_0h,&p_t_21_0m,&p_t_21_0l,xh,xm,p_t_20_0h,p_t_20_0m,p_t_20_0l);
67
+ Renormalize3(p_resh,p_resm,p_resl,p_t_21_0h,p_t_21_0m,p_t_21_0l);
68
+
69
+
70
+ }
71
+
72
+
73
+
74
+ /*************************************************************
75
+ *************************************************************
76
+ * ROUNDED TO NEAREST *
77
+ *************************************************************
78
+ *************************************************************/
79
+ double log_rn(double x){
80
+ db_number xdb, yhdb;
81
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
82
+ double ph, pl, pm, log2edh, log2edl, log2edm, logTabPolyh, logTabPolyl, logh, logm, logl;
83
+ int E, index;
84
+ double zhSquare, zhCube, zhSquareHalf;
85
+ double p35, p46, p36;
86
+ double pUpper;
87
+ double zhSquareHalfPlusZl;
88
+ double zhFour;
89
+ double logyh, logym, logyl;
90
+ double loghover, logmover, loglover;
91
+
92
+
93
+ E=0;
94
+ xdb.d=x;
95
+
96
+ /* Filter cases */
97
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
98
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
99
+ return -1.0/0.0;
100
+ } /* log(+/-0) = -Inf */
101
+ if (xdb.i[HI] < 0){
102
+ return (x-x)/0; /* log(-x) = Nan */
103
+ }
104
+ /* Subnormal number */
105
+ E = -52;
106
+ xdb.d *= two52; /* make x a normal number */
107
+ }
108
+
109
+ if (xdb.i[HI] >= 0x7ff00000){
110
+ return x+x; /* Inf or Nan */
111
+ }
112
+
113
+
114
+ /* Extract exponent and mantissa
115
+ Do range reduction,
116
+ yielding to E holding the exponent and
117
+ y the mantissa between sqrt(2)/2 and sqrt(2)
118
+ */
119
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
120
+ index = (xdb.i[HI] & 0x000fffff);
121
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
122
+ index = (index + (1<<(20-L-1))) >> (20-L);
123
+
124
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
125
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
126
+ xdb.i[HI] -= 0x00100000;
127
+ E++;
128
+ }
129
+
130
+ yhdb.i[HI] = xdb.i[HI];
131
+ yhdb.i[LO] = 0;
132
+ yh = yhdb.d;
133
+ yl = xdb.d - yh;
134
+
135
+ index = index & INDEXMASK;
136
+ /* Cast integer E into double ed for multiplication later */
137
+ ed = (double) E;
138
+
139
+ /*
140
+ Read tables:
141
+ Read one float for ri
142
+ Read the first two doubles for -log(r_i) (out of three)
143
+
144
+ Organization of the table:
145
+
146
+ one struct entry per index, the struct entry containing
147
+ r, logih, logim and logil in this order
148
+ */
149
+
150
+
151
+ ri = argredtable[index].ri;
152
+ /*
153
+ Actually we don't need the logarithm entries now
154
+ Move the following two lines to the eventual reconstruction
155
+ As long as we don't have any if in the following code, we can overlap
156
+ memory access with calculations
157
+ */
158
+ logih = argredtable[index].logih;
159
+ logim = argredtable[index].logim;
160
+
161
+ /* Do range reduction:
162
+
163
+ zh + zl = y * ri - 1.0 exactly
164
+
165
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
166
+
167
+ Discard zl for higher monome degrees
168
+ */
169
+
170
+ yrih = yh * ri;
171
+ yril = yl * ri;
172
+ th = yrih - 1.0;
173
+ Add12Cond(zh, zl, th, yril);
174
+
175
+ /* Polynomial approximation */
176
+
177
+ zhSquare = zh * zh; /* 1 */
178
+
179
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
180
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
181
+ zhCube = zhSquare * zh; /* 2 */
182
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
183
+ zhFour = zhSquare * zhSquare; /* 2 */
184
+
185
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
186
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
187
+
188
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
189
+
190
+ Add12(ph,pl,zh,pUpper); /* 8 */
191
+
192
+ /* Reconstruction
193
+
194
+ Read logih and logim in the tables (already done)
195
+
196
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
197
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
198
+
199
+ Carry out everything in double double precision
200
+
201
+ */
202
+
203
+ /*
204
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
205
+ Multiplication of ed (double E) and log2h is thus exact
206
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
207
+ is enough for the accurate phase
208
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
209
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
210
+ triple double values. We have to take it into account for the accurate phase
211
+ basic procedures for addition and multiplication
212
+ The condition on the next Add12 is verified as log2m is smaller than log2h
213
+ and both are scaled by ed
214
+ */
215
+
216
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
217
+
218
+ /* Add logih and logim to ph and pl */
219
+
220
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
221
+
222
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
223
+
224
+ Add22(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
225
+
226
+ /* Rounding test and possible return or call to the accurate function */
227
+
228
+ if(logh == (logh + (logm * RNROUNDCST)))
229
+ return logh;
230
+ else
231
+ {
232
+
233
+ logil = argredtable[index].logil;
234
+
235
+ p_accu(&ph, &pm, &pl, zh, zl);
236
+
237
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
238
+
239
+ log2edh = log2h * ed;
240
+ log2edm = log2m * ed;
241
+ log2edl = log2l * ed;
242
+
243
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
244
+
245
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
246
+
247
+ ReturnRoundToNearest3(logh, logm, logl);
248
+
249
+ } /* Accurate phase launched */
250
+ }
251
+
252
+
253
+ /*************************************************************
254
+ *************************************************************
255
+ * ROUNDED UPWARDS *
256
+ *************************************************************
257
+ *************************************************************/
258
+ double log_ru(double x){
259
+ db_number xdb, yhdb;
260
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
261
+ double ph, pl, pm, log2edh, log2edl, log2edm, logTabPolyh, logTabPolyl, logh, logm, logl;
262
+ int E, index;
263
+ double zhSquare, zhCube, zhSquareHalf;
264
+ double p35, p46, p36;
265
+ double pUpper;
266
+ double zhSquareHalfPlusZl;
267
+ double zhFour;
268
+ double logyh, logym, logyl;
269
+ double loghover, logmover, loglover;
270
+
271
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
272
+
273
+ E=0;
274
+ xdb.d=x;
275
+
276
+ /* Filter cases */
277
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
278
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
279
+ return -1.0/0.0;
280
+ } /* log(+/-0) = -Inf */
281
+ if (xdb.i[HI] < 0){
282
+ return (x-x)/0; /* log(-x) = Nan */
283
+ }
284
+ /* Subnormal number */
285
+ E = -52;
286
+ xdb.d *= two52; /* make x a normal number */
287
+ }
288
+
289
+ if (xdb.i[HI] >= 0x7ff00000){
290
+ return x+x; /* Inf or Nan */
291
+ }
292
+
293
+
294
+ /* Extract exponent and mantissa
295
+ Do range reduction,
296
+ yielding to E holding the exponent and
297
+ y the mantissa between sqrt(2)/2 and sqrt(2)
298
+ */
299
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
300
+ index = (xdb.i[HI] & 0x000fffff);
301
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
302
+ index = (index + (1<<(20-L-1))) >> (20-L);
303
+
304
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
305
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
306
+ xdb.i[HI] -= 0x00100000;
307
+ E++;
308
+ }
309
+
310
+ yhdb.i[HI] = xdb.i[HI];
311
+ yhdb.i[LO] = 0;
312
+ yh = yhdb.d;
313
+ yl = xdb.d - yh;
314
+
315
+ index = index & INDEXMASK;
316
+ /* Cast integer E into double ed for multiplication later */
317
+ ed = (double) E;
318
+
319
+ /*
320
+ Read tables:
321
+ Read one float for ri
322
+ Read the first two doubles for -log(r_i) (out of three)
323
+
324
+ Organization of the table:
325
+
326
+ one struct entry per index, the struct entry containing
327
+ r, logih, logim and logil in this order
328
+ */
329
+
330
+
331
+ ri = argredtable[index].ri;
332
+ /*
333
+ Actually we don't need the logarithm entries now
334
+ Move the following two lines to the eventual reconstruction
335
+ As long as we don't have any if in the following code, we can overlap
336
+ memory access with calculations
337
+ */
338
+ logih = argredtable[index].logih;
339
+ logim = argredtable[index].logim;
340
+
341
+ /* Do range reduction:
342
+
343
+ zh + zl = y * ri - 1.0 exactly
344
+
345
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
346
+
347
+ Discard zl for higher monome degrees
348
+ */
349
+
350
+ yrih = yh * ri;
351
+ yril = yl * ri;
352
+ th = yrih - 1.0;
353
+ Add12Cond(zh, zl, th, yril);
354
+
355
+ /* Polynomial approximation */
356
+
357
+ zhSquare = zh * zh; /* 1 */
358
+
359
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
360
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
361
+ zhCube = zhSquare * zh; /* 2 */
362
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
363
+ zhFour = zhSquare * zhSquare; /* 2 */
364
+
365
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
366
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
367
+
368
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
369
+
370
+ Add12(ph,pl,zh,pUpper); /* 8 */
371
+
372
+ /* Reconstruction
373
+
374
+ Read logih and logim in the tables (already done)
375
+
376
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
377
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
378
+
379
+ Carry out everything in double double precision
380
+
381
+ */
382
+
383
+ /*
384
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
385
+ Multiplication of ed (double E) and log2h is thus exact
386
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
387
+ is enough for the accurate phase
388
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
389
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
390
+ triple double values. We have to take it into account for the accurate phase
391
+ basic procedures for addition and multiplication
392
+ The condition on the next Add12 is verified as log2m is smaller than log2h
393
+ and both are scaled by ed
394
+ */
395
+
396
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
397
+
398
+ /* Add logih and logim to ph and pl */
399
+
400
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
401
+
402
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
403
+
404
+ Add22(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
405
+
406
+ /* Rounding test and possible return or call to the accurate function */
407
+
408
+ TEST_AND_RETURN_RU(logh, logm, RDROUNDCST);
409
+
410
+ {
411
+
412
+ logil = argredtable[index].logil;
413
+
414
+ p_accu(&ph, &pm, &pl, zh, zl);
415
+
416
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
417
+
418
+ log2edh = log2h * ed;
419
+ log2edm = log2m * ed;
420
+ log2edl = log2l * ed;
421
+
422
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
423
+
424
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
425
+
426
+ ReturnRoundUpwards3(logh, logm, logl);
427
+
428
+ } /* Accurate phase launched */
429
+ }
430
+
431
+
432
+ /*************************************************************
433
+ *************************************************************
434
+ * ROUNDED DOWNWARDS *
435
+ *************************************************************
436
+ *************************************************************/
437
+ double log_rd(double x){
438
+ db_number xdb, yhdb;
439
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
440
+ double ph, pl, pm, log2edh, log2edl, log2edm, logTabPolyh, logTabPolyl, logh, logm, logl;
441
+ int E, index;
442
+ double zhSquare, zhCube, zhSquareHalf;
443
+ double p35, p46, p36;
444
+ double pUpper;
445
+ double zhSquareHalfPlusZl;
446
+ double zhFour;
447
+ double logyh, logym, logyl;
448
+ double loghover, logmover, loglover;
449
+
450
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
451
+
452
+ E=0;
453
+ xdb.d=x;
454
+
455
+ /* Filter cases */
456
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
457
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
458
+ return -1.0/0.0;
459
+ } /* log(+/-0) = -Inf */
460
+ if (xdb.i[HI] < 0){
461
+ return (x-x)/0; /* log(-x) = Nan */
462
+ }
463
+ /* Subnormal number */
464
+ E = -52;
465
+ xdb.d *= two52; /* make x a normal number */
466
+ }
467
+
468
+ if (xdb.i[HI] >= 0x7ff00000){
469
+ return x+x; /* Inf or Nan */
470
+ }
471
+
472
+
473
+ /* Extract exponent and mantissa
474
+ Do range reduction,
475
+ yielding to E holding the exponent and
476
+ y the mantissa between sqrt(2)/2 and sqrt(2)
477
+ */
478
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
479
+ index = (xdb.i[HI] & 0x000fffff);
480
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
481
+ index = (index + (1<<(20-L-1))) >> (20-L);
482
+
483
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
484
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
485
+ xdb.i[HI] -= 0x00100000;
486
+ E++;
487
+ }
488
+
489
+ yhdb.i[HI] = xdb.i[HI];
490
+ yhdb.i[LO] = 0;
491
+ yh = yhdb.d;
492
+ yl = xdb.d - yh;
493
+
494
+ index = index & INDEXMASK;
495
+ /* Cast integer E into double ed for multiplication later */
496
+ ed = (double) E;
497
+
498
+ /*
499
+ Read tables:
500
+ Read one float for ri
501
+ Read the first two doubles for -log(r_i) (out of three)
502
+
503
+ Organization of the table:
504
+
505
+ one struct entry per index, the struct entry containing
506
+ r, logih, logim and logil in this order
507
+ */
508
+
509
+
510
+ ri = argredtable[index].ri;
511
+ /*
512
+ Actually we don't need the logarithm entries now
513
+ Move the following two lines to the eventual reconstruction
514
+ As long as we don't have any if in the following code, we can overlap
515
+ memory access with calculations
516
+ */
517
+ logih = argredtable[index].logih;
518
+ logim = argredtable[index].logim;
519
+
520
+ /* Do range reduction:
521
+
522
+ zh + zl = y * ri - 1.0 exactly
523
+
524
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
525
+
526
+ Discard zl for higher monome degrees
527
+ */
528
+
529
+ yrih = yh * ri;
530
+ yril = yl * ri;
531
+ th = yrih - 1.0;
532
+ Add12Cond(zh, zl, th, yril);
533
+
534
+ /* Polynomial approximation */
535
+
536
+ zhSquare = zh * zh; /* 1 */
537
+
538
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
539
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
540
+ zhCube = zhSquare * zh; /* 2 */
541
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
542
+ zhFour = zhSquare * zhSquare; /* 2 */
543
+
544
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
545
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
546
+
547
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
548
+
549
+ Add12(ph,pl,zh,pUpper); /* 8 */
550
+
551
+ /* Reconstruction
552
+
553
+ Read logih and logim in the tables (already done)
554
+
555
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
556
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
557
+
558
+ Carry out everything in double double precision
559
+
560
+ */
561
+
562
+ /*
563
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
564
+ Multiplication of ed (double E) and log2h is thus exact
565
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
566
+ is enough for the accurate phase
567
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
568
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
569
+ triple double values. We have to take it into account for the accurate phase
570
+ basic procedures for addition and multiplication
571
+ The condition on the next Add12 is verified as log2m is smaller than log2h
572
+ and both are scaled by ed
573
+ */
574
+
575
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
576
+
577
+ /* Add logih and logim to ph and pl */
578
+
579
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
580
+
581
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
582
+
583
+ Add22(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
584
+
585
+ /* Rounding test and possible return or call to the accurate function */
586
+
587
+ TEST_AND_RETURN_RD(logh, logm, RDROUNDCST);
588
+
589
+ {
590
+
591
+ logil = argredtable[index].logil;
592
+
593
+ p_accu(&ph, &pm, &pl, zh, zl);
594
+
595
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
596
+
597
+ log2edh = log2h * ed;
598
+ log2edm = log2m * ed;
599
+ log2edl = log2l * ed;
600
+
601
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
602
+
603
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
604
+
605
+ ReturnRoundDownwards3(logh, logm, logl);
606
+
607
+ } /* Accurate phase launched */
608
+ }
609
+
610
+
611
+ /*************************************************************
612
+ *************************************************************
613
+ * ROUNDED TOWARDS ZERO *
614
+ *************************************************************
615
+ *************************************************************/
616
+ double log_rz(double x){
617
+ db_number xdb, yhdb;
618
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
619
+ double ph, pl, pm, log2edh, log2edl, log2edm, logTabPolyh, logTabPolyl, logh, logm, logl;
620
+ int E, index;
621
+ double zhSquare, zhCube, zhSquareHalf;
622
+ double p35, p46, p36;
623
+ double pUpper;
624
+ double zhSquareHalfPlusZl;
625
+ double zhFour;
626
+ double logyh, logym, logyl;
627
+ double loghover, logmover, loglover;
628
+
629
+ if (x == 1.0) return 0.0; /* This the only case in which the image under log of a double is a double. */
630
+
631
+ E=0;
632
+ xdb.d=x;
633
+
634
+ /* Filter cases */
635
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
636
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
637
+ return -1.0/0.0;
638
+ } /* log(+/-0) = -Inf */
639
+ if (xdb.i[HI] < 0){
640
+ return (x-x)/0; /* log(-x) = Nan */
641
+ }
642
+ /* Subnormal number */
643
+ E = -52;
644
+ xdb.d *= two52; /* make x a normal number */
645
+ }
646
+
647
+ if (xdb.i[HI] >= 0x7ff00000){
648
+ return x+x; /* Inf or Nan */
649
+ }
650
+
651
+
652
+ /* Extract exponent and mantissa
653
+ Do range reduction,
654
+ yielding to E holding the exponent and
655
+ y the mantissa between sqrt(2)/2 and sqrt(2)
656
+ */
657
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
658
+ index = (xdb.i[HI] & 0x000fffff);
659
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
660
+ index = (index + (1<<(20-L-1))) >> (20-L);
661
+
662
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
663
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
664
+ xdb.i[HI] -= 0x00100000;
665
+ E++;
666
+ }
667
+
668
+ yhdb.i[HI] = xdb.i[HI];
669
+ yhdb.i[LO] = 0;
670
+ yh = yhdb.d;
671
+ yl = xdb.d - yh;
672
+
673
+ index = index & INDEXMASK;
674
+ /* Cast integer E into double ed for multiplication later */
675
+ ed = (double) E;
676
+
677
+ /*
678
+ Read tables:
679
+ Read one float for ri
680
+ Read the first two doubles for -log(r_i) (out of three)
681
+
682
+ Organization of the table:
683
+
684
+ one struct entry per index, the struct entry containing
685
+ r, logih, logim and logil in this order
686
+ */
687
+
688
+
689
+ ri = argredtable[index].ri;
690
+ /*
691
+ Actually we don't need the logarithm entries now
692
+ Move the following two lines to the eventual reconstruction
693
+ As long as we don't have any if in the following code, we can overlap
694
+ memory access with calculations
695
+ */
696
+ logih = argredtable[index].logih;
697
+ logim = argredtable[index].logim;
698
+
699
+ /* Do range reduction:
700
+
701
+ zh + zl = y * ri - 1.0 exactly
702
+
703
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
704
+
705
+ Discard zl for higher monome degrees
706
+ */
707
+
708
+ yrih = yh * ri;
709
+ yril = yl * ri;
710
+ th = yrih - 1.0;
711
+ Add12Cond(zh, zl, th, yril);
712
+
713
+ /* Polynomial approximation */
714
+
715
+ zhSquare = zh * zh; /* 1 */
716
+
717
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
718
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
719
+ zhCube = zhSquare * zh; /* 2 */
720
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
721
+ zhFour = zhSquare * zhSquare; /* 2 */
722
+
723
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
724
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
725
+
726
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
727
+
728
+ Add12(ph,pl,zh,pUpper); /* 8 */
729
+
730
+ /* Reconstruction
731
+
732
+ Read logih and logim in the tables (already done)
733
+
734
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
735
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
736
+
737
+ Carry out everything in double double precision
738
+
739
+ */
740
+
741
+ /*
742
+ We store log2 as log2h + log2m + log2l where log2h and log2m have 12 trailing zeros
743
+ Multiplication of ed (double E) and log2h is thus exact
744
+ The overall accuracy of log2h + log2m + log2l is 53 * 3 - 24 = 135 which
745
+ is enough for the accurate phase
746
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
747
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
748
+ triple double values. We have to take it into account for the accurate phase
749
+ basic procedures for addition and multiplication
750
+ The condition on the next Add12 is verified as log2m is smaller than log2h
751
+ and both are scaled by ed
752
+ */
753
+
754
+ Add12(log2edh, log2edl, log2h * ed, log2m * ed);
755
+
756
+ /* Add logih and logim to ph and pl */
757
+
758
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
759
+
760
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
761
+
762
+ Add22(&logh, &logm, log2edh, log2edl, logTabPolyh, logTabPolyl);
763
+
764
+ /* Rounding test and possible return or call to the accurate function */
765
+
766
+ TEST_AND_RETURN_RZ(logh, logm, RDROUNDCST);
767
+
768
+ {
769
+
770
+ logil = argredtable[index].logil;
771
+
772
+ p_accu(&ph, &pm, &pl, zh, zl);
773
+
774
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
775
+
776
+ log2edh = log2h * ed;
777
+ log2edm = log2m * ed;
778
+ log2edl = log2l * ed;
779
+
780
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, logyh, logym, logyl);
781
+
782
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
783
+
784
+ ReturnRoundTowardsZero3(logh, logm, logl);
785
+
786
+ } /* Accurate phase launched */
787
+ }
788
+
789
+ /*************************************************************
790
+ *************************************************************
791
+ * ROUNDED TO NEAREST *
792
+ *************************************************************
793
+ *************************************************************/
794
+ double log2_rn(double x){
795
+ db_number xdb, yhdb;
796
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
797
+ double ph, pl, pm, logTabPolyh, logTabPolyl, logh, logm, logl;
798
+ int E, index;
799
+ double zhSquare, zhCube, zhSquareHalf;
800
+ double p35, p46, p36;
801
+ double pUpper;
802
+ double zhSquareHalfPlusZl;
803
+ double zhFour;
804
+ double logyh, logym, logyl;
805
+ double loghover, logmover, loglover;
806
+ double log2TabPolyh, log2TabPolyl, log2yh, log2ym, log2yl;
807
+
808
+
809
+ E=0;
810
+ xdb.d=x;
811
+
812
+ /* Filter cases */
813
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
814
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
815
+ return -1.0/0.0;
816
+ } /* log(+/-0) = -Inf */
817
+ if (xdb.i[HI] < 0){
818
+ return (x-x)/0; /* log(-x) = Nan */
819
+ }
820
+ /* Subnormal number */
821
+ E = -52;
822
+ xdb.d *= two52; /* make x a normal number */
823
+ }
824
+
825
+ if (xdb.i[HI] >= 0x7ff00000){
826
+ return x+x; /* Inf or Nan */
827
+ }
828
+
829
+
830
+ /* Extract exponent and mantissa
831
+ Do range reduction,
832
+ yielding to E holding the exponent and
833
+ y the mantissa between sqrt(2)/2 and sqrt(2)
834
+ */
835
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
836
+ index = (xdb.i[HI] & 0x000fffff);
837
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
838
+ index = (index + (1<<(20-L-1))) >> (20-L);
839
+
840
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
841
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
842
+ xdb.i[HI] -= 0x00100000;
843
+ E++;
844
+ }
845
+
846
+ yhdb.i[HI] = xdb.i[HI];
847
+ yhdb.i[LO] = 0;
848
+ yh = yhdb.d;
849
+ yl = xdb.d - yh;
850
+
851
+ index = index & INDEXMASK;
852
+ /* Cast integer E into double ed for multiplication later */
853
+ ed = (double) E;
854
+
855
+ /*
856
+ Read tables:
857
+ Read one float for ri
858
+ Read the first two doubles for -log(r_i) (out of three)
859
+
860
+ Organization of the table:
861
+
862
+ one struct entry per index, the struct entry containing
863
+ r, logih, logim and logil in this order
864
+ */
865
+
866
+
867
+ ri = argredtable[index].ri;
868
+ /*
869
+ Actually we don't need the logarithm entries now
870
+ Move the following two lines to the eventual reconstruction
871
+ As long as we don't have any if in the following code, we can overlap
872
+ memory access with calculations
873
+ */
874
+ logih = argredtable[index].logih;
875
+ logim = argredtable[index].logim;
876
+
877
+ /* Do range reduction:
878
+
879
+ zh + zl = y * ri - 1.0 exactly
880
+
881
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
882
+
883
+ Discard zl for higher monome degrees
884
+ */
885
+
886
+ yrih = yh * ri;
887
+ yril = yl * ri;
888
+ th = yrih - 1.0;
889
+ Add12Cond(zh, zl, th, yril);
890
+
891
+ /* Polynomial approximation */
892
+
893
+ zhSquare = zh * zh; /* 1 */
894
+
895
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
896
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
897
+ zhCube = zhSquare * zh; /* 2 */
898
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
899
+ zhFour = zhSquare * zhSquare; /* 2 */
900
+
901
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
902
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
903
+
904
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
905
+
906
+ Add12(ph,pl,zh,pUpper); /* 8 */
907
+
908
+ /* Reconstruction
909
+
910
+ Read logih and logim in the tables (already done)
911
+
912
+ Compute log2(x) = E + 1/log(2) * (log(1+z) - log(ri))
913
+
914
+
915
+ Carry out everything in double double precision
916
+
917
+ */
918
+
919
+ /* Add logih and logim to ph and pl */
920
+
921
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
922
+
923
+ /* Multiply by 1/log(2) */
924
+
925
+ Mul22(&log2TabPolyh,&log2TabPolyl, RECPRLOG2H, RECPRLOG2L, logTabPolyh, logTabPolyl);
926
+
927
+ /* Add E */
928
+
929
+ Add122(&logh, &logm, ed, log2TabPolyh, log2TabPolyl);
930
+
931
+ /* Rounding test and possible return or call to the accurate function */
932
+
933
+ if(logh == (logh + (logm * RNROUNDCST)))
934
+ return logh;
935
+ else
936
+ {
937
+
938
+ logil = argredtable[index].logil;
939
+
940
+ p_accu(&ph, &pm, &pl, zh, zl);
941
+
942
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
943
+
944
+ Mul233(&log2yh,&log2ym,&log2yl,RECPRLOG2H,RECPRLOG2L,logyh,logym,logyl);
945
+
946
+ Add133(&loghover,&logmover,&loglover,ed,log2yh,log2ym,log2yl);
947
+
948
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
949
+
950
+ ReturnRoundToNearest3(logh, logm, logl);
951
+
952
+ } /* Accurate phase launched */
953
+ }
954
+
955
+ /*************************************************************
956
+ *************************************************************
957
+ * ROUNDED UPWARDS *
958
+ *************************************************************
959
+ *************************************************************/
960
+ double log2_ru(double x){
961
+ db_number xdb, yhdb;
962
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
963
+ double ph, pl, pm, logTabPolyh, logTabPolyl, logh, logm, logl;
964
+ int E, index;
965
+ double zhSquare, zhCube, zhSquareHalf;
966
+ double p35, p46, p36;
967
+ double pUpper;
968
+ double zhSquareHalfPlusZl;
969
+ double zhFour;
970
+ double logyh, logym, logyl;
971
+ double loghover, logmover, loglover;
972
+ double log2TabPolyh, log2TabPolyl, log2yh, log2ym, log2yl;
973
+
974
+
975
+ E=0;
976
+ xdb.d=x;
977
+
978
+ /* Filter cases */
979
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
980
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
981
+ return -1.0/0.0;
982
+ } /* log(+/-0) = -Inf */
983
+ if (xdb.i[HI] < 0){
984
+ return (x-x)/0; /* log(-x) = Nan */
985
+ }
986
+ /* Subnormal number */
987
+ E = -52;
988
+ xdb.d *= two52; /* make x a normal number */
989
+ }
990
+
991
+ if (xdb.i[HI] >= 0x7ff00000){
992
+ return x+x; /* Inf or Nan */
993
+ }
994
+
995
+
996
+ /* Extract exponent and mantissa
997
+ Do range reduction,
998
+ yielding to E holding the exponent and
999
+ y the mantissa between sqrt(2)/2 and sqrt(2)
1000
+ */
1001
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
1002
+ index = (xdb.i[HI] & 0x000fffff);
1003
+
1004
+ /* Test now if the argument is an exact power of 2
1005
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
1006
+ This test is necessary for filtering out the cases where the final
1007
+ rounding test cannot distinguish between an exact algebraic
1008
+ number and a hard case to round
1009
+ */
1010
+
1011
+ if ((index | xdb.i[LO]) == 0) {
1012
+ /* Handle the "trivial" case for log2:
1013
+ The argument is an exact power of 2, return thus
1014
+ just the exponant of the number
1015
+ */
1016
+
1017
+ return (double) E;
1018
+
1019
+ }
1020
+
1021
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
1022
+ index = (index + (1<<(20-L-1))) >> (20-L);
1023
+
1024
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1025
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1026
+ xdb.i[HI] -= 0x00100000;
1027
+ E++;
1028
+ }
1029
+
1030
+ yhdb.i[HI] = xdb.i[HI];
1031
+ yhdb.i[LO] = 0;
1032
+ yh = yhdb.d;
1033
+ yl = xdb.d - yh;
1034
+
1035
+ index = index & INDEXMASK;
1036
+ /* Cast integer E into double ed for multiplication later */
1037
+ ed = (double) E;
1038
+
1039
+ /*
1040
+ Read tables:
1041
+ Read one float for ri
1042
+ Read the first two doubles for -log(r_i) (out of three)
1043
+
1044
+ Organization of the table:
1045
+
1046
+ one struct entry per index, the struct entry containing
1047
+ r, logih, logim and logil in this order
1048
+ */
1049
+
1050
+
1051
+ ri = argredtable[index].ri;
1052
+ /*
1053
+ Actually we don't need the logarithm entries now
1054
+ Move the following two lines to the eventual reconstruction
1055
+ As long as we don't have any if in the following code, we can overlap
1056
+ memory access with calculations
1057
+ */
1058
+ logih = argredtable[index].logih;
1059
+ logim = argredtable[index].logim;
1060
+
1061
+ /* Do range reduction:
1062
+
1063
+ zh + zl = y * ri - 1.0 exactly
1064
+
1065
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
1066
+
1067
+ Discard zl for higher monome degrees
1068
+ */
1069
+
1070
+ yrih = yh * ri;
1071
+ yril = yl * ri;
1072
+ th = yrih - 1.0;
1073
+ Add12Cond(zh, zl, th, yril);
1074
+
1075
+ /* Polynomial approximation */
1076
+
1077
+ zhSquare = zh * zh; /* 1 */
1078
+
1079
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
1080
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
1081
+ zhCube = zhSquare * zh; /* 2 */
1082
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
1083
+ zhFour = zhSquare * zhSquare; /* 2 */
1084
+
1085
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
1086
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
1087
+
1088
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
1089
+
1090
+ Add12(ph,pl,zh,pUpper); /* 8 */
1091
+
1092
+ /* Reconstruction
1093
+
1094
+ Read logih and logim in the tables (already done)
1095
+
1096
+ Compute log2(x) = E + 1/log(2) * (log(1+z) - log(ri))
1097
+
1098
+
1099
+ Carry out everything in double double precision
1100
+
1101
+ */
1102
+
1103
+ /* Add logih and logim to ph and pl */
1104
+
1105
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
1106
+
1107
+ /* Multiply by 1/log(2) */
1108
+
1109
+ Mul22(&log2TabPolyh,&log2TabPolyl, RECPRLOG2H, RECPRLOG2L, logTabPolyh, logTabPolyl);
1110
+
1111
+ /* Add E */
1112
+
1113
+ Add122(&logh, &logm, ed, log2TabPolyh, log2TabPolyl);
1114
+
1115
+ /* Rounding test and eventual return or call to the accurate function */
1116
+
1117
+ TEST_AND_RETURN_RU(logh, logm, RDROUNDCST);
1118
+ {
1119
+
1120
+ logil = argredtable[index].logil;
1121
+
1122
+ p_accu(&ph, &pm, &pl, zh, zl);
1123
+
1124
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
1125
+
1126
+ Mul233(&log2yh,&log2ym,&log2yl,RECPRLOG2H,RECPRLOG2L,logyh,logym,logyl);
1127
+
1128
+ Add133(&loghover,&logmover,&loglover,ed,log2yh,log2ym,log2yl);
1129
+
1130
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
1131
+
1132
+ ReturnRoundUpwards3(logh, logm, logl);
1133
+
1134
+ } /* Accurate phase launched */
1135
+ }
1136
+
1137
+ /*************************************************************
1138
+ *************************************************************
1139
+ * ROUNDED DOWNWARDS *
1140
+ *************************************************************
1141
+ *************************************************************/
1142
+ double log2_rd(double x){
1143
+ db_number xdb, yhdb;
1144
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
1145
+ double ph, pl, pm, logTabPolyh, logTabPolyl, logh, logm, logl;
1146
+ int E, index;
1147
+ double zhSquare, zhCube, zhSquareHalf;
1148
+ double p35, p46, p36;
1149
+ double pUpper;
1150
+ double zhSquareHalfPlusZl;
1151
+ double zhFour;
1152
+ double logyh, logym, logyl;
1153
+ double loghover, logmover, loglover;
1154
+ double log2TabPolyh, log2TabPolyl, log2yh, log2ym, log2yl;
1155
+
1156
+
1157
+ E=0;
1158
+ xdb.d=x;
1159
+
1160
+ /* Filter cases */
1161
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
1162
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
1163
+ return -1.0/0.0;
1164
+ } /* log(+/-0) = -Inf */
1165
+ if (xdb.i[HI] < 0){
1166
+ return (x-x)/0; /* log(-x) = Nan */
1167
+ }
1168
+ /* Subnormal number */
1169
+ E = -52;
1170
+ xdb.d *= two52; /* make x a normal number */
1171
+ }
1172
+
1173
+ if (xdb.i[HI] >= 0x7ff00000){
1174
+ return x+x; /* Inf or Nan */
1175
+ }
1176
+
1177
+
1178
+ /* Extract exponent and mantissa
1179
+ Do range reduction,
1180
+ yielding to E holding the exponent and
1181
+ y the mantissa between sqrt(2)/2 and sqrt(2)
1182
+ */
1183
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
1184
+ index = (xdb.i[HI] & 0x000fffff);
1185
+
1186
+ /* Test now if the argument is an exact power of 2
1187
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
1188
+ This test is necessary for filtering out the cases where the final
1189
+ rounding test cannot distinguish between an exact algebraic
1190
+ number and a hard case to round
1191
+ */
1192
+
1193
+ if ((index | xdb.i[LO]) == 0) {
1194
+ /* Handle the "trivial" case for log2:
1195
+ The argument is an exact power of 2, return thus
1196
+ just the exponant of the number
1197
+ */
1198
+
1199
+ return (double) E;
1200
+
1201
+ }
1202
+
1203
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
1204
+ index = (index + (1<<(20-L-1))) >> (20-L);
1205
+
1206
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1207
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1208
+ xdb.i[HI] -= 0x00100000;
1209
+ E++;
1210
+ }
1211
+
1212
+ yhdb.i[HI] = xdb.i[HI];
1213
+ yhdb.i[LO] = 0;
1214
+ yh = yhdb.d;
1215
+ yl = xdb.d - yh;
1216
+
1217
+ index = index & INDEXMASK;
1218
+ /* Cast integer E into double ed for multiplication later */
1219
+ ed = (double) E;
1220
+
1221
+ /*
1222
+ Read tables:
1223
+ Read one float for ri
1224
+ Read the first two doubles for -log(r_i) (out of three)
1225
+
1226
+ Organization of the table:
1227
+
1228
+ one struct entry per index, the struct entry containing
1229
+ r, logih, logim and logil in this order
1230
+ */
1231
+
1232
+
1233
+ ri = argredtable[index].ri;
1234
+ /*
1235
+ Actually we don't need the logarithm entries now
1236
+ Move the following two lines to the eventual reconstruction
1237
+ As long as we don't have any if in the following code, we can overlap
1238
+ memory access with calculations
1239
+ */
1240
+ logih = argredtable[index].logih;
1241
+ logim = argredtable[index].logim;
1242
+
1243
+ /* Do range reduction:
1244
+
1245
+ zh + zl = y * ri - 1.0 exactly
1246
+
1247
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
1248
+
1249
+ Discard zl for higher monome degrees
1250
+ */
1251
+
1252
+ yrih = yh * ri;
1253
+ yril = yl * ri;
1254
+ th = yrih - 1.0;
1255
+ Add12Cond(zh, zl, th, yril);
1256
+
1257
+ /* Polynomial approximation */
1258
+
1259
+ zhSquare = zh * zh; /* 1 */
1260
+
1261
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
1262
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
1263
+ zhCube = zhSquare * zh; /* 2 */
1264
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
1265
+ zhFour = zhSquare * zhSquare; /* 2 */
1266
+
1267
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
1268
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
1269
+
1270
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
1271
+
1272
+ Add12(ph,pl,zh,pUpper); /* 8 */
1273
+
1274
+ /* Reconstruction
1275
+
1276
+ Read logih and logim in the tables (already done)
1277
+
1278
+ Compute log2(x) = E + 1/log(2) * (log(1+z) - log(ri))
1279
+
1280
+
1281
+ Carry out everything in double double precision
1282
+
1283
+ */
1284
+
1285
+ /* Add logih and logim to ph and pl */
1286
+
1287
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
1288
+
1289
+ /* Multiply by 1/log(2) */
1290
+
1291
+ Mul22(&log2TabPolyh,&log2TabPolyl, RECPRLOG2H, RECPRLOG2L, logTabPolyh, logTabPolyl);
1292
+
1293
+ /* Add E */
1294
+
1295
+ Add122(&logh, &logm, ed, log2TabPolyh, log2TabPolyl);
1296
+
1297
+ /* Rounding test and eventual return or call to the accurate function */
1298
+
1299
+ TEST_AND_RETURN_RD(logh, logm, RDROUNDCST);
1300
+ {
1301
+
1302
+ logil = argredtable[index].logil;
1303
+
1304
+ p_accu(&ph, &pm, &pl, zh, zl);
1305
+
1306
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
1307
+
1308
+ Mul233(&log2yh,&log2ym,&log2yl,RECPRLOG2H,RECPRLOG2L,logyh,logym,logyl);
1309
+
1310
+ Add133(&loghover,&logmover,&loglover,ed,log2yh,log2ym,log2yl);
1311
+
1312
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
1313
+
1314
+ ReturnRoundDownwards3(logh, logm, logl);
1315
+
1316
+ } /* Accurate phase launched */
1317
+ }
1318
+
1319
+ /*************************************************************
1320
+ *************************************************************
1321
+ * ROUNDED TOWARDS ZERO *
1322
+ *************************************************************
1323
+ *************************************************************/
1324
+ double log2_rz(double x){
1325
+ db_number xdb, yhdb;
1326
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
1327
+ double ph, pl, pm, logTabPolyh, logTabPolyl, logh, logm, logl;
1328
+ int E, index;
1329
+ double zhSquare, zhCube, zhSquareHalf;
1330
+ double p35, p46, p36;
1331
+ double pUpper;
1332
+ double zhSquareHalfPlusZl;
1333
+ double zhFour;
1334
+ double logyh, logym, logyl;
1335
+ double loghover, logmover, loglover;
1336
+ double log2TabPolyh, log2TabPolyl, log2yh, log2ym, log2yl;
1337
+
1338
+
1339
+ E=0;
1340
+ xdb.d=x;
1341
+
1342
+ /* Filter cases */
1343
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
1344
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
1345
+ return -1.0/0.0;
1346
+ } /* log(+/-0) = -Inf */
1347
+ if (xdb.i[HI] < 0){
1348
+ return (x-x)/0; /* log(-x) = Nan */
1349
+ }
1350
+ /* Subnormal number */
1351
+ E = -52;
1352
+ xdb.d *= two52; /* make x a normal number */
1353
+ }
1354
+
1355
+ if (xdb.i[HI] >= 0x7ff00000){
1356
+ return x+x; /* Inf or Nan */
1357
+ }
1358
+
1359
+
1360
+ /* Extract exponent and mantissa
1361
+ Do range reduction,
1362
+ yielding to E holding the exponent and
1363
+ y the mantissa between sqrt(2)/2 and sqrt(2)
1364
+ */
1365
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
1366
+ index = (xdb.i[HI] & 0x000fffff);
1367
+
1368
+ /* Test now if the argument is an exact power of 2
1369
+ i.e. if the mantissa is exactly 1 (0x0..0 with the implicit bit)
1370
+ This test is necessary for filtering out the cases where the final
1371
+ rounding test cannot distinguish between an exact algebraic
1372
+ number and a hard case to round
1373
+ */
1374
+
1375
+ if ((index | xdb.i[LO]) == 0) {
1376
+ /* Handle the "trivial" case for log2:
1377
+ The argument is an exact power of 2, return thus
1378
+ just the exponant of the number
1379
+ */
1380
+
1381
+ return (double) E;
1382
+
1383
+ }
1384
+
1385
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
1386
+ index = (index + (1<<(20-L-1))) >> (20-L);
1387
+
1388
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1389
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1390
+ xdb.i[HI] -= 0x00100000;
1391
+ E++;
1392
+ }
1393
+
1394
+ yhdb.i[HI] = xdb.i[HI];
1395
+ yhdb.i[LO] = 0;
1396
+ yh = yhdb.d;
1397
+ yl = xdb.d - yh;
1398
+
1399
+ index = index & INDEXMASK;
1400
+ /* Cast integer E into double ed for multiplication later */
1401
+ ed = (double) E;
1402
+
1403
+ /*
1404
+ Read tables:
1405
+ Read one float for ri
1406
+ Read the first two doubles for -log(r_i) (out of three)
1407
+
1408
+ Organization of the table:
1409
+
1410
+ one struct entry per index, the struct entry containing
1411
+ r, logih, logim and logil in this order
1412
+ */
1413
+
1414
+
1415
+ ri = argredtable[index].ri;
1416
+ /*
1417
+ Actually we don't need the logarithm entries now
1418
+ Move the following two lines to the eventual reconstruction
1419
+ As long as we don't have any if in the following code, we can overlap
1420
+ memory access with calculations
1421
+ */
1422
+ logih = argredtable[index].logih;
1423
+ logim = argredtable[index].logim;
1424
+
1425
+ /* Do range reduction:
1426
+
1427
+ zh + zl = y * ri - 1.0 exactly
1428
+
1429
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
1430
+
1431
+ Discard zl for higher monome degrees
1432
+ */
1433
+
1434
+ yrih = yh * ri;
1435
+ yril = yl * ri;
1436
+ th = yrih - 1.0;
1437
+ Add12Cond(zh, zl, th, yril);
1438
+
1439
+ /* Polynomial approximation */
1440
+
1441
+ zhSquare = zh * zh; /* 1 */
1442
+
1443
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
1444
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
1445
+ zhCube = zhSquare * zh; /* 2 */
1446
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
1447
+ zhFour = zhSquare * zhSquare; /* 2 */
1448
+
1449
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
1450
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
1451
+
1452
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
1453
+
1454
+ Add12(ph,pl,zh,pUpper); /* 8 */
1455
+
1456
+ /* Reconstruction
1457
+
1458
+ Read logih and logim in the tables (already done)
1459
+
1460
+ Compute log2(x) = E + 1/log(2) * (log(1+z) - log(ri))
1461
+
1462
+
1463
+ Carry out everything in double double precision
1464
+
1465
+ */
1466
+
1467
+ /* Add logih and logim to ph and pl */
1468
+
1469
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
1470
+
1471
+ /* Multiply by 1/log(2) */
1472
+
1473
+ Mul22(&log2TabPolyh,&log2TabPolyl, RECPRLOG2H, RECPRLOG2L, logTabPolyh, logTabPolyl);
1474
+
1475
+ /* Add E */
1476
+
1477
+ Add122(&logh, &logm, ed, log2TabPolyh, log2TabPolyl);
1478
+
1479
+ /* Rounding test and possible return or call to the accurate function */
1480
+
1481
+ TEST_AND_RETURN_RZ(logh, logm, RDROUNDCST);
1482
+ {
1483
+
1484
+ logil = argredtable[index].logil;
1485
+
1486
+ p_accu(&ph, &pm, &pl, zh, zl);
1487
+
1488
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
1489
+
1490
+ Mul233(&log2yh,&log2ym,&log2yl,RECPRLOG2H,RECPRLOG2L,logyh,logym,logyl);
1491
+
1492
+ Add133(&loghover,&logmover,&loglover,ed,log2yh,log2ym,log2yl);
1493
+
1494
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
1495
+
1496
+ ReturnRoundTowardsZero3(logh, logm, logl);
1497
+
1498
+ } /* Accurate phase launched */
1499
+ }
1500
+
1501
+ /*************************************************************
1502
+ *************************************************************
1503
+ * ROUNDED TO NEAREST *
1504
+ *************************************************************
1505
+ *************************************************************/
1506
+ double log10_rn(double x){
1507
+ db_number xdb, yhdb;
1508
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
1509
+ double ph, pl, pm, log2edh, log2edl, log2edm, logTabPolyh, logTabPolyl, logh, logm, logl;
1510
+ int E, index;
1511
+ double zhSquare, zhCube, zhSquareHalf;
1512
+ double p35, p46, p36;
1513
+ double pUpper;
1514
+ double zhSquareHalfPlusZl;
1515
+ double zhFour;
1516
+ double logyh, logym, logyl;
1517
+ double loghover, logmover, loglover;
1518
+ double log10TabPolyh, log10TabPolyl;
1519
+ double log10yh, log10ym, log10yl;
1520
+
1521
+
1522
+ E=0;
1523
+ xdb.d=x;
1524
+
1525
+ /* Filter cases */
1526
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
1527
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
1528
+ return -1.0/0.0;
1529
+ } /* log(+/-0) = -Inf */
1530
+ if (xdb.i[HI] < 0){
1531
+ return (x-x)/0; /* log(-x) = Nan */
1532
+ }
1533
+ /* Subnormal number */
1534
+ E = -52;
1535
+ xdb.d *= two52; /* make x a normal number */
1536
+ }
1537
+
1538
+ if (xdb.i[HI] >= 0x7ff00000){
1539
+ return x+x; /* Inf or Nan */
1540
+ }
1541
+
1542
+
1543
+ /* Extract exponent and mantissa
1544
+ Do range reduction,
1545
+ yielding to E holding the exponent and
1546
+ y the mantissa between sqrt(2)/2 and sqrt(2)
1547
+ */
1548
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
1549
+ index = (xdb.i[HI] & 0x000fffff);
1550
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
1551
+ index = (index + (1<<(20-L-1))) >> (20-L);
1552
+
1553
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1554
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1555
+ xdb.i[HI] -= 0x00100000;
1556
+ E++;
1557
+ }
1558
+
1559
+ yhdb.i[HI] = xdb.i[HI];
1560
+ yhdb.i[LO] = 0;
1561
+ yh = yhdb.d;
1562
+ yl = xdb.d - yh;
1563
+
1564
+ index = index & INDEXMASK;
1565
+ /* Cast integer E into double ed for multiplication later */
1566
+ ed = (double) E;
1567
+
1568
+ /*
1569
+ Read tables:
1570
+ Read one float for ri
1571
+ Read the first two doubles for -log(r_i) (out of three)
1572
+
1573
+ Organization of the table:
1574
+
1575
+ one struct entry per index, the struct entry containing
1576
+ r, logih, logim and logil in this order
1577
+ */
1578
+
1579
+
1580
+ ri = argredtable[index].ri;
1581
+ /*
1582
+ Actually we don't need the logarithm entries now
1583
+ Move the following two lines to the eventual reconstruction
1584
+ As long as we don't have any if in the following code, we can overlap
1585
+ memory access with calculations
1586
+ */
1587
+ logih = argredtable[index].logih;
1588
+ logim = argredtable[index].logim;
1589
+
1590
+ /* Do range reduction:
1591
+
1592
+ zh + zl = y * ri - 1.0 exactly
1593
+
1594
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
1595
+
1596
+ Discard zl for higher monome degrees
1597
+ */
1598
+
1599
+ yrih = yh * ri;
1600
+ yril = yl * ri;
1601
+ th = yrih - 1.0;
1602
+ Add12Cond(zh, zl, th, yril);
1603
+
1604
+ /* Polynomial approximation */
1605
+
1606
+ zhSquare = zh * zh; /* 1 */
1607
+
1608
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
1609
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
1610
+ zhCube = zhSquare * zh; /* 2 */
1611
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
1612
+ zhFour = zhSquare * zhSquare; /* 2 */
1613
+
1614
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
1615
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
1616
+
1617
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
1618
+
1619
+ Add12(ph,pl,zh,pUpper); /* 8 */
1620
+
1621
+ /* Reconstruction
1622
+
1623
+ Read logih and logim in the tables (already done)
1624
+
1625
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
1626
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
1627
+
1628
+ Carry out everything in double double precision
1629
+
1630
+ */
1631
+
1632
+ /*
1633
+ We store log_10(2) as log210h + log210m + log210l where log210h and log210m have 10 trailing zeros
1634
+ Multiplication of ed (double E) and log210h and m is thus exact
1635
+ The overall accuracy of log10h + log10m + log10l is 53 * 3 - 24 = 135 which
1636
+ is enough for the accurate phase
1637
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
1638
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
1639
+ triple double values. We have to take it into account for the accurate phase
1640
+ basic procedures for addition and multiplication
1641
+ The condition on the next Add12 is verified as log210m is smaller than log210h
1642
+ and both are scaled by ed
1643
+ */
1644
+
1645
+ Add12(log2edh, log2edl, log210h * ed, log210m * ed);
1646
+
1647
+ /* Add logih and logim to ph and pl */
1648
+
1649
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
1650
+
1651
+ /* Multiply by 1/log(10) */
1652
+
1653
+ Mul22(&log10TabPolyh,&log10TabPolyl,RECPRLOG10H,RECPRLOG10M,logTabPolyh,logTabPolyl);
1654
+
1655
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
1656
+
1657
+ Add22(&logh, &logm, log2edh, log2edl, log10TabPolyh, log10TabPolyl);
1658
+
1659
+ /* Rounding test and possible return or call to the accurate function */
1660
+
1661
+ if(logh == (logh + (logm * RNROUNDCST)))
1662
+ return logh;
1663
+ else
1664
+ {
1665
+
1666
+ logil = argredtable[index].logil;
1667
+
1668
+ p_accu(&ph, &pm, &pl, zh, zl);
1669
+
1670
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
1671
+
1672
+ Mul33(&log10yh,&log10ym,&log10yl,RECPRLOG10H,RECPRLOG10M,RECPRLOG10L,logyh,logym,logyl);
1673
+
1674
+ log2edh = log210h * ed;
1675
+ log2edm = log210m * ed;
1676
+ log2edl = log210l * ed;
1677
+
1678
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, log10yh, log10ym, log10yl);
1679
+
1680
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
1681
+
1682
+ ReturnRoundToNearest3(logh, logm, logl);
1683
+
1684
+ } /* Accurate phase launched */
1685
+ }
1686
+
1687
+
1688
+ /*************************************************************
1689
+ *************************************************************
1690
+ * ROUNDED UPWARDS *
1691
+ *************************************************************
1692
+ *************************************************************/
1693
+ double log10_ru(double x){
1694
+ db_number xdb, yhdb;
1695
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
1696
+ double ph, pl, pm, log2edh, log2edl, log2edm, logTabPolyh, logTabPolyl, logh, logm, logl;
1697
+ int E, index;
1698
+ double zhSquare, zhCube, zhSquareHalf;
1699
+ double p35, p46, p36;
1700
+ double pUpper;
1701
+ double zhSquareHalfPlusZl;
1702
+ double zhFour;
1703
+ double logyh, logym, logyl;
1704
+ double loghover, logmover, loglover;
1705
+ double log10TabPolyh, log10TabPolyl;
1706
+ double log10yh, log10ym, log10yl;
1707
+
1708
+
1709
+ E=0;
1710
+ xdb.d=x;
1711
+
1712
+ /* Filter cases */
1713
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
1714
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
1715
+ return -1.0/0.0;
1716
+ } /* log(+/-0) = -Inf */
1717
+ if (xdb.i[HI] < 0){
1718
+ return (x-x)/0; /* log(-x) = Nan */
1719
+ }
1720
+ /* Subnormal number */
1721
+ E = -52;
1722
+ xdb.d *= two52; /* make x a normal number */
1723
+ }
1724
+
1725
+ if (xdb.i[HI] >= 0x7ff00000){
1726
+ return x+x; /* Inf or Nan */
1727
+ }
1728
+
1729
+
1730
+ /* Extract exponent and mantissa
1731
+ Do range reduction,
1732
+ yielding to E holding the exponent and
1733
+ y the mantissa between sqrt(2)/2 and sqrt(2)
1734
+ */
1735
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
1736
+ index = (xdb.i[HI] & 0x000fffff);
1737
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
1738
+ index = (index + (1<<(20-L-1))) >> (20-L);
1739
+
1740
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1741
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1742
+ xdb.i[HI] -= 0x00100000;
1743
+ E++;
1744
+ }
1745
+
1746
+ yhdb.i[HI] = xdb.i[HI];
1747
+ yhdb.i[LO] = 0;
1748
+ yh = yhdb.d;
1749
+ yl = xdb.d - yh;
1750
+
1751
+ index = index & INDEXMASK;
1752
+ /* Cast integer E into double ed for multiplication later */
1753
+ ed = (double) E;
1754
+
1755
+ /*
1756
+ Read tables:
1757
+ Read one float for ri
1758
+ Read the first two doubles for -log(r_i) (out of three)
1759
+
1760
+ Organization of the table:
1761
+
1762
+ one struct entry per index, the struct entry containing
1763
+ r, logih, logim and logil in this order
1764
+ */
1765
+
1766
+
1767
+ ri = argredtable[index].ri;
1768
+ /*
1769
+ Actually we don't need the logarithm entries now
1770
+ Move the following two lines to the eventual reconstruction
1771
+ As long as we don't have any if in the following code, we can overlap
1772
+ memory access with calculations
1773
+ */
1774
+ logih = argredtable[index].logih;
1775
+ logim = argredtable[index].logim;
1776
+
1777
+ /* Do range reduction:
1778
+
1779
+ zh + zl = y * ri - 1.0 exactly
1780
+
1781
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
1782
+
1783
+ Discard zl for higher monome degrees
1784
+ */
1785
+
1786
+ yrih = yh * ri;
1787
+ yril = yl * ri;
1788
+ th = yrih - 1.0;
1789
+ Add12Cond(zh, zl, th, yril);
1790
+
1791
+ /* Polynomial approximation */
1792
+
1793
+ zhSquare = zh * zh; /* 1 */
1794
+
1795
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
1796
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
1797
+ zhCube = zhSquare * zh; /* 2 */
1798
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
1799
+ zhFour = zhSquare * zhSquare; /* 2 */
1800
+
1801
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
1802
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
1803
+
1804
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
1805
+
1806
+ Add12(ph,pl,zh,pUpper); /* 8 */
1807
+
1808
+ /* Reconstruction
1809
+
1810
+ Read logih and logim in the tables (already done)
1811
+
1812
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
1813
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
1814
+
1815
+ Carry out everything in double double precision
1816
+
1817
+ */
1818
+
1819
+ /*
1820
+ We store log_10(2) as log210h + log210m + log210l where log210h and log210m have 10 trailing zeros
1821
+ Multiplication of ed (double E) and log210h and m is thus exact
1822
+ The overall accuracy of log10h + log10m + log10l is 53 * 3 - 24 = 135 which
1823
+ is enough for the accurate phase
1824
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
1825
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
1826
+ triple double values. We have to take it into account for the accurate phase
1827
+ basic procedures for addition and multiplication
1828
+ The condition on the next Add12 is verified as log210m is smaller than log210h
1829
+ and both are scaled by ed
1830
+ */
1831
+
1832
+ Add12(log2edh, log2edl, log210h * ed, log210m * ed);
1833
+
1834
+ /* Add logih and logim to ph and pl */
1835
+
1836
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
1837
+
1838
+ /* Multiply by 1/log(10) */
1839
+
1840
+ Mul22(&log10TabPolyh,&log10TabPolyl,RECPRLOG10H,RECPRLOG10M,logTabPolyh,logTabPolyl);
1841
+
1842
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
1843
+
1844
+ Add22(&logh, &logm, log2edh, log2edl, log10TabPolyh, log10TabPolyl);
1845
+
1846
+ /* Rounding test and possible return or call to the accurate function */
1847
+
1848
+
1849
+ TEST_AND_RETURN_RU(logh, logm, RDROUNDCST);
1850
+ {
1851
+
1852
+ logil = argredtable[index].logil;
1853
+
1854
+ p_accu(&ph, &pm, &pl, zh, zl);
1855
+
1856
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
1857
+
1858
+ Mul33(&log10yh,&log10ym,&log10yl,RECPRLOG10H,RECPRLOG10M,RECPRLOG10L,logyh,logym,logyl);
1859
+
1860
+ log2edh = log210h * ed;
1861
+ log2edm = log210m * ed;
1862
+ log2edl = log210l * ed;
1863
+
1864
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, log10yh, log10ym, log10yl);
1865
+
1866
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
1867
+
1868
+ ReturnRoundUpwards3Unfiltered(logh, logm, logl, WORSTCASEACCURACY);
1869
+
1870
+ } /* Accurate phase launched */
1871
+ }
1872
+
1873
+
1874
+
1875
+ /*************************************************************
1876
+ *************************************************************
1877
+ * ROUNDED DOWNWARDS *
1878
+ *************************************************************
1879
+ *************************************************************/
1880
+ double log10_rd(double x){
1881
+ db_number xdb, yhdb;
1882
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
1883
+ double ph, pl, pm, log2edh, log2edl, log2edm, logTabPolyh, logTabPolyl, logh, logm, logl;
1884
+ int E, index;
1885
+ double zhSquare, zhCube, zhSquareHalf;
1886
+ double p35, p46, p36;
1887
+ double pUpper;
1888
+ double zhSquareHalfPlusZl;
1889
+ double zhFour;
1890
+ double logyh, logym, logyl;
1891
+ double loghover, logmover, loglover;
1892
+ double log10TabPolyh, log10TabPolyl;
1893
+ double log10yh, log10ym, log10yl;
1894
+
1895
+
1896
+ E=0;
1897
+ xdb.d=x;
1898
+
1899
+ /* Filter cases */
1900
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
1901
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
1902
+ return -1.0/0.0;
1903
+ } /* log(+/-0) = -Inf */
1904
+ if (xdb.i[HI] < 0){
1905
+ return (x-x)/0; /* log(-x) = Nan */
1906
+ }
1907
+ /* Subnormal number */
1908
+ E = -52;
1909
+ xdb.d *= two52; /* make x a normal number */
1910
+ }
1911
+
1912
+ if (xdb.i[HI] >= 0x7ff00000){
1913
+ return x+x; /* Inf or Nan */
1914
+ }
1915
+
1916
+
1917
+ /* Extract exponent and mantissa
1918
+ Do range reduction,
1919
+ yielding to E holding the exponent and
1920
+ y the mantissa between sqrt(2)/2 and sqrt(2)
1921
+ */
1922
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
1923
+ index = (xdb.i[HI] & 0x000fffff);
1924
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
1925
+ index = (index + (1<<(20-L-1))) >> (20-L);
1926
+
1927
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1928
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
1929
+ xdb.i[HI] -= 0x00100000;
1930
+ E++;
1931
+ }
1932
+
1933
+ yhdb.i[HI] = xdb.i[HI];
1934
+ yhdb.i[LO] = 0;
1935
+ yh = yhdb.d;
1936
+ yl = xdb.d - yh;
1937
+
1938
+ index = index & INDEXMASK;
1939
+ /* Cast integer E into double ed for multiplication later */
1940
+ ed = (double) E;
1941
+
1942
+ /*
1943
+ Read tables:
1944
+ Read one float for ri
1945
+ Read the first two doubles for -log(r_i) (out of three)
1946
+
1947
+ Organization of the table:
1948
+
1949
+ one struct entry per index, the struct entry containing
1950
+ r, logih, logim and logil in this order
1951
+ */
1952
+
1953
+
1954
+ ri = argredtable[index].ri;
1955
+ /*
1956
+ Actually we don't need the logarithm entries now
1957
+ Move the following two lines to the eventual reconstruction
1958
+ As long as we don't have any if in the following code, we can overlap
1959
+ memory access with calculations
1960
+ */
1961
+ logih = argredtable[index].logih;
1962
+ logim = argredtable[index].logim;
1963
+
1964
+ /* Do range reduction:
1965
+
1966
+ zh + zl = y * ri - 1.0 exactly
1967
+
1968
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
1969
+
1970
+ Discard zl for higher monome degrees
1971
+ */
1972
+
1973
+ yrih = yh * ri;
1974
+ yril = yl * ri;
1975
+ th = yrih - 1.0;
1976
+ Add12Cond(zh, zl, th, yril);
1977
+
1978
+ /* Polynomial approximation */
1979
+
1980
+ zhSquare = zh * zh; /* 1 */
1981
+
1982
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
1983
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
1984
+ zhCube = zhSquare * zh; /* 2 */
1985
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
1986
+ zhFour = zhSquare * zhSquare; /* 2 */
1987
+
1988
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
1989
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
1990
+
1991
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
1992
+
1993
+ Add12(ph,pl,zh,pUpper); /* 8 */
1994
+
1995
+ /* Reconstruction
1996
+
1997
+ Read logih and logim in the tables (already done)
1998
+
1999
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
2000
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
2001
+
2002
+ Carry out everything in double double precision
2003
+
2004
+ */
2005
+
2006
+ /*
2007
+ We store log_10(2) as log210h + log210m + log210l where log210h and log210m have 10 trailing zeros
2008
+ Multiplication of ed (double E) and log210h and m is thus exact
2009
+ The overall accuracy of log10h + log10m + log10l is 53 * 3 - 24 = 135 which
2010
+ is enough for the accurate phase
2011
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
2012
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
2013
+ triple double values. We have to take it into account for the accurate phase
2014
+ basic procedures for addition and multiplication
2015
+ The condition on the next Add12 is verified as log210m is smaller than log210h
2016
+ and both are scaled by ed
2017
+ */
2018
+
2019
+ Add12(log2edh, log2edl, log210h * ed, log210m * ed);
2020
+
2021
+ /* Add logih and logim to ph and pl */
2022
+
2023
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
2024
+
2025
+ /* Multiply by 1/log(10) */
2026
+
2027
+ Mul22(&log10TabPolyh,&log10TabPolyl,RECPRLOG10H,RECPRLOG10M,logTabPolyh,logTabPolyl);
2028
+
2029
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
2030
+
2031
+ Add22(&logh, &logm, log2edh, log2edl, log10TabPolyh, log10TabPolyl);
2032
+
2033
+ /* Rounding test and possible return or call to the accurate function */
2034
+
2035
+
2036
+ TEST_AND_RETURN_RD(logh, logm, RDROUNDCST);
2037
+ {
2038
+
2039
+ logil = argredtable[index].logil;
2040
+
2041
+ p_accu(&ph, &pm, &pl, zh, zl);
2042
+
2043
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
2044
+
2045
+ Mul33(&log10yh,&log10ym,&log10yl,RECPRLOG10H,RECPRLOG10M,RECPRLOG10L,logyh,logym,logyl);
2046
+
2047
+ log2edh = log210h * ed;
2048
+ log2edm = log210m * ed;
2049
+ log2edl = log210l * ed;
2050
+
2051
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, log10yh, log10ym, log10yl);
2052
+
2053
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
2054
+
2055
+ ReturnRoundDownwards3Unfiltered(logh, logm, logl, WORSTCASEACCURACY);
2056
+
2057
+ } /* Accurate phase launched */
2058
+ }
2059
+
2060
+
2061
+ /*************************************************************
2062
+ *************************************************************
2063
+ * ROUNDED TOWARDS ZERO *
2064
+ *************************************************************
2065
+ *************************************************************/
2066
+ double log10_rz(double x){
2067
+ db_number xdb, yhdb;
2068
+ double yh, yl, ed, ri, logih, logim, logil, yrih, yril, th, zh, zl;
2069
+ double ph, pl, pm, log2edh, log2edl, log2edm, logTabPolyh, logTabPolyl, logh, logm, logl;
2070
+ int E, index;
2071
+ double zhSquare, zhCube, zhSquareHalf;
2072
+ double p35, p46, p36;
2073
+ double pUpper;
2074
+ double zhSquareHalfPlusZl;
2075
+ double zhFour;
2076
+ double logyh, logym, logyl;
2077
+ double loghover, logmover, loglover;
2078
+ double log10TabPolyh, log10TabPolyl;
2079
+ double log10yh, log10ym, log10yl;
2080
+
2081
+
2082
+ E=0;
2083
+ xdb.d=x;
2084
+
2085
+ /* Filter cases */
2086
+ if (xdb.i[HI] < 0x00100000){ /* x < 2^(-1022) */
2087
+ if (((xdb.i[HI] & 0x7fffffff)|xdb.i[LO])==0){
2088
+ return -1.0/0.0;
2089
+ } /* log(+/-0) = -Inf */
2090
+ if (xdb.i[HI] < 0){
2091
+ return (x-x)/0; /* log(-x) = Nan */
2092
+ }
2093
+ /* Subnormal number */
2094
+ E = -52;
2095
+ xdb.d *= two52; /* make x a normal number */
2096
+ }
2097
+
2098
+ if (xdb.i[HI] >= 0x7ff00000){
2099
+ return x+x; /* Inf or Nan */
2100
+ }
2101
+
2102
+
2103
+ /* Extract exponent and mantissa
2104
+ Do range reduction,
2105
+ yielding to E holding the exponent and
2106
+ y the mantissa between sqrt(2)/2 and sqrt(2)
2107
+ */
2108
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
2109
+ index = (xdb.i[HI] & 0x000fffff);
2110
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
2111
+ index = (index + (1<<(20-L-1))) >> (20-L);
2112
+
2113
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
2114
+ if (index >= MAXINDEX){ /* corresponds to xdb>sqrt(2)*/
2115
+ xdb.i[HI] -= 0x00100000;
2116
+ E++;
2117
+ }
2118
+
2119
+ yhdb.i[HI] = xdb.i[HI];
2120
+ yhdb.i[LO] = 0;
2121
+ yh = yhdb.d;
2122
+ yl = xdb.d - yh;
2123
+
2124
+ index = index & INDEXMASK;
2125
+ /* Cast integer E into double ed for multiplication later */
2126
+ ed = (double) E;
2127
+
2128
+ /*
2129
+ Read tables:
2130
+ Read one float for ri
2131
+ Read the first two doubles for -log(r_i) (out of three)
2132
+
2133
+ Organization of the table:
2134
+
2135
+ one struct entry per index, the struct entry containing
2136
+ r, logih, logim and logil in this order
2137
+ */
2138
+
2139
+
2140
+ ri = argredtable[index].ri;
2141
+ /*
2142
+ Actually we don't need the logarithm entries now
2143
+ Move the following two lines to the eventual reconstruction
2144
+ As long as we don't have any if in the following code, we can overlap
2145
+ memory access with calculations
2146
+ */
2147
+ logih = argredtable[index].logih;
2148
+ logim = argredtable[index].logim;
2149
+
2150
+ /* Do range reduction:
2151
+
2152
+ zh + zl = y * ri - 1.0 exactly
2153
+
2154
+ Exactness is assured by use of two part yh + yl and 21 bit ri and Add12
2155
+
2156
+ Discard zl for higher monome degrees
2157
+ */
2158
+
2159
+ yrih = yh * ri;
2160
+ yril = yl * ri;
2161
+ th = yrih - 1.0;
2162
+ Add12Cond(zh, zl, th, yril);
2163
+
2164
+ /* Polynomial approximation */
2165
+
2166
+ zhSquare = zh * zh; /* 1 */
2167
+
2168
+ p35 = p_coeff_3h + zhSquare * p_coeff_5h; /* 3 */
2169
+ p46 = p_coeff_4h + zhSquare * p_coeff_6h; /* 3 */
2170
+ zhCube = zhSquare * zh; /* 2 */
2171
+ zhSquareHalf = p_coeff_2h * zhSquare; /* 2 */
2172
+ zhFour = zhSquare * zhSquare; /* 2 */
2173
+
2174
+ p36 = zhCube * p35 + zhFour * p46; /* 4 */
2175
+ zhSquareHalfPlusZl = zhSquareHalf + zl; /* 3 */
2176
+
2177
+ pUpper = zhSquareHalfPlusZl + p36; /* 5 */
2178
+
2179
+ Add12(ph,pl,zh,pUpper); /* 8 */
2180
+
2181
+ /* Reconstruction
2182
+
2183
+ Read logih and logim in the tables (already done)
2184
+
2185
+ Compute log(x) = E * log(2) + log(1+z) - log(ri)
2186
+ i.e. log(x) = ed * (log2h + log2m) + (ph + pl) + (logih + logim) + delta
2187
+
2188
+ Carry out everything in double double precision
2189
+
2190
+ */
2191
+
2192
+ /*
2193
+ We store log_10(2) as log210h + log210m + log210l where log210h and log210m have 10 trailing zeros
2194
+ Multiplication of ed (double E) and log210h and m is thus exact
2195
+ The overall accuracy of log10h + log10m + log10l is 53 * 3 - 24 = 135 which
2196
+ is enough for the accurate phase
2197
+ The accuracy suffices also for the quick phase: 53 * 2 - 24 = 82
2198
+ Nevertheless the storage with trailing zeros implies an overlap of the tabulated
2199
+ triple double values. We have to take it into account for the accurate phase
2200
+ basic procedures for addition and multiplication
2201
+ The condition on the next Add12 is verified as log210m is smaller than log210h
2202
+ and both are scaled by ed
2203
+ */
2204
+
2205
+ Add12(log2edh, log2edl, log210h * ed, log210m * ed);
2206
+
2207
+ /* Add logih and logim to ph and pl */
2208
+
2209
+ Add22(&logTabPolyh, &logTabPolyl, logih, logim, ph, pl);
2210
+
2211
+ /* Multiply by 1/log(10) */
2212
+
2213
+ Mul22(&log10TabPolyh,&log10TabPolyl,RECPRLOG10H,RECPRLOG10M,logTabPolyh,logTabPolyl);
2214
+
2215
+ /* Add log2edh + log2edl to logTabPolyh + logTabPolyl */
2216
+
2217
+ Add22(&logh, &logm, log2edh, log2edl, log10TabPolyh, log10TabPolyl);
2218
+
2219
+ /* Rounding test and possible return or call to the accurate function */
2220
+
2221
+
2222
+ TEST_AND_RETURN_RZ(logh, logm, RDROUNDCST);
2223
+ {
2224
+
2225
+ logil = argredtable[index].logil;
2226
+
2227
+ p_accu(&ph, &pm, &pl, zh, zl);
2228
+
2229
+ Add33(&logyh, &logym, &logyl, logih, logim, logil, ph, pm, pl);
2230
+
2231
+ Mul33(&log10yh,&log10ym,&log10yl,RECPRLOG10H,RECPRLOG10M,RECPRLOG10L,logyh,logym,logyl);
2232
+
2233
+ log2edh = log210h * ed;
2234
+ log2edm = log210m * ed;
2235
+ log2edl = log210l * ed;
2236
+
2237
+ Add33(&loghover, &logmover, &loglover, log2edh, log2edm, log2edl, log10yh, log10ym, log10yl);
2238
+
2239
+ Renormalize3(&logh,&logm,&logl,loghover,logmover,loglover);
2240
+
2241
+ ReturnRoundTowardsZero3Unfiltered(logh, logm, logl, WORSTCASEACCURACY);
2242
+
2243
+ } /* Accurate phase launched */
2244
+ }