crmf 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +105 -3
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +117 -20
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +108 -10
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1396 @@
1
+
2
+ #include <stdio.h>
3
+ #include "crlibm.h"
4
+ #include "crlibm_private.h"
5
+ #include "triple-double.h"
6
+ #include "pow.h"
7
+
8
+
9
+ /* Some macros for specific operations in power */
10
+
11
+ #define USE_BUILTINS 0
12
+
13
+ /* decompose
14
+
15
+ Decomposes a positive double precision
16
+ variable x (normal or subnormal) such that
17
+
18
+ 2^(resE) * resm = x
19
+
20
+ where resm = 2 * k + 1 for integer k
21
+
22
+ resE is an integer variable pointer
23
+ resm is a double precision variable pointer
24
+ x is a double precision variable.
25
+
26
+ */
27
+
28
+ #if USE_BUILTINS
29
+ #define decompose(resm,resE,x) \
30
+ { \
31
+ int __decompose_ex, __decompose_d; \
32
+ int64_t __decompose_temp; \
33
+ db_number __decompose_xdb; \
34
+ \
35
+ __decompose_xdb.d = (x); \
36
+ __decompose_ex = 0; \
37
+ if (!(__decompose_xdb.i[HI] & 0xfff00000)) { \
38
+ __decompose_xdb.d = (x) * 0.18014398509481984e17; \
39
+ __decompose_ex = -54; \
40
+ } \
41
+ __decompose_ex += (__decompose_xdb.i[HI] >> 20) - 1023; \
42
+ __decompose_xdb.i[HI] &= 0x000fffff; \
43
+ __decompose_temp = __decompose_xdb.l | 0x0010000000000000llu; \
44
+ __decompose_d = __builtin_ctz(__decompose_temp); \
45
+ __decompose_xdb.i[HI] |= (1075 - __decompose_d) << 20; \
46
+ *(resE) = __decompose_d - 52 + __decompose_ex; \
47
+ *(resm) = __decompose_xdb.d; \
48
+ }
49
+ #else
50
+ #define decompose(resm,resE,x) \
51
+ { \
52
+ int __decompose_ex, __decompose_d; \
53
+ int64_t __decompose_temp; \
54
+ db_number __decompose_xdb, __decompose_tempdb; \
55
+ \
56
+ __decompose_xdb.d = (x); \
57
+ __decompose_ex = 0; \
58
+ if (!(__decompose_xdb.i[HI] & 0xfff00000)) { \
59
+ __decompose_xdb.d = (x) * 0.18014398509481984e17; \
60
+ __decompose_ex = -54; \
61
+ } \
62
+ __decompose_ex += (__decompose_xdb.i[HI] >> 20) - 1023; \
63
+ __decompose_xdb.i[HI] &= 0x000fffff; \
64
+ __decompose_temp = __decompose_xdb.l | 0x0010000000000000llu; \
65
+ __decompose_temp = (~__decompose_temp & (__decompose_temp - 1)) + 1; \
66
+ __decompose_tempdb.d = (double) __decompose_temp; \
67
+ __decompose_d = (__decompose_tempdb.i[HI] >> 20) - 1023; \
68
+ __decompose_xdb.i[HI] |= (1075 - __decompose_d) << 20; \
69
+ *(resE) = __decompose_d - 52 + __decompose_ex; \
70
+ *(resm) = __decompose_xdb.d; \
71
+ }
72
+ #endif
73
+
74
+
75
+ /* isOddInteger
76
+
77
+ Determines if a given double precision number x is an odd integer
78
+
79
+ */
80
+
81
+ #define isOddInteger(x) (ABS(((ABS(x) + 0.9007199254740992e16) - 0.9007199254740992e16) - ABS(x)) == 1.0)
82
+
83
+ /* isInteger
84
+
85
+ Determines if a given double precision number x is integer
86
+
87
+ */
88
+
89
+ #define isInteger(x) ((ABS(x) >= 0.4503599627370496e16) || (((ABS(x) + 0.4503599627370496e16) - 0.4503599627370496e16) == ABS(x)))
90
+
91
+
92
+ /* log2_130
93
+
94
+ Approximates
95
+
96
+ resh + resm + resl = (ed + log2(1 + (xh + xm)) + log2(r[index])) * (1 + eps)
97
+
98
+ where ||eps|| <= 2^(-130)
99
+
100
+
101
+ */
102
+ static inline void log2_130(double *resh, double *resm, double *resl,
103
+ int index, double ed, double xh, double xm) {
104
+
105
+ double p_t_1_0h;
106
+ double p_t_2_0h;
107
+ double p_t_3_0h;
108
+ double p_t_4_0h;
109
+ double p_t_5_0h;
110
+ double p_t_6_0h;
111
+ double p_t_7_0h;
112
+ double p_t_8_0h;
113
+ double p_t_9_0h, p_t_9_0m;
114
+ double p_t_10_0h, p_t_10_0m;
115
+ double p_t_11_0h, p_t_11_0m;
116
+ double p_t_12_0h, p_t_12_0m;
117
+ double p_t_13_0h, p_t_13_0m;
118
+ double p_t_14_0h, p_t_14_0m;
119
+ double p_t_15_0h, p_t_15_0m;
120
+ double p_t_16_0h, p_t_16_0m, p_t_16_0l;
121
+ double p_t_17_0h, p_t_17_0m, p_t_17_0l;
122
+ double p_t_18_0h, p_t_18_0m, p_t_18_0l;
123
+ double p_t_19_0h, p_t_19_0m, p_t_19_0l;
124
+ double p_t_20_0h, p_t_20_0m, p_t_20_0l;
125
+ double p_t_21_0h, p_t_21_0m, p_t_21_0l;
126
+ double p_t_21_1h, p_t_21_1m, p_t_21_1l;
127
+ double p_t_22_0h, p_t_22_0m, p_t_22_0l;
128
+ double p_t_23_0h, p_t_23_0m, p_t_23_0l;
129
+ double p_resh, p_resm, p_resl;
130
+ double log2yh, log2ym, log2yl;
131
+ double log2xh, log2xm, log2xl;
132
+ double logih, logim, logil;
133
+
134
+
135
+ p_t_1_0h = log2_130_p_coeff_13h;
136
+ p_t_2_0h = p_t_1_0h * xh;
137
+ p_t_3_0h = log2_130_p_coeff_12h + p_t_2_0h;
138
+ p_t_4_0h = p_t_3_0h * xh;
139
+ p_t_5_0h = log2_130_p_coeff_11h + p_t_4_0h;
140
+ p_t_6_0h = p_t_5_0h * xh;
141
+ p_t_7_0h = log2_130_p_coeff_10h + p_t_6_0h;
142
+ p_t_8_0h = p_t_7_0h * xh;
143
+ Add12(p_t_9_0h,p_t_9_0m,log2_130_p_coeff_9h,p_t_8_0h);
144
+ Mul22(&p_t_10_0h,&p_t_10_0m,p_t_9_0h,p_t_9_0m,xh,xm);
145
+ Add122(&p_t_11_0h,&p_t_11_0m,log2_130_p_coeff_8h,p_t_10_0h,p_t_10_0m);
146
+ MulAdd22(&p_t_12_0h,&p_t_12_0m,log2_130_p_coeff_7h,log2_130_p_coeff_7m,xh,xm,p_t_11_0h,p_t_11_0m);
147
+ MulAdd22(&p_t_13_0h,&p_t_13_0m,log2_130_p_coeff_6h,log2_130_p_coeff_6m,xh,xm,p_t_12_0h,p_t_12_0m);
148
+ MulAdd22(&p_t_14_0h,&p_t_14_0m,log2_130_p_coeff_5h,log2_130_p_coeff_5m,xh,xm,p_t_13_0h,p_t_13_0m);
149
+ Mul22(&p_t_15_0h,&p_t_15_0m,p_t_14_0h,p_t_14_0m,xh,xm);
150
+ Add23(&p_t_16_0h,&p_t_16_0m,&p_t_16_0l,log2_130_p_coeff_4h,log2_130_p_coeff_4m,p_t_15_0h,p_t_15_0m);
151
+ Mul233(&p_t_17_0h,&p_t_17_0m,&p_t_17_0l,xh,xm,p_t_16_0h,p_t_16_0m,p_t_16_0l);
152
+ Add233(&p_t_18_0h,&p_t_18_0m,&p_t_18_0l,log2_130_p_coeff_3h,log2_130_p_coeff_3m,p_t_17_0h,p_t_17_0m,p_t_17_0l);
153
+ Mul233(&p_t_19_0h,&p_t_19_0m,&p_t_19_0l,xh,xm,p_t_18_0h,p_t_18_0m,p_t_18_0l);
154
+ Add33(&p_t_20_0h,&p_t_20_0m,&p_t_20_0l,log2_130_p_coeff_2h,log2_130_p_coeff_2m,log2_130_p_coeff_2l,p_t_19_0h,p_t_19_0m,p_t_19_0l);
155
+ Mul233(&p_t_21_0h,&p_t_21_0m,&p_t_21_0l,xh,xm,p_t_20_0h,p_t_20_0m,p_t_20_0l);
156
+ Renormalize3(&p_t_21_1h,&p_t_21_1m,&p_t_21_1l,p_t_21_0h,p_t_21_0m,p_t_21_0l);
157
+ Add33(&p_t_22_0h,&p_t_22_0m,&p_t_22_0l,log2_130_p_coeff_1h,log2_130_p_coeff_1m,log2_130_p_coeff_1l,p_t_21_1h,p_t_21_1m,p_t_21_1l);
158
+ Mul233(&p_t_23_0h,&p_t_23_0m,&p_t_23_0l,xh,xm,p_t_22_0h,p_t_22_0m,p_t_22_0l);
159
+ p_resh = p_t_23_0h;
160
+ p_resm = p_t_23_0m;
161
+ p_resl = p_t_23_0l;
162
+
163
+ logih = argredtable[index].logih;
164
+ logim = argredtable[index].logim;
165
+ logil = argredtable[index].logil;
166
+
167
+ Add33(&log2yh,&log2ym,&log2yl,logih,logim,logil,p_resh,p_resm,p_resl);
168
+ Add133(&log2xh,&log2xm,&log2xl,ed,log2yh,log2ym,log2yl);
169
+
170
+ Renormalize3(resh,resm,resl,log2xh,log2xm,log2xl);
171
+
172
+ }
173
+
174
+ /* exp2_120
175
+
176
+ Approximates
177
+
178
+ 2^H * (resh + resm + resl) = 2^(xh + xm + xl) * (1 + eps)
179
+
180
+ where ||eps|| <= 2^(-119.5)
181
+
182
+ */
183
+ static inline void exp2_120(int *H, double *resh, double *resm, double *resl,
184
+ double xh, double xm, double xl) {
185
+ double xhMult2L, rhMult2L, r;
186
+ int k, index1, index2;
187
+ db_number shiftedxhMult2Ldb;
188
+ double rh, rm, rl;
189
+ double t1h, t1m, t1l, t2, t3;
190
+ double p_t_1_0h;
191
+ double p_t_2_0h;
192
+ double p_t_3_0h;
193
+ double p_t_4_0h;
194
+ double p_t_5_0h, p_t_5_0m;
195
+ double p_t_6_0h, p_t_6_0m;
196
+ double p_t_7_0h, p_t_7_0m;
197
+ double p_t_8_0h, p_t_8_0m;
198
+ double p_t_9_0h, p_t_9_0m, p_t_9_0l;
199
+ double p_t_10_0h, p_t_10_0m, p_t_10_0l;
200
+ double p_t_11_0h, p_t_11_0m, p_t_11_0l;
201
+ double p_resh, p_resm, p_resl;
202
+ double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
203
+ double tablesh, tablesm, tablesl;
204
+ double exp2h, exp2m, exp2l;
205
+
206
+ /* Argument reduction
207
+
208
+ Produce exactly
209
+
210
+ 2^H * 2^(i1/2^8) * 2^(i2/2^13) * 2^(rh + rm + rl)
211
+
212
+ */
213
+
214
+ xhMult2L = xh * two13;
215
+ shiftedxhMult2Ldb.d = shiftConst + xhMult2L;
216
+ rhMult2L = xhMult2L - (shiftedxhMult2Ldb.d - shiftConst);
217
+ r = rhMult2L * twoM13;
218
+ k = shiftedxhMult2Ldb.i[LO];
219
+ *H = k >> 13;
220
+ index1 = k & INDEXMASK1;
221
+ index2 = (k & INDEXMASK2) >> 5;
222
+ Add12Cond(t1h, t2, r, xm);
223
+ Add12Cond(t1m, t1l, t2, xl);
224
+ Add12(rh, t3, t1h, t1m);
225
+ Add12(rm, rl, t3, t1l);
226
+
227
+ /* Polynomial approximation of 2^(rh + rm + rl) */
228
+
229
+ p_t_1_0h = exp2_120_p_coeff_6h;
230
+ p_t_2_0h = p_t_1_0h * rh;
231
+ p_t_3_0h = exp2_120_p_coeff_5h + p_t_2_0h;
232
+ p_t_4_0h = p_t_3_0h * rh;
233
+ Add12(p_t_5_0h,p_t_5_0m,exp2_120_p_coeff_4h,p_t_4_0h);
234
+ MulAdd22(&p_t_6_0h,&p_t_6_0m,exp2_120_p_coeff_3h,exp2_120_p_coeff_3m,rh,rm,p_t_5_0h,p_t_5_0m);
235
+ MulAdd22(&p_t_7_0h,&p_t_7_0m,exp2_120_p_coeff_2h,exp2_120_p_coeff_2m,rh,rm,p_t_6_0h,p_t_6_0m);
236
+ Mul22(&p_t_8_0h,&p_t_8_0m,p_t_7_0h,p_t_7_0m,rh,rm);
237
+ Add23(&p_t_9_0h,&p_t_9_0m,&p_t_9_0l,exp2_120_p_coeff_1h,exp2_120_p_coeff_1m,p_t_8_0h,p_t_8_0m);
238
+ Mul33(&p_t_10_0h,&p_t_10_0m,&p_t_10_0l,rh,rm,rl,p_t_9_0h,p_t_9_0m,p_t_9_0l);
239
+ Add133(&p_t_11_0h,&p_t_11_0m,&p_t_11_0l,exp2_120_p_coeff_0h,p_t_10_0h,p_t_10_0m,p_t_10_0l);
240
+ Renormalize3(&p_resh,&p_resm,&p_resl,p_t_11_0h,p_t_11_0m,p_t_11_0l);
241
+
242
+ /* Table access */
243
+
244
+ tbl1h = twoPowerIndex1[index1].hi;
245
+ tbl1m = twoPowerIndex1[index1].mi;
246
+ tbl1l = twoPowerIndex1[index1].lo;
247
+ tbl2h = twoPowerIndex2[index2].hi;
248
+ tbl2m = twoPowerIndex2[index2].mi;
249
+ tbl2l = twoPowerIndex2[index2].lo;
250
+
251
+ /* Reconstruction */
252
+
253
+ Mul33(&tablesh,&tablesm,&tablesl,tbl1h,tbl1m,tbl1l,tbl2h,tbl2m,tbl2l);
254
+ Mul33(&exp2h,&exp2m,&exp2l,tablesh,tablesm,tablesl,p_resh,p_resm,p_resl);
255
+
256
+ Renormalize3(resh,resm,resl,exp2h,exp2m,exp2l);
257
+
258
+ }
259
+
260
+
261
+
262
+ /* pow_120
263
+
264
+ Approximates
265
+
266
+ 2^H * (resh + resm + resl) = 2^(y * (ed + log2(1 + (zh + zm)) + log2(r[index]))) * (1 + eps)
267
+
268
+ where ||eps|| <= 2^(-118.5) if -1075 <= y * (ed + log2(1 + (zh + zm)) + log2(r[index])) <= 1024
269
+
270
+ Approximates further (ed + log2(1 + (zh + zm)) + log2(r[index))) by log2xh
271
+ where
272
+
273
+ log2xh = (ed + log2(1 + (zh + zm)) + log2(r[index))) * (1 + eps2)
274
+
275
+ where |eps2| <= 2^(-52)
276
+ and log2xh is exact if 2^ed * ((1 + (zh + zm)) * r[index]) is an integer power of 2.
277
+
278
+ */
279
+ void pow_120(int *H, double *resh, double *resm, double *resl, double *log2xh,
280
+ double y, int index, double ed, double zh, double zm) {
281
+ double ylog2xh, ylog2xm, ylog2xl;
282
+ double log2xm, log2xl;
283
+
284
+ /* Compute log2(x) */
285
+ log2_130(log2xh,&log2xm,&log2xl,index,ed,zh,zm);
286
+
287
+ /* Compute y * log2(x) */
288
+ Mul133(&ylog2xh,&ylog2xm,&ylog2xl,y,*log2xh,log2xm,log2xl);
289
+
290
+ /* Compute 2^(y * log2(x)) */
291
+ exp2_120(H,resh,resm,resl,ylog2xh,ylog2xm,ylog2xl);
292
+
293
+ }
294
+
295
+ /* pow_round_and_check_rn
296
+
297
+ Checks whether
298
+
299
+ 2^H * (powh + powm + powl)
300
+
301
+ which is an approximate to x^y with a relative error or less than 2^(-118.5),
302
+ can be rounded correctly to double precision in round-to-nearest-ties-to-even
303
+ mode or whether the Table Maker's Dilemma occurs or the case is exact.
304
+
305
+ Returns 1 if rounding is possible and affects pow with the rounding
306
+ Returns 0 if rounding is not possible or if the case is exact
307
+
308
+ If the returned value is 0, it affects
309
+
310
+ G, kh, kl
311
+
312
+ such that
313
+
314
+ 2^G * (kh + kl)
315
+
316
+ is an approximate to x^y with an relative error of 2^(-117)
317
+
318
+ and such that rounding
319
+
320
+ 2^G * kh
321
+
322
+ to double precision is an exact operation.
323
+
324
+ */
325
+
326
+ static inline int pow_round_and_check_rn(double *pow,
327
+ int H, double powh, double powm, double powl,
328
+ int *G, double *kh, double *kl) {
329
+ double th, tm, tl;
330
+ int K, K1, K2;
331
+ db_number twodb, two2db;
332
+ double twoH1074powh, twoH1074powm, shiftedpowh, delta;
333
+ double scaledth;
334
+ double t1m, t1l;
335
+
336
+ /* We start by bringing H and powh + powm + powl
337
+ to a form such that
338
+
339
+ 1 <= powh + powm + powl < 2
340
+ */
341
+ if ((powh < 1.0) ||
342
+ ((powh == 1.0) && (powm < 0.0))) {
343
+ powh *= 2.0;
344
+ powm *= 2.0;
345
+ powl *= 2.0;
346
+ H--;
347
+ }
348
+ if ((powh > 2.0) ||
349
+ ((powh == 2.0) && (powm >= 0.0))) {
350
+ powh *= 0.5;
351
+ powm *= 0.5;
352
+ powl *= 0.5;
353
+ H++;
354
+ }
355
+
356
+ /* Check now whether we have normal or subnormal rounding
357
+
358
+ The rounding is subnormal iff H <= -1023
359
+
360
+ In both cases, we bring H, powh + powm + powl to a form
361
+
362
+ 2^K * (th + tm + tl) = 2^H * (powh + powm + powm) * (1 + eps)
363
+
364
+ where
365
+
366
+ (i) |eps| <= 2^(-118 - 53) = 2^(-171)
367
+ (ii) 2^(-K) * ulp(2^K * th) = 1
368
+ (iii) the rounding of 2^K * th to double precision is exact
369
+ (or produces +inf)
370
+
371
+ */
372
+ if (H <= -1023) {
373
+ /* Subnormal rounding
374
+
375
+ In this case, we can neglect powl
376
+ because the rounding bit of the RN rounding
377
+ is in powh
378
+
379
+ */
380
+ twodb.i[HI] = (H + (1074 + 1023)) << 20;
381
+ twodb.i[LO] = 0;
382
+
383
+ twoH1074powh = twodb.d * powh;
384
+ twoH1074powm = twodb.d * powm;
385
+
386
+ shiftedpowh = two52 + twoH1074powh;
387
+ th = shiftedpowh - two52;
388
+ delta = twoH1074powh - th;
389
+
390
+ Add12Cond(tm,tl,delta,twoH1074powm);
391
+
392
+ K = -1074;
393
+ } else {
394
+ /* Normal rounding
395
+
396
+ In this case, we have exactly:
397
+
398
+ 2^K * (th + tm + tl) = 2^H * (powh + powm + powm)
399
+
400
+ */
401
+ th = powh * two52;
402
+ tm = powm * two52;
403
+ tl = powl * two52;
404
+ K = H - 52;
405
+ }
406
+
407
+ /* Return results for exactness case test */
408
+ *G = K;
409
+ *kh = th;
410
+ *kl = tm;
411
+
412
+ /* Compute now
413
+
414
+ delta = ABS(0.5 - ABS(tm + tl)) * (1 + eps)
415
+
416
+ where |eps| <= 2^(-53)
417
+
418
+ The addition 0.5 + (-ABS(tm)) is exact by Sterbenz' lemma
419
+
420
+ */
421
+ if (tm > 0.0) {
422
+ t1m = - tm;
423
+ t1l = - tl;
424
+ } else {
425
+ t1m = tm;
426
+ t1l = tl;
427
+ }
428
+ delta = ABS((0.5 + t1m) - t1l);
429
+
430
+ /* We cannot decide the rounding or have an exact case
431
+ iff
432
+
433
+ delta <= 2^(-118) * th
434
+
435
+ We can see this in the following drawing:
436
+
437
+ result = round(
438
+
439
+ +-----------------+------+----------...----+------------
440
+ 2^K * | th | +/-1 | 0 | delta )
441
+ +-----------------+------+----------...----+------------
442
+ | <------------- 118 bits -------------> |
443
+
444
+ */
445
+
446
+ scaledth = th * PRECISEROUNDCST;
447
+
448
+ if (delta > scaledth) {
449
+ /* We can round exactly to nearest
450
+
451
+ We must correct th to the rounding of
452
+ th + tm + tl iff tm is greater in
453
+ absolute value than 0.5
454
+ or if tm is equal to 0.5 in absolute value and
455
+ the sign of tm and tl
456
+ is equal:
457
+
458
+ | |
459
+ ...------|--------|--------|------...
460
+ |------->--> |
461
+ ^ tm tl ^
462
+ th round(th + tm + tl)
463
+
464
+ If tm is negative, we must correct th by decreasing it
465
+ otherwise we must correct th by increasing it.
466
+
467
+ */
468
+ if (ABS(tm) >= 0.5) {
469
+ if (ABS(tm) == 0.5) {
470
+ if (tm < 0.0) {
471
+ if (tl < 0.0) {
472
+ /* The same sign of tm and tl, tm is negative */
473
+ th -= 1.0;
474
+ }
475
+ } else {
476
+ if (tl > 0.0) {
477
+ /* The same sign of tm and tl, tm is positive */
478
+ th += 1.0;
479
+ }
480
+ }
481
+ } else {
482
+ /* tm > 0.5 */
483
+ if (tm < 0.0) {
484
+ th -= 1.0;
485
+ } else {
486
+ th += 1.0;
487
+ }
488
+ }
489
+ }
490
+
491
+ /* Perform now the multiplication 2^K * th
492
+
493
+ Note that we must be able to produce
494
+
495
+ (i) 0 if we have total underflow
496
+ (ii) a subnormal result
497
+ (iii) a normal result
498
+ (iv) +inf if we have overflow in a TMD case
499
+
500
+ We produce K1 + K2 = K with K1 = floor(K/2)
501
+ and multiply in two steps.
502
+
503
+ */
504
+ K1 = K >> 1;
505
+ K2 = K - K1;
506
+
507
+ twodb.i[HI] = (K1 + 1023) << 20;
508
+ twodb.i[LO] = 0;
509
+ two2db.i[HI] = (K2 + 1023) << 20;
510
+ two2db.i[LO] = 0;
511
+
512
+ *pow = two2db.d * (twodb.d * th);
513
+
514
+ return 1;
515
+ }
516
+ /* Otherwise we return 0 because we cannot round */
517
+
518
+ return 0;
519
+ }
520
+
521
+
522
+ /* pow_exact_case
523
+
524
+ Checks whether x^y is an exact or half-ulp case for rounding
525
+ into double precision.
526
+
527
+ This means the procedure checks whether x^y can be written on
528
+ not more than 54 bits.
529
+
530
+ The procedure uses 2^G * (kh + kl) supposing the following properties:
531
+
532
+ * kh + kl holds on at most 54 bits
533
+ * 2^G * kh is representable in double precision (even in the subnormal range)
534
+ * 2^G * (kh + kl) approximates x^y with an error eps less than 2^(-117), i.e.
535
+
536
+ 2^G * (kh + kl) = x^y * (1 + eps) where |eps| <= 2^(-117)
537
+
538
+ * log2xh approximates log2(x) with an accuracy equivalent to at least 52 bits
539
+ In particular log2xh is exact if x is an integer power of 2
540
+
541
+ Returns 1 if the case is exact or half-ulp
542
+ Returns 0 otherwise
543
+
544
+ If returning 1, affects pow with 2^G * (kh + kl) rounded to double precision
545
+
546
+ */
547
+ int pow_exact_case(double *pow,
548
+ double x, double y,
549
+ int G, double kh, double kl, double log2xh) {
550
+ db_number xdb, ydb, tempdb, shiftedEydb, temp2db;
551
+ int E, F, G1, G2;
552
+ double m, n, yh, yl, Eyh, Eyl, ed, Ey;
553
+ double delta;
554
+ double nearestEy;
555
+ double value;
556
+
557
+ /* For testing whether x^y is an exact or half-ulp case,
558
+ we have two main cases:
559
+
560
+ (i) x is an integer power of 2
561
+ (ii) x is not an integer power of 2
562
+
563
+ We start by testing if x is an integer power of 2.
564
+
565
+ */
566
+
567
+ xdb.d = x;
568
+ if ((xdb.i[HI] & 0xfff00000) == 0) {
569
+ /* x is subnormal, scale by 2^52 */
570
+ xdb.d *= 0.4503599627370496e16;
571
+ }
572
+ if (((xdb.i[HI] & 0x000fffff) | xdb.i[LO]) == 0) {
573
+ /* x is an integer power of 2
574
+
575
+ x^y is exact or midpoint iff log2(x) * y is integer
576
+
577
+ Since we know that x is an integer power of 2,
578
+ log2xh is equal to this integer. Since the exponent
579
+ of the double precision number is bounded by the
580
+ exponent range, we know that log2xh is integer and
581
+ bounded by 2^11. It is therefore possible to
582
+ split y into two parts of 21 and 32 bits, to perform
583
+ the multiplication componentwise. Since the result
584
+ is bounded by 2^11, it suffices to compute the
585
+ nearest integer to the higher word product, and to
586
+ compare the rounding difference to the low word product.
587
+ This splitting is faster than the usual Dekker splitting.
588
+
589
+ */
590
+ ydb.d = y;
591
+ ydb.i[LO] = 0;
592
+ yh = ydb.d;
593
+ yl = y - yh;
594
+ Eyh = log2xh * yh;
595
+ Eyl = log2xh * yl;
596
+ delta = ((0.6755399441055744e16 + Eyh) - 0.6755399441055744e16) - Eyh; /* addition rounds, subtractions exact */
597
+
598
+ if (delta != Eyl) return 0;
599
+
600
+ } else {
601
+
602
+ /* x is not an integer power of 2
603
+
604
+ We have clearly an inexact case if y is negative
605
+ or if y is greater than 35
606
+
607
+ */
608
+
609
+ if ((y < 0.0) || (y > 35.0)) return 0;
610
+
611
+ /* Decompose now y into
612
+
613
+ y = 2^F * n
614
+
615
+ Checking F and n, we can then already decide
616
+ some cases using the fact that the worst-case
617
+ accuracy for x^n, n in [|0;35|], is less (in bits)
618
+ than the accuracy of the approximation of x^y we have
619
+ already in 2^G * (kh + kl).
620
+
621
+ */
622
+ decompose(&n,&F,y);
623
+
624
+ if ((n > 35.0) || (F < -5)) return 0;
625
+
626
+ if (F < 0) {
627
+ /* Here, -5 <= F <= -1, 3 <= n <= 35, n an odd integer
628
+
629
+ We decompose x into 2^E * m where m is an odd integer
630
+
631
+ Let H, sh and sl such that 2^H * (sh + sl) = 2^G * (kh + kl) and
632
+ sh + sl is an odd integer.
633
+
634
+ If we have E * 2^F * n = H, we can apply the worst case argument
635
+ because we know the worst case for
636
+
637
+ m^(2^F * n) with m odd integer, -5 <= F <= -1, 3 <= n <= 35
638
+
639
+ when rounding to 53 bits in both rounding modes.
640
+
641
+ E is bounded in magnitude by 2^11. 2^F * n is equal to y by
642
+ construction. Since n <= 35, y contains at most 6 significant bits.
643
+ The arithmetical multiplication E * y is therefore exact and less
644
+ than or equal to 2^17.
645
+
646
+ We check first whether E * y is an integer. If this is the case,
647
+ we compute sh + sl = 2^(G - E * y) * (kh + kl). Finally,
648
+ we check whether sh + sl is an odd integer.
649
+
650
+ */
651
+
652
+ decompose(&m,&E,x);
653
+
654
+ ed = (double) E;
655
+
656
+ Ey = ed * y; /* Exact */
657
+
658
+
659
+ /* Check whether Ey is an integer using the simple shift technique
660
+ The addition rounds, the substraction is exact by Sterbenz' lemma.
661
+ If Ey is an integer, the low order word of shiftedEydb is equal to this
662
+ integer.
663
+ */
664
+ shiftedEydb.d = 0.6755399441055744e16 + Ey;
665
+ nearestEy = shiftedEydb.d - 0.6755399441055744e16;
666
+
667
+ if (nearestEy != Ey) return 0;
668
+
669
+ /* Here E * y is integer.
670
+ Produce now 2^(G - E * y).
671
+ */
672
+ tempdb.i[HI] = (((G - shiftedEydb.i[LO]) + 1023) << 20);
673
+ tempdb.i[LO] = 0;
674
+
675
+ /* Check now if sh + sl = tempdb.d * (kh + kl) is an odd
676
+ integer.
677
+
678
+ Since kh and kl are not overlapped, we have two cases:
679
+
680
+ (i) kl is equal to 0, in which case tempdb.d * kh must be an odd integer
681
+ (ii) kl is not equal to 0, in which case tempdb.d * kl must be an odd integer
682
+
683
+ */
684
+ if (kl == 0.0) value = kh; else value = kl;
685
+
686
+ value *= tempdb.d; /* Exact because multiplication by positive integer power of 2 */
687
+
688
+ if (!isOddInteger(value)) return 0;
689
+
690
+ /* Here the case is exact by the worst-case argument */
691
+ }
692
+
693
+ /* Here, we have either F >= 0 or an exact case
694
+
695
+ If F >= 0, we also have an exact case because
696
+ 2^F * n = y <= 35, y therefore integer and because
697
+ we can apply the worst case argument.
698
+
699
+ */
700
+ }
701
+
702
+ /*
703
+ Here, the case is exact, affect pow with 2^G * (kh + kl) rounded to
704
+ nearest in double precision
705
+
706
+ Since kh + kl holds on at most 54 bits, 2^G * kh produces
707
+ never any rounding error and kh and kl are not overlapping,
708
+ 2^G * kh is equal to the rounding if
709
+ (i) kl is equal to 0
710
+ (ii) kl is not equal to 0 and the mantissa of 2^G * kh is even
711
+
712
+ If in condition (ii), kl is not equal to 0 and the mantissa of
713
+ 2^G * kh is not even, we correct it depending on the sign of kl.
714
+
715
+ Remark that G can be such that 2^G is no longer a normal.
716
+ Produce therefore 2^(floor(G/2)) and 2^(G - floor(G/2)) and
717
+ multiply in two steps.
718
+
719
+ */
720
+
721
+ G1 = G >> 1;
722
+ G2 = G - G1;
723
+
724
+ tempdb.i[HI] = (G1 + 1023) << 20;
725
+ tempdb.i[LO] = 0;
726
+ temp2db.i[HI] = (G2 + 1023) << 20;
727
+ temp2db.i[LO] = 0;
728
+
729
+ tempdb.d *= (kh * temp2db.d);
730
+
731
+ if ((kl != 0.0) && ((tempdb.i[LO] & 1) != 0)) {
732
+ /* We must correct the rounding to the rounding to nearest ties to even */
733
+ if (kl > 0.0) {
734
+ tempdb.l++;
735
+ } else {
736
+ tempdb.l++;
737
+ }
738
+ }
739
+ *pow = tempdb.d;
740
+
741
+ return 1;
742
+ }
743
+
744
+
745
+ double pow_exact_rn(double x, double y, double sign,
746
+ int index, double ed, double zh, double zm) {
747
+ int H, G;
748
+ double powh, powm, powl;
749
+ double pow;
750
+ double kh, kl;
751
+ double log2xh;
752
+
753
+ pow_120(&H, &powh, &powm, &powl, &log2xh, y, index, ed, zh, zm);
754
+
755
+ if (pow_round_and_check_rn(&pow,H,powh,powm,powl,&G,&kh,&kl))
756
+ return sign * pow;
757
+
758
+ if (pow_exact_case(&pow,x,y,G,kh,kl,log2xh))
759
+ return sign * pow;
760
+
761
+ // printf("Could not decide the rounding to nearest of power.\n");
762
+
763
+ return -5.0;
764
+ }
765
+
766
+
767
+ double pow_rn(double x, double y) {
768
+ db_number xdb, ydb, yhdb, shiftedylog2xhMult2Ldb, powdb, twodb;
769
+ double sign;
770
+ int E, index;
771
+ double log2FastApprox, ed, ylog2xFast, f;
772
+ double yh, yl, ri, logih, logim, yrih, yril, th, zh, zm;
773
+ double p_t_1_0h;
774
+ double p_t_2_0h;
775
+ double p_t_3_0h;
776
+ double p_t_4_0h;
777
+ double p_t_5_0h;
778
+ double p_t_9_0h;
779
+ double p_t_10_0h;
780
+ double p_t_11_0h, p_t_11_0m;
781
+ double p_t_12_0h, p_t_12_0m;
782
+ double log2zh, log2zm;
783
+ double log2yh, log2ym;
784
+ double log2xh, log2xm;
785
+ double ylog2xh, ylog2xm;
786
+ double rh, r;
787
+ int k, index1, index2, H;
788
+ double tbl1, tbl2h, tbl2m;
789
+ double ph;
790
+ double powh, powm;
791
+ double twoH1074powh, twoH1074powm;
792
+ double shiftedpowh, nearestintpowh, delta, deltaint;
793
+ double rest;
794
+ double lowerTerms;
795
+ double temp1;
796
+ double zhSq, zhFour, p35, p46, p36, p7;
797
+ double xSq;
798
+
799
+ /* Fast rejection of special cases */
800
+ xdb.d = x;
801
+ ydb.d = y;
802
+
803
+ if (((((xdb.i[HI] >> 20) + 1) & 0x3ff) <= 1) ||
804
+ ((((ydb.i[HI] >> 20) + 1) & 0x3ff) <= 1)) {
805
+
806
+ /* Handle special cases before handling NaNs and Infinities */
807
+ if (x == 1.0) return 1.0;
808
+ if (y == 0.0) return 1.0;
809
+ if (y == 1.0) return x;
810
+ if (y == 2.0) return x * x; /* Remark: may yield uncorrect rounding on x86 for subnormal results */
811
+ if (y == -1.0) return 1 / x;
812
+
813
+ if ((x == 0.0) && ((ydb.i[HI] & 0x7ff00000) != 0x7ff00000)) {
814
+ /* x = +/-0 and y is neither NaN nor Infinity
815
+
816
+ We have four cases
817
+ (i) y < 0:
818
+ (a) y odd integer: return +/- Inf and raise divide-by-zero
819
+ (b) y not odd integer: return + Ind and raise divide-by-zero
820
+ (ii) (a) y odd integer: return +/- 0
821
+ (b) y not odd integer: return +0
822
+
823
+ Note that y = 0.0 has already been filtered out.
824
+ */
825
+ if (y < 0.0) {
826
+ if (isOddInteger(y))
827
+ return 1/x;
828
+ else
829
+ return 1/(x * x);
830
+ } else {
831
+ if (isOddInteger(y))
832
+ return x;
833
+ else
834
+ return x * x;
835
+ }
836
+ }
837
+
838
+ /* Handle NaNs and Infinities
839
+
840
+ Note: the cases (x,y) = (1,NaN) and (x,y) = (NaN,0) have already been handled.
841
+
842
+ */
843
+ if ((ydb.i[HI] & 0x7ff00000) == 0x7ff00000) {
844
+ /* Here y is NaN or Inf */
845
+ if (((ydb.i[HI] & 0x000fffff) | ydb.i[LO]) != 0) {
846
+ /* Here y is NaN, we return NaN */
847
+ return y;
848
+ }
849
+ /* Here y is +/- Inf
850
+
851
+ There are three main cases:
852
+ (i) x = -1: return 1
853
+ (ii) abs(x) > 1:
854
+ (a) y = +Inf: return +Inf
855
+ (b) y = -Inf: return +0
856
+ (iii) abs(x) < 1:
857
+ (a) y = +Inf: return +0
858
+ (b) y = -Inf: return +Inf
859
+
860
+ Note: the case x = 1 has already been filtered out
861
+ */
862
+ if (x == -1.0)
863
+ return 1.0;
864
+
865
+ /* Here x != 1, x != -1 */
866
+ if ((ABS(x) > 1.0) ^ ((ydb.i[HI] & 0x80000000) == 0)) {
867
+ /* abs(x) > 1 and y = -Inf or abs(x) < 1 and y = +Inf */
868
+ return 0.0;
869
+ } else {
870
+ /* abs(x) > 1 and y = +Inf or abs(x) < 1 and y = -Inf */
871
+ return ABS(y);
872
+ }
873
+ }
874
+
875
+ /* Here y is neither Inf nor NaN */
876
+
877
+ if ((xdb.i[HI] & 0x7ff00000) == 0x7ff00000) {
878
+ /* Here x is NaN or Inf */
879
+ if (((xdb.i[HI] & 0x000fffff) | xdb.i[LO]) != 0) {
880
+ /* Here x is NaN, we return NaN */
881
+ return x;
882
+ }
883
+ /* Here x is +/- Inf
884
+
885
+ There are two main cases:
886
+
887
+ (i) x is +Inf
888
+ (ii) x is -Inf
889
+ */
890
+ if ((xdb.i[HI] & 0x80000000) == 0) {
891
+ /* x is +Inf
892
+
893
+ (a) y > 0: return +Inf
894
+ (b) y < 0: return +0
895
+
896
+ Note: y = 0 has already been filtered out
897
+ */
898
+ if (y > 0.0) {
899
+ return x;
900
+ } else {
901
+ return 0.0;
902
+ }
903
+ } else {
904
+ /* x is -Inf
905
+
906
+ There are four cases:
907
+
908
+ (a) y > 0:
909
+ (*) y is an odd integer: return -Inf
910
+ (**) y is not an odd integer: return +Inf
911
+ (b) y < 0:
912
+ (*) y is an odd integer: return -0
913
+ (**) y is not an odd integer: return +0
914
+
915
+ Note: y = 0 has already been filtered out
916
+ */
917
+ if (y > 0.0) {
918
+ if (isOddInteger(y)) {
919
+ return x;
920
+ } else {
921
+ return -x;
922
+ }
923
+ } else {
924
+ if (isOddInteger(y)) {
925
+ return -0.0;
926
+ } else {
927
+ return 0.0;
928
+ }
929
+ }
930
+ }
931
+ }
932
+ }
933
+ /* Here both x and y are finite numbers */
934
+
935
+ /* Test now whether we have the case
936
+
937
+ (-x)^y = (-1)^y * x^y
938
+
939
+ where x is positive
940
+
941
+ */
942
+
943
+ sign = 1.0;
944
+ if (x < 0.0) {
945
+ /* x is negative
946
+ x^y is defined only if y is integer
947
+ */
948
+ if (!isInteger(y)) {
949
+ /* y is not integer
950
+
951
+ return NaN and raise invalid exception
952
+ */
953
+ return 0.0/0.0;
954
+ }
955
+ /* Here y is integer
956
+
957
+ Remove the sign of x and put (-1)^y in sign
958
+
959
+ */
960
+ x = -x; xdb.i[HI] &= 0x7fffffff;
961
+ if (isOddInteger(y)) {
962
+ sign = -sign;
963
+ }
964
+ }
965
+
966
+ /* Here x is strictly positive and finite */
967
+
968
+ /* Test now if abs(y) is in a range giving finite, non-trivial results x^y
969
+
970
+ We have
971
+
972
+ x^y = 2^(y * log2(x)) = 2^(y * log2(2^E * m)) = 2^(y * (E + log2(1 + f)))
973
+
974
+ We have overflow iff x^y >= 2^(1024), i.e. y * (E + log2(1 + f)) >= 1024
975
+ We have underflow (flush to zero) iff x^y <= 2^(-1076), i.e. y * (E + log2(1 + f)) <= - 1076
976
+ We round to 1.0 iff abs(y * (E + log2(1 + f))) <= 2^(-54)
977
+
978
+ We approximate log2(1 + f) as
979
+
980
+ log2(1 + f) = c * f * alpha(f)
981
+
982
+ where c is a constant and 0.853 < alpha(f) < 1.21
983
+
984
+ So we have surely
985
+ (i) overflow or underflow if abs(y * (E + c * f)) >= ceil(1076/0.853) = 1261
986
+ (ii) trivial rounding to 1.0 if abs(y * (E + c * f)) <= 2^(-55) <= 2^(-54)/1.21
987
+
988
+ */
989
+
990
+ /* We start the computation of the logarithm of x */
991
+
992
+ E = 0;
993
+ if ((xdb.i[HI] & 0xfff00000) == 0) {
994
+ /* x is subnormal, make it normal */
995
+ xdb.d *= two52;
996
+ E = -52;
997
+ }
998
+
999
+ E += (xdb.i[HI]>>20)-1023; /* extract the exponent */
1000
+ index = (xdb.i[HI] & 0x000fffff);
1001
+ xdb.i[HI] = index | 0x3ff00000; /* do exponent = 0 */
1002
+ index = (index + (1<<(20-L-1))) >> (20-L);
1003
+
1004
+ /* reduce such that sqrt(2)/2 < xdb.d < sqrt(2) */
1005
+ if (index >= MAXINDEX) { /* corresponds to xdb>sqrt(2)*/
1006
+ xdb.i[HI] -= 0x00100000;
1007
+ E++;
1008
+ }
1009
+
1010
+ ed = (double) E;
1011
+
1012
+ /* Continue with the exact range reduction of the logarithm of x */
1013
+
1014
+ yhdb.i[HI] = xdb.i[HI];
1015
+ yhdb.i[LO] = 0;
1016
+ yh = yhdb.d;
1017
+ yl = xdb.d - yh;
1018
+
1019
+
1020
+ /* Special handling for y = 3 or y = 4 and x on not more than 21 bits (without subnormals) */
1021
+ if ((yl == 0) && ((y == 3.0) || (y == 4.0)) && (E > -255)) {
1022
+ if (y == 3.0)
1023
+ return sign * (x * (x * x));
1024
+ else {
1025
+ xSq = x * x;
1026
+ return sign * (xSq * xSq);
1027
+ }
1028
+ }
1029
+
1030
+ index = index & INDEXMASK;
1031
+
1032
+ /* Now we have
1033
+
1034
+ log2(x) = E + log2(xdb.d)
1035
+
1036
+ with sqrt(2)/2 < xdb.d < sqrt(2)
1037
+
1038
+ Compute now f such that 1 + f = xdb.d and
1039
+ approximate
1040
+
1041
+ log2(1 + f) = logFastCoeff * f
1042
+
1043
+ */
1044
+
1045
+ f = xdb.d - 1.0;
1046
+ log2FastApprox = ed + logFastCoeff * f;
1047
+ ylog2xFast = y * log2FastApprox;
1048
+
1049
+ if (ABS(ylog2xFast) >= 1261.0) {
1050
+ if (ylog2xFast > 0.0) {
1051
+ /* y * log2(x) is positive, i.e. we overflow */
1052
+ return (sign * LARGEST) * LARGEST;
1053
+ } else {
1054
+ /* y * log2(x) is negative, i.e. we underflow */
1055
+ return (sign * SMALLEST) * SMALLEST;
1056
+ }
1057
+ }
1058
+
1059
+ if (ABS(ylog2xFast) <= twoM55) {
1060
+ /* abs(y * log2(x)) <= 2^(-55),
1061
+ we return 1.0 and set the inexact flag
1062
+ */
1063
+ return sign * (1.0 + SMALLEST);
1064
+ }
1065
+
1066
+ /* Now, we may still overflow or underflow but on some inputs only */
1067
+
1068
+ /*
1069
+ Read tables:
1070
+ Read one float for ri
1071
+ Read the first two doubles for -log(r_i) (out of three)
1072
+
1073
+ Organization of the table:
1074
+
1075
+ one struct entry per index, the struct entry containing
1076
+ r, logih, logim and logil in this order
1077
+ */
1078
+
1079
+ ri = argredtable[index].ri;
1080
+ /*
1081
+ Actually we don't need the logarithm entries now
1082
+ Move the following two lines to the eventual reconstruction
1083
+ As long as we don't have any if in the following code, we can overlap
1084
+ memory access with calculations
1085
+ */
1086
+ logih = argredtable[index].logih;
1087
+ logim = argredtable[index].logim;
1088
+
1089
+ /* Do range reduction:
1090
+
1091
+ zh + zm = y * ri - 1.0 correctly
1092
+
1093
+ Correctness is assured by use of two part yh + yl and 21 bit ri and Add12
1094
+
1095
+ Discard zl for higher monome degrees
1096
+ */
1097
+
1098
+ yrih = yh * ri;
1099
+ yril = yl * ri;
1100
+ th = yrih - 1.0;
1101
+ Add12Cond(zh, zm, th, yril);
1102
+
1103
+ /* Polynomial approximation of log2(1 + (zh + zm)) */
1104
+
1105
+ zhSq = zh * zh;
1106
+
1107
+ p35 = log2_70_p_coeff_3h + zhSq * log2_70_p_coeff_5h;
1108
+ p46 = log2_70_p_coeff_4h + zhSq * log2_70_p_coeff_6h;
1109
+ zhFour = zhSq * zhSq;
1110
+
1111
+ p36 = p35 + zh * p46;
1112
+ p7 = log2_70_p_coeff_7h * zhFour;
1113
+
1114
+ p_t_9_0h = p36 + p7;
1115
+
1116
+ p_t_10_0h = p_t_9_0h * zh;
1117
+ Add212(&p_t_11_0h,&p_t_11_0m,log2_70_p_coeff_2h,log2_70_p_coeff_2m,p_t_10_0h);
1118
+ MulAdd22(&p_t_12_0h,&p_t_12_0m,log2_70_p_coeff_1h,log2_70_p_coeff_1m,zh,zm,p_t_11_0h,p_t_11_0m);
1119
+ Mul22(&log2zh,&log2zm,p_t_12_0h,p_t_12_0m,zh,zm);
1120
+
1121
+ /* Reconstruction */
1122
+
1123
+ Add122(&log2yh,&log2ym,ed,logih,logim);
1124
+ Add22(&log2xh,&log2xm,log2yh,log2ym,log2zh,log2zm);
1125
+
1126
+ /* Produce ylog2xh + ylog2xm approximating y * log2(x)
1127
+
1128
+ Note: neither y nor y * log2(x) may not be subnormal and non-zero
1129
+ because of the fast overflow/underflow/trivial rounding test.
1130
+
1131
+ */
1132
+
1133
+ Mul12(&ylog2xh,&temp1,y,log2xh);
1134
+ ylog2xm = temp1 + y * log2xm;
1135
+
1136
+ /* Approximate now 2^(y * log2(x))
1137
+
1138
+ We have
1139
+
1140
+ 2^(ylog2xh + ylog2xm)
1141
+ = 2^ylog2xh * 2^ylog2xm
1142
+ = 2^H * 2^(ylog2xh - H) * 2^ylog2xm
1143
+ = 2^H * 2^(i2/2^8) * 2^(i1/2^13) * 2^(ylog2xh - H - i2/2^8 - i1/2^13) * 2^ylog2xm
1144
+ = 2^H * tbl2[i2] * (1 + tbl1[i1]) * (1 + p(rh)) + delta
1145
+ where rh \approx ylog2xh - H - i1/2^7 - i2/2^13 + ylog2xm
1146
+ */
1147
+
1148
+ shiftedylog2xhMult2Ldb.d = shiftConstTwoM13 + ylog2xh;
1149
+ r = ylog2xh - (shiftedylog2xhMult2Ldb.d - shiftConstTwoM13);
1150
+ k = shiftedylog2xhMult2Ldb.i[LO];
1151
+ H = k >> 13;
1152
+ index1 = k & INDEXMASK1;
1153
+ index2 = (k & INDEXMASK2) >> 5;
1154
+
1155
+ /* Renormalize reduced argument
1156
+
1157
+ This operation produces an error
1158
+
1159
+ 2^(z * (1 + eps)) = 2^z * (1 + eps')
1160
+
1161
+ where |eps| <= 2^(-53) and |eps'| <= 2^(-65)
1162
+
1163
+ */
1164
+ rh = r + ylog2xm;
1165
+
1166
+ /* Table reads */
1167
+ tbl1 = twoPowerIndex1[index1].hiM1;
1168
+ tbl2h = twoPowerIndex2[index2].hi;
1169
+ tbl2m = twoPowerIndex2[index2].mi;
1170
+
1171
+ /* Polynomial approximation */
1172
+ p_t_1_0h = exp2_p_coeff_3h;
1173
+ p_t_2_0h = p_t_1_0h * rh;
1174
+ p_t_3_0h = exp2_p_coeff_2h + p_t_2_0h;
1175
+ p_t_4_0h = p_t_3_0h * rh;
1176
+ p_t_5_0h = exp2_p_coeff_1h + p_t_4_0h;
1177
+ ph = p_t_5_0h * rh;
1178
+
1179
+ /* Reconstruction */
1180
+
1181
+ lowerTerms = tbl1 + (ph + tbl1 * ph);
1182
+
1183
+ Add212(&powh,&powm,tbl2h,tbl2m,tbl2h * lowerTerms);
1184
+
1185
+ /* Here we have
1186
+
1187
+ 2^H * (powh + powm) = x^y * (1 + eps)
1188
+
1189
+ with ||eps|| <= 2^(-60)
1190
+
1191
+ Check first if we overflow or underflow.
1192
+
1193
+ Check then if we can perform normal rounding or
1194
+ if we must perhaps perform subnormal rounding.
1195
+ We are sure that
1196
+
1197
+ 0.5 <= powh + powm <= 4.0
1198
+
1199
+ */
1200
+
1201
+ if (H >= 1025) {
1202
+ /* Here, we surely overflow
1203
+ Return sign * inf
1204
+ */
1205
+ return (sign * LARGEST) * LARGEST;
1206
+ }
1207
+
1208
+ if (H <= -1077) {
1209
+ /* Here, we surely underflow
1210
+ Return sign * 0 and set inexact flag
1211
+ */
1212
+ return (sign * SMALLEST) * SMALLEST;
1213
+ }
1214
+
1215
+ if (H > -1022) {
1216
+ /* We are sure to be able to perform normal rounding
1217
+ We must still be aware of the fact that the final
1218
+ result may overflow
1219
+ */
1220
+ if(powh == (powh + (powm * RNROUNDCST))) {
1221
+ powdb.d = powh;
1222
+ if (H < 1023) {
1223
+ powdb.i[HI] += H << 20;
1224
+ return sign * powdb.d;
1225
+ } else {
1226
+ /* May overflow: multiply by 2^H in two steps */
1227
+ powdb.i[HI] += (H - 3) << 20;
1228
+ return sign * powdb.d * 8.0;
1229
+ }
1230
+ }
1231
+ } else {
1232
+ /* We must perhaps perform subnormal rounding
1233
+
1234
+ We start by renormalizing the double-double
1235
+ powh + powm such that
1236
+
1237
+ 1 <= powh + powm < 2
1238
+
1239
+ */
1240
+
1241
+ if ((powh < 1.0) || ((powh == 1.0) && (powm < 0.0))) {
1242
+ powh *= 2.0;
1243
+ powm *= 2.0;
1244
+ H--;
1245
+ }
1246
+ if ((powh > 2.0) || ((powh == 2.0) && (powm >= 0.0))) {
1247
+ powh *= 0.5;
1248
+ powm *= 0.5;
1249
+ H++;
1250
+ }
1251
+
1252
+ /* Here we know that 1 <= powh + powm < 2
1253
+
1254
+ 2^H * (powh + powm) is subnormal or equal to the least normal
1255
+ iff H <= -1023
1256
+
1257
+ */
1258
+ if (H > -1023) {
1259
+ /* We have nevertheless normal rounding */
1260
+ if(powh == (powh + (powm * RNROUNDCST))) {
1261
+ powdb.d = powh;
1262
+ powdb.i[HI] += H << 20;
1263
+ return sign * powdb.d;
1264
+ }
1265
+ /* Here we have normal rounding but could not decide the rounding */
1266
+ } else {
1267
+ /* We have surely denormal rounding
1268
+
1269
+ This means
1270
+
1271
+ round(2^H * (powh + powm)) = 2^(-1074) * nearestint(2^(+1074) * 2^H * (powh + powm))
1272
+
1273
+ We compute the rounding (and test its TMD) of
1274
+
1275
+ nearestint(2^(H + 1074) * (powh + powm))
1276
+
1277
+ by computing first
1278
+
1279
+ nearestint(2^(H + 1074) * powh)
1280
+
1281
+ and then the absolute error
1282
+
1283
+ delta = 2^(H + 1074) * powh - nearestint(2^(H + 1074) * powh) + 2^(H + 1074) * powl
1284
+
1285
+ We can then refute or correct the rounding by tests on delta.
1286
+
1287
+ We know that 2^(H + 1074) <= 2^(51) and powh <= 2.0. Thus 2^(H + 1074) * powh <= 2^(52)
1288
+ We can hence compute nearestint using the shift trick.
1289
+
1290
+ We start by constructing 2^(H + 1074)
1291
+ */
1292
+
1293
+ twodb.i[HI] = (H + (1074 + 1023)) << 20;
1294
+ twodb.i[LO] = 0;
1295
+
1296
+ twoH1074powh = twodb.d * powh;
1297
+ twoH1074powm = twodb.d * powm;
1298
+
1299
+ shiftedpowh = two52 + twoH1074powh;
1300
+ nearestintpowh = shiftedpowh - two52;
1301
+ deltaint = twoH1074powh - nearestintpowh;
1302
+
1303
+ /* The next addition produces a very small
1304
+ rounding error we must account for in the rounding test.
1305
+ */
1306
+ delta = deltaint + twoH1074powm;
1307
+
1308
+ /* Correct now the rounding */
1309
+
1310
+ if (ABS(delta) > 0.5) {
1311
+ /* ABS(delta) > 0.5 */
1312
+ if (delta > 0.0) {
1313
+ /* nearestintpowh is too small by 1.0 */
1314
+ nearestintpowh += 1.0; /* exact */
1315
+ delta -= 1.0; /* exact */
1316
+ } else {
1317
+ /* nearestintpowh is too great by 1.0 */
1318
+ nearestintpowh -= 1.0; /* exact */
1319
+ delta += 1.0; /* exact */
1320
+ }
1321
+ }
1322
+
1323
+ /* Perform now the rounding test
1324
+
1325
+ This test filters out also half-ulp exact cases.
1326
+ Exact cases are not filtered out because C99 allows
1327
+ setting the underflow and/or inexact flags also
1328
+ for exact cases.
1329
+
1330
+ Compute first
1331
+
1332
+ rest = abs(0.5 - abs(delta)) * (1 + eps)
1333
+
1334
+ where abs(eps) <= 2^(-53)
1335
+
1336
+ We cannot decide the rounding if
1337
+
1338
+ rest <= 2^(H + 1074) * epsmax * 2
1339
+
1340
+ where epsmax is the bound for the approximating polynomials,
1341
+
1342
+ here epsmax = 2^(-60)
1343
+
1344
+ as follows by the following drawing
1345
+
1346
+ 2^52 2^(H + 1074) 2^0 2^(H + 1074) * epsmax
1347
+ +-------------------+----------------+---+------... |
1348
+ | 0 ... 0 | nearestintpowh | 1 | rest
1349
+ +-------------------+----------------+---+------... |
1350
+ | <---- 60 bits approx. ------->|
1351
+
1352
+ This means that we cannot decide the rounding if
1353
+
1354
+ rest * 1/epsmax * 0.5 <= 2^(H + 1074)
1355
+
1356
+ We store 1/epsmax * 0.5 on SUBNORMROUNDCST
1357
+ */
1358
+
1359
+ rest = ABS(0.5 - ABS(delta));
1360
+
1361
+ if (rest * SUBNORMROUNDCST > twodb.d) {
1362
+ /* Here we could decide the rounding
1363
+
1364
+ The result to return is
1365
+
1366
+ sign * 2^(-1074) * nearestintpowh
1367
+
1368
+ which is either subnormal or equal to the
1369
+ least normal.
1370
+
1371
+ Since we know that the multiplication
1372
+ does not imply any rounding error, we can
1373
+ perform it using the shift trick and a mask.
1374
+
1375
+ This trick yields to a 200 cycle speed-up on Athlon.
1376
+
1377
+ */
1378
+
1379
+ twodb.d = nearestintpowh + two52;
1380
+ twodb.i[HI] = (twodb.i[HI] + (1 << 20)) & 0x001fffff;
1381
+
1382
+ return sign * twodb.d;
1383
+ }
1384
+ }
1385
+ }
1386
+
1387
+ /* If we are here, we could not decide the rounding or are half-ulp
1388
+
1389
+ Launch now exacter phases and exact and half-ulp testing
1390
+
1391
+ */
1392
+
1393
+ return pow_exact_rn(x,y,sign,index,ed,zh,zm);
1394
+
1395
+ }
1396
+