crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,4488 @@
|
|
1
|
+
/*
|
2
|
+
* Correctly rounded arcsine and arccosine
|
3
|
+
*
|
4
|
+
* Copyright (c) 2007 Christoph Lauter (ENS Lyon),
|
5
|
+
* with Sylvain Chevillard (ENS Lyon) for the polynomials
|
6
|
+
*
|
7
|
+
* This file is part of the crlibm library developed by the Arenaire
|
8
|
+
* project at Ecole Normale Superieure de Lyon
|
9
|
+
*
|
10
|
+
* This program is free software; you can redistribute it and/or modify
|
11
|
+
* it under the terms of the GNU Lesser General Public License as published by
|
12
|
+
* the Free Software Foundation; either version 2 of the License, or
|
13
|
+
* (at your option) any later version.
|
14
|
+
*
|
15
|
+
* This program is distributed in the hope that it will be useful,
|
16
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18
|
+
* GNU General Public License for more details.
|
19
|
+
*
|
20
|
+
* You should have received a copy of the GNU Lesser General Public License
|
21
|
+
* along with this program; if not, write to the Free Software
|
22
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
23
|
+
*/
|
24
|
+
|
25
|
+
#include <stdio.h>
|
26
|
+
#include <stdlib.h>
|
27
|
+
#include "crlibm.h"
|
28
|
+
#include "crlibm_private.h"
|
29
|
+
#include "triple-double.h"
|
30
|
+
#include "asincos.h"
|
31
|
+
|
32
|
+
static inline void p0_quick(double *p_resh, double *p_resm, double x, int32_t xhi) {
|
33
|
+
double p_x_0_pow2h, p_x_0_pow2m;
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
double p_t_1_0h;
|
40
|
+
double p_t_2_0h;
|
41
|
+
double p_t_3_0h;
|
42
|
+
double p_t_4_0h;
|
43
|
+
double p_t_5_0h;
|
44
|
+
double p_t_6_0h;
|
45
|
+
double p_t_7_0h;
|
46
|
+
double p_t_8_0h;
|
47
|
+
double p_t_9_0h;
|
48
|
+
double p_t_10_0h;
|
49
|
+
double p_t_11_0h;
|
50
|
+
double p_t_12_0h;
|
51
|
+
double p_t_13_0h;
|
52
|
+
double p_t_14_0h;
|
53
|
+
double p_t_15_0h;
|
54
|
+
double p_t_16_0h;
|
55
|
+
double p_t_17_0h, p_t_17_0m;
|
56
|
+
double p_t_18_0h, p_t_18_0m;
|
57
|
+
double p_t_19_0h, p_t_19_0m;
|
58
|
+
double p_t_20_0h, p_t_20_0m;
|
59
|
+
|
60
|
+
if (xhi < EXTRABOUND2) {
|
61
|
+
|
62
|
+
double t1, t2, t3;
|
63
|
+
|
64
|
+
t2 = p0_quick_coeff_3h * x;
|
65
|
+
t1 = x * x;
|
66
|
+
t3 = t1 * t2;
|
67
|
+
|
68
|
+
Add12(*p_resh,*p_resm,x,t3);
|
69
|
+
|
70
|
+
|
71
|
+
return;
|
72
|
+
}
|
73
|
+
|
74
|
+
Mul12(&p_x_0_pow2h,&p_x_0_pow2m,x,x);
|
75
|
+
|
76
|
+
p_t_15_0h = p0_quick_coeff_5h;
|
77
|
+
if (xhi > EXTRABOUND) {
|
78
|
+
|
79
|
+
p_t_1_0h = p0_quick_coeff_19h;
|
80
|
+
p_t_2_0h = p_t_1_0h * p_x_0_pow2h;
|
81
|
+
p_t_3_0h = p0_quick_coeff_17h + p_t_2_0h;
|
82
|
+
p_t_4_0h = p_t_3_0h * p_x_0_pow2h;
|
83
|
+
p_t_5_0h = p0_quick_coeff_15h + p_t_4_0h;
|
84
|
+
p_t_6_0h = p_t_5_0h * p_x_0_pow2h;
|
85
|
+
p_t_7_0h = p0_quick_coeff_13h + p_t_6_0h;
|
86
|
+
p_t_8_0h = p_t_7_0h * p_x_0_pow2h;
|
87
|
+
p_t_9_0h = p0_quick_coeff_11h + p_t_8_0h;
|
88
|
+
p_t_10_0h = p_t_9_0h * p_x_0_pow2h;
|
89
|
+
p_t_11_0h = p0_quick_coeff_9h + p_t_10_0h;
|
90
|
+
p_t_12_0h = p_t_11_0h * p_x_0_pow2h;
|
91
|
+
p_t_13_0h = p0_quick_coeff_7h + p_t_12_0h;
|
92
|
+
p_t_14_0h = p_t_13_0h * p_x_0_pow2h;
|
93
|
+
p_t_15_0h = p_t_15_0h + p_t_14_0h;
|
94
|
+
}
|
95
|
+
|
96
|
+
p_t_16_0h = p_t_15_0h * p_x_0_pow2h;
|
97
|
+
Add12(p_t_17_0h,p_t_17_0m,p0_quick_coeff_3h,p_t_16_0h);
|
98
|
+
|
99
|
+
Mul122(&p_t_18_0h,&p_t_18_0m,x,p_x_0_pow2h,p_x_0_pow2m);
|
100
|
+
Mul22(&p_t_19_0h,&p_t_19_0m,p_t_17_0h,p_t_17_0m,p_t_18_0h,p_t_18_0m);
|
101
|
+
|
102
|
+
Add122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
|
103
|
+
|
104
|
+
*p_resh = p_t_20_0h; *p_resm = p_t_20_0m;
|
105
|
+
|
106
|
+
|
107
|
+
}
|
108
|
+
|
109
|
+
static inline void p_quick(double *p_resh, double *p_resm, double x, int index) {
|
110
|
+
|
111
|
+
|
112
|
+
double p_t_1_0h;
|
113
|
+
double p_t_2_0h;
|
114
|
+
double p_t_3_0h;
|
115
|
+
double p_t_4_0h;
|
116
|
+
double p_t_5_0h;
|
117
|
+
double p_t_6_0h;
|
118
|
+
double p_t_7_0h;
|
119
|
+
double p_t_8_0h;
|
120
|
+
double p_t_9_0h;
|
121
|
+
double p_t_10_0h;
|
122
|
+
double p_t_11_0h;
|
123
|
+
double p_t_12_0h;
|
124
|
+
double p_t_13_0h;
|
125
|
+
double p_t_14_0h;
|
126
|
+
double p_t_15_0h;
|
127
|
+
double p_t_16_0h;
|
128
|
+
double p_t_17_0h;
|
129
|
+
double p_t_18_0h;
|
130
|
+
double p_t_19_0h;
|
131
|
+
double p_t_20_0h;
|
132
|
+
double p_t_21_0h, p_t_21_0m;
|
133
|
+
double p_t_22_0h, p_t_22_0m;
|
134
|
+
double p_t_23_0h, p_t_23_0m;
|
135
|
+
|
136
|
+
p_t_1_0h = p_quick_coeff_12h;
|
137
|
+
p_t_2_0h = p_t_1_0h * x;
|
138
|
+
p_t_3_0h = p_quick_coeff_11h + p_t_2_0h;
|
139
|
+
p_t_4_0h = p_t_3_0h * x;
|
140
|
+
p_t_5_0h = p_quick_coeff_10h + p_t_4_0h;
|
141
|
+
p_t_6_0h = p_t_5_0h * x;
|
142
|
+
p_t_7_0h = p_quick_coeff_9h + p_t_6_0h;
|
143
|
+
p_t_8_0h = p_t_7_0h * x;
|
144
|
+
p_t_9_0h = p_quick_coeff_8h + p_t_8_0h;
|
145
|
+
p_t_10_0h = p_t_9_0h * x;
|
146
|
+
p_t_11_0h = p_quick_coeff_7h + p_t_10_0h;
|
147
|
+
p_t_12_0h = p_t_11_0h * x;
|
148
|
+
p_t_13_0h = p_quick_coeff_6h + p_t_12_0h;
|
149
|
+
p_t_14_0h = p_t_13_0h * x;
|
150
|
+
p_t_15_0h = p_quick_coeff_5h + p_t_14_0h;
|
151
|
+
p_t_16_0h = p_t_15_0h * x;
|
152
|
+
p_t_17_0h = p_quick_coeff_4h + p_t_16_0h;
|
153
|
+
p_t_18_0h = p_t_17_0h * x;
|
154
|
+
p_t_19_0h = p_quick_coeff_3h + p_t_18_0h;
|
155
|
+
p_t_20_0h = p_t_19_0h * x;
|
156
|
+
Add12(p_t_21_0h,p_t_21_0m,p_quick_coeff_2h,p_t_20_0h);
|
157
|
+
MulAdd212(&p_t_22_0h,&p_t_22_0m,p_quick_coeff_1h,p_quick_coeff_1m,x,p_t_21_0h,p_t_21_0m);
|
158
|
+
MulAdd212(&p_t_23_0h,&p_t_23_0m,p_quick_coeff_0h,p_quick_coeff_0m,x,p_t_22_0h,p_t_22_0m);
|
159
|
+
*p_resh = p_t_23_0h; *p_resm = p_t_23_0m;
|
160
|
+
|
161
|
+
}
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
static inline void p9_quick(double *p_resh, double *p_resm, double x) {
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
double p_t_1_0h;
|
171
|
+
double p_t_2_0h;
|
172
|
+
double p_t_3_0h;
|
173
|
+
double p_t_4_0h;
|
174
|
+
double p_t_5_0h;
|
175
|
+
double p_t_6_0h;
|
176
|
+
double p_t_7_0h;
|
177
|
+
double p_t_8_0h;
|
178
|
+
double p_t_9_0h;
|
179
|
+
double p_t_10_0h;
|
180
|
+
double p_t_11_0h;
|
181
|
+
double p_t_12_0h;
|
182
|
+
double p_t_13_0h;
|
183
|
+
double p_t_14_0h;
|
184
|
+
double p_t_15_0h;
|
185
|
+
double p_t_16_0h;
|
186
|
+
double p_t_17_0h;
|
187
|
+
double p_t_18_0h;
|
188
|
+
double p_t_19_0h;
|
189
|
+
double p_t_20_0h;
|
190
|
+
double p_t_21_0h, p_t_21_0m;
|
191
|
+
double p_t_22_0h, p_t_22_0m;
|
192
|
+
double p_t_23_0h, p_t_23_0m;
|
193
|
+
|
194
|
+
|
195
|
+
|
196
|
+
p_t_1_0h = p9_quick_coeff_11h;
|
197
|
+
p_t_2_0h = p_t_1_0h * x;
|
198
|
+
p_t_3_0h = p9_quick_coeff_10h + p_t_2_0h;
|
199
|
+
p_t_4_0h = p_t_3_0h * x;
|
200
|
+
p_t_5_0h = p9_quick_coeff_9h + p_t_4_0h;
|
201
|
+
p_t_6_0h = p_t_5_0h * x;
|
202
|
+
p_t_7_0h = p9_quick_coeff_8h + p_t_6_0h;
|
203
|
+
p_t_8_0h = p_t_7_0h * x;
|
204
|
+
p_t_9_0h = p9_quick_coeff_7h + p_t_8_0h;
|
205
|
+
p_t_10_0h = p_t_9_0h * x;
|
206
|
+
p_t_11_0h = p9_quick_coeff_6h + p_t_10_0h;
|
207
|
+
p_t_12_0h = p_t_11_0h * x;
|
208
|
+
p_t_13_0h = p9_quick_coeff_5h + p_t_12_0h;
|
209
|
+
p_t_14_0h = p_t_13_0h * x;
|
210
|
+
p_t_15_0h = p9_quick_coeff_4h + p_t_14_0h;
|
211
|
+
p_t_16_0h = p_t_15_0h * x;
|
212
|
+
p_t_17_0h = p9_quick_coeff_3h + p_t_16_0h;
|
213
|
+
p_t_18_0h = p_t_17_0h * x;
|
214
|
+
p_t_19_0h = p9_quick_coeff_2h + p_t_18_0h;
|
215
|
+
p_t_20_0h = p_t_19_0h * x;
|
216
|
+
Add12(p_t_21_0h,p_t_21_0m,p9_quick_coeff_1h,p_t_20_0h);
|
217
|
+
Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
|
218
|
+
Add122(&p_t_23_0h,&p_t_23_0m,p9_quick_coeff_0h,p_t_22_0h,p_t_22_0m);
|
219
|
+
*p_resh = p_t_23_0h; *p_resm = p_t_23_0m;
|
220
|
+
|
221
|
+
|
222
|
+
}
|
223
|
+
|
224
|
+
|
225
|
+
|
226
|
+
static void p0_accu(double *p_resh, double *p_resm, double *p_resl, double x) {
|
227
|
+
double p_x_0_pow2h, p_x_0_pow2m;
|
228
|
+
|
229
|
+
|
230
|
+
Mul12(&p_x_0_pow2h,&p_x_0_pow2m,x,x);
|
231
|
+
|
232
|
+
|
233
|
+
double p_t_1_0h;
|
234
|
+
double p_t_2_0h;
|
235
|
+
double p_t_3_0h;
|
236
|
+
double p_t_4_0h;
|
237
|
+
double p_t_5_0h;
|
238
|
+
double p_t_6_0h;
|
239
|
+
double p_t_7_0h, p_t_7_0m;
|
240
|
+
double p_t_8_0h, p_t_8_0m;
|
241
|
+
double p_t_9_0h, p_t_9_0m;
|
242
|
+
double p_t_10_0h, p_t_10_0m;
|
243
|
+
double p_t_11_0h, p_t_11_0m;
|
244
|
+
double p_t_12_0h, p_t_12_0m;
|
245
|
+
double p_t_13_0h, p_t_13_0m;
|
246
|
+
double p_t_14_0h, p_t_14_0m;
|
247
|
+
double p_t_15_0h, p_t_15_0m;
|
248
|
+
double p_t_16_0h, p_t_16_0m;
|
249
|
+
double p_t_17_0h, p_t_17_0m;
|
250
|
+
double p_t_18_0h, p_t_18_0m;
|
251
|
+
double p_t_19_0h, p_t_19_0m;
|
252
|
+
double p_t_20_0h, p_t_20_0m;
|
253
|
+
double p_t_21_0h, p_t_21_0m;
|
254
|
+
double p_t_22_0h, p_t_22_0m;
|
255
|
+
double p_t_23_0h, p_t_23_0m;
|
256
|
+
double p_t_24_0h, p_t_24_0m;
|
257
|
+
double p_t_25_0h, p_t_25_0m;
|
258
|
+
double p_t_26_0h, p_t_26_0m;
|
259
|
+
double p_t_27_0h, p_t_27_0m;
|
260
|
+
double p_t_28_0h, p_t_28_0m, p_t_28_0l;
|
261
|
+
double p_t_29_0h, p_t_29_0m, p_t_29_0l;
|
262
|
+
double p_t_30_0h, p_t_30_0m, p_t_30_0l;
|
263
|
+
double p_t_31_0h, p_t_31_0m, p_t_31_0l;
|
264
|
+
double p_t_32_0h, p_t_32_0m, p_t_32_0l;
|
265
|
+
double p_t_33_0h, p_t_33_0m, p_t_33_0l;
|
266
|
+
double p_t_34_0h, p_t_34_0m, p_t_34_0l;
|
267
|
+
double p_t_35_0h, p_t_35_0m, p_t_35_0l;
|
268
|
+
|
269
|
+
|
270
|
+
|
271
|
+
p_t_1_0h = p0_accu_coeff_37h;
|
272
|
+
p_t_2_0h = p_t_1_0h * p_x_0_pow2h;
|
273
|
+
p_t_3_0h = p0_accu_coeff_35h + p_t_2_0h;
|
274
|
+
p_t_4_0h = p_t_3_0h * p_x_0_pow2h;
|
275
|
+
p_t_5_0h = p0_accu_coeff_33h + p_t_4_0h;
|
276
|
+
p_t_6_0h = p_t_5_0h * p_x_0_pow2h;
|
277
|
+
Add12(p_t_7_0h,p_t_7_0m,p0_accu_coeff_31h,p_t_6_0h);
|
278
|
+
Mul22(&p_t_8_0h,&p_t_8_0m,p_t_7_0h,p_t_7_0m,p_x_0_pow2h,p_x_0_pow2m);
|
279
|
+
Add122(&p_t_9_0h,&p_t_9_0m,p0_accu_coeff_29h,p_t_8_0h,p_t_8_0m);
|
280
|
+
Mul22(&p_t_10_0h,&p_t_10_0m,p_t_9_0h,p_t_9_0m,p_x_0_pow2h,p_x_0_pow2m);
|
281
|
+
Add122(&p_t_11_0h,&p_t_11_0m,p0_accu_coeff_27h,p_t_10_0h,p_t_10_0m);
|
282
|
+
Mul22(&p_t_12_0h,&p_t_12_0m,p_t_11_0h,p_t_11_0m,p_x_0_pow2h,p_x_0_pow2m);
|
283
|
+
Add122(&p_t_13_0h,&p_t_13_0m,p0_accu_coeff_25h,p_t_12_0h,p_t_12_0m);
|
284
|
+
Mul22(&p_t_14_0h,&p_t_14_0m,p_t_13_0h,p_t_13_0m,p_x_0_pow2h,p_x_0_pow2m);
|
285
|
+
Add122(&p_t_15_0h,&p_t_15_0m,p0_accu_coeff_23h,p_t_14_0h,p_t_14_0m);
|
286
|
+
Mul22(&p_t_16_0h,&p_t_16_0m,p_t_15_0h,p_t_15_0m,p_x_0_pow2h,p_x_0_pow2m);
|
287
|
+
Add122(&p_t_17_0h,&p_t_17_0m,p0_accu_coeff_21h,p_t_16_0h,p_t_16_0m);
|
288
|
+
Mul22(&p_t_18_0h,&p_t_18_0m,p_t_17_0h,p_t_17_0m,p_x_0_pow2h,p_x_0_pow2m);
|
289
|
+
Add122(&p_t_19_0h,&p_t_19_0m,p0_accu_coeff_19h,p_t_18_0h,p_t_18_0m);
|
290
|
+
Mul22(&p_t_20_0h,&p_t_20_0m,p_t_19_0h,p_t_19_0m,p_x_0_pow2h,p_x_0_pow2m);
|
291
|
+
Add122(&p_t_21_0h,&p_t_21_0m,p0_accu_coeff_17h,p_t_20_0h,p_t_20_0m);
|
292
|
+
Mul22(&p_t_22_0h,&p_t_22_0m,p_t_21_0h,p_t_21_0m,p_x_0_pow2h,p_x_0_pow2m);
|
293
|
+
Add122(&p_t_23_0h,&p_t_23_0m,p0_accu_coeff_15h,p_t_22_0h,p_t_22_0m);
|
294
|
+
MulAdd22(&p_t_24_0h,&p_t_24_0m,p0_accu_coeff_13h,p0_accu_coeff_13m,p_x_0_pow2h,p_x_0_pow2m,p_t_23_0h,p_t_23_0m);
|
295
|
+
MulAdd22(&p_t_25_0h,&p_t_25_0m,p0_accu_coeff_11h,p0_accu_coeff_11m,p_x_0_pow2h,p_x_0_pow2m,p_t_24_0h,p_t_24_0m);
|
296
|
+
MulAdd22(&p_t_26_0h,&p_t_26_0m,p0_accu_coeff_9h,p0_accu_coeff_9m,p_x_0_pow2h,p_x_0_pow2m,p_t_25_0h,p_t_25_0m);
|
297
|
+
Mul22(&p_t_27_0h,&p_t_27_0m,p_t_26_0h,p_t_26_0m,p_x_0_pow2h,p_x_0_pow2m);
|
298
|
+
Add23(&p_t_28_0h,&p_t_28_0m,&p_t_28_0l,p0_accu_coeff_7h,p0_accu_coeff_7m,p_t_27_0h,p_t_27_0m);
|
299
|
+
Mul233(&p_t_29_0h,&p_t_29_0m,&p_t_29_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_28_0h,p_t_28_0m,p_t_28_0l);
|
300
|
+
Add233(&p_t_30_0h,&p_t_30_0m,&p_t_30_0l,p0_accu_coeff_5h,p0_accu_coeff_5m,p_t_29_0h,p_t_29_0m,p_t_29_0l);
|
301
|
+
Mul233(&p_t_31_0h,&p_t_31_0m,&p_t_31_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_30_0h,p_t_30_0m,p_t_30_0l);
|
302
|
+
Add233(&p_t_32_0h,&p_t_32_0m,&p_t_32_0l,p0_accu_coeff_3h,p0_accu_coeff_3m,p_t_31_0h,p_t_31_0m,p_t_31_0l);
|
303
|
+
Mul233(&p_t_33_0h,&p_t_33_0m,&p_t_33_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_32_0h,p_t_32_0m,p_t_32_0l);
|
304
|
+
Add133(&p_t_34_0h,&p_t_34_0m,&p_t_34_0l,p0_accu_coeff_1h,p_t_33_0h,p_t_33_0m,p_t_33_0l);
|
305
|
+
Mul133(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,x,p_t_34_0h,p_t_34_0m,p_t_34_0l);
|
306
|
+
Renormalize3(p_resh,p_resm,p_resl,p_t_35_0h,p_t_35_0m,p_t_35_0l);
|
307
|
+
|
308
|
+
|
309
|
+
}
|
310
|
+
|
311
|
+
|
312
|
+
static void p_accu(double *p_resh, double *p_resm, double *p_resl, double x, int index) {
|
313
|
+
|
314
|
+
|
315
|
+
double p_t_1_0h;
|
316
|
+
double p_t_2_0h;
|
317
|
+
double p_t_3_0h;
|
318
|
+
double p_t_4_0h;
|
319
|
+
double p_t_5_0h;
|
320
|
+
double p_t_6_0h;
|
321
|
+
double p_t_7_0h;
|
322
|
+
double p_t_8_0h;
|
323
|
+
double p_t_9_0h;
|
324
|
+
double p_t_10_0h;
|
325
|
+
double p_t_11_0h;
|
326
|
+
double p_t_12_0h;
|
327
|
+
double p_t_13_0h;
|
328
|
+
double p_t_14_0h;
|
329
|
+
double p_t_15_0h;
|
330
|
+
double p_t_16_0h;
|
331
|
+
double p_t_17_0h, p_t_17_0m;
|
332
|
+
double p_t_18_0h, p_t_18_0m;
|
333
|
+
double p_t_19_0h, p_t_19_0m;
|
334
|
+
double p_t_20_0h, p_t_20_0m;
|
335
|
+
double p_t_21_0h, p_t_21_0m;
|
336
|
+
double p_t_22_0h, p_t_22_0m;
|
337
|
+
double p_t_23_0h, p_t_23_0m;
|
338
|
+
double p_t_24_0h, p_t_24_0m;
|
339
|
+
double p_t_25_0h, p_t_25_0m;
|
340
|
+
double p_t_26_0h, p_t_26_0m;
|
341
|
+
double p_t_27_0h, p_t_27_0m;
|
342
|
+
double p_t_28_0h, p_t_28_0m;
|
343
|
+
double p_t_29_0h, p_t_29_0m;
|
344
|
+
double p_t_30_0h, p_t_30_0m;
|
345
|
+
double p_t_31_0h, p_t_31_0m;
|
346
|
+
double p_t_32_0h, p_t_32_0m;
|
347
|
+
double p_t_33_0h, p_t_33_0m;
|
348
|
+
double p_t_34_0h, p_t_34_0m;
|
349
|
+
double p_t_35_0h, p_t_35_0m, p_t_35_0l;
|
350
|
+
double p_t_36_0h, p_t_36_0m, p_t_36_0l;
|
351
|
+
double p_t_37_0h, p_t_37_0m, p_t_37_0l;
|
352
|
+
double p_t_38_0h, p_t_38_0m, p_t_38_0l;
|
353
|
+
double p_t_39_0h, p_t_39_0m, p_t_39_0l;
|
354
|
+
|
355
|
+
|
356
|
+
|
357
|
+
p_t_1_0h = p_accu_coeff_22h;
|
358
|
+
p_t_2_0h = p_t_1_0h * x;
|
359
|
+
p_t_3_0h = p_accu_coeff_21h + p_t_2_0h;
|
360
|
+
p_t_4_0h = p_t_3_0h * x;
|
361
|
+
p_t_5_0h = p_accu_coeff_20h + p_t_4_0h;
|
362
|
+
p_t_6_0h = p_t_5_0h * x;
|
363
|
+
p_t_7_0h = p_accu_coeff_19h + p_t_6_0h;
|
364
|
+
p_t_8_0h = p_t_7_0h * x;
|
365
|
+
p_t_9_0h = p_accu_coeff_18h + p_t_8_0h;
|
366
|
+
p_t_10_0h = p_t_9_0h * x;
|
367
|
+
p_t_11_0h = p_accu_coeff_17h + p_t_10_0h;
|
368
|
+
p_t_12_0h = p_t_11_0h * x;
|
369
|
+
p_t_13_0h = p_accu_coeff_16h + p_t_12_0h;
|
370
|
+
p_t_14_0h = p_t_13_0h * x;
|
371
|
+
p_t_15_0h = p_accu_coeff_15h + p_t_14_0h;
|
372
|
+
p_t_16_0h = p_t_15_0h * x;
|
373
|
+
Add12(p_t_17_0h,p_t_17_0m,p_accu_coeff_14h,p_t_16_0h);
|
374
|
+
Mul122(&p_t_18_0h,&p_t_18_0m,x,p_t_17_0h,p_t_17_0m);
|
375
|
+
Add122(&p_t_19_0h,&p_t_19_0m,p_accu_coeff_13h,p_t_18_0h,p_t_18_0m);
|
376
|
+
Mul122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
|
377
|
+
Add122(&p_t_21_0h,&p_t_21_0m,p_accu_coeff_12h,p_t_20_0h,p_t_20_0m);
|
378
|
+
Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
|
379
|
+
Add122(&p_t_23_0h,&p_t_23_0m,p_accu_coeff_11h,p_t_22_0h,p_t_22_0m);
|
380
|
+
Mul122(&p_t_24_0h,&p_t_24_0m,x,p_t_23_0h,p_t_23_0m);
|
381
|
+
Add122(&p_t_25_0h,&p_t_25_0m,p_accu_coeff_10h,p_t_24_0h,p_t_24_0m);
|
382
|
+
Mul122(&p_t_26_0h,&p_t_26_0m,x,p_t_25_0h,p_t_25_0m);
|
383
|
+
Add122(&p_t_27_0h,&p_t_27_0m,p_accu_coeff_9h,p_t_26_0h,p_t_26_0m);
|
384
|
+
MulAdd212(&p_t_28_0h,&p_t_28_0m,p_accu_coeff_8h,p_accu_coeff_8m,x,p_t_27_0h,p_t_27_0m);
|
385
|
+
MulAdd212(&p_t_29_0h,&p_t_29_0m,p_accu_coeff_7h,p_accu_coeff_7m,x,p_t_28_0h,p_t_28_0m);
|
386
|
+
MulAdd212(&p_t_30_0h,&p_t_30_0m,p_accu_coeff_6h,p_accu_coeff_6m,x,p_t_29_0h,p_t_29_0m);
|
387
|
+
MulAdd212(&p_t_31_0h,&p_t_31_0m,p_accu_coeff_5h,p_accu_coeff_5m,x,p_t_30_0h,p_t_30_0m);
|
388
|
+
MulAdd212(&p_t_32_0h,&p_t_32_0m,p_accu_coeff_4h,p_accu_coeff_4m,x,p_t_31_0h,p_t_31_0m);
|
389
|
+
MulAdd212(&p_t_33_0h,&p_t_33_0m,p_accu_coeff_3h,p_accu_coeff_3m,x,p_t_32_0h,p_t_32_0m);
|
390
|
+
Mul122(&p_t_34_0h,&p_t_34_0m,x,p_t_33_0h,p_t_33_0m);
|
391
|
+
Add23(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,p_accu_coeff_2h,p_accu_coeff_2m,p_t_34_0h,p_t_34_0m);
|
392
|
+
Mul133(&p_t_36_0h,&p_t_36_0m,&p_t_36_0l,x,p_t_35_0h,p_t_35_0m,p_t_35_0l);
|
393
|
+
Add233(&p_t_37_0h,&p_t_37_0m,&p_t_37_0l,p_accu_coeff_1h,p_accu_coeff_1m,p_t_36_0h,p_t_36_0m,p_t_36_0l);
|
394
|
+
Mul133(&p_t_38_0h,&p_t_38_0m,&p_t_38_0l,x,p_t_37_0h,p_t_37_0m,p_t_37_0l);
|
395
|
+
Add233(&p_t_39_0h,&p_t_39_0m,&p_t_39_0l,p_accu_coeff_0h,p_accu_coeff_0m,p_t_38_0h,p_t_38_0m,p_t_38_0l);
|
396
|
+
Renormalize3(p_resh,p_resm,p_resl,p_t_39_0h,p_t_39_0m,p_t_39_0l);
|
397
|
+
|
398
|
+
|
399
|
+
}
|
400
|
+
|
401
|
+
|
402
|
+
static void p9_accu(double *p_resh, double *p_resm, double *p_resl, double x) {
|
403
|
+
|
404
|
+
|
405
|
+
|
406
|
+
|
407
|
+
double p_t_1_0h;
|
408
|
+
double p_t_2_0h;
|
409
|
+
double p_t_3_0h;
|
410
|
+
double p_t_4_0h;
|
411
|
+
double p_t_5_0h;
|
412
|
+
double p_t_6_0h;
|
413
|
+
double p_t_7_0h;
|
414
|
+
double p_t_8_0h;
|
415
|
+
double p_t_9_0h;
|
416
|
+
double p_t_10_0h;
|
417
|
+
double p_t_11_0h;
|
418
|
+
double p_t_12_0h;
|
419
|
+
double p_t_13_0h;
|
420
|
+
double p_t_14_0h;
|
421
|
+
double p_t_15_0h;
|
422
|
+
double p_t_16_0h;
|
423
|
+
double p_t_17_0h, p_t_17_0m;
|
424
|
+
double p_t_18_0h, p_t_18_0m;
|
425
|
+
double p_t_19_0h, p_t_19_0m;
|
426
|
+
double p_t_20_0h, p_t_20_0m;
|
427
|
+
double p_t_21_0h, p_t_21_0m;
|
428
|
+
double p_t_22_0h, p_t_22_0m;
|
429
|
+
double p_t_23_0h, p_t_23_0m;
|
430
|
+
double p_t_24_0h, p_t_24_0m;
|
431
|
+
double p_t_25_0h, p_t_25_0m;
|
432
|
+
double p_t_26_0h, p_t_26_0m;
|
433
|
+
double p_t_27_0h, p_t_27_0m;
|
434
|
+
double p_t_28_0h, p_t_28_0m;
|
435
|
+
double p_t_29_0h, p_t_29_0m;
|
436
|
+
double p_t_30_0h, p_t_30_0m;
|
437
|
+
double p_t_31_0h, p_t_31_0m;
|
438
|
+
double p_t_32_0h, p_t_32_0m;
|
439
|
+
double p_t_33_0h, p_t_33_0m, p_t_33_0l;
|
440
|
+
double p_t_34_0h, p_t_34_0m, p_t_34_0l;
|
441
|
+
double p_t_35_0h, p_t_35_0m, p_t_35_0l;
|
442
|
+
|
443
|
+
|
444
|
+
|
445
|
+
p_t_1_0h = p9_accu_coeff_20h;
|
446
|
+
p_t_2_0h = p_t_1_0h * x;
|
447
|
+
p_t_3_0h = p9_accu_coeff_19h + p_t_2_0h;
|
448
|
+
p_t_4_0h = p_t_3_0h * x;
|
449
|
+
p_t_5_0h = p9_accu_coeff_18h + p_t_4_0h;
|
450
|
+
p_t_6_0h = p_t_5_0h * x;
|
451
|
+
p_t_7_0h = p9_accu_coeff_17h + p_t_6_0h;
|
452
|
+
p_t_8_0h = p_t_7_0h * x;
|
453
|
+
p_t_9_0h = p9_accu_coeff_16h + p_t_8_0h;
|
454
|
+
p_t_10_0h = p_t_9_0h * x;
|
455
|
+
p_t_11_0h = p9_accu_coeff_15h + p_t_10_0h;
|
456
|
+
p_t_12_0h = p_t_11_0h * x;
|
457
|
+
p_t_13_0h = p9_accu_coeff_14h + p_t_12_0h;
|
458
|
+
p_t_14_0h = p_t_13_0h * x;
|
459
|
+
p_t_15_0h = p9_accu_coeff_13h + p_t_14_0h;
|
460
|
+
p_t_16_0h = p_t_15_0h * x;
|
461
|
+
Add12(p_t_17_0h,p_t_17_0m,p9_accu_coeff_12h,p_t_16_0h);
|
462
|
+
Mul122(&p_t_18_0h,&p_t_18_0m,x,p_t_17_0h,p_t_17_0m);
|
463
|
+
Add122(&p_t_19_0h,&p_t_19_0m,p9_accu_coeff_11h,p_t_18_0h,p_t_18_0m);
|
464
|
+
Mul122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
|
465
|
+
Add122(&p_t_21_0h,&p_t_21_0m,p9_accu_coeff_10h,p_t_20_0h,p_t_20_0m);
|
466
|
+
Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
|
467
|
+
Add122(&p_t_23_0h,&p_t_23_0m,p9_accu_coeff_9h,p_t_22_0h,p_t_22_0m);
|
468
|
+
Mul122(&p_t_24_0h,&p_t_24_0m,x,p_t_23_0h,p_t_23_0m);
|
469
|
+
Add122(&p_t_25_0h,&p_t_25_0m,p9_accu_coeff_8h,p_t_24_0h,p_t_24_0m);
|
470
|
+
MulAdd212(&p_t_26_0h,&p_t_26_0m,p9_accu_coeff_7h,p9_accu_coeff_7m,x,p_t_25_0h,p_t_25_0m);
|
471
|
+
MulAdd212(&p_t_27_0h,&p_t_27_0m,p9_accu_coeff_6h,p9_accu_coeff_6m,x,p_t_26_0h,p_t_26_0m);
|
472
|
+
MulAdd212(&p_t_28_0h,&p_t_28_0m,p9_accu_coeff_5h,p9_accu_coeff_5m,x,p_t_27_0h,p_t_27_0m);
|
473
|
+
MulAdd212(&p_t_29_0h,&p_t_29_0m,p9_accu_coeff_4h,p9_accu_coeff_4m,x,p_t_28_0h,p_t_28_0m);
|
474
|
+
MulAdd212(&p_t_30_0h,&p_t_30_0m,p9_accu_coeff_3h,p9_accu_coeff_3m,x,p_t_29_0h,p_t_29_0m);
|
475
|
+
MulAdd212(&p_t_31_0h,&p_t_31_0m,p9_accu_coeff_2h,p9_accu_coeff_2m,x,p_t_30_0h,p_t_30_0m);
|
476
|
+
Mul122(&p_t_32_0h,&p_t_32_0m,x,p_t_31_0h,p_t_31_0m);
|
477
|
+
Add23(&p_t_33_0h,&p_t_33_0m,&p_t_33_0l,p9_accu_coeff_1h,p9_accu_coeff_1m,p_t_32_0h,p_t_32_0m);
|
478
|
+
Mul133(&p_t_34_0h,&p_t_34_0m,&p_t_34_0l,x,p_t_33_0h,p_t_33_0m,p_t_33_0l);
|
479
|
+
Add233(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,p9_accu_coeff_0h,p9_accu_coeff_0m,p_t_34_0h,p_t_34_0m,p_t_34_0l);
|
480
|
+
Renormalize3(p_resh,p_resm,p_resl,p_t_35_0h,p_t_35_0m,p_t_35_0l);
|
481
|
+
|
482
|
+
|
483
|
+
}
|
484
|
+
|
485
|
+
|
486
|
+
|
487
|
+
|
488
|
+
double asin_rn(double x) {
|
489
|
+
db_number xdb, zdb;
|
490
|
+
double sign, z, zp;
|
491
|
+
int index;
|
492
|
+
double asinh, asinm, asinl;
|
493
|
+
double asinhover, asinmover, asinlover;
|
494
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
495
|
+
double t1h, t1m, t1l;
|
496
|
+
double asin;
|
497
|
+
double xabs;
|
498
|
+
|
499
|
+
/* Start already computations for argument reduction */
|
500
|
+
|
501
|
+
zdb.d = 1.0 + x * x;
|
502
|
+
|
503
|
+
xdb.d = x;
|
504
|
+
|
505
|
+
/* Special case handling */
|
506
|
+
|
507
|
+
/* Remove sign of x in floating-point */
|
508
|
+
xabs = ABS(x);
|
509
|
+
xdb.i[HI] &= 0x7fffffff;
|
510
|
+
|
511
|
+
/* If |x| < 2^(-28) we have
|
512
|
+
|
513
|
+
arcsin(x) = x * ( 1 + xi )
|
514
|
+
|
515
|
+
with 0 <= xi < 2^(-55)
|
516
|
+
|
517
|
+
So we can decide the rounding without any computation
|
518
|
+
*/
|
519
|
+
if (xdb.i[HI] < ASINSIMPLEBOUND) {
|
520
|
+
return x;
|
521
|
+
}
|
522
|
+
|
523
|
+
/* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
|
524
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
525
|
+
if (x == 1.0) {
|
526
|
+
return PIHALFH;
|
527
|
+
}
|
528
|
+
if (x == -1.0) {
|
529
|
+
return - PIHALFH;
|
530
|
+
}
|
531
|
+
return (x-x)/0.0; /* return NaN */
|
532
|
+
}
|
533
|
+
|
534
|
+
/* Argument reduction:
|
535
|
+
|
536
|
+
We have 10 intervals and 3 paths:
|
537
|
+
|
538
|
+
- interval 0 => path 1 using p0
|
539
|
+
- interval 1-8 => path 2 using p
|
540
|
+
- interval 9 => path 3 using p9
|
541
|
+
|
542
|
+
*/
|
543
|
+
|
544
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
545
|
+
|
546
|
+
/* 0 <= index <= 15
|
547
|
+
|
548
|
+
index approximates roughly x^2
|
549
|
+
|
550
|
+
Map indexes to intervals as follows:
|
551
|
+
|
552
|
+
0 -> 0
|
553
|
+
1 -> 1
|
554
|
+
...
|
555
|
+
8 -> 8
|
556
|
+
9 -> 9
|
557
|
+
...
|
558
|
+
15 -> 9
|
559
|
+
|
560
|
+
For this mapping, filter first the case 0 -> 0
|
561
|
+
In consequence, 1 <= index <= 15, i.e.
|
562
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
563
|
+
|
564
|
+
0 -> 1
|
565
|
+
...
|
566
|
+
7 -> 8
|
567
|
+
8 -> 9
|
568
|
+
...
|
569
|
+
15 -> 9
|
570
|
+
|
571
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
572
|
+
|
573
|
+
*/
|
574
|
+
|
575
|
+
if (index == 0) {
|
576
|
+
/* Path 1 using p0 */
|
577
|
+
|
578
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
579
|
+
|
580
|
+
/* Rounding test */
|
581
|
+
|
582
|
+
if(asinh == (asinh + (asinm * RNROUNDCST)))
|
583
|
+
return asinh;
|
584
|
+
|
585
|
+
/* Rounding test failed, launch accurate phase */
|
586
|
+
|
587
|
+
#if EVAL_PERF
|
588
|
+
crlibm_second_step_taken++;
|
589
|
+
#endif
|
590
|
+
|
591
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
592
|
+
|
593
|
+
/* Final rounding */
|
594
|
+
|
595
|
+
RoundToNearest3(&asin,asinh,asinm,asinl);
|
596
|
+
|
597
|
+
return asin;
|
598
|
+
}
|
599
|
+
|
600
|
+
/* Strip off the sign of argument x */
|
601
|
+
sign = 1.0;
|
602
|
+
if (x < 0.0) sign = -sign;
|
603
|
+
|
604
|
+
index--;
|
605
|
+
if ((index & 0x8) != 0) {
|
606
|
+
/* Path 3 using p9 */
|
607
|
+
|
608
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
609
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
610
|
+
for the asymptotical approximation using a square root.
|
611
|
+
*/
|
612
|
+
|
613
|
+
z = xabs - MI_9;
|
614
|
+
zp = 2.0 * (1.0 - xabs);
|
615
|
+
|
616
|
+
/* Polynomial approximation and square root extraction */
|
617
|
+
|
618
|
+
p9_quick(&p9h, &p9m, z);
|
619
|
+
p9h = -p9h;
|
620
|
+
p9m = -p9m;
|
621
|
+
|
622
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
623
|
+
|
624
|
+
/* Reconstruction */
|
625
|
+
|
626
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
627
|
+
Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
|
628
|
+
|
629
|
+
/* Rounding test */
|
630
|
+
|
631
|
+
if(asinh == (asinh + (asinm * RNROUNDCST)))
|
632
|
+
return sign * asinh;
|
633
|
+
|
634
|
+
/* Rounding test failed, launch accurate phase */
|
635
|
+
|
636
|
+
#if EVAL_PERF
|
637
|
+
crlibm_second_step_taken++;
|
638
|
+
#endif
|
639
|
+
|
640
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
641
|
+
p9h = -p9h;
|
642
|
+
p9m = -p9m;
|
643
|
+
p9l = -p9l;
|
644
|
+
|
645
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
646
|
+
|
647
|
+
/* Reconstruction */
|
648
|
+
|
649
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
650
|
+
Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
|
651
|
+
|
652
|
+
Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
|
653
|
+
|
654
|
+
/* Final rounding */
|
655
|
+
|
656
|
+
RoundToNearest3(&asin,asinh,asinm,asinl);
|
657
|
+
|
658
|
+
return sign * asin;
|
659
|
+
|
660
|
+
}
|
661
|
+
|
662
|
+
/* Path 2 using p */
|
663
|
+
|
664
|
+
/* Do argument reduction using a table value for
|
665
|
+
the midpoint value
|
666
|
+
*/
|
667
|
+
|
668
|
+
z = xabs - mi_i;
|
669
|
+
|
670
|
+
p_quick(&asinh, &asinm, z, index);
|
671
|
+
|
672
|
+
|
673
|
+
/* Rounding test */
|
674
|
+
|
675
|
+
if(asinh == (asinh + (asinm * RNROUNDCST)))
|
676
|
+
return sign * asinh;
|
677
|
+
|
678
|
+
/* Rounding test failed, launch accurate phase */
|
679
|
+
|
680
|
+
#if EVAL_PERF
|
681
|
+
crlibm_second_step_taken++;
|
682
|
+
#endif
|
683
|
+
|
684
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
685
|
+
|
686
|
+
/* Final rounding */
|
687
|
+
|
688
|
+
RoundToNearest3(&asin,asinh,asinm,asinl);
|
689
|
+
|
690
|
+
return sign * asin;
|
691
|
+
|
692
|
+
}
|
693
|
+
|
694
|
+
|
695
|
+
double asin_ru(double x) {
|
696
|
+
db_number xdb, zdb;
|
697
|
+
double sign, z, zp;
|
698
|
+
int index;
|
699
|
+
double asinh, asinm, asinl;
|
700
|
+
double asinhover, asinmover, asinlover;
|
701
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
702
|
+
double t1h, t1m, t1l;
|
703
|
+
double xabs;
|
704
|
+
|
705
|
+
/* Start already computations for argument reduction */
|
706
|
+
|
707
|
+
zdb.d = 1.0 + x * x;
|
708
|
+
|
709
|
+
xdb.d = x;
|
710
|
+
|
711
|
+
/* Special case handling */
|
712
|
+
|
713
|
+
/* Remove sign of x in floating-point */
|
714
|
+
xabs = ABS(x);
|
715
|
+
xdb.i[HI] &= 0x7fffffff;
|
716
|
+
|
717
|
+
/* If |x| < 2^(-28) we have
|
718
|
+
|
719
|
+
arcsin(x) = x * ( 1 + xi )
|
720
|
+
|
721
|
+
with 0 <= xi < 2^(-55)
|
722
|
+
|
723
|
+
So we can decide the rounding without any computation
|
724
|
+
*/
|
725
|
+
if (xdb.i[HI] < ASINSIMPLEBOUND) {
|
726
|
+
/* If x == 0 then we got the algebraic result arcsin(0) = 0
|
727
|
+
If x < 0 then the truncation rest is negative but less than
|
728
|
+
1 ulp; we round upwards by returning x
|
729
|
+
*/
|
730
|
+
if (x <= 0.0) return x;
|
731
|
+
/* Otherwise the rest is positive, less than 1 ulp and the
|
732
|
+
image is not algebraic
|
733
|
+
We return x + 1ulp
|
734
|
+
*/
|
735
|
+
xdb.l++;
|
736
|
+
return xdb.d;
|
737
|
+
}
|
738
|
+
|
739
|
+
/* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
|
740
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
741
|
+
if (x == 1.0) {
|
742
|
+
return PIHALFRU;
|
743
|
+
}
|
744
|
+
if (x == -1.0) {
|
745
|
+
return - PIHALFH;
|
746
|
+
}
|
747
|
+
return (x-x)/0.0; /* return NaN */
|
748
|
+
}
|
749
|
+
|
750
|
+
/* Argument reduction:
|
751
|
+
|
752
|
+
We have 10 intervals and 3 paths:
|
753
|
+
|
754
|
+
- interval 0 => path 1 using p0
|
755
|
+
- interval 1-8 => path 2 using p
|
756
|
+
- interval 9 => path 3 using p9
|
757
|
+
|
758
|
+
*/
|
759
|
+
|
760
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
761
|
+
|
762
|
+
/* 0 <= index <= 15
|
763
|
+
|
764
|
+
index approximates roughly x^2
|
765
|
+
|
766
|
+
Map indexes to intervals as follows:
|
767
|
+
|
768
|
+
0 -> 0
|
769
|
+
1 -> 1
|
770
|
+
...
|
771
|
+
8 -> 8
|
772
|
+
9 -> 9
|
773
|
+
...
|
774
|
+
15 -> 9
|
775
|
+
|
776
|
+
For this mapping, filter first the case 0 -> 0
|
777
|
+
In consequence, 1 <= index <= 15, i.e.
|
778
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
779
|
+
|
780
|
+
0 -> 1
|
781
|
+
...
|
782
|
+
7 -> 8
|
783
|
+
8 -> 9
|
784
|
+
...
|
785
|
+
15 -> 9
|
786
|
+
|
787
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
788
|
+
|
789
|
+
*/
|
790
|
+
|
791
|
+
if (index == 0) {
|
792
|
+
/* Path 1 using p0 */
|
793
|
+
|
794
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
795
|
+
|
796
|
+
/* Rounding test */
|
797
|
+
|
798
|
+
TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
|
799
|
+
|
800
|
+
/* Rounding test failed, launch accurate phase */
|
801
|
+
|
802
|
+
#if EVAL_PERF
|
803
|
+
crlibm_second_step_taken++;
|
804
|
+
#endif
|
805
|
+
|
806
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
807
|
+
|
808
|
+
/* Final rounding */
|
809
|
+
|
810
|
+
ReturnRoundUpwards3(asinh,asinm,asinl);
|
811
|
+
|
812
|
+
}
|
813
|
+
|
814
|
+
/* Strip off the sign of argument x */
|
815
|
+
sign = 1.0;
|
816
|
+
if (x < 0.0) sign = -sign;
|
817
|
+
|
818
|
+
index--;
|
819
|
+
if ((index & 0x8) != 0) {
|
820
|
+
/* Path 3 using p9 */
|
821
|
+
|
822
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
823
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
824
|
+
for the asymptotical approximation using a square root.
|
825
|
+
*/
|
826
|
+
|
827
|
+
z = xabs - MI_9;
|
828
|
+
zp = 2.0 * (1.0 - xabs);
|
829
|
+
|
830
|
+
/* Polynomial approximation and square root extraction */
|
831
|
+
|
832
|
+
p9_quick(&p9h, &p9m, z);
|
833
|
+
p9h = -p9h;
|
834
|
+
p9m = -p9m;
|
835
|
+
|
836
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
837
|
+
|
838
|
+
/* Reconstruction */
|
839
|
+
|
840
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
841
|
+
Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
|
842
|
+
|
843
|
+
/* Rounding test */
|
844
|
+
|
845
|
+
asinh *= sign;
|
846
|
+
asinm *= sign;
|
847
|
+
|
848
|
+
TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
|
849
|
+
|
850
|
+
/* Rounding test failed, launch accurate phase */
|
851
|
+
|
852
|
+
#if EVAL_PERF
|
853
|
+
crlibm_second_step_taken++;
|
854
|
+
#endif
|
855
|
+
|
856
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
857
|
+
p9h = -p9h;
|
858
|
+
p9m = -p9m;
|
859
|
+
p9l = -p9l;
|
860
|
+
|
861
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
862
|
+
|
863
|
+
/* Reconstruction */
|
864
|
+
|
865
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
866
|
+
Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
|
867
|
+
|
868
|
+
Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
|
869
|
+
|
870
|
+
/* Final rounding */
|
871
|
+
|
872
|
+
asinh *= sign;
|
873
|
+
asinm *= sign;
|
874
|
+
asinl *= sign;
|
875
|
+
|
876
|
+
ReturnRoundUpwards3(asinh,asinm,asinl);
|
877
|
+
|
878
|
+
}
|
879
|
+
|
880
|
+
/* Path 2 using p */
|
881
|
+
|
882
|
+
/* Do argument reduction using a table value for
|
883
|
+
the midpoint value
|
884
|
+
*/
|
885
|
+
|
886
|
+
z = xabs - mi_i;
|
887
|
+
|
888
|
+
p_quick(&asinh, &asinm, z, index);
|
889
|
+
|
890
|
+
|
891
|
+
/* Rounding test */
|
892
|
+
|
893
|
+
asinh *= sign;
|
894
|
+
asinm *= sign;
|
895
|
+
|
896
|
+
TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
|
897
|
+
|
898
|
+
/* Rounding test failed, launch accurate phase */
|
899
|
+
|
900
|
+
#if EVAL_PERF
|
901
|
+
crlibm_second_step_taken++;
|
902
|
+
#endif
|
903
|
+
|
904
|
+
if (x == ASINBADCASEX) return ASINBADCASEYRU;
|
905
|
+
|
906
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
907
|
+
|
908
|
+
/* Final rounding */
|
909
|
+
|
910
|
+
asinh *= sign;
|
911
|
+
asinm *= sign;
|
912
|
+
asinl *= sign;
|
913
|
+
|
914
|
+
ReturnRoundUpwards3(asinh,asinm,asinl);
|
915
|
+
|
916
|
+
}
|
917
|
+
|
918
|
+
double asin_rd(double x) {
|
919
|
+
db_number xdb, zdb;
|
920
|
+
double sign, z, zp;
|
921
|
+
int index;
|
922
|
+
double asinh, asinm, asinl;
|
923
|
+
double asinhover, asinmover, asinlover;
|
924
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
925
|
+
double t1h, t1m, t1l;
|
926
|
+
double xabs;
|
927
|
+
|
928
|
+
/* Start already computations for argument reduction */
|
929
|
+
|
930
|
+
zdb.d = 1.0 + x * x;
|
931
|
+
|
932
|
+
xdb.d = x;
|
933
|
+
|
934
|
+
/* Special case handling */
|
935
|
+
|
936
|
+
/* Remove sign of x in floating-point */
|
937
|
+
xabs = ABS(x);
|
938
|
+
xdb.i[HI] &= 0x7fffffff;
|
939
|
+
|
940
|
+
/* If |x| < 2^(-28) we have
|
941
|
+
|
942
|
+
arcsin(x) = x * ( 1 + xi )
|
943
|
+
|
944
|
+
with 0 <= xi < 2^(-55)
|
945
|
+
|
946
|
+
So we can decide the rounding without any computation
|
947
|
+
*/
|
948
|
+
if (xdb.i[HI] < ASINSIMPLEBOUND) {
|
949
|
+
/* If x == 0 then we got the algebraic result arcsin(0) = 0
|
950
|
+
If x > 0 then the truncation rest is positive but less than
|
951
|
+
1 ulp; we round downwards by returning x
|
952
|
+
*/
|
953
|
+
if (x >= 0) return x;
|
954
|
+
/* Otherwise the rest is negative, less than 1 ulp and the
|
955
|
+
image is not algebraic
|
956
|
+
We return x - 1ulp
|
957
|
+
We stripped off the sign, so we add 1 ulp to -x (in xdb.d) and multiply by -1
|
958
|
+
*/
|
959
|
+
xdb.l++;
|
960
|
+
return -1 * xdb.d;
|
961
|
+
}
|
962
|
+
|
963
|
+
/* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
|
964
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
965
|
+
if (x == 1.0) {
|
966
|
+
return PIHALFH;
|
967
|
+
}
|
968
|
+
if (x == -1.0) {
|
969
|
+
return - PIHALFRU;
|
970
|
+
}
|
971
|
+
return (x-x)/0.0; /* return NaN */
|
972
|
+
}
|
973
|
+
|
974
|
+
/* Argument reduction:
|
975
|
+
|
976
|
+
We have 10 intervals and 3 paths:
|
977
|
+
|
978
|
+
- interval 0 => path 1 using p0
|
979
|
+
- interval 1-8 => path 2 using p
|
980
|
+
- interval 9 => path 3 using p9
|
981
|
+
|
982
|
+
*/
|
983
|
+
|
984
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
985
|
+
|
986
|
+
/* 0 <= index <= 15
|
987
|
+
|
988
|
+
index approximates roughly x^2
|
989
|
+
|
990
|
+
Map indexes to intervals as follows:
|
991
|
+
|
992
|
+
0 -> 0
|
993
|
+
1 -> 1
|
994
|
+
...
|
995
|
+
8 -> 8
|
996
|
+
9 -> 9
|
997
|
+
...
|
998
|
+
15 -> 9
|
999
|
+
|
1000
|
+
For this mapping, filter first the case 0 -> 0
|
1001
|
+
In consequence, 1 <= index <= 15, i.e.
|
1002
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
1003
|
+
|
1004
|
+
0 -> 1
|
1005
|
+
...
|
1006
|
+
7 -> 8
|
1007
|
+
8 -> 9
|
1008
|
+
...
|
1009
|
+
15 -> 9
|
1010
|
+
|
1011
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
1012
|
+
|
1013
|
+
*/
|
1014
|
+
|
1015
|
+
if (index == 0) {
|
1016
|
+
/* Path 1 using p0 */
|
1017
|
+
|
1018
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
1019
|
+
|
1020
|
+
/* Rounding test */
|
1021
|
+
|
1022
|
+
TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
|
1023
|
+
|
1024
|
+
/* Rounding test failed, launch accurate phase */
|
1025
|
+
|
1026
|
+
#if EVAL_PERF
|
1027
|
+
crlibm_second_step_taken++;
|
1028
|
+
#endif
|
1029
|
+
|
1030
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
1031
|
+
|
1032
|
+
/* Final rounding */
|
1033
|
+
|
1034
|
+
ReturnRoundDownwards3(asinh,asinm,asinl);
|
1035
|
+
|
1036
|
+
}
|
1037
|
+
|
1038
|
+
/* Strip off the sign of argument x */
|
1039
|
+
sign = 1.0;
|
1040
|
+
if (x < 0.0) sign = -sign;
|
1041
|
+
|
1042
|
+
index--;
|
1043
|
+
if ((index & 0x8) != 0) {
|
1044
|
+
/* Path 3 using p9 */
|
1045
|
+
|
1046
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
1047
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
1048
|
+
for the asymptotical approximation using a square root.
|
1049
|
+
*/
|
1050
|
+
|
1051
|
+
z = xabs - MI_9;
|
1052
|
+
zp = 2.0 * (1.0 - xabs);
|
1053
|
+
|
1054
|
+
/* Polynomial approximation and square root extraction */
|
1055
|
+
|
1056
|
+
p9_quick(&p9h, &p9m, z);
|
1057
|
+
p9h = -p9h;
|
1058
|
+
p9m = -p9m;
|
1059
|
+
|
1060
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
1061
|
+
|
1062
|
+
/* Reconstruction */
|
1063
|
+
|
1064
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
1065
|
+
Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
|
1066
|
+
|
1067
|
+
/* Rounding test */
|
1068
|
+
|
1069
|
+
asinh *= sign;
|
1070
|
+
asinm *= sign;
|
1071
|
+
|
1072
|
+
TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
|
1073
|
+
|
1074
|
+
/* Rounding test failed, launch accurate phase */
|
1075
|
+
|
1076
|
+
#if EVAL_PERF
|
1077
|
+
crlibm_second_step_taken++;
|
1078
|
+
#endif
|
1079
|
+
|
1080
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
1081
|
+
p9h = -p9h;
|
1082
|
+
p9m = -p9m;
|
1083
|
+
p9l = -p9l;
|
1084
|
+
|
1085
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
1086
|
+
|
1087
|
+
/* Reconstruction */
|
1088
|
+
|
1089
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
1090
|
+
Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
|
1091
|
+
|
1092
|
+
Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
|
1093
|
+
|
1094
|
+
/* Final rounding */
|
1095
|
+
|
1096
|
+
asinh *= sign;
|
1097
|
+
asinm *= sign;
|
1098
|
+
asinl *= sign;
|
1099
|
+
|
1100
|
+
ReturnRoundDownwards3(asinh,asinm,asinl);
|
1101
|
+
|
1102
|
+
}
|
1103
|
+
|
1104
|
+
/* Path 2 using p */
|
1105
|
+
|
1106
|
+
/* Do argument reduction using a table value for
|
1107
|
+
the midpoint value
|
1108
|
+
*/
|
1109
|
+
|
1110
|
+
z = xabs - mi_i;
|
1111
|
+
|
1112
|
+
p_quick(&asinh, &asinm, z, index);
|
1113
|
+
|
1114
|
+
|
1115
|
+
/* Rounding test */
|
1116
|
+
|
1117
|
+
asinh *= sign;
|
1118
|
+
asinm *= sign;
|
1119
|
+
|
1120
|
+
TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
|
1121
|
+
|
1122
|
+
/* Rounding test failed, launch accurate phase */
|
1123
|
+
|
1124
|
+
#if EVAL_PERF
|
1125
|
+
crlibm_second_step_taken++;
|
1126
|
+
#endif
|
1127
|
+
|
1128
|
+
if (x == ASINBADCASEX) return ASINBADCASEYRD;
|
1129
|
+
|
1130
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
1131
|
+
|
1132
|
+
/* Final rounding */
|
1133
|
+
|
1134
|
+
asinh *= sign;
|
1135
|
+
asinm *= sign;
|
1136
|
+
asinl *= sign;
|
1137
|
+
|
1138
|
+
ReturnRoundDownwards3(asinh,asinm,asinl);
|
1139
|
+
|
1140
|
+
}
|
1141
|
+
|
1142
|
+
double asin_rz(double x) {
|
1143
|
+
db_number xdb, zdb;
|
1144
|
+
double sign, z, zp;
|
1145
|
+
int index;
|
1146
|
+
double asinh, asinm, asinl;
|
1147
|
+
double asinhover, asinmover, asinlover;
|
1148
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
1149
|
+
double t1h, t1m, t1l;
|
1150
|
+
double xabs;
|
1151
|
+
|
1152
|
+
/* Start already computations for argument reduction */
|
1153
|
+
|
1154
|
+
zdb.d = 1.0 + x * x;
|
1155
|
+
|
1156
|
+
xdb.d = x;
|
1157
|
+
|
1158
|
+
/* Special case handling */
|
1159
|
+
|
1160
|
+
/* Remove sign of x in floating-point */
|
1161
|
+
xabs = ABS(x);
|
1162
|
+
xdb.i[HI] &= 0x7fffffff;
|
1163
|
+
|
1164
|
+
/* If |x| < 2^(-28) we have
|
1165
|
+
|
1166
|
+
arcsin(x) = x * ( 1 + xi )
|
1167
|
+
|
1168
|
+
with 0 <= xi < 2^(-55)
|
1169
|
+
|
1170
|
+
So we can decide the rounding without any computation
|
1171
|
+
*/
|
1172
|
+
if (xdb.i[HI] < ASINSIMPLEBOUND) {
|
1173
|
+
/* If x == 0 the result is algebraic and equal to 0
|
1174
|
+
If x < 0 the truncation rest is negative and less than 1 ulp, we return x
|
1175
|
+
If x > 0 the truncation rest is positive and less than 1 ulp, we return x
|
1176
|
+
*/
|
1177
|
+
return x;
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
/* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
|
1181
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
1182
|
+
if (x == 1.0) {
|
1183
|
+
return PIHALFH;
|
1184
|
+
}
|
1185
|
+
if (x == -1.0) {
|
1186
|
+
return - PIHALFH;
|
1187
|
+
}
|
1188
|
+
return (x-x)/0.0; /* return NaN */
|
1189
|
+
}
|
1190
|
+
|
1191
|
+
/* Argument reduction:
|
1192
|
+
|
1193
|
+
We have 10 intervals and 3 paths:
|
1194
|
+
|
1195
|
+
- interval 0 => path 1 using p0
|
1196
|
+
- interval 1-8 => path 2 using p
|
1197
|
+
- interval 9 => path 3 using p9
|
1198
|
+
|
1199
|
+
*/
|
1200
|
+
|
1201
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
1202
|
+
|
1203
|
+
/* 0 <= index <= 15
|
1204
|
+
|
1205
|
+
index approximates roughly x^2
|
1206
|
+
|
1207
|
+
Map indexes to intervals as follows:
|
1208
|
+
|
1209
|
+
0 -> 0
|
1210
|
+
1 -> 1
|
1211
|
+
...
|
1212
|
+
8 -> 8
|
1213
|
+
9 -> 9
|
1214
|
+
...
|
1215
|
+
15 -> 9
|
1216
|
+
|
1217
|
+
For this mapping, filter first the case 0 -> 0
|
1218
|
+
In consequence, 1 <= index <= 15, i.e.
|
1219
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
1220
|
+
|
1221
|
+
0 -> 1
|
1222
|
+
...
|
1223
|
+
7 -> 8
|
1224
|
+
8 -> 9
|
1225
|
+
...
|
1226
|
+
15 -> 9
|
1227
|
+
|
1228
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
1229
|
+
|
1230
|
+
*/
|
1231
|
+
|
1232
|
+
if (index == 0) {
|
1233
|
+
/* Path 1 using p0 */
|
1234
|
+
|
1235
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
1236
|
+
|
1237
|
+
/* Rounding test */
|
1238
|
+
|
1239
|
+
TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
|
1240
|
+
|
1241
|
+
/* Rounding test failed, launch accurate phase */
|
1242
|
+
|
1243
|
+
#if EVAL_PERF
|
1244
|
+
crlibm_second_step_taken++;
|
1245
|
+
#endif
|
1246
|
+
|
1247
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
1248
|
+
|
1249
|
+
/* Final rounding */
|
1250
|
+
|
1251
|
+
ReturnRoundTowardsZero3(asinh,asinm,asinl);
|
1252
|
+
|
1253
|
+
}
|
1254
|
+
|
1255
|
+
/* Strip off the sign of argument x */
|
1256
|
+
sign = 1.0;
|
1257
|
+
if (x < 0.0) sign = -sign;
|
1258
|
+
|
1259
|
+
index--;
|
1260
|
+
if ((index & 0x8) != 0) {
|
1261
|
+
/* Path 3 using p9 */
|
1262
|
+
|
1263
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
1264
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
1265
|
+
for the asymptotical approximation using a square root.
|
1266
|
+
*/
|
1267
|
+
|
1268
|
+
z = xabs - MI_9;
|
1269
|
+
zp = 2.0 * (1.0 - xabs);
|
1270
|
+
|
1271
|
+
/* Polynomial approximation and square root extraction */
|
1272
|
+
|
1273
|
+
p9_quick(&p9h, &p9m, z);
|
1274
|
+
p9h = -p9h;
|
1275
|
+
p9m = -p9m;
|
1276
|
+
|
1277
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
1278
|
+
|
1279
|
+
/* Reconstruction */
|
1280
|
+
|
1281
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
1282
|
+
Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
|
1283
|
+
|
1284
|
+
/* Rounding test */
|
1285
|
+
|
1286
|
+
asinh *= sign;
|
1287
|
+
asinm *= sign;
|
1288
|
+
|
1289
|
+
TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
|
1290
|
+
|
1291
|
+
/* Rounding test failed, launch accurate phase */
|
1292
|
+
|
1293
|
+
#if EVAL_PERF
|
1294
|
+
crlibm_second_step_taken++;
|
1295
|
+
#endif
|
1296
|
+
|
1297
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
1298
|
+
p9h = -p9h;
|
1299
|
+
p9m = -p9m;
|
1300
|
+
p9l = -p9l;
|
1301
|
+
|
1302
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
1303
|
+
|
1304
|
+
/* Reconstruction */
|
1305
|
+
|
1306
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
1307
|
+
Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
|
1308
|
+
|
1309
|
+
Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
|
1310
|
+
|
1311
|
+
/* Final rounding */
|
1312
|
+
|
1313
|
+
asinh *= sign;
|
1314
|
+
asinm *= sign;
|
1315
|
+
asinl *= sign;
|
1316
|
+
|
1317
|
+
ReturnRoundTowardsZero3(asinh,asinm,asinl);
|
1318
|
+
|
1319
|
+
}
|
1320
|
+
|
1321
|
+
/* Path 2 using p */
|
1322
|
+
|
1323
|
+
/* Do argument reduction using a table value for
|
1324
|
+
the midpoint value
|
1325
|
+
*/
|
1326
|
+
|
1327
|
+
z = xabs - mi_i;
|
1328
|
+
|
1329
|
+
p_quick(&asinh, &asinm, z, index);
|
1330
|
+
|
1331
|
+
|
1332
|
+
/* Rounding test */
|
1333
|
+
|
1334
|
+
asinh *= sign;
|
1335
|
+
asinm *= sign;
|
1336
|
+
|
1337
|
+
TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
|
1338
|
+
|
1339
|
+
/* Rounding test failed, launch accurate phase */
|
1340
|
+
|
1341
|
+
#if EVAL_PERF
|
1342
|
+
crlibm_second_step_taken++;
|
1343
|
+
#endif
|
1344
|
+
|
1345
|
+
if (x == ASINBADCASEX) return ASINBADCASEYRD;
|
1346
|
+
|
1347
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
1348
|
+
|
1349
|
+
/* Final rounding */
|
1350
|
+
|
1351
|
+
asinh *= sign;
|
1352
|
+
asinm *= sign;
|
1353
|
+
asinl *= sign;
|
1354
|
+
|
1355
|
+
ReturnRoundTowardsZero3(asinh,asinm,asinl);
|
1356
|
+
|
1357
|
+
}
|
1358
|
+
|
1359
|
+
double acos_rn(double x) {
|
1360
|
+
db_number xdb, zdb;
|
1361
|
+
double z, zp;
|
1362
|
+
int index;
|
1363
|
+
double asinh, asinm, asinl;
|
1364
|
+
double acosh, acosm, acosl;
|
1365
|
+
double acoshover, acosmover, acoslover;
|
1366
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
1367
|
+
double t1h, t1m, t1l;
|
1368
|
+
double xabs;
|
1369
|
+
|
1370
|
+
/* Start already computations for argument reduction */
|
1371
|
+
|
1372
|
+
zdb.d = 1.0 + x * x;
|
1373
|
+
|
1374
|
+
xdb.d = x;
|
1375
|
+
|
1376
|
+
/* Special case handling */
|
1377
|
+
|
1378
|
+
/* Remove sign of x in floating-point */
|
1379
|
+
xabs = ABS(x);
|
1380
|
+
xdb.i[HI] &= 0x7fffffff;
|
1381
|
+
|
1382
|
+
/* If |x| < 2^(-28) we have
|
1383
|
+
|
1384
|
+
arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
|
1385
|
+
|
1386
|
+
with 0 <= xi < 2^(-87)
|
1387
|
+
|
1388
|
+
From 2^(-27) we have no bad rounding case longer than 5 bits
|
1389
|
+
more than the ulp of x, thus the approximation suffices.
|
1390
|
+
|
1391
|
+
*/
|
1392
|
+
if (xdb.i[HI] < ACOSSIMPLEBOUND) {
|
1393
|
+
Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
|
1394
|
+
|
1395
|
+
return acosh;
|
1396
|
+
}
|
1397
|
+
|
1398
|
+
/* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
|
1399
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
1400
|
+
if (x == 1.0) {
|
1401
|
+
return 0.0;
|
1402
|
+
}
|
1403
|
+
if (x == -1.0) {
|
1404
|
+
return PIH;
|
1405
|
+
}
|
1406
|
+
return (x-x)/0.0; /* return NaN */
|
1407
|
+
}
|
1408
|
+
|
1409
|
+
/* Argument reduction:
|
1410
|
+
|
1411
|
+
We have 10 intervals and 3 paths:
|
1412
|
+
|
1413
|
+
- interval 0 => path 1 using p0
|
1414
|
+
- interval 1-8 => path 2 using p
|
1415
|
+
- interval 9 => path 3 using p9
|
1416
|
+
|
1417
|
+
*/
|
1418
|
+
|
1419
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
1420
|
+
|
1421
|
+
/* 0 <= index <= 15
|
1422
|
+
|
1423
|
+
index approximates roughly x^2
|
1424
|
+
|
1425
|
+
Map indexes to intervals as follows:
|
1426
|
+
|
1427
|
+
0 -> 0
|
1428
|
+
1 -> 1
|
1429
|
+
...
|
1430
|
+
8 -> 8
|
1431
|
+
9 -> 9
|
1432
|
+
...
|
1433
|
+
15 -> 9
|
1434
|
+
|
1435
|
+
For this mapping, filter first the case 0 -> 0
|
1436
|
+
In consequence, 1 <= index <= 15, i.e.
|
1437
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
1438
|
+
|
1439
|
+
0 -> 1
|
1440
|
+
...
|
1441
|
+
7 -> 8
|
1442
|
+
8 -> 9
|
1443
|
+
...
|
1444
|
+
15 -> 9
|
1445
|
+
|
1446
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
1447
|
+
|
1448
|
+
*/
|
1449
|
+
|
1450
|
+
if (index == 0) {
|
1451
|
+
/* Path 1 using p0 */
|
1452
|
+
|
1453
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
1454
|
+
|
1455
|
+
/* Recompose acos
|
1456
|
+
|
1457
|
+
No cancellation possible because
|
1458
|
+
|
1459
|
+
|asin(x)| <= 0.264 for |x| <= 0.26
|
1460
|
+
|
1461
|
+
*/
|
1462
|
+
|
1463
|
+
asinh = - asinh;
|
1464
|
+
asinm = - asinm;
|
1465
|
+
|
1466
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
|
1467
|
+
|
1468
|
+
/* Rounding test */
|
1469
|
+
|
1470
|
+
if(acosh == (acosh + (acosm * RNROUNDCST)))
|
1471
|
+
return acosh;
|
1472
|
+
|
1473
|
+
/* Rounding test failed, launch accurate phase */
|
1474
|
+
|
1475
|
+
#if EVAL_PERF
|
1476
|
+
crlibm_second_step_taken++;
|
1477
|
+
#endif
|
1478
|
+
|
1479
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
1480
|
+
|
1481
|
+
/* Recompose acos */
|
1482
|
+
|
1483
|
+
asinh = -asinh;
|
1484
|
+
asinm = -asinm;
|
1485
|
+
asinl = -asinl;
|
1486
|
+
|
1487
|
+
Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
|
1488
|
+
|
1489
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
1490
|
+
|
1491
|
+
/* Final rounding */
|
1492
|
+
|
1493
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
1494
|
+
|
1495
|
+
}
|
1496
|
+
|
1497
|
+
index--;
|
1498
|
+
if ((index & 0x8) != 0) {
|
1499
|
+
/* Path 3 using p9 */
|
1500
|
+
|
1501
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
1502
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
1503
|
+
for the asymptotical approximation using a square root.
|
1504
|
+
*/
|
1505
|
+
|
1506
|
+
z = xabs - MI_9;
|
1507
|
+
zp = 2.0 * (1.0 - xabs);
|
1508
|
+
|
1509
|
+
/* Polynomial approximation and square root extraction */
|
1510
|
+
|
1511
|
+
p9_quick(&p9h, &p9m, z);
|
1512
|
+
|
1513
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
1514
|
+
|
1515
|
+
/* Reconstruction */
|
1516
|
+
|
1517
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
1518
|
+
|
1519
|
+
if (x > 0.0) {
|
1520
|
+
acosh = t1h;
|
1521
|
+
acosm = t1m;
|
1522
|
+
} else {
|
1523
|
+
/* arccos(-x) = Pi - arccos(x) */
|
1524
|
+
t1h = - t1h;
|
1525
|
+
t1m = - t1m;
|
1526
|
+
|
1527
|
+
Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
|
1528
|
+
}
|
1529
|
+
|
1530
|
+
/* Rounding test */
|
1531
|
+
|
1532
|
+
if(acosh == (acosh + (acosm * RNROUNDCST)))
|
1533
|
+
return acosh;
|
1534
|
+
|
1535
|
+
/* Rounding test failed, launch accurate phase */
|
1536
|
+
|
1537
|
+
#if EVAL_PERF
|
1538
|
+
crlibm_second_step_taken++;
|
1539
|
+
#endif
|
1540
|
+
|
1541
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
1542
|
+
|
1543
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
1544
|
+
|
1545
|
+
/* Reconstruction */
|
1546
|
+
|
1547
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
1548
|
+
|
1549
|
+
if (x > 0.0) {
|
1550
|
+
Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
|
1551
|
+
} else {
|
1552
|
+
/* arccos(-x) = Pi - arccos(x) */
|
1553
|
+
t1h = - t1h;
|
1554
|
+
t1m = - t1m;
|
1555
|
+
t1l = - t1l;
|
1556
|
+
|
1557
|
+
Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
|
1558
|
+
|
1559
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
1560
|
+
}
|
1561
|
+
|
1562
|
+
/* Final rounding */
|
1563
|
+
|
1564
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
1565
|
+
|
1566
|
+
}
|
1567
|
+
|
1568
|
+
/* Path 2 using p */
|
1569
|
+
|
1570
|
+
/* Do argument reduction using a table value for
|
1571
|
+
the midpoint value
|
1572
|
+
*/
|
1573
|
+
|
1574
|
+
z = xabs - mi_i;
|
1575
|
+
|
1576
|
+
p_quick(&asinh, &asinm, z, index);
|
1577
|
+
|
1578
|
+
/* Recompose acos(x) out of asin(abs(x))
|
1579
|
+
|
1580
|
+
In the case of a substraction, we will cancel
|
1581
|
+
not more than 1 bit.
|
1582
|
+
|
1583
|
+
*/
|
1584
|
+
|
1585
|
+
if (x > 0.0) {
|
1586
|
+
asinh = - asinh;
|
1587
|
+
asinm = - asinm;
|
1588
|
+
}
|
1589
|
+
|
1590
|
+
Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
|
1591
|
+
|
1592
|
+
/* Rounding test */
|
1593
|
+
|
1594
|
+
if(acosh == (acosh + (acosm * RNROUNDCST)))
|
1595
|
+
return acosh;
|
1596
|
+
|
1597
|
+
/* Rounding test failed, launch accurate phase */
|
1598
|
+
|
1599
|
+
#if EVAL_PERF
|
1600
|
+
crlibm_second_step_taken++;
|
1601
|
+
#endif
|
1602
|
+
|
1603
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
1604
|
+
|
1605
|
+
/* Recompose acos(x) out of asin(abs(x))
|
1606
|
+
|
1607
|
+
In the case of a substraction, we will cancel
|
1608
|
+
not more than 1 bit.
|
1609
|
+
|
1610
|
+
*/
|
1611
|
+
|
1612
|
+
if (x > 0.0) {
|
1613
|
+
asinh = - asinh;
|
1614
|
+
asinm = - asinm;
|
1615
|
+
asinl = - asinl;
|
1616
|
+
}
|
1617
|
+
|
1618
|
+
Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
|
1619
|
+
|
1620
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
1621
|
+
|
1622
|
+
|
1623
|
+
/* Final rounding */
|
1624
|
+
|
1625
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
1626
|
+
|
1627
|
+
|
1628
|
+
}
|
1629
|
+
|
1630
|
+
double acos_ru(double x) {
|
1631
|
+
db_number xdb, zdb, acoshdb;
|
1632
|
+
double z, zp;
|
1633
|
+
int index;
|
1634
|
+
double asinh, asinm, asinl;
|
1635
|
+
double acosh, acosm, acosl;
|
1636
|
+
double acoshover, acosmover, acoslover;
|
1637
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
1638
|
+
double t1h, t1m, t1l;
|
1639
|
+
double xabs;
|
1640
|
+
|
1641
|
+
/* Start already computations for argument reduction */
|
1642
|
+
|
1643
|
+
zdb.d = 1.0 + x * x;
|
1644
|
+
|
1645
|
+
xdb.d = x;
|
1646
|
+
|
1647
|
+
/* Special case handling */
|
1648
|
+
|
1649
|
+
/* Remove sign of x in floating-point */
|
1650
|
+
xabs = ABS(x);
|
1651
|
+
xdb.i[HI] &= 0x7fffffff;
|
1652
|
+
|
1653
|
+
/* If |x| < 2^(-28) we have
|
1654
|
+
|
1655
|
+
arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
|
1656
|
+
|
1657
|
+
with 0 <= xi < 2^(-87)
|
1658
|
+
|
1659
|
+
From 2^(-27) we have no bad rounding case longer than 5 bits
|
1660
|
+
more than the ulp of x, thus the approximation suffices.
|
1661
|
+
|
1662
|
+
*/
|
1663
|
+
if (xdb.i[HI] < ACOSSIMPLEBOUND) {
|
1664
|
+
Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
|
1665
|
+
|
1666
|
+
/* acosh is the round-to-nearest of acos(x) in this domain
|
1667
|
+
|
1668
|
+
acosm is a at least 20 bit exact correction of the
|
1669
|
+
rounding error of this round-to-nearest rounding.
|
1670
|
+
|
1671
|
+
If acosh is the rounded up result of acos(x), the
|
1672
|
+
correction is negative and vice-versa.
|
1673
|
+
|
1674
|
+
*/
|
1675
|
+
|
1676
|
+
if (acosm < 0.0)
|
1677
|
+
return acosh;
|
1678
|
+
|
1679
|
+
/* Here the correction acosm is positive, acosh is
|
1680
|
+
therefore the rounded down result of acos(x).
|
1681
|
+
We add thus one ulp.
|
1682
|
+
*/
|
1683
|
+
|
1684
|
+
acoshdb.d = acosh;
|
1685
|
+
acoshdb.l++;
|
1686
|
+
|
1687
|
+
return acoshdb.d;
|
1688
|
+
|
1689
|
+
}
|
1690
|
+
|
1691
|
+
/* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
|
1692
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
1693
|
+
if (x == 1.0) {
|
1694
|
+
return 0.0;
|
1695
|
+
}
|
1696
|
+
if (x == -1.0) {
|
1697
|
+
return PIRU;
|
1698
|
+
}
|
1699
|
+
return (x-x)/0.0; /* return NaN */
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
/* Argument reduction:
|
1703
|
+
|
1704
|
+
We have 10 intervals and 3 paths:
|
1705
|
+
|
1706
|
+
- interval 0 => path 1 using p0
|
1707
|
+
- interval 1-8 => path 2 using p
|
1708
|
+
- interval 9 => path 3 using p9
|
1709
|
+
|
1710
|
+
*/
|
1711
|
+
|
1712
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
1713
|
+
|
1714
|
+
/* 0 <= index <= 15
|
1715
|
+
|
1716
|
+
index approximates roughly x^2
|
1717
|
+
|
1718
|
+
Map indexes to intervals as follows:
|
1719
|
+
|
1720
|
+
0 -> 0
|
1721
|
+
1 -> 1
|
1722
|
+
...
|
1723
|
+
8 -> 8
|
1724
|
+
9 -> 9
|
1725
|
+
...
|
1726
|
+
15 -> 9
|
1727
|
+
|
1728
|
+
For this mapping, filter first the case 0 -> 0
|
1729
|
+
In consequence, 1 <= index <= 15, i.e.
|
1730
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
1731
|
+
|
1732
|
+
0 -> 1
|
1733
|
+
...
|
1734
|
+
7 -> 8
|
1735
|
+
8 -> 9
|
1736
|
+
...
|
1737
|
+
15 -> 9
|
1738
|
+
|
1739
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
1740
|
+
|
1741
|
+
*/
|
1742
|
+
|
1743
|
+
if (index == 0) {
|
1744
|
+
/* Path 1 using p0 */
|
1745
|
+
|
1746
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
1747
|
+
|
1748
|
+
/* Recompose acos
|
1749
|
+
|
1750
|
+
No cancellation possible because
|
1751
|
+
|
1752
|
+
|asin(x)| <= 0.264 for |x| <= 0.26
|
1753
|
+
|
1754
|
+
*/
|
1755
|
+
|
1756
|
+
asinh = - asinh;
|
1757
|
+
asinm = - asinm;
|
1758
|
+
|
1759
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
|
1760
|
+
|
1761
|
+
/* Rounding test */
|
1762
|
+
|
1763
|
+
TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
|
1764
|
+
|
1765
|
+
/* Rounding test failed, launch accurate phase */
|
1766
|
+
|
1767
|
+
#if EVAL_PERF
|
1768
|
+
crlibm_second_step_taken++;
|
1769
|
+
#endif
|
1770
|
+
|
1771
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
1772
|
+
|
1773
|
+
/* Recompose acos */
|
1774
|
+
|
1775
|
+
asinh = -asinh;
|
1776
|
+
asinm = -asinm;
|
1777
|
+
asinl = -asinl;
|
1778
|
+
|
1779
|
+
Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
|
1780
|
+
|
1781
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
1782
|
+
|
1783
|
+
/* Final rounding */
|
1784
|
+
|
1785
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
1786
|
+
|
1787
|
+
}
|
1788
|
+
|
1789
|
+
index--;
|
1790
|
+
if ((index & 0x8) != 0) {
|
1791
|
+
/* Path 3 using p9 */
|
1792
|
+
|
1793
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
1794
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
1795
|
+
for the asymptotical approximation using a square root.
|
1796
|
+
*/
|
1797
|
+
|
1798
|
+
z = xabs - MI_9;
|
1799
|
+
zp = 2.0 * (1.0 - xabs);
|
1800
|
+
|
1801
|
+
/* Polynomial approximation and square root extraction */
|
1802
|
+
|
1803
|
+
p9_quick(&p9h, &p9m, z);
|
1804
|
+
|
1805
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
1806
|
+
|
1807
|
+
/* Reconstruction */
|
1808
|
+
|
1809
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
1810
|
+
|
1811
|
+
if (x > 0.0) {
|
1812
|
+
acosh = t1h;
|
1813
|
+
acosm = t1m;
|
1814
|
+
} else {
|
1815
|
+
/* arccos(-x) = Pi - arccos(x) */
|
1816
|
+
t1h = - t1h;
|
1817
|
+
t1m = - t1m;
|
1818
|
+
|
1819
|
+
Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
|
1820
|
+
}
|
1821
|
+
|
1822
|
+
/* Rounding test */
|
1823
|
+
|
1824
|
+
TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
|
1825
|
+
|
1826
|
+
/* Rounding test failed, launch accurate phase */
|
1827
|
+
|
1828
|
+
#if EVAL_PERF
|
1829
|
+
crlibm_second_step_taken++;
|
1830
|
+
#endif
|
1831
|
+
|
1832
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
1833
|
+
|
1834
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
1835
|
+
|
1836
|
+
/* Reconstruction */
|
1837
|
+
|
1838
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
1839
|
+
|
1840
|
+
if (x > 0.0) {
|
1841
|
+
Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
|
1842
|
+
} else {
|
1843
|
+
/* arccos(-x) = Pi - arccos(x) */
|
1844
|
+
t1h = - t1h;
|
1845
|
+
t1m = - t1m;
|
1846
|
+
t1l = - t1l;
|
1847
|
+
|
1848
|
+
Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
|
1849
|
+
|
1850
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
1851
|
+
}
|
1852
|
+
|
1853
|
+
/* Final rounding */
|
1854
|
+
|
1855
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
1856
|
+
|
1857
|
+
}
|
1858
|
+
|
1859
|
+
/* Path 2 using p */
|
1860
|
+
|
1861
|
+
/* Do argument reduction using a table value for
|
1862
|
+
the midpoint value
|
1863
|
+
*/
|
1864
|
+
|
1865
|
+
z = xabs - mi_i;
|
1866
|
+
|
1867
|
+
p_quick(&asinh, &asinm, z, index);
|
1868
|
+
|
1869
|
+
/* Recompose acos(x) out of asin(abs(x))
|
1870
|
+
|
1871
|
+
In the case of a substraction, we will cancel
|
1872
|
+
not more than 1 bit.
|
1873
|
+
|
1874
|
+
*/
|
1875
|
+
|
1876
|
+
if (x > 0.0) {
|
1877
|
+
asinh = - asinh;
|
1878
|
+
asinm = - asinm;
|
1879
|
+
}
|
1880
|
+
|
1881
|
+
Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
|
1882
|
+
|
1883
|
+
/* Rounding test */
|
1884
|
+
|
1885
|
+
TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
|
1886
|
+
|
1887
|
+
/* Rounding test failed, launch accurate phase */
|
1888
|
+
|
1889
|
+
#if EVAL_PERF
|
1890
|
+
crlibm_second_step_taken++;
|
1891
|
+
#endif
|
1892
|
+
|
1893
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
1894
|
+
|
1895
|
+
/* Recompose acos(x) out of asin(abs(x))
|
1896
|
+
|
1897
|
+
In the case of a substraction, we will cancel
|
1898
|
+
not more than 1 bit.
|
1899
|
+
|
1900
|
+
*/
|
1901
|
+
|
1902
|
+
if (x > 0.0) {
|
1903
|
+
asinh = - asinh;
|
1904
|
+
asinm = - asinm;
|
1905
|
+
asinl = - asinl;
|
1906
|
+
}
|
1907
|
+
|
1908
|
+
Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
|
1909
|
+
|
1910
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
1911
|
+
|
1912
|
+
|
1913
|
+
/* Final rounding */
|
1914
|
+
|
1915
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
1916
|
+
|
1917
|
+
}
|
1918
|
+
|
1919
|
+
|
1920
|
+
double acos_rd(double x) {
|
1921
|
+
db_number xdb, zdb, acoshdb;
|
1922
|
+
double z, zp;
|
1923
|
+
int index;
|
1924
|
+
double asinh, asinm, asinl;
|
1925
|
+
double acosh, acosm, acosl;
|
1926
|
+
double acoshover, acosmover, acoslover;
|
1927
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
1928
|
+
double t1h, t1m, t1l;
|
1929
|
+
double xabs;
|
1930
|
+
|
1931
|
+
/* Start already computations for argument reduction */
|
1932
|
+
|
1933
|
+
zdb.d = 1.0 + x * x;
|
1934
|
+
|
1935
|
+
xdb.d = x;
|
1936
|
+
|
1937
|
+
/* Special case handling */
|
1938
|
+
|
1939
|
+
/* Remove sign of x in floating-point */
|
1940
|
+
xabs = ABS(x);
|
1941
|
+
xdb.i[HI] &= 0x7fffffff;
|
1942
|
+
|
1943
|
+
/* If |x| < 2^(-28) we have
|
1944
|
+
|
1945
|
+
arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
|
1946
|
+
|
1947
|
+
with 0 <= xi < 2^(-87)
|
1948
|
+
|
1949
|
+
From 2^(-27) we have no bad rounding case longer than 5 bits
|
1950
|
+
more than the ulp of x, thus the approximation suffices.
|
1951
|
+
|
1952
|
+
*/
|
1953
|
+
if (xdb.i[HI] < ACOSSIMPLEBOUND) {
|
1954
|
+
Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
|
1955
|
+
|
1956
|
+
/* acosh is the round-to-nearest of acos(x) in this domain
|
1957
|
+
|
1958
|
+
acosm is a at least 20 bit exact correction of the
|
1959
|
+
rounding error of this round-to-nearest rounding.
|
1960
|
+
|
1961
|
+
If acosh is the rounded up result of acos(x), the
|
1962
|
+
correction is negative and vice-versa.
|
1963
|
+
|
1964
|
+
*/
|
1965
|
+
|
1966
|
+
if (acosm > 0.0)
|
1967
|
+
return acosh;
|
1968
|
+
|
1969
|
+
/* Here the correction acosm is negative, acosh is
|
1970
|
+
therefore the rounded up result of acos(x).
|
1971
|
+
We subtract thus one ulp.
|
1972
|
+
*/
|
1973
|
+
|
1974
|
+
acoshdb.d = acosh;
|
1975
|
+
acoshdb.l--;
|
1976
|
+
|
1977
|
+
return acoshdb.d;
|
1978
|
+
|
1979
|
+
}
|
1980
|
+
|
1981
|
+
/* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
|
1982
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
1983
|
+
if (x == 1.0) {
|
1984
|
+
return 0.0;
|
1985
|
+
}
|
1986
|
+
if (x == -1.0) {
|
1987
|
+
return PIH;
|
1988
|
+
}
|
1989
|
+
return (x-x)/0.0; /* return NaN */
|
1990
|
+
}
|
1991
|
+
|
1992
|
+
/* Argument reduction:
|
1993
|
+
|
1994
|
+
We have 10 intervals and 3 paths:
|
1995
|
+
|
1996
|
+
- interval 0 => path 1 using p0
|
1997
|
+
- interval 1-8 => path 2 using p
|
1998
|
+
- interval 9 => path 3 using p9
|
1999
|
+
|
2000
|
+
*/
|
2001
|
+
|
2002
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
2003
|
+
|
2004
|
+
/* 0 <= index <= 15
|
2005
|
+
|
2006
|
+
index approximates roughly x^2
|
2007
|
+
|
2008
|
+
Map indexes to intervals as follows:
|
2009
|
+
|
2010
|
+
0 -> 0
|
2011
|
+
1 -> 1
|
2012
|
+
...
|
2013
|
+
8 -> 8
|
2014
|
+
9 -> 9
|
2015
|
+
...
|
2016
|
+
15 -> 9
|
2017
|
+
|
2018
|
+
For this mapping, filter first the case 0 -> 0
|
2019
|
+
In consequence, 1 <= index <= 15, i.e.
|
2020
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
2021
|
+
|
2022
|
+
0 -> 1
|
2023
|
+
...
|
2024
|
+
7 -> 8
|
2025
|
+
8 -> 9
|
2026
|
+
...
|
2027
|
+
15 -> 9
|
2028
|
+
|
2029
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
2030
|
+
|
2031
|
+
*/
|
2032
|
+
|
2033
|
+
if (index == 0) {
|
2034
|
+
/* Path 1 using p0 */
|
2035
|
+
|
2036
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
2037
|
+
|
2038
|
+
/* Recompose acos
|
2039
|
+
|
2040
|
+
No cancellation possible because
|
2041
|
+
|
2042
|
+
|asin(x)| <= 0.264 for |x| <= 0.26
|
2043
|
+
|
2044
|
+
*/
|
2045
|
+
|
2046
|
+
asinh = - asinh;
|
2047
|
+
asinm = - asinm;
|
2048
|
+
|
2049
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
|
2050
|
+
|
2051
|
+
/* Rounding test */
|
2052
|
+
|
2053
|
+
TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
|
2054
|
+
|
2055
|
+
/* Rounding test failed, launch accurate phase */
|
2056
|
+
|
2057
|
+
#if EVAL_PERF
|
2058
|
+
crlibm_second_step_taken++;
|
2059
|
+
#endif
|
2060
|
+
|
2061
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
2062
|
+
|
2063
|
+
/* Recompose acos */
|
2064
|
+
|
2065
|
+
asinh = -asinh;
|
2066
|
+
asinm = -asinm;
|
2067
|
+
asinl = -asinl;
|
2068
|
+
|
2069
|
+
Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
|
2070
|
+
|
2071
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2072
|
+
|
2073
|
+
/* Final rounding */
|
2074
|
+
|
2075
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
2076
|
+
|
2077
|
+
}
|
2078
|
+
|
2079
|
+
index--;
|
2080
|
+
if ((index & 0x8) != 0) {
|
2081
|
+
/* Path 3 using p9 */
|
2082
|
+
|
2083
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
2084
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
2085
|
+
for the asymptotical approximation using a square root.
|
2086
|
+
*/
|
2087
|
+
|
2088
|
+
z = xabs - MI_9;
|
2089
|
+
zp = 2.0 * (1.0 - xabs);
|
2090
|
+
|
2091
|
+
/* Polynomial approximation and square root extraction */
|
2092
|
+
|
2093
|
+
p9_quick(&p9h, &p9m, z);
|
2094
|
+
|
2095
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
2096
|
+
|
2097
|
+
/* Reconstruction */
|
2098
|
+
|
2099
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
2100
|
+
|
2101
|
+
if (x > 0.0) {
|
2102
|
+
acosh = t1h;
|
2103
|
+
acosm = t1m;
|
2104
|
+
} else {
|
2105
|
+
/* arccos(-x) = Pi - arccos(x) */
|
2106
|
+
t1h = - t1h;
|
2107
|
+
t1m = - t1m;
|
2108
|
+
|
2109
|
+
Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
|
2110
|
+
}
|
2111
|
+
|
2112
|
+
/* Rounding test */
|
2113
|
+
|
2114
|
+
TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
|
2115
|
+
|
2116
|
+
/* Rounding test failed, launch accurate phase */
|
2117
|
+
|
2118
|
+
#if EVAL_PERF
|
2119
|
+
crlibm_second_step_taken++;
|
2120
|
+
#endif
|
2121
|
+
|
2122
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
2123
|
+
|
2124
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
2125
|
+
|
2126
|
+
/* Reconstruction */
|
2127
|
+
|
2128
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
2129
|
+
|
2130
|
+
if (x > 0.0) {
|
2131
|
+
Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
|
2132
|
+
} else {
|
2133
|
+
/* arccos(-x) = Pi - arccos(x) */
|
2134
|
+
t1h = - t1h;
|
2135
|
+
t1m = - t1m;
|
2136
|
+
t1l = - t1l;
|
2137
|
+
|
2138
|
+
Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
|
2139
|
+
|
2140
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2141
|
+
}
|
2142
|
+
|
2143
|
+
/* Final rounding */
|
2144
|
+
|
2145
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
2146
|
+
|
2147
|
+
}
|
2148
|
+
|
2149
|
+
/* Path 2 using p */
|
2150
|
+
|
2151
|
+
/* Do argument reduction using a table value for
|
2152
|
+
the midpoint value
|
2153
|
+
*/
|
2154
|
+
|
2155
|
+
z = xabs - mi_i;
|
2156
|
+
|
2157
|
+
p_quick(&asinh, &asinm, z, index);
|
2158
|
+
|
2159
|
+
/* Recompose acos(x) out of asin(abs(x))
|
2160
|
+
|
2161
|
+
In the case of a substraction, we will cancel
|
2162
|
+
not more than 1 bit.
|
2163
|
+
|
2164
|
+
*/
|
2165
|
+
|
2166
|
+
if (x > 0.0) {
|
2167
|
+
asinh = - asinh;
|
2168
|
+
asinm = - asinm;
|
2169
|
+
}
|
2170
|
+
|
2171
|
+
Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
|
2172
|
+
|
2173
|
+
/* Rounding test */
|
2174
|
+
|
2175
|
+
TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
|
2176
|
+
|
2177
|
+
/* Rounding test failed, launch accurate phase */
|
2178
|
+
|
2179
|
+
#if EVAL_PERF
|
2180
|
+
crlibm_second_step_taken++;
|
2181
|
+
#endif
|
2182
|
+
|
2183
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
2184
|
+
|
2185
|
+
/* Recompose acos(x) out of asin(abs(x))
|
2186
|
+
|
2187
|
+
In the case of a substraction, we will cancel
|
2188
|
+
not more than 1 bit.
|
2189
|
+
|
2190
|
+
*/
|
2191
|
+
|
2192
|
+
if (x > 0.0) {
|
2193
|
+
asinh = - asinh;
|
2194
|
+
asinm = - asinm;
|
2195
|
+
asinl = - asinl;
|
2196
|
+
}
|
2197
|
+
|
2198
|
+
Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
|
2199
|
+
|
2200
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2201
|
+
|
2202
|
+
/* Final rounding */
|
2203
|
+
|
2204
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
2205
|
+
|
2206
|
+
}
|
2207
|
+
|
2208
|
+
|
2209
|
+
|
2210
|
+
|
2211
|
+
double acospi_rn(double x) {
|
2212
|
+
db_number xdb, zdb;
|
2213
|
+
double z, zp;
|
2214
|
+
int index;
|
2215
|
+
double asinh, asinm, asinl;
|
2216
|
+
double asinpih, asinpim, asinpil;
|
2217
|
+
double acosh, acosm, acosl;
|
2218
|
+
double acoshover, acosmover, acoslover;
|
2219
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
2220
|
+
double t1h, t1m, t1l;
|
2221
|
+
double t2h, t2m, t2l;
|
2222
|
+
double xabs;
|
2223
|
+
|
2224
|
+
/* Start already computations for argument reduction */
|
2225
|
+
|
2226
|
+
zdb.d = 1.0 + x * x;
|
2227
|
+
|
2228
|
+
xdb.d = x;
|
2229
|
+
|
2230
|
+
/* Special case handling */
|
2231
|
+
|
2232
|
+
/* Remove sign of x in floating-point */
|
2233
|
+
xabs = ABS(x);
|
2234
|
+
xdb.i[HI] &= 0x7fffffff;
|
2235
|
+
|
2236
|
+
/* If |x| < 2^(-54) we have
|
2237
|
+
|
2238
|
+
arccos(x)/pi = 1/2 * (1 + xi)
|
2239
|
+
|
2240
|
+
with 0 <= xi < 2^(-54)
|
2241
|
+
|
2242
|
+
So arcospi(x) = 0.5 in this case.
|
2243
|
+
|
2244
|
+
*/
|
2245
|
+
if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
|
2246
|
+
return 0.5;
|
2247
|
+
}
|
2248
|
+
|
2249
|
+
/* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
|
2250
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
2251
|
+
if (x == 1.0) {
|
2252
|
+
return 0.0;
|
2253
|
+
}
|
2254
|
+
if (x == -1.0) {
|
2255
|
+
return 1.0;
|
2256
|
+
}
|
2257
|
+
return (x-x)/0.0; /* return NaN */
|
2258
|
+
}
|
2259
|
+
|
2260
|
+
/* Argument reduction:
|
2261
|
+
|
2262
|
+
We have 10 intervals and 3 paths:
|
2263
|
+
|
2264
|
+
- interval 0 => path 1 using p0
|
2265
|
+
- interval 1-8 => path 2 using p
|
2266
|
+
- interval 9 => path 3 using p9
|
2267
|
+
|
2268
|
+
*/
|
2269
|
+
|
2270
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
2271
|
+
|
2272
|
+
/* 0 <= index <= 15
|
2273
|
+
|
2274
|
+
index approximates roughly x^2
|
2275
|
+
|
2276
|
+
Map indexes to intervals as follows:
|
2277
|
+
|
2278
|
+
0 -> 0
|
2279
|
+
1 -> 1
|
2280
|
+
...
|
2281
|
+
8 -> 8
|
2282
|
+
9 -> 9
|
2283
|
+
...
|
2284
|
+
15 -> 9
|
2285
|
+
|
2286
|
+
For this mapping, filter first the case 0 -> 0
|
2287
|
+
In consequence, 1 <= index <= 15, i.e.
|
2288
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
2289
|
+
|
2290
|
+
0 -> 1
|
2291
|
+
...
|
2292
|
+
7 -> 8
|
2293
|
+
8 -> 9
|
2294
|
+
...
|
2295
|
+
15 -> 9
|
2296
|
+
|
2297
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
2298
|
+
|
2299
|
+
*/
|
2300
|
+
|
2301
|
+
if (index == 0) {
|
2302
|
+
/* Path 1 using p0 */
|
2303
|
+
|
2304
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
2305
|
+
|
2306
|
+
/* Recompose acos/pi
|
2307
|
+
|
2308
|
+
We have
|
2309
|
+
|
2310
|
+
arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
|
2311
|
+
|
2312
|
+
No cancellation possible because
|
2313
|
+
|
2314
|
+
|asin(x)/pi| <= 0.0837 for |x| <= 0.26
|
2315
|
+
|
2316
|
+
*/
|
2317
|
+
|
2318
|
+
Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
|
2319
|
+
|
2320
|
+
Add122(&acosh,&acosm,0.5,asinpih,asinpim);
|
2321
|
+
|
2322
|
+
/* Rounding test */
|
2323
|
+
|
2324
|
+
if(acosh == (acosh + (acosm * RNROUNDCST)))
|
2325
|
+
return acosh;
|
2326
|
+
|
2327
|
+
/* Rounding test failed, launch accurate phase */
|
2328
|
+
|
2329
|
+
#if EVAL_PERF
|
2330
|
+
crlibm_second_step_taken++;
|
2331
|
+
#endif
|
2332
|
+
|
2333
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
2334
|
+
|
2335
|
+
/* Recompose acos/pi */
|
2336
|
+
|
2337
|
+
Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
|
2338
|
+
|
2339
|
+
Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
|
2340
|
+
|
2341
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2342
|
+
|
2343
|
+
/* Final rounding */
|
2344
|
+
|
2345
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
2346
|
+
|
2347
|
+
}
|
2348
|
+
|
2349
|
+
index--;
|
2350
|
+
if ((index & 0x8) != 0) {
|
2351
|
+
/* Path 3 using p9 */
|
2352
|
+
|
2353
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
2354
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
2355
|
+
for the asymptotical approximation using a square root.
|
2356
|
+
*/
|
2357
|
+
|
2358
|
+
z = xabs - MI_9;
|
2359
|
+
zp = 2.0 * (1.0 - xabs);
|
2360
|
+
|
2361
|
+
/* Polynomial approximation and square root extraction */
|
2362
|
+
|
2363
|
+
p9_quick(&p9h, &p9m, z);
|
2364
|
+
|
2365
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
2366
|
+
|
2367
|
+
/* Reconstruction */
|
2368
|
+
|
2369
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
2370
|
+
|
2371
|
+
Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
|
2372
|
+
|
2373
|
+
if (x > 0.0) {
|
2374
|
+
acosh = t2h;
|
2375
|
+
acosm = t2m;
|
2376
|
+
} else {
|
2377
|
+
/* arccos(-x)/pi = 1 - arccos(x)/pi */
|
2378
|
+
t2h = - t2h;
|
2379
|
+
t2m = - t2m;
|
2380
|
+
|
2381
|
+
Add122(&acosh,&acosm,1.0,t2h,t2m);
|
2382
|
+
}
|
2383
|
+
|
2384
|
+
/* Rounding test */
|
2385
|
+
|
2386
|
+
if(acosh == (acosh + (acosm * RNROUNDCST)))
|
2387
|
+
return acosh;
|
2388
|
+
|
2389
|
+
/* Rounding test failed, launch accurate phase */
|
2390
|
+
|
2391
|
+
#if EVAL_PERF
|
2392
|
+
crlibm_second_step_taken++;
|
2393
|
+
#endif
|
2394
|
+
|
2395
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
2396
|
+
|
2397
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
2398
|
+
|
2399
|
+
/* Reconstruction */
|
2400
|
+
|
2401
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
2402
|
+
|
2403
|
+
Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
|
2404
|
+
|
2405
|
+
if (x > 0.0) {
|
2406
|
+
Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
|
2407
|
+
} else {
|
2408
|
+
/* arccos(-x)/pi = 1 - arccos(x)/pi */
|
2409
|
+
t2h = - t2h;
|
2410
|
+
t2m = - t2m;
|
2411
|
+
t2l = - t2l;
|
2412
|
+
|
2413
|
+
Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
|
2414
|
+
|
2415
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2416
|
+
}
|
2417
|
+
|
2418
|
+
/* Final rounding */
|
2419
|
+
|
2420
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
2421
|
+
|
2422
|
+
}
|
2423
|
+
|
2424
|
+
/* Path 2 using p */
|
2425
|
+
|
2426
|
+
/* Do argument reduction using a table value for
|
2427
|
+
the midpoint value
|
2428
|
+
*/
|
2429
|
+
|
2430
|
+
z = xabs - mi_i;
|
2431
|
+
|
2432
|
+
p_quick(&asinh, &asinm, z, index);
|
2433
|
+
|
2434
|
+
/* Recompose acos(x)/pi out of asin(abs(x))
|
2435
|
+
|
2436
|
+
In the case of a substraction, we will cancel
|
2437
|
+
not more than 1 bit.
|
2438
|
+
|
2439
|
+
*/
|
2440
|
+
|
2441
|
+
if (x > 0.0) {
|
2442
|
+
asinh = - asinh;
|
2443
|
+
asinm = - asinm;
|
2444
|
+
}
|
2445
|
+
|
2446
|
+
Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
|
2447
|
+
|
2448
|
+
Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
|
2449
|
+
|
2450
|
+
/* Rounding test */
|
2451
|
+
|
2452
|
+
if(acosh == (acosh + (acosm * RNROUNDCST)))
|
2453
|
+
return acosh;
|
2454
|
+
|
2455
|
+
/* Rounding test failed, launch accurate phase */
|
2456
|
+
|
2457
|
+
#if EVAL_PERF
|
2458
|
+
crlibm_second_step_taken++;
|
2459
|
+
#endif
|
2460
|
+
|
2461
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
2462
|
+
|
2463
|
+
/* Recompose acos(x) out of asin(abs(x))
|
2464
|
+
|
2465
|
+
In the case of a substraction, we will cancel
|
2466
|
+
not more than 1 bit.
|
2467
|
+
|
2468
|
+
*/
|
2469
|
+
|
2470
|
+
if (x > 0.0) {
|
2471
|
+
asinh = - asinh;
|
2472
|
+
asinm = - asinm;
|
2473
|
+
asinl = - asinl;
|
2474
|
+
}
|
2475
|
+
|
2476
|
+
Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
|
2477
|
+
|
2478
|
+
Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
|
2479
|
+
|
2480
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2481
|
+
|
2482
|
+
/* Final rounding */
|
2483
|
+
|
2484
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
2485
|
+
|
2486
|
+
}
|
2487
|
+
|
2488
|
+
double acospi_rd(double x) {
|
2489
|
+
db_number xdb, zdb;
|
2490
|
+
double z, zp;
|
2491
|
+
int index;
|
2492
|
+
double asinh, asinm, asinl;
|
2493
|
+
double asinpih, asinpim, asinpil;
|
2494
|
+
double acosh, acosm, acosl;
|
2495
|
+
double acoshover, acosmover, acoslover;
|
2496
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
2497
|
+
double t1h, t1m, t1l;
|
2498
|
+
double t2h, t2m, t2l;
|
2499
|
+
double xabs;
|
2500
|
+
|
2501
|
+
/* Start already computations for argument reduction */
|
2502
|
+
|
2503
|
+
zdb.d = 1.0 + x * x;
|
2504
|
+
|
2505
|
+
xdb.d = x;
|
2506
|
+
|
2507
|
+
/* Special case handling */
|
2508
|
+
|
2509
|
+
/* Remove sign of x in floating-point */
|
2510
|
+
xabs = ABS(x);
|
2511
|
+
xdb.i[HI] &= 0x7fffffff;
|
2512
|
+
|
2513
|
+
/* If |x| < 2^(-54) we have
|
2514
|
+
|
2515
|
+
arccos(x)/pi = 1/2 * (1 + xi)
|
2516
|
+
|
2517
|
+
with 0 <= xi < 2^(-54)
|
2518
|
+
|
2519
|
+
Thus we have
|
2520
|
+
|
2521
|
+
acospi(x) =
|
2522
|
+
|
2523
|
+
(i) 0.5 if x <= 0
|
2524
|
+
(ii) 0.5 - 1/2 ulp(0.5) if x > 0
|
2525
|
+
|
2526
|
+
*/
|
2527
|
+
if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
|
2528
|
+
if (x <= 0.0)
|
2529
|
+
return 0.5;
|
2530
|
+
|
2531
|
+
return HALFMINUSHALFULP;
|
2532
|
+
}
|
2533
|
+
|
2534
|
+
/* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
|
2535
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
2536
|
+
if (x == 1.0) {
|
2537
|
+
return 0.0;
|
2538
|
+
}
|
2539
|
+
if (x == -1.0) {
|
2540
|
+
return 1.0;
|
2541
|
+
}
|
2542
|
+
return (x-x)/0.0; /* return NaN */
|
2543
|
+
}
|
2544
|
+
|
2545
|
+
/* Argument reduction:
|
2546
|
+
|
2547
|
+
We have 10 intervals and 3 paths:
|
2548
|
+
|
2549
|
+
- interval 0 => path 1 using p0
|
2550
|
+
- interval 1-8 => path 2 using p
|
2551
|
+
- interval 9 => path 3 using p9
|
2552
|
+
|
2553
|
+
*/
|
2554
|
+
|
2555
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
2556
|
+
|
2557
|
+
/* 0 <= index <= 15
|
2558
|
+
|
2559
|
+
index approximates roughly x^2
|
2560
|
+
|
2561
|
+
Map indexes to intervals as follows:
|
2562
|
+
|
2563
|
+
0 -> 0
|
2564
|
+
1 -> 1
|
2565
|
+
...
|
2566
|
+
8 -> 8
|
2567
|
+
9 -> 9
|
2568
|
+
...
|
2569
|
+
15 -> 9
|
2570
|
+
|
2571
|
+
For this mapping, filter first the case 0 -> 0
|
2572
|
+
In consequence, 1 <= index <= 15, i.e.
|
2573
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
2574
|
+
|
2575
|
+
0 -> 1
|
2576
|
+
...
|
2577
|
+
7 -> 8
|
2578
|
+
8 -> 9
|
2579
|
+
...
|
2580
|
+
15 -> 9
|
2581
|
+
|
2582
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
2583
|
+
|
2584
|
+
*/
|
2585
|
+
|
2586
|
+
if (index == 0) {
|
2587
|
+
/* Path 1 using p0 */
|
2588
|
+
|
2589
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
2590
|
+
|
2591
|
+
/* Recompose acos/pi
|
2592
|
+
|
2593
|
+
We have
|
2594
|
+
|
2595
|
+
arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
|
2596
|
+
|
2597
|
+
No cancellation possible because
|
2598
|
+
|
2599
|
+
|asin(x)/pi| <= 0.0837 for |x| <= 0.26
|
2600
|
+
|
2601
|
+
*/
|
2602
|
+
|
2603
|
+
Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
|
2604
|
+
|
2605
|
+
Add122(&acosh,&acosm,0.5,asinpih,asinpim);
|
2606
|
+
|
2607
|
+
/* Rounding test */
|
2608
|
+
|
2609
|
+
TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
|
2610
|
+
|
2611
|
+
/* Rounding test failed, launch accurate phase */
|
2612
|
+
|
2613
|
+
#if EVAL_PERF
|
2614
|
+
crlibm_second_step_taken++;
|
2615
|
+
#endif
|
2616
|
+
|
2617
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
2618
|
+
|
2619
|
+
/* Recompose acos/pi */
|
2620
|
+
|
2621
|
+
Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
|
2622
|
+
|
2623
|
+
Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
|
2624
|
+
|
2625
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2626
|
+
|
2627
|
+
/* Final rounding */
|
2628
|
+
|
2629
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
2630
|
+
|
2631
|
+
}
|
2632
|
+
|
2633
|
+
index--;
|
2634
|
+
if ((index & 0x8) != 0) {
|
2635
|
+
/* Path 3 using p9 */
|
2636
|
+
|
2637
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
2638
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
2639
|
+
for the asymptotical approximation using a square root.
|
2640
|
+
*/
|
2641
|
+
|
2642
|
+
z = xabs - MI_9;
|
2643
|
+
zp = 2.0 * (1.0 - xabs);
|
2644
|
+
|
2645
|
+
/* Polynomial approximation and square root extraction */
|
2646
|
+
|
2647
|
+
p9_quick(&p9h, &p9m, z);
|
2648
|
+
|
2649
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
2650
|
+
|
2651
|
+
/* Reconstruction */
|
2652
|
+
|
2653
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
2654
|
+
|
2655
|
+
Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
|
2656
|
+
|
2657
|
+
if (x > 0.0) {
|
2658
|
+
acosh = t2h;
|
2659
|
+
acosm = t2m;
|
2660
|
+
} else {
|
2661
|
+
/* arccos(-x)/pi = 1 - arccos(x)/pi */
|
2662
|
+
t2h = - t2h;
|
2663
|
+
t2m = - t2m;
|
2664
|
+
|
2665
|
+
Add122(&acosh,&acosm,1.0,t2h,t2m);
|
2666
|
+
}
|
2667
|
+
|
2668
|
+
/* Rounding test */
|
2669
|
+
|
2670
|
+
TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
|
2671
|
+
|
2672
|
+
/* Rounding test failed, launch accurate phase */
|
2673
|
+
|
2674
|
+
#if EVAL_PERF
|
2675
|
+
crlibm_second_step_taken++;
|
2676
|
+
#endif
|
2677
|
+
|
2678
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
2679
|
+
|
2680
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
2681
|
+
|
2682
|
+
/* Reconstruction */
|
2683
|
+
|
2684
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
2685
|
+
|
2686
|
+
Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
|
2687
|
+
|
2688
|
+
if (x > 0.0) {
|
2689
|
+
Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
|
2690
|
+
} else {
|
2691
|
+
/* arccos(-x)/pi = 1 - arccos(x)/pi */
|
2692
|
+
t2h = - t2h;
|
2693
|
+
t2m = - t2m;
|
2694
|
+
t2l = - t2l;
|
2695
|
+
|
2696
|
+
Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
|
2697
|
+
|
2698
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2699
|
+
}
|
2700
|
+
|
2701
|
+
/* Final rounding */
|
2702
|
+
|
2703
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
2704
|
+
|
2705
|
+
}
|
2706
|
+
|
2707
|
+
/* Path 2 using p */
|
2708
|
+
|
2709
|
+
/* Do argument reduction using a table value for
|
2710
|
+
the midpoint value
|
2711
|
+
*/
|
2712
|
+
|
2713
|
+
z = xabs - mi_i;
|
2714
|
+
|
2715
|
+
p_quick(&asinh, &asinm, z, index);
|
2716
|
+
|
2717
|
+
/* Recompose acos(x)/pi out of asin(abs(x))
|
2718
|
+
|
2719
|
+
In the case of a substraction, we will cancel
|
2720
|
+
not more than 1 bit.
|
2721
|
+
|
2722
|
+
*/
|
2723
|
+
|
2724
|
+
if (x > 0.0) {
|
2725
|
+
asinh = - asinh;
|
2726
|
+
asinm = - asinm;
|
2727
|
+
}
|
2728
|
+
|
2729
|
+
Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
|
2730
|
+
|
2731
|
+
Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
|
2732
|
+
|
2733
|
+
/* Rounding test */
|
2734
|
+
|
2735
|
+
TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
|
2736
|
+
|
2737
|
+
/* Rounding test failed, launch accurate phase */
|
2738
|
+
|
2739
|
+
#if EVAL_PERF
|
2740
|
+
crlibm_second_step_taken++;
|
2741
|
+
#endif
|
2742
|
+
|
2743
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
2744
|
+
|
2745
|
+
/* Recompose acos(x) out of asin(abs(x))
|
2746
|
+
|
2747
|
+
In the case of a substraction, we will cancel
|
2748
|
+
not more than 1 bit.
|
2749
|
+
|
2750
|
+
*/
|
2751
|
+
|
2752
|
+
if (x > 0.0) {
|
2753
|
+
asinh = - asinh;
|
2754
|
+
asinm = - asinm;
|
2755
|
+
asinl = - asinl;
|
2756
|
+
}
|
2757
|
+
|
2758
|
+
Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
|
2759
|
+
|
2760
|
+
Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
|
2761
|
+
|
2762
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2763
|
+
|
2764
|
+
/* Final rounding */
|
2765
|
+
|
2766
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
2767
|
+
|
2768
|
+
}
|
2769
|
+
|
2770
|
+
double acospi_ru(double x) {
|
2771
|
+
db_number xdb, zdb;
|
2772
|
+
double z, zp;
|
2773
|
+
int index;
|
2774
|
+
double asinh, asinm, asinl;
|
2775
|
+
double asinpih, asinpim, asinpil;
|
2776
|
+
double acosh, acosm, acosl;
|
2777
|
+
double acoshover, acosmover, acoslover;
|
2778
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
2779
|
+
double t1h, t1m, t1l;
|
2780
|
+
double t2h, t2m, t2l;
|
2781
|
+
double xabs;
|
2782
|
+
|
2783
|
+
/* Start already computations for argument reduction */
|
2784
|
+
|
2785
|
+
zdb.d = 1.0 + x * x;
|
2786
|
+
|
2787
|
+
xdb.d = x;
|
2788
|
+
|
2789
|
+
/* Special case handling */
|
2790
|
+
|
2791
|
+
/* Remove sign of x in floating-point */
|
2792
|
+
xabs = ABS(x);
|
2793
|
+
xdb.i[HI] &= 0x7fffffff;
|
2794
|
+
|
2795
|
+
/* If |x| < 2^(-54) we have
|
2796
|
+
|
2797
|
+
arccos(x)/pi = 1/2 * (1 + xi)
|
2798
|
+
|
2799
|
+
with 0 <= xi < 2^(-54)
|
2800
|
+
|
2801
|
+
Thus we have
|
2802
|
+
|
2803
|
+
acospi(x) =
|
2804
|
+
|
2805
|
+
(i) 0.5 if x >= 0
|
2806
|
+
(ii) 0.5 + 1 ulp(0.5) if x < 0
|
2807
|
+
|
2808
|
+
*/
|
2809
|
+
if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
|
2810
|
+
if (x >= 0.0)
|
2811
|
+
return 0.5;
|
2812
|
+
|
2813
|
+
return 0.50000000000000011102230246251565404236316680908203125;
|
2814
|
+
}
|
2815
|
+
|
2816
|
+
/* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
|
2817
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
2818
|
+
if (x == 1.0) {
|
2819
|
+
return 0.0;
|
2820
|
+
}
|
2821
|
+
if (x == -1.0) {
|
2822
|
+
return 1.0;
|
2823
|
+
}
|
2824
|
+
return (x-x)/0.0; /* return NaN */
|
2825
|
+
}
|
2826
|
+
|
2827
|
+
/* Argument reduction:
|
2828
|
+
|
2829
|
+
We have 10 intervals and 3 paths:
|
2830
|
+
|
2831
|
+
- interval 0 => path 1 using p0
|
2832
|
+
- interval 1-8 => path 2 using p
|
2833
|
+
- interval 9 => path 3 using p9
|
2834
|
+
|
2835
|
+
*/
|
2836
|
+
|
2837
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
2838
|
+
|
2839
|
+
/* 0 <= index <= 15
|
2840
|
+
|
2841
|
+
index approximates roughly x^2
|
2842
|
+
|
2843
|
+
Map indexes to intervals as follows:
|
2844
|
+
|
2845
|
+
0 -> 0
|
2846
|
+
1 -> 1
|
2847
|
+
...
|
2848
|
+
8 -> 8
|
2849
|
+
9 -> 9
|
2850
|
+
...
|
2851
|
+
15 -> 9
|
2852
|
+
|
2853
|
+
For this mapping, filter first the case 0 -> 0
|
2854
|
+
In consequence, 1 <= index <= 15, i.e.
|
2855
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
2856
|
+
|
2857
|
+
0 -> 1
|
2858
|
+
...
|
2859
|
+
7 -> 8
|
2860
|
+
8 -> 9
|
2861
|
+
...
|
2862
|
+
15 -> 9
|
2863
|
+
|
2864
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
2865
|
+
|
2866
|
+
*/
|
2867
|
+
|
2868
|
+
if (index == 0) {
|
2869
|
+
/* Path 1 using p0 */
|
2870
|
+
|
2871
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
2872
|
+
|
2873
|
+
/* Recompose acos/pi
|
2874
|
+
|
2875
|
+
We have
|
2876
|
+
|
2877
|
+
arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
|
2878
|
+
|
2879
|
+
No cancellation possible because
|
2880
|
+
|
2881
|
+
|asin(x)/pi| <= 0.0837 for |x| <= 0.26
|
2882
|
+
|
2883
|
+
*/
|
2884
|
+
|
2885
|
+
Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
|
2886
|
+
|
2887
|
+
Add122(&acosh,&acosm,0.5,asinpih,asinpim);
|
2888
|
+
|
2889
|
+
/* Rounding test */
|
2890
|
+
|
2891
|
+
TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
|
2892
|
+
|
2893
|
+
/* Rounding test failed, launch accurate phase */
|
2894
|
+
|
2895
|
+
#if EVAL_PERF
|
2896
|
+
crlibm_second_step_taken++;
|
2897
|
+
#endif
|
2898
|
+
|
2899
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
2900
|
+
|
2901
|
+
/* Recompose acos/pi */
|
2902
|
+
|
2903
|
+
Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
|
2904
|
+
|
2905
|
+
Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
|
2906
|
+
|
2907
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2908
|
+
|
2909
|
+
/* Final rounding */
|
2910
|
+
|
2911
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
2912
|
+
|
2913
|
+
}
|
2914
|
+
|
2915
|
+
index--;
|
2916
|
+
if ((index & 0x8) != 0) {
|
2917
|
+
/* Path 3 using p9 */
|
2918
|
+
|
2919
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
2920
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
2921
|
+
for the asymptotical approximation using a square root.
|
2922
|
+
*/
|
2923
|
+
|
2924
|
+
z = xabs - MI_9;
|
2925
|
+
zp = 2.0 * (1.0 - xabs);
|
2926
|
+
|
2927
|
+
/* Polynomial approximation and square root extraction */
|
2928
|
+
|
2929
|
+
p9_quick(&p9h, &p9m, z);
|
2930
|
+
|
2931
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
2932
|
+
|
2933
|
+
/* Reconstruction */
|
2934
|
+
|
2935
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
2936
|
+
|
2937
|
+
Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
|
2938
|
+
|
2939
|
+
if (x > 0.0) {
|
2940
|
+
acosh = t2h;
|
2941
|
+
acosm = t2m;
|
2942
|
+
} else {
|
2943
|
+
/* arccos(-x)/pi = 1 - arccos(x)/pi */
|
2944
|
+
t2h = - t2h;
|
2945
|
+
t2m = - t2m;
|
2946
|
+
|
2947
|
+
Add122(&acosh,&acosm,1.0,t2h,t2m);
|
2948
|
+
}
|
2949
|
+
|
2950
|
+
/* Rounding test */
|
2951
|
+
|
2952
|
+
TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
|
2953
|
+
|
2954
|
+
/* Rounding test failed, launch accurate phase */
|
2955
|
+
|
2956
|
+
#if EVAL_PERF
|
2957
|
+
crlibm_second_step_taken++;
|
2958
|
+
#endif
|
2959
|
+
|
2960
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
2961
|
+
|
2962
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
2963
|
+
|
2964
|
+
/* Reconstruction */
|
2965
|
+
|
2966
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
2967
|
+
|
2968
|
+
Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
|
2969
|
+
|
2970
|
+
if (x > 0.0) {
|
2971
|
+
Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
|
2972
|
+
} else {
|
2973
|
+
/* arccos(-x)/pi = 1 - arccos(x)/pi */
|
2974
|
+
t2h = - t2h;
|
2975
|
+
t2m = - t2m;
|
2976
|
+
t2l = - t2l;
|
2977
|
+
|
2978
|
+
Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
|
2979
|
+
|
2980
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
2981
|
+
}
|
2982
|
+
|
2983
|
+
/* Final rounding */
|
2984
|
+
|
2985
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
2986
|
+
|
2987
|
+
}
|
2988
|
+
|
2989
|
+
/* Path 2 using p */
|
2990
|
+
|
2991
|
+
/* Do argument reduction using a table value for
|
2992
|
+
the midpoint value
|
2993
|
+
*/
|
2994
|
+
|
2995
|
+
z = xabs - mi_i;
|
2996
|
+
|
2997
|
+
p_quick(&asinh, &asinm, z, index);
|
2998
|
+
|
2999
|
+
/* Recompose acos(x)/pi out of asin(abs(x))
|
3000
|
+
|
3001
|
+
In the case of a substraction, we will cancel
|
3002
|
+
not more than 1 bit.
|
3003
|
+
|
3004
|
+
*/
|
3005
|
+
|
3006
|
+
if (x > 0.0) {
|
3007
|
+
asinh = - asinh;
|
3008
|
+
asinm = - asinm;
|
3009
|
+
}
|
3010
|
+
|
3011
|
+
Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
|
3012
|
+
|
3013
|
+
Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
|
3014
|
+
|
3015
|
+
/* Rounding test */
|
3016
|
+
|
3017
|
+
TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
|
3018
|
+
|
3019
|
+
/* Rounding test failed, launch accurate phase */
|
3020
|
+
|
3021
|
+
#if EVAL_PERF
|
3022
|
+
crlibm_second_step_taken++;
|
3023
|
+
#endif
|
3024
|
+
|
3025
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
3026
|
+
|
3027
|
+
/* Recompose acos(x) out of asin(abs(x))
|
3028
|
+
|
3029
|
+
In the case of a substraction, we will cancel
|
3030
|
+
not more than 1 bit.
|
3031
|
+
|
3032
|
+
*/
|
3033
|
+
|
3034
|
+
if (x > 0.0) {
|
3035
|
+
asinh = - asinh;
|
3036
|
+
asinm = - asinm;
|
3037
|
+
asinl = - asinl;
|
3038
|
+
}
|
3039
|
+
|
3040
|
+
Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
|
3041
|
+
|
3042
|
+
Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
|
3043
|
+
|
3044
|
+
Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
|
3045
|
+
|
3046
|
+
/* Final rounding */
|
3047
|
+
|
3048
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
3049
|
+
|
3050
|
+
}
|
3051
|
+
|
3052
|
+
|
3053
|
+
|
3054
|
+
double asinpi_rn(double x) {
|
3055
|
+
db_number xdb, zdb, asinhdb, tempdb;
|
3056
|
+
double sign, z, zp;
|
3057
|
+
int index;
|
3058
|
+
double asinh, asinm, asinl;
|
3059
|
+
double asinpih, asinpim, asinpil;
|
3060
|
+
double asinpihover, asinpimover, asinpilover;
|
3061
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
3062
|
+
double t1h, t1m, t1l;
|
3063
|
+
double t2h, t2m, t2l;
|
3064
|
+
double asinpi;
|
3065
|
+
double xabs;
|
3066
|
+
double xScaled;
|
3067
|
+
double xPih, xPim, xPil;
|
3068
|
+
double xPihover, xPimover, xPilover;
|
3069
|
+
double deltatemp, deltah, deltal;
|
3070
|
+
double temp1, temp2h, temp2l, temp3;
|
3071
|
+
double miulp;
|
3072
|
+
|
3073
|
+
/* Start already computations for argument reduction */
|
3074
|
+
|
3075
|
+
zdb.d = 1.0 + x * x;
|
3076
|
+
|
3077
|
+
xdb.d = x;
|
3078
|
+
|
3079
|
+
/* Special case handling */
|
3080
|
+
|
3081
|
+
/* Remove sign of x in floating-point */
|
3082
|
+
xabs = ABS(x);
|
3083
|
+
xdb.i[HI] &= 0x7fffffff;
|
3084
|
+
|
3085
|
+
/* If |x| < 2^(-60) we have
|
3086
|
+
|
3087
|
+
arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
|
3088
|
+
|
3089
|
+
with 0 <= xi < 2^(-122)
|
3090
|
+
|
3091
|
+
We have no bad case worser than
|
3092
|
+
112 bits for asinpi(x) for |x| > 2^(-58)
|
3093
|
+
and the order 3 term of asinpi is still more
|
3094
|
+
far away.
|
3095
|
+
|
3096
|
+
*/
|
3097
|
+
if (xdb.i[HI] < ASINPISIMPLEBOUND) {
|
3098
|
+
/* For a faster path for the exact case,
|
3099
|
+
we check first if x = 0
|
3100
|
+
*/
|
3101
|
+
|
3102
|
+
if (x == 0.0)
|
3103
|
+
return x;
|
3104
|
+
|
3105
|
+
/* We want a relatively fast path for values where
|
3106
|
+
neither the input nor the output is subnormal
|
3107
|
+
because subnormal rounding is expensive.
|
3108
|
+
|
3109
|
+
Since
|
3110
|
+
|
3111
|
+
abs(double(asin(x)/pi)) <= 2^(-860)
|
3112
|
+
|
3113
|
+
for abs(x) <= 2^(-858), we filter for this (normal) value.
|
3114
|
+
|
3115
|
+
*/
|
3116
|
+
|
3117
|
+
if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
|
3118
|
+
|
3119
|
+
/* Here abs(x) >= 2^(-858).
|
3120
|
+
The result is therefore clearly normal and
|
3121
|
+
the double precision numbers in the triple-double
|
3122
|
+
representation of TD(1/pi) * x are all normal, too.
|
3123
|
+
|
3124
|
+
For speed, we use a two-step approach.
|
3125
|
+
We know that
|
3126
|
+
|
3127
|
+
|| x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
|
3128
|
+
|
3129
|
+
*/
|
3130
|
+
|
3131
|
+
Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
|
3132
|
+
|
3133
|
+
if(xPih == (xPih + (xPim * RNROUNDCSTASINPI)))
|
3134
|
+
return xPih;
|
3135
|
+
|
3136
|
+
Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
|
3137
|
+
|
3138
|
+
Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
|
3139
|
+
|
3140
|
+
ReturnRoundToNearest3(xPih,xPim,xPil);
|
3141
|
+
|
3142
|
+
}
|
3143
|
+
|
3144
|
+
/* Here abs(x) < 2^(-858)
|
3145
|
+
|
3146
|
+
Because of subnormals and especially because
|
3147
|
+
of the fact that 1/pi < 1, we must scale x
|
3148
|
+
appropriately. We compute hence:
|
3149
|
+
|
3150
|
+
asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
|
3151
|
+
|
3152
|
+
where the rounding procedure works temporarily on the scaled
|
3153
|
+
intermediate.
|
3154
|
+
*/
|
3155
|
+
|
3156
|
+
xScaled = x * TWO1000;
|
3157
|
+
|
3158
|
+
Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
|
3159
|
+
|
3160
|
+
Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
|
3161
|
+
|
3162
|
+
/* Produce a (possibly) subnormal intermediate rounding */
|
3163
|
+
|
3164
|
+
asinhdb.d = xPih * TWOM1000;
|
3165
|
+
|
3166
|
+
#if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
|
3167
|
+
tempdb.i[HI] = asinhdb.i[HI];
|
3168
|
+
tempdb.i[LO] = asinhdb.i[LO];
|
3169
|
+
asinhdb.i[LO] = tempdb.i[LO];
|
3170
|
+
asinhdb.i[HI] = tempdb.i[HI];
|
3171
|
+
#else
|
3172
|
+
tempdb.d = asinhdb.d;
|
3173
|
+
#endif
|
3174
|
+
|
3175
|
+
/* Rescale the result */
|
3176
|
+
|
3177
|
+
temp1 = asinhdb.d * TWO1000;
|
3178
|
+
|
3179
|
+
/* Compute the scaled error, the operation is exact by Sterbenz' lemma */
|
3180
|
+
|
3181
|
+
deltatemp = xPih - temp1;
|
3182
|
+
|
3183
|
+
/* Sum up the errors, representing them on a double-double
|
3184
|
+
|
3185
|
+
This is exact for the normal rounding case and the error
|
3186
|
+
is neglectable in the subnormal rounding case.
|
3187
|
+
*/
|
3188
|
+
|
3189
|
+
Add12Cond(temp2h,temp2l,deltatemp,xPim);
|
3190
|
+
temp3 = temp2l + xPil;
|
3191
|
+
Add12(deltah,deltal,temp2h,temp3);
|
3192
|
+
|
3193
|
+
/* Compute now a scaled 1/2 ulp of the intermediate result
|
3194
|
+
in the direction of delta
|
3195
|
+
*/
|
3196
|
+
|
3197
|
+
if ((x >= 0.0) ^ (deltah >= 0.0))
|
3198
|
+
tempdb.l--;
|
3199
|
+
else
|
3200
|
+
tempdb.l++;
|
3201
|
+
|
3202
|
+
miulp = TWO999 * (tempdb.d - asinhdb.d);
|
3203
|
+
|
3204
|
+
/* We must correct the intermediate rounding
|
3205
|
+
if the error on deltah + deltal is greater
|
3206
|
+
in absolute value than miulp = 1/2 ulp in the
|
3207
|
+
right direction.
|
3208
|
+
*/
|
3209
|
+
|
3210
|
+
if (ABS(deltah) < ABS(miulp)) {
|
3211
|
+
/* deltah is less than miulp, deltal is less than
|
3212
|
+
1/2 the ulp of deltah. Thus deltah + deltal is
|
3213
|
+
less than miulp. We do not need to correct
|
3214
|
+
the intermediate rounding.
|
3215
|
+
*/
|
3216
|
+
return asinhdb.d;
|
3217
|
+
}
|
3218
|
+
|
3219
|
+
if (ABS(deltah) > ABS(miulp)) {
|
3220
|
+
/* deltah is greater than miulp and deltal cannot correct it.
|
3221
|
+
We must correct the rounding.
|
3222
|
+
|
3223
|
+
tempdb.d (= asinhdb.d +/- 1 ulp) is the correct rounding.
|
3224
|
+
*/
|
3225
|
+
return tempdb.d;
|
3226
|
+
}
|
3227
|
+
|
3228
|
+
/* Here deltah and miulp are equal in absolute value
|
3229
|
+
|
3230
|
+
We must correct the intermediate rounding iff the sign of deltal
|
3231
|
+
and deltah are the same.
|
3232
|
+
|
3233
|
+
*/
|
3234
|
+
|
3235
|
+
if ((deltah >= 0.0) ^ (deltal >= 0.0)) {
|
3236
|
+
/* Here the sign is different, we return the
|
3237
|
+
intermediate rounding asinhdb.d
|
3238
|
+
*/
|
3239
|
+
return asinhdb.d;
|
3240
|
+
}
|
3241
|
+
|
3242
|
+
/* Return the corrected result tempdb.d */
|
3243
|
+
|
3244
|
+
return tempdb.d;
|
3245
|
+
}
|
3246
|
+
|
3247
|
+
/* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
|
3248
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
3249
|
+
if (x == 1.0) {
|
3250
|
+
return 0.5;
|
3251
|
+
}
|
3252
|
+
if (x == -1.0) {
|
3253
|
+
return - 0.5;
|
3254
|
+
}
|
3255
|
+
return (x-x)/0.0; /* return NaN */
|
3256
|
+
}
|
3257
|
+
|
3258
|
+
/* Argument reduction:
|
3259
|
+
|
3260
|
+
We have 10 intervals and 3 paths:
|
3261
|
+
|
3262
|
+
- interval 0 => path 1 using p0
|
3263
|
+
- interval 1-8 => path 2 using p
|
3264
|
+
- interval 9 => path 3 using p9
|
3265
|
+
|
3266
|
+
*/
|
3267
|
+
|
3268
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
3269
|
+
|
3270
|
+
/* 0 <= index <= 15
|
3271
|
+
|
3272
|
+
index approximates roughly x^2
|
3273
|
+
|
3274
|
+
Map indexes to intervals as follows:
|
3275
|
+
|
3276
|
+
0 -> 0
|
3277
|
+
1 -> 1
|
3278
|
+
...
|
3279
|
+
8 -> 8
|
3280
|
+
9 -> 9
|
3281
|
+
...
|
3282
|
+
15 -> 9
|
3283
|
+
|
3284
|
+
For this mapping, filter first the case 0 -> 0
|
3285
|
+
In consequence, 1 <= index <= 15, i.e.
|
3286
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
3287
|
+
|
3288
|
+
0 -> 1
|
3289
|
+
...
|
3290
|
+
7 -> 8
|
3291
|
+
8 -> 9
|
3292
|
+
...
|
3293
|
+
15 -> 9
|
3294
|
+
|
3295
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
3296
|
+
|
3297
|
+
*/
|
3298
|
+
|
3299
|
+
if (index == 0) {
|
3300
|
+
/* Path 1 using p0 */
|
3301
|
+
|
3302
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
3303
|
+
|
3304
|
+
Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
|
3305
|
+
|
3306
|
+
/* Rounding test */
|
3307
|
+
|
3308
|
+
if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
|
3309
|
+
return asinpih;
|
3310
|
+
|
3311
|
+
/* Rounding test failed, launch accurate phase */
|
3312
|
+
|
3313
|
+
#if EVAL_PERF
|
3314
|
+
crlibm_second_step_taken++;
|
3315
|
+
#endif
|
3316
|
+
|
3317
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
3318
|
+
|
3319
|
+
Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
|
3320
|
+
|
3321
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
3322
|
+
|
3323
|
+
/* Final rounding */
|
3324
|
+
|
3325
|
+
ReturnRoundToNearest3(asinpih,asinpim,asinpil);
|
3326
|
+
|
3327
|
+
}
|
3328
|
+
|
3329
|
+
/* Strip off the sign of argument x */
|
3330
|
+
sign = 1.0;
|
3331
|
+
if (x < 0.0) sign = -sign;
|
3332
|
+
|
3333
|
+
index--;
|
3334
|
+
if ((index & 0x8) != 0) {
|
3335
|
+
/* Path 3 using p9 */
|
3336
|
+
|
3337
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
3338
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
3339
|
+
for the asymptotical approximation using a square root.
|
3340
|
+
*/
|
3341
|
+
|
3342
|
+
z = xabs - MI_9;
|
3343
|
+
zp = 2.0 * (1.0 - xabs);
|
3344
|
+
|
3345
|
+
/* Polynomial approximation and square root extraction */
|
3346
|
+
|
3347
|
+
p9_quick(&p9h, &p9m, z);
|
3348
|
+
p9h = -p9h;
|
3349
|
+
p9m = -p9m;
|
3350
|
+
|
3351
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
3352
|
+
|
3353
|
+
/* Reconstruction */
|
3354
|
+
|
3355
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
3356
|
+
Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
|
3357
|
+
|
3358
|
+
Add122(&asinpih,&asinpim,0.5,t2h,t2m);
|
3359
|
+
|
3360
|
+
/* Rounding test */
|
3361
|
+
|
3362
|
+
if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
|
3363
|
+
return sign * asinpih;
|
3364
|
+
|
3365
|
+
/* Rounding test failed, launch accurate phase */
|
3366
|
+
|
3367
|
+
#if EVAL_PERF
|
3368
|
+
crlibm_second_step_taken++;
|
3369
|
+
#endif
|
3370
|
+
|
3371
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
3372
|
+
p9h = -p9h;
|
3373
|
+
p9m = -p9m;
|
3374
|
+
p9l = -p9l;
|
3375
|
+
|
3376
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
3377
|
+
|
3378
|
+
/* Reconstruction */
|
3379
|
+
|
3380
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
3381
|
+
Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
|
3382
|
+
|
3383
|
+
Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
|
3384
|
+
|
3385
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
3386
|
+
|
3387
|
+
/* Final rounding */
|
3388
|
+
|
3389
|
+
RoundToNearest3(&asinpi,asinpih,asinpim,asinpil);
|
3390
|
+
|
3391
|
+
return sign * asinpi;
|
3392
|
+
|
3393
|
+
}
|
3394
|
+
|
3395
|
+
/* Path 2 using p */
|
3396
|
+
|
3397
|
+
/* Do argument reduction using a table value for
|
3398
|
+
the midpoint value
|
3399
|
+
*/
|
3400
|
+
|
3401
|
+
z = xabs - mi_i;
|
3402
|
+
|
3403
|
+
p_quick(&asinh, &asinm, z, index);
|
3404
|
+
|
3405
|
+
Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
|
3406
|
+
|
3407
|
+
/* Rounding test */
|
3408
|
+
|
3409
|
+
if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
|
3410
|
+
return sign * asinpih;
|
3411
|
+
|
3412
|
+
/* Rounding test failed, launch accurate phase */
|
3413
|
+
|
3414
|
+
#if EVAL_PERF
|
3415
|
+
crlibm_second_step_taken++;
|
3416
|
+
#endif
|
3417
|
+
|
3418
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
3419
|
+
|
3420
|
+
Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
|
3421
|
+
|
3422
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
3423
|
+
|
3424
|
+
/* Final rounding */
|
3425
|
+
|
3426
|
+
RoundToNearest3(&asinpi,asinpih,asinpim,asinpil);
|
3427
|
+
|
3428
|
+
return sign * asinpi;
|
3429
|
+
|
3430
|
+
}
|
3431
|
+
|
3432
|
+
double asinpi_rd(double x) {
|
3433
|
+
db_number xdb, zdb, asinhdb;
|
3434
|
+
double sign, z, zp;
|
3435
|
+
int index;
|
3436
|
+
double asinh, asinm, asinl;
|
3437
|
+
double asinpih, asinpim, asinpil;
|
3438
|
+
double asinpihover, asinpimover, asinpilover;
|
3439
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
3440
|
+
double t1h, t1m, t1l;
|
3441
|
+
double t2h, t2m, t2l;
|
3442
|
+
double asinpi;
|
3443
|
+
double xabs;
|
3444
|
+
double xScaled;
|
3445
|
+
double xPih, xPim, xPil;
|
3446
|
+
double xPihover, xPimover, xPilover;
|
3447
|
+
#if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
|
3448
|
+
db_number tempdb;
|
3449
|
+
#endif
|
3450
|
+
double deltatemp, deltah, deltal;
|
3451
|
+
double temp1, temp2h, temp2l, temp3;
|
3452
|
+
|
3453
|
+
/* Start already computations for argument reduction */
|
3454
|
+
|
3455
|
+
zdb.d = 1.0 + x * x;
|
3456
|
+
|
3457
|
+
xdb.d = x;
|
3458
|
+
|
3459
|
+
/* Special case handling */
|
3460
|
+
|
3461
|
+
/* Remove sign of x in floating-point */
|
3462
|
+
xabs = ABS(x);
|
3463
|
+
xdb.i[HI] &= 0x7fffffff;
|
3464
|
+
|
3465
|
+
/* If |x| < 2^(-60) we have
|
3466
|
+
|
3467
|
+
arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
|
3468
|
+
|
3469
|
+
with 0 <= xi < 2^(-122)
|
3470
|
+
|
3471
|
+
We have no bad case worser than
|
3472
|
+
112 bits for asinpi(x) for |x| > 2^(-58)
|
3473
|
+
and the order 3 term of asinpi is still more
|
3474
|
+
far away.
|
3475
|
+
|
3476
|
+
*/
|
3477
|
+
if (xdb.i[HI] < ASINPISIMPLEBOUND) {
|
3478
|
+
/* For a faster path for the exact case,
|
3479
|
+
we check first if x = 0
|
3480
|
+
*/
|
3481
|
+
|
3482
|
+
if (x == 0.0)
|
3483
|
+
return x;
|
3484
|
+
|
3485
|
+
/* We want a relatively fast path for values where
|
3486
|
+
neither the input nor the output is subnormal
|
3487
|
+
because subnormal rounding is expensive.
|
3488
|
+
|
3489
|
+
Since
|
3490
|
+
|
3491
|
+
abs(double(asin(x)/pi)) <= 2^(-860)
|
3492
|
+
|
3493
|
+
for abs(x) <= 2^(-858), we filter for this (normal) value.
|
3494
|
+
|
3495
|
+
*/
|
3496
|
+
|
3497
|
+
if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
|
3498
|
+
|
3499
|
+
/* Here abs(x) >= 2^(-858).
|
3500
|
+
The result is therefore clearly normal and
|
3501
|
+
the double precision numbers in the triple-double
|
3502
|
+
representation of TD(1/pi) * x are all normal, too.
|
3503
|
+
|
3504
|
+
For speed, we use a two-step approach.
|
3505
|
+
We know that
|
3506
|
+
|
3507
|
+
|| x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
|
3508
|
+
|
3509
|
+
*/
|
3510
|
+
|
3511
|
+
Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
|
3512
|
+
|
3513
|
+
TEST_AND_RETURN_RD(xPih, xPim, RDROUNDCSTASINPI);
|
3514
|
+
|
3515
|
+
Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
|
3516
|
+
|
3517
|
+
Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
|
3518
|
+
|
3519
|
+
ReturnRoundDownwards3(xPih,xPim,xPil);
|
3520
|
+
|
3521
|
+
}
|
3522
|
+
|
3523
|
+
/* Here abs(x) < 2^(-858)
|
3524
|
+
|
3525
|
+
Because of subnormals and especially because
|
3526
|
+
of the fact that 1/pi < 1, we must scale x
|
3527
|
+
appropriately. We compute hence:
|
3528
|
+
|
3529
|
+
asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
|
3530
|
+
|
3531
|
+
where the rounding procedure works temporarily on the scaled
|
3532
|
+
intermediate.
|
3533
|
+
*/
|
3534
|
+
|
3535
|
+
xScaled = x * TWO1000;
|
3536
|
+
|
3537
|
+
Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
|
3538
|
+
|
3539
|
+
Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
|
3540
|
+
|
3541
|
+
/* Produce a (possibly) subnormal intermediate rounding */
|
3542
|
+
|
3543
|
+
asinhdb.d = xPih * TWOM1000;
|
3544
|
+
|
3545
|
+
#if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
|
3546
|
+
tempdb.i[HI] = asinhdb.i[HI];
|
3547
|
+
tempdb.i[LO] = asinhdb.i[LO];
|
3548
|
+
asinhdb.i[LO] = tempdb.i[LO];
|
3549
|
+
asinhdb.i[HI] = tempdb.i[HI];
|
3550
|
+
#endif
|
3551
|
+
|
3552
|
+
|
3553
|
+
/* Rescale the result */
|
3554
|
+
|
3555
|
+
temp1 = asinhdb.d * TWO1000;
|
3556
|
+
|
3557
|
+
/* Compute the scaled error, the operation is exact by Sterbenz' lemma */
|
3558
|
+
|
3559
|
+
deltatemp = xPih - temp1;
|
3560
|
+
|
3561
|
+
/* Sum up the errors, representing them on a double-double
|
3562
|
+
|
3563
|
+
This is exact for the normal rounding case and the error
|
3564
|
+
is neglectable in the subnormal rounding case.
|
3565
|
+
*/
|
3566
|
+
|
3567
|
+
Add12Cond(temp2h,temp2l,deltatemp,xPim);
|
3568
|
+
temp3 = temp2l + xPil;
|
3569
|
+
Add12(deltah,deltal,temp2h,temp3);
|
3570
|
+
|
3571
|
+
/* We are doing directed rounding. Thus we must correct the rounding
|
3572
|
+
if the sign of the error is not correct
|
3573
|
+
RD -> sign must be positive for correct rounding
|
3574
|
+
RU -> sign must be negative for correct rounding
|
3575
|
+
RZ -> sign must be positive for positive x and negative for negative x
|
3576
|
+
*/
|
3577
|
+
|
3578
|
+
if (deltah >= 0.0) {
|
3579
|
+
/* The sign is correct, return the intermediate rounding */
|
3580
|
+
return asinhdb.d;
|
3581
|
+
}
|
3582
|
+
|
3583
|
+
/* The sign is not correct
|
3584
|
+
|
3585
|
+
RD -> subtract 1 ulp
|
3586
|
+
RU -> add 1 ulp
|
3587
|
+
RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
|
3588
|
+
*/
|
3589
|
+
|
3590
|
+
if (x < 0.0)
|
3591
|
+
asinhdb.l++;
|
3592
|
+
else
|
3593
|
+
asinhdb.l--;
|
3594
|
+
|
3595
|
+
return asinhdb.d;
|
3596
|
+
}
|
3597
|
+
|
3598
|
+
/* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
|
3599
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
3600
|
+
if (x == 1.0) {
|
3601
|
+
return 0.5;
|
3602
|
+
}
|
3603
|
+
if (x == -1.0) {
|
3604
|
+
return - 0.5;
|
3605
|
+
}
|
3606
|
+
return (x-x)/0.0; /* return NaN */
|
3607
|
+
}
|
3608
|
+
|
3609
|
+
/* Argument reduction:
|
3610
|
+
|
3611
|
+
We have 10 intervals and 3 paths:
|
3612
|
+
|
3613
|
+
- interval 0 => path 1 using p0
|
3614
|
+
- interval 1-8 => path 2 using p
|
3615
|
+
- interval 9 => path 3 using p9
|
3616
|
+
|
3617
|
+
*/
|
3618
|
+
|
3619
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
3620
|
+
|
3621
|
+
/* 0 <= index <= 15
|
3622
|
+
|
3623
|
+
index approximates roughly x^2
|
3624
|
+
|
3625
|
+
Map indexes to intervals as follows:
|
3626
|
+
|
3627
|
+
0 -> 0
|
3628
|
+
1 -> 1
|
3629
|
+
...
|
3630
|
+
8 -> 8
|
3631
|
+
9 -> 9
|
3632
|
+
...
|
3633
|
+
15 -> 9
|
3634
|
+
|
3635
|
+
For this mapping, filter first the case 0 -> 0
|
3636
|
+
In consequence, 1 <= index <= 15, i.e.
|
3637
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
3638
|
+
|
3639
|
+
0 -> 1
|
3640
|
+
...
|
3641
|
+
7 -> 8
|
3642
|
+
8 -> 9
|
3643
|
+
...
|
3644
|
+
15 -> 9
|
3645
|
+
|
3646
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
3647
|
+
|
3648
|
+
*/
|
3649
|
+
|
3650
|
+
if (index == 0) {
|
3651
|
+
/* Path 1 using p0 */
|
3652
|
+
|
3653
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
3654
|
+
|
3655
|
+
Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
|
3656
|
+
|
3657
|
+
/* Rounding test */
|
3658
|
+
|
3659
|
+
TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
|
3660
|
+
|
3661
|
+
/* Rounding test failed, launch accurate phase */
|
3662
|
+
|
3663
|
+
#if EVAL_PERF
|
3664
|
+
crlibm_second_step_taken++;
|
3665
|
+
#endif
|
3666
|
+
|
3667
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
3668
|
+
|
3669
|
+
Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
|
3670
|
+
|
3671
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
3672
|
+
|
3673
|
+
/* Final rounding */
|
3674
|
+
|
3675
|
+
ReturnRoundDownwards3(asinpih,asinpim,asinpil);
|
3676
|
+
|
3677
|
+
}
|
3678
|
+
|
3679
|
+
/* Strip off the sign of argument x */
|
3680
|
+
sign = 1.0;
|
3681
|
+
if (x < 0.0) sign = -sign;
|
3682
|
+
|
3683
|
+
index--;
|
3684
|
+
if ((index & 0x8) != 0) {
|
3685
|
+
/* Path 3 using p9 */
|
3686
|
+
|
3687
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
3688
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
3689
|
+
for the asymptotical approximation using a square root.
|
3690
|
+
*/
|
3691
|
+
|
3692
|
+
z = xabs - MI_9;
|
3693
|
+
zp = 2.0 * (1.0 - xabs);
|
3694
|
+
|
3695
|
+
/* Polynomial approximation and square root extraction */
|
3696
|
+
|
3697
|
+
p9_quick(&p9h, &p9m, z);
|
3698
|
+
p9h = -p9h;
|
3699
|
+
p9m = -p9m;
|
3700
|
+
|
3701
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
3702
|
+
|
3703
|
+
/* Reconstruction */
|
3704
|
+
|
3705
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
3706
|
+
Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
|
3707
|
+
|
3708
|
+
Add122(&asinpih,&asinpim,0.5,t2h,t2m);
|
3709
|
+
|
3710
|
+
/* Rounding test */
|
3711
|
+
|
3712
|
+
asinpih *= sign;
|
3713
|
+
asinpim *= sign;
|
3714
|
+
|
3715
|
+
TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
|
3716
|
+
|
3717
|
+
/* Rounding test failed, launch accurate phase */
|
3718
|
+
|
3719
|
+
#if EVAL_PERF
|
3720
|
+
crlibm_second_step_taken++;
|
3721
|
+
#endif
|
3722
|
+
|
3723
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
3724
|
+
p9h = -p9h;
|
3725
|
+
p9m = -p9m;
|
3726
|
+
p9l = -p9l;
|
3727
|
+
|
3728
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
3729
|
+
|
3730
|
+
/* Reconstruction */
|
3731
|
+
|
3732
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
3733
|
+
Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
|
3734
|
+
|
3735
|
+
Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
|
3736
|
+
|
3737
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
3738
|
+
|
3739
|
+
/* Final rounding */
|
3740
|
+
|
3741
|
+
RoundDownwards3(&asinpi,asinpih,asinpim,asinpil);
|
3742
|
+
|
3743
|
+
return sign * asinpi;
|
3744
|
+
|
3745
|
+
}
|
3746
|
+
|
3747
|
+
/* Path 2 using p */
|
3748
|
+
|
3749
|
+
/* Do argument reduction using a table value for
|
3750
|
+
the midpoint value
|
3751
|
+
*/
|
3752
|
+
|
3753
|
+
z = xabs - mi_i;
|
3754
|
+
|
3755
|
+
p_quick(&asinh, &asinm, z, index);
|
3756
|
+
|
3757
|
+
Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
|
3758
|
+
|
3759
|
+
/* Rounding test */
|
3760
|
+
|
3761
|
+
asinpih *= sign;
|
3762
|
+
asinpim *= sign;
|
3763
|
+
|
3764
|
+
TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
|
3765
|
+
|
3766
|
+
/* Rounding test failed, launch accurate phase */
|
3767
|
+
|
3768
|
+
#if EVAL_PERF
|
3769
|
+
crlibm_second_step_taken++;
|
3770
|
+
#endif
|
3771
|
+
|
3772
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
3773
|
+
|
3774
|
+
Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
|
3775
|
+
|
3776
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
3777
|
+
|
3778
|
+
/* Final rounding */
|
3779
|
+
|
3780
|
+
RoundDownwards3(&asinpi,asinpih,asinpim,asinpil);
|
3781
|
+
|
3782
|
+
return sign * asinpi;
|
3783
|
+
|
3784
|
+
}
|
3785
|
+
|
3786
|
+
double asinpi_ru(double x) {
|
3787
|
+
db_number xdb, zdb, asinhdb;
|
3788
|
+
double sign, z, zp;
|
3789
|
+
int index;
|
3790
|
+
double asinh, asinm, asinl;
|
3791
|
+
double asinpih, asinpim, asinpil;
|
3792
|
+
double asinpihover, asinpimover, asinpilover;
|
3793
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
3794
|
+
double t1h, t1m, t1l;
|
3795
|
+
double t2h, t2m, t2l;
|
3796
|
+
double asinpi;
|
3797
|
+
double xabs;
|
3798
|
+
double xScaled;
|
3799
|
+
double xPih, xPim, xPil;
|
3800
|
+
double xPihover, xPimover, xPilover;
|
3801
|
+
#if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
|
3802
|
+
db_number tempdb;
|
3803
|
+
#endif
|
3804
|
+
double deltatemp, deltah, deltal;
|
3805
|
+
double temp1, temp2h, temp2l, temp3;
|
3806
|
+
|
3807
|
+
/* Start already computations for argument reduction */
|
3808
|
+
|
3809
|
+
zdb.d = 1.0 + x * x;
|
3810
|
+
|
3811
|
+
xdb.d = x;
|
3812
|
+
|
3813
|
+
/* Special case handling */
|
3814
|
+
|
3815
|
+
/* Remove sign of x in floating-point */
|
3816
|
+
xabs = ABS(x);
|
3817
|
+
xdb.i[HI] &= 0x7fffffff;
|
3818
|
+
|
3819
|
+
/* If |x| < 2^(-60) we have
|
3820
|
+
|
3821
|
+
arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
|
3822
|
+
|
3823
|
+
with 0 <= xi < 2^(-122)
|
3824
|
+
|
3825
|
+
We have no bad case worser than
|
3826
|
+
112 bits for asinpi(x) for |x| > 2^(-58)
|
3827
|
+
and the order 3 term of asinpi is still more
|
3828
|
+
far away.
|
3829
|
+
|
3830
|
+
*/
|
3831
|
+
if (xdb.i[HI] < ASINPISIMPLEBOUND) {
|
3832
|
+
/* For a faster path for the exact case,
|
3833
|
+
we check first if x = 0
|
3834
|
+
*/
|
3835
|
+
|
3836
|
+
if (x == 0.0)
|
3837
|
+
return x;
|
3838
|
+
|
3839
|
+
/* We want a relatively fast path for values where
|
3840
|
+
neither the input nor the output is subnormal
|
3841
|
+
because subnormal rounding is expensive.
|
3842
|
+
|
3843
|
+
Since
|
3844
|
+
|
3845
|
+
abs(double(asin(x)/pi)) <= 2^(-860)
|
3846
|
+
|
3847
|
+
for abs(x) <= 2^(-858), we filter for this (normal) value.
|
3848
|
+
|
3849
|
+
*/
|
3850
|
+
|
3851
|
+
if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
|
3852
|
+
|
3853
|
+
/* Here abs(x) >= 2^(-858).
|
3854
|
+
The result is therefore clearly normal and
|
3855
|
+
the double precision numbers in the triple-double
|
3856
|
+
representation of TD(1/pi) * x are all normal, too.
|
3857
|
+
|
3858
|
+
For speed, we use a two-step approach.
|
3859
|
+
We know that
|
3860
|
+
|
3861
|
+
|| x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
|
3862
|
+
|
3863
|
+
*/
|
3864
|
+
|
3865
|
+
Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
|
3866
|
+
|
3867
|
+
TEST_AND_RETURN_RU(xPih, xPim, RDROUNDCSTASINPI);
|
3868
|
+
|
3869
|
+
Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
|
3870
|
+
|
3871
|
+
Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
|
3872
|
+
|
3873
|
+
ReturnRoundUpwards3(xPih,xPim,xPil);
|
3874
|
+
|
3875
|
+
}
|
3876
|
+
|
3877
|
+
/* Here abs(x) < 2^(-858)
|
3878
|
+
|
3879
|
+
Because of subnormals and especially because
|
3880
|
+
of the fact that 1/pi < 1, we must scale x
|
3881
|
+
appropriately. We compute hence:
|
3882
|
+
|
3883
|
+
asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
|
3884
|
+
|
3885
|
+
where the rounding procedure works temporarily on the scaled
|
3886
|
+
intermediate.
|
3887
|
+
*/
|
3888
|
+
|
3889
|
+
xScaled = x * TWO1000;
|
3890
|
+
|
3891
|
+
Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
|
3892
|
+
|
3893
|
+
Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
|
3894
|
+
|
3895
|
+
/* Produce a (possibly) subnormal intermediate rounding */
|
3896
|
+
|
3897
|
+
asinhdb.d = xPih * TWOM1000;
|
3898
|
+
|
3899
|
+
#if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
|
3900
|
+
tempdb.i[HI] = asinhdb.i[HI];
|
3901
|
+
tempdb.i[LO] = asinhdb.i[LO];
|
3902
|
+
asinhdb.i[LO] = tempdb.i[LO];
|
3903
|
+
asinhdb.i[HI] = tempdb.i[HI];
|
3904
|
+
#endif
|
3905
|
+
|
3906
|
+
/* Rescale the result */
|
3907
|
+
|
3908
|
+
temp1 = asinhdb.d * TWO1000;
|
3909
|
+
|
3910
|
+
/* Compute the scaled error, the operation is exact by Sterbenz' lemma */
|
3911
|
+
|
3912
|
+
deltatemp = xPih - temp1;
|
3913
|
+
|
3914
|
+
/* Sum up the errors, representing them on a double-double
|
3915
|
+
|
3916
|
+
This is exact for the normal rounding case and the error
|
3917
|
+
is neglectable in the subnormal rounding case.
|
3918
|
+
*/
|
3919
|
+
|
3920
|
+
Add12Cond(temp2h,temp2l,deltatemp,xPim);
|
3921
|
+
temp3 = temp2l + xPil;
|
3922
|
+
Add12(deltah,deltal,temp2h,temp3);
|
3923
|
+
|
3924
|
+
/* We are doing directed rounding. Thus we must correct the rounding
|
3925
|
+
if the sign of the error is not correct
|
3926
|
+
RD -> sign must be positive for correct rounding
|
3927
|
+
RU -> sign must be negative for correct rounding
|
3928
|
+
RZ -> sign must be positive for positive x and negative for negative x
|
3929
|
+
*/
|
3930
|
+
|
3931
|
+
if (deltah <= 0.0) {
|
3932
|
+
/* The sign is correct, return the intermediate rounding */
|
3933
|
+
return asinhdb.d;
|
3934
|
+
}
|
3935
|
+
|
3936
|
+
/* The sign is not correct
|
3937
|
+
|
3938
|
+
RD -> subtract 1 ulp
|
3939
|
+
RU -> add 1 ulp
|
3940
|
+
RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
|
3941
|
+
*/
|
3942
|
+
|
3943
|
+
if (x < 0.0)
|
3944
|
+
asinhdb.l--;
|
3945
|
+
else
|
3946
|
+
asinhdb.l++;
|
3947
|
+
|
3948
|
+
return asinhdb.d;
|
3949
|
+
}
|
3950
|
+
|
3951
|
+
/* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
|
3952
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
3953
|
+
if (x == 1.0) {
|
3954
|
+
return 0.5;
|
3955
|
+
}
|
3956
|
+
if (x == -1.0) {
|
3957
|
+
return - 0.5;
|
3958
|
+
}
|
3959
|
+
return (x-x)/0.0; /* return NaN */
|
3960
|
+
}
|
3961
|
+
|
3962
|
+
/* Argument reduction:
|
3963
|
+
|
3964
|
+
We have 10 intervals and 3 paths:
|
3965
|
+
|
3966
|
+
- interval 0 => path 1 using p0
|
3967
|
+
- interval 1-8 => path 2 using p
|
3968
|
+
- interval 9 => path 3 using p9
|
3969
|
+
|
3970
|
+
*/
|
3971
|
+
|
3972
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
3973
|
+
|
3974
|
+
/* 0 <= index <= 15
|
3975
|
+
|
3976
|
+
index approximates roughly x^2
|
3977
|
+
|
3978
|
+
Map indexes to intervals as follows:
|
3979
|
+
|
3980
|
+
0 -> 0
|
3981
|
+
1 -> 1
|
3982
|
+
...
|
3983
|
+
8 -> 8
|
3984
|
+
9 -> 9
|
3985
|
+
...
|
3986
|
+
15 -> 9
|
3987
|
+
|
3988
|
+
For this mapping, filter first the case 0 -> 0
|
3989
|
+
In consequence, 1 <= index <= 15, i.e.
|
3990
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
3991
|
+
|
3992
|
+
0 -> 1
|
3993
|
+
...
|
3994
|
+
7 -> 8
|
3995
|
+
8 -> 9
|
3996
|
+
...
|
3997
|
+
15 -> 9
|
3998
|
+
|
3999
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
4000
|
+
|
4001
|
+
*/
|
4002
|
+
|
4003
|
+
if (index == 0) {
|
4004
|
+
/* Path 1 using p0 */
|
4005
|
+
|
4006
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
4007
|
+
|
4008
|
+
Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
|
4009
|
+
|
4010
|
+
/* Rounding test */
|
4011
|
+
|
4012
|
+
TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
|
4013
|
+
|
4014
|
+
/* Rounding test failed, launch accurate phase */
|
4015
|
+
|
4016
|
+
#if EVAL_PERF
|
4017
|
+
crlibm_second_step_taken++;
|
4018
|
+
#endif
|
4019
|
+
|
4020
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
4021
|
+
|
4022
|
+
Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
|
4023
|
+
|
4024
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
4025
|
+
|
4026
|
+
/* Final rounding */
|
4027
|
+
|
4028
|
+
ReturnRoundUpwards3(asinpih,asinpim,asinpil);
|
4029
|
+
|
4030
|
+
}
|
4031
|
+
|
4032
|
+
/* Strip off the sign of argument x */
|
4033
|
+
sign = 1.0;
|
4034
|
+
if (x < 0.0) sign = -sign;
|
4035
|
+
|
4036
|
+
index--;
|
4037
|
+
if ((index & 0x8) != 0) {
|
4038
|
+
/* Path 3 using p9 */
|
4039
|
+
|
4040
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
4041
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
4042
|
+
for the asymptotical approximation using a square root.
|
4043
|
+
*/
|
4044
|
+
|
4045
|
+
z = xabs - MI_9;
|
4046
|
+
zp = 2.0 * (1.0 - xabs);
|
4047
|
+
|
4048
|
+
/* Polynomial approximation and square root extraction */
|
4049
|
+
|
4050
|
+
p9_quick(&p9h, &p9m, z);
|
4051
|
+
p9h = -p9h;
|
4052
|
+
p9m = -p9m;
|
4053
|
+
|
4054
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
4055
|
+
|
4056
|
+
/* Reconstruction */
|
4057
|
+
|
4058
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
4059
|
+
Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
|
4060
|
+
|
4061
|
+
Add122(&asinpih,&asinpim,0.5,t2h,t2m);
|
4062
|
+
|
4063
|
+
/* Rounding test */
|
4064
|
+
|
4065
|
+
asinpih *= sign;
|
4066
|
+
asinpim *= sign;
|
4067
|
+
|
4068
|
+
TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
|
4069
|
+
|
4070
|
+
/* Rounding test failed, launch accurate phase */
|
4071
|
+
|
4072
|
+
#if EVAL_PERF
|
4073
|
+
crlibm_second_step_taken++;
|
4074
|
+
#endif
|
4075
|
+
|
4076
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
4077
|
+
p9h = -p9h;
|
4078
|
+
p9m = -p9m;
|
4079
|
+
p9l = -p9l;
|
4080
|
+
|
4081
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
4082
|
+
|
4083
|
+
/* Reconstruction */
|
4084
|
+
|
4085
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
4086
|
+
Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
|
4087
|
+
|
4088
|
+
Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
|
4089
|
+
|
4090
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
4091
|
+
|
4092
|
+
/* Final rounding */
|
4093
|
+
|
4094
|
+
RoundUpwards3(&asinpi,asinpih,asinpim,asinpil);
|
4095
|
+
|
4096
|
+
return sign * asinpi;
|
4097
|
+
|
4098
|
+
}
|
4099
|
+
|
4100
|
+
/* Path 2 using p */
|
4101
|
+
|
4102
|
+
/* Do argument reduction using a table value for
|
4103
|
+
the midpoint value
|
4104
|
+
*/
|
4105
|
+
|
4106
|
+
z = xabs - mi_i;
|
4107
|
+
|
4108
|
+
p_quick(&asinh, &asinm, z, index);
|
4109
|
+
|
4110
|
+
Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
|
4111
|
+
|
4112
|
+
/* Rounding test */
|
4113
|
+
|
4114
|
+
asinpih *= sign;
|
4115
|
+
asinpim *= sign;
|
4116
|
+
|
4117
|
+
TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
|
4118
|
+
|
4119
|
+
/* Rounding test failed, launch accurate phase */
|
4120
|
+
|
4121
|
+
#if EVAL_PERF
|
4122
|
+
crlibm_second_step_taken++;
|
4123
|
+
#endif
|
4124
|
+
|
4125
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
4126
|
+
|
4127
|
+
Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
|
4128
|
+
|
4129
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
4130
|
+
|
4131
|
+
/* Final rounding */
|
4132
|
+
|
4133
|
+
RoundUpwards3(&asinpi,asinpih,asinpim,asinpil);
|
4134
|
+
|
4135
|
+
return sign * asinpi;
|
4136
|
+
|
4137
|
+
}
|
4138
|
+
|
4139
|
+
double asinpi_rz(double x) {
|
4140
|
+
db_number xdb, zdb, asinhdb;
|
4141
|
+
double sign, z, zp;
|
4142
|
+
int index;
|
4143
|
+
double asinh, asinm, asinl;
|
4144
|
+
double asinpih, asinpim, asinpil;
|
4145
|
+
double asinpihover, asinpimover, asinpilover;
|
4146
|
+
double p9h, p9m, p9l, sqrh, sqrm, sqrl;
|
4147
|
+
double t1h, t1m, t1l;
|
4148
|
+
double t2h, t2m, t2l;
|
4149
|
+
double asinpi;
|
4150
|
+
double xabs;
|
4151
|
+
double xScaled;
|
4152
|
+
double xPih, xPim, xPil;
|
4153
|
+
double xPihover, xPimover, xPilover;
|
4154
|
+
#if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
|
4155
|
+
db_number tempdb;
|
4156
|
+
#endif
|
4157
|
+
double deltatemp, deltah, deltal;
|
4158
|
+
double temp1, temp2h, temp2l, temp3;
|
4159
|
+
|
4160
|
+
/* Start already computations for argument reduction */
|
4161
|
+
|
4162
|
+
zdb.d = 1.0 + x * x;
|
4163
|
+
|
4164
|
+
xdb.d = x;
|
4165
|
+
|
4166
|
+
/* Special case handling */
|
4167
|
+
|
4168
|
+
/* Remove sign of x in floating-point */
|
4169
|
+
xabs = ABS(x);
|
4170
|
+
xdb.i[HI] &= 0x7fffffff;
|
4171
|
+
|
4172
|
+
/* If |x| < 2^(-60) we have
|
4173
|
+
|
4174
|
+
arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
|
4175
|
+
|
4176
|
+
with 0 <= xi < 2^(-122)
|
4177
|
+
|
4178
|
+
We have no bad case worser than
|
4179
|
+
112 bits for asinpi(x) for |x| > 2^(-58)
|
4180
|
+
and the order 3 term of asinpi is still more
|
4181
|
+
far away.
|
4182
|
+
|
4183
|
+
*/
|
4184
|
+
if (xdb.i[HI] < ASINPISIMPLEBOUND) {
|
4185
|
+
/* For a faster path for the exact case,
|
4186
|
+
we check first if x = 0
|
4187
|
+
*/
|
4188
|
+
|
4189
|
+
if (x == 0.0)
|
4190
|
+
return x;
|
4191
|
+
|
4192
|
+
/* We want a relatively fast path for values where
|
4193
|
+
neither the input nor the output is subnormal
|
4194
|
+
because subnormal rounding is expensive.
|
4195
|
+
|
4196
|
+
Since
|
4197
|
+
|
4198
|
+
abs(double(asin(x)/pi)) <= 2^(-860)
|
4199
|
+
|
4200
|
+
for abs(x) <= 2^(-858), we filter for this (normal) value.
|
4201
|
+
|
4202
|
+
*/
|
4203
|
+
|
4204
|
+
if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
|
4205
|
+
|
4206
|
+
/* Here abs(x) >= 2^(-858).
|
4207
|
+
The result is therefore clearly normal and
|
4208
|
+
the double precision numbers in the triple-double
|
4209
|
+
representation of TD(1/pi) * x are all normal, too.
|
4210
|
+
|
4211
|
+
For speed, we use a two-step approach.
|
4212
|
+
We know that
|
4213
|
+
|
4214
|
+
|| x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
|
4215
|
+
|
4216
|
+
*/
|
4217
|
+
|
4218
|
+
Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
|
4219
|
+
|
4220
|
+
TEST_AND_RETURN_RZ(xPih, xPim, RDROUNDCSTASINPI);
|
4221
|
+
|
4222
|
+
Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
|
4223
|
+
|
4224
|
+
Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
|
4225
|
+
|
4226
|
+
ReturnRoundTowardsZero3(xPih,xPim,xPil);
|
4227
|
+
|
4228
|
+
}
|
4229
|
+
|
4230
|
+
/* Here abs(x) < 2^(-858)
|
4231
|
+
|
4232
|
+
Because of subnormals and especially because
|
4233
|
+
of the fact that 1/pi < 1, we must scale x
|
4234
|
+
appropriately. We compute hence:
|
4235
|
+
|
4236
|
+
asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
|
4237
|
+
|
4238
|
+
where the rounding procedure works temporarily on the scaled
|
4239
|
+
intermediate.
|
4240
|
+
*/
|
4241
|
+
|
4242
|
+
xScaled = x * TWO1000;
|
4243
|
+
|
4244
|
+
Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
|
4245
|
+
|
4246
|
+
Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
|
4247
|
+
|
4248
|
+
/* Produce a (possibly) subnormal intermediate rounding */
|
4249
|
+
|
4250
|
+
asinhdb.d = xPih * TWOM1000;
|
4251
|
+
|
4252
|
+
#if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
|
4253
|
+
tempdb.i[HI] = asinhdb.i[HI];
|
4254
|
+
tempdb.i[LO] = asinhdb.i[LO];
|
4255
|
+
asinhdb.i[LO] = tempdb.i[LO];
|
4256
|
+
asinhdb.i[HI] = tempdb.i[HI];
|
4257
|
+
#endif
|
4258
|
+
|
4259
|
+
/* Rescale the result */
|
4260
|
+
|
4261
|
+
temp1 = asinhdb.d * TWO1000;
|
4262
|
+
|
4263
|
+
/* Compute the scaled error, the operation is exact by Sterbenz' lemma */
|
4264
|
+
|
4265
|
+
deltatemp = xPih - temp1;
|
4266
|
+
|
4267
|
+
/* Sum up the errors, representing them on a double-double
|
4268
|
+
|
4269
|
+
This is exact for the normal rounding case and the error
|
4270
|
+
is neglectable in the subnormal rounding case.
|
4271
|
+
*/
|
4272
|
+
|
4273
|
+
Add12Cond(temp2h,temp2l,deltatemp,xPim);
|
4274
|
+
temp3 = temp2l + xPil;
|
4275
|
+
Add12(deltah,deltal,temp2h,temp3);
|
4276
|
+
|
4277
|
+
/* We are doing directed rounding. Thus we must correct the rounding
|
4278
|
+
if the sign of the error is not correct
|
4279
|
+
RD -> sign must be positive for correct rounding
|
4280
|
+
RU -> sign must be negative for correct rounding
|
4281
|
+
RZ -> sign must be positive for positive x and negative for negative x
|
4282
|
+
*/
|
4283
|
+
|
4284
|
+
if ((x > 0.0) ^ (deltah < 0.0)) {
|
4285
|
+
/* The sign is correct, return the intermediate rounding */
|
4286
|
+
return asinhdb.d;
|
4287
|
+
}
|
4288
|
+
|
4289
|
+
/* The sign is not correct
|
4290
|
+
|
4291
|
+
RD -> subtract 1 ulp
|
4292
|
+
RU -> add 1 ulp
|
4293
|
+
RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
|
4294
|
+
*/
|
4295
|
+
|
4296
|
+
asinhdb.l--;
|
4297
|
+
|
4298
|
+
return asinhdb.d;
|
4299
|
+
}
|
4300
|
+
|
4301
|
+
/* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
|
4302
|
+
if (xdb.i[HI] >= 0x3ff00000) {
|
4303
|
+
if (x == 1.0) {
|
4304
|
+
return 0.5;
|
4305
|
+
}
|
4306
|
+
if (x == -1.0) {
|
4307
|
+
return - 0.5;
|
4308
|
+
}
|
4309
|
+
return (x-x)/0.0; /* return NaN */
|
4310
|
+
}
|
4311
|
+
|
4312
|
+
/* Argument reduction:
|
4313
|
+
|
4314
|
+
We have 10 intervals and 3 paths:
|
4315
|
+
|
4316
|
+
- interval 0 => path 1 using p0
|
4317
|
+
- interval 1-8 => path 2 using p
|
4318
|
+
- interval 9 => path 3 using p9
|
4319
|
+
|
4320
|
+
*/
|
4321
|
+
|
4322
|
+
index = (0x000f0000 & zdb.i[HI]) >> 16;
|
4323
|
+
|
4324
|
+
/* 0 <= index <= 15
|
4325
|
+
|
4326
|
+
index approximates roughly x^2
|
4327
|
+
|
4328
|
+
Map indexes to intervals as follows:
|
4329
|
+
|
4330
|
+
0 -> 0
|
4331
|
+
1 -> 1
|
4332
|
+
...
|
4333
|
+
8 -> 8
|
4334
|
+
9 -> 9
|
4335
|
+
...
|
4336
|
+
15 -> 9
|
4337
|
+
|
4338
|
+
For this mapping, filter first the case 0 -> 0
|
4339
|
+
In consequence, 1 <= index <= 15, i.e.
|
4340
|
+
0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
|
4341
|
+
|
4342
|
+
0 -> 1
|
4343
|
+
...
|
4344
|
+
7 -> 8
|
4345
|
+
8 -> 9
|
4346
|
+
...
|
4347
|
+
15 -> 9
|
4348
|
+
|
4349
|
+
Thus it suffices to check the 3rd bit of index - 1 after the first filter.
|
4350
|
+
|
4351
|
+
*/
|
4352
|
+
|
4353
|
+
if (index == 0) {
|
4354
|
+
/* Path 1 using p0 */
|
4355
|
+
|
4356
|
+
p0_quick(&asinh, &asinm, x, xdb.i[HI]);
|
4357
|
+
|
4358
|
+
Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
|
4359
|
+
|
4360
|
+
/* Rounding test */
|
4361
|
+
|
4362
|
+
TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
|
4363
|
+
|
4364
|
+
/* Rounding test failed, launch accurate phase */
|
4365
|
+
|
4366
|
+
#if EVAL_PERF
|
4367
|
+
crlibm_second_step_taken++;
|
4368
|
+
#endif
|
4369
|
+
|
4370
|
+
p0_accu(&asinh, &asinm, &asinl, x);
|
4371
|
+
|
4372
|
+
Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
|
4373
|
+
|
4374
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
4375
|
+
|
4376
|
+
/* Final rounding */
|
4377
|
+
|
4378
|
+
ReturnRoundTowardsZero3(asinpih,asinpim,asinpil);
|
4379
|
+
|
4380
|
+
}
|
4381
|
+
|
4382
|
+
/* Strip off the sign of argument x */
|
4383
|
+
sign = 1.0;
|
4384
|
+
if (x < 0.0) sign = -sign;
|
4385
|
+
|
4386
|
+
index--;
|
4387
|
+
if ((index & 0x8) != 0) {
|
4388
|
+
/* Path 3 using p9 */
|
4389
|
+
|
4390
|
+
/* Do argument reduction using a MI_9 as a midpoint value
|
4391
|
+
for the polynomial and compute exactly zp = 2 * (1 - x)
|
4392
|
+
for the asymptotical approximation using a square root.
|
4393
|
+
*/
|
4394
|
+
|
4395
|
+
z = xabs - MI_9;
|
4396
|
+
zp = 2.0 * (1.0 - xabs);
|
4397
|
+
|
4398
|
+
/* Polynomial approximation and square root extraction */
|
4399
|
+
|
4400
|
+
p9_quick(&p9h, &p9m, z);
|
4401
|
+
p9h = -p9h;
|
4402
|
+
p9m = -p9m;
|
4403
|
+
|
4404
|
+
sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
|
4405
|
+
|
4406
|
+
/* Reconstruction */
|
4407
|
+
|
4408
|
+
Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
|
4409
|
+
Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
|
4410
|
+
|
4411
|
+
Add122(&asinpih,&asinpim,0.5,t2h,t2m);
|
4412
|
+
|
4413
|
+
/* Rounding test */
|
4414
|
+
|
4415
|
+
asinpih *= sign;
|
4416
|
+
asinpim *= sign;
|
4417
|
+
|
4418
|
+
TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
|
4419
|
+
|
4420
|
+
/* Rounding test failed, launch accurate phase */
|
4421
|
+
|
4422
|
+
#if EVAL_PERF
|
4423
|
+
crlibm_second_step_taken++;
|
4424
|
+
#endif
|
4425
|
+
|
4426
|
+
p9_accu(&p9h, &p9m, &p9l, z);
|
4427
|
+
p9h = -p9h;
|
4428
|
+
p9m = -p9m;
|
4429
|
+
p9l = -p9l;
|
4430
|
+
|
4431
|
+
Sqrt13(&sqrh,&sqrm,&sqrl,zp);
|
4432
|
+
|
4433
|
+
/* Reconstruction */
|
4434
|
+
|
4435
|
+
Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
|
4436
|
+
Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
|
4437
|
+
|
4438
|
+
Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
|
4439
|
+
|
4440
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
4441
|
+
|
4442
|
+
/* Final rounding */
|
4443
|
+
|
4444
|
+
RoundTowardsZero3(&asinpi,asinpih,asinpim,asinpil);
|
4445
|
+
|
4446
|
+
return sign * asinpi;
|
4447
|
+
|
4448
|
+
}
|
4449
|
+
|
4450
|
+
/* Path 2 using p */
|
4451
|
+
|
4452
|
+
/* Do argument reduction using a table value for
|
4453
|
+
the midpoint value
|
4454
|
+
*/
|
4455
|
+
|
4456
|
+
z = xabs - mi_i;
|
4457
|
+
|
4458
|
+
p_quick(&asinh, &asinm, z, index);
|
4459
|
+
|
4460
|
+
Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
|
4461
|
+
|
4462
|
+
/* Rounding test */
|
4463
|
+
|
4464
|
+
asinpih *= sign;
|
4465
|
+
asinpim *= sign;
|
4466
|
+
|
4467
|
+
TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
|
4468
|
+
|
4469
|
+
/* Rounding test failed, launch accurate phase */
|
4470
|
+
|
4471
|
+
#if EVAL_PERF
|
4472
|
+
crlibm_second_step_taken++;
|
4473
|
+
#endif
|
4474
|
+
|
4475
|
+
p_accu(&asinh, &asinm, &asinl, z, index);
|
4476
|
+
|
4477
|
+
Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
|
4478
|
+
|
4479
|
+
Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
|
4480
|
+
|
4481
|
+
/* Final rounding */
|
4482
|
+
|
4483
|
+
RoundTowardsZero3(&asinpi,asinpih,asinpim,asinpil);
|
4484
|
+
|
4485
|
+
return sign * asinpi;
|
4486
|
+
|
4487
|
+
}
|
4488
|
+
|