crmf 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +105 -3
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +117 -20
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +108 -10
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1297 @@
|
|
1
|
+
/*
|
2
|
+
* Correctly rounded arcsine
|
3
|
+
*
|
4
|
+
* Author : Christoph Lauter (ENS Lyon)
|
5
|
+
*
|
6
|
+
* This file is part of the crlibm library developed by the Arenaire
|
7
|
+
* project at Ecole Normale Superieure de Lyon
|
8
|
+
*
|
9
|
+
* This program is free software; you can redistribute it and/or modify
|
10
|
+
* it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
* the Free Software Foundation; either version 2 of the License, or
|
12
|
+
* (at your option) any later version.
|
13
|
+
*
|
14
|
+
* This program is distributed in the hope that it will be useful,
|
15
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
* GNU General Public License for more details.
|
18
|
+
*
|
19
|
+
* You should have received a copy of the GNU Lesser General Public License
|
20
|
+
* along with this program; if not, write to the Free Software
|
21
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#include <stdio.h>
|
25
|
+
#include <stdlib.h>
|
26
|
+
#include "crlibm.h"
|
27
|
+
#include "crlibm_private.h"
|
28
|
+
#include "triple-double.h"
|
29
|
+
#include "asin-td.h"
|
30
|
+
|
31
|
+
#define AVOID_FMA 1
|
32
|
+
|
33
|
+
void asin_accurate_lower(double *asinh, double *asinm, double *asinl, double x, double xSqh, double xSql, double sign) {
|
34
|
+
double highPoly;
|
35
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l;
|
36
|
+
double tt1h, tt1l;
|
37
|
+
double t8h, t8m, t8l, t9h, t9m, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l;
|
38
|
+
double tt8h, tt8m, tt8l, tt9h, tt9m, tt9l, tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
|
39
|
+
double xCubeh, xCubem, xCubel, tt13h, tt13m, tt13l, t13h, t13m, t13l, polyh, polym, polyl;
|
40
|
+
double tt11hover, tt11mover, tt11lover;
|
41
|
+
|
42
|
+
#if EVAL_PERF
|
43
|
+
crlibm_second_step_taken++;
|
44
|
+
#endif
|
45
|
+
|
46
|
+
/* Evaluate the polynomial of degree 37
|
47
|
+
Its coefficients start at tbl[0]
|
48
|
+
|
49
|
+
p(x) = x + x * x^2 * (c3 + x^2 * (c5 + ...
|
50
|
+
|
51
|
+
We receive x^2 as xSqh + xSql = x * x (exactly)
|
52
|
+
in argument
|
53
|
+
|
54
|
+
|x| <= 0.185 = 2^(-2.43)
|
55
|
+
|
56
|
+
Compute monomials 27 to 37 in double precision
|
57
|
+
monomials 13 to 25 in double-double and
|
58
|
+
1 to 11 in triple-double precision in a
|
59
|
+
modified Horner form
|
60
|
+
|
61
|
+
*/
|
62
|
+
|
63
|
+
/* Double computations */
|
64
|
+
|
65
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
66
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(tbl[33],xSqh,tbl[32]),xSqh,tbl[31]),xSqh,tbl[30]),xSqh,tbl[29]),xSqh,tbl[28]);
|
67
|
+
#else
|
68
|
+
highPoly = tbl[28] + xSqh * (tbl[29] + xSqh * (tbl[30] + xSqh * (tbl[31] + xSqh * (tbl[32] + xSqh * tbl[33]))));
|
69
|
+
#endif
|
70
|
+
|
71
|
+
/* Double-double computations */
|
72
|
+
|
73
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
74
|
+
Add22(&t1h,&t1l,tbl[27],0,tt1h,tt1l);
|
75
|
+
|
76
|
+
MulAdd22(&t2h,&t2l,tbl[25],tbl[26],xSqh,xSql,t1h,t1l);
|
77
|
+
MulAdd22(&t3h,&t3l,tbl[23],tbl[24],xSqh,xSql,t2h,t2l);
|
78
|
+
MulAdd22(&t4h,&t4l,tbl[21],tbl[22],xSqh,xSql,t3h,t3l);
|
79
|
+
MulAdd22(&t5h,&t5l,tbl[19],tbl[20],xSqh,xSql,t4h,t4l);
|
80
|
+
MulAdd22(&t6h,&t6l,tbl[17],tbl[18],xSqh,xSql,t5h,t5l);
|
81
|
+
MulAdd22(&t7h,&t7l,tbl[15],tbl[16],xSqh,xSql,t6h,t6l);
|
82
|
+
|
83
|
+
/* Triple-double computations */
|
84
|
+
|
85
|
+
Mul23(&tt8h,&tt8m,&tt8l,xSqh,xSql,t7h,t7l); /* 149 - 48/53 */
|
86
|
+
Add33(&t8h,&t8m,&t8l,tbl[12],tbl[13],tbl[14],tt8h,tt8m,tt8l); /* 145 - 43/53 */
|
87
|
+
Mul233(&tt9h,&tt9m,&tt9l,xSqh,xSql,t8h,t8m,t8l); /* 139 - 39/53 */
|
88
|
+
Add33(&t9h,&t9m,&t9l,tbl[9],tbl[10],tbl[11],tt9h,tt9m,tt9l); /* 136 - 34/53 */
|
89
|
+
Mul233(&tt10h,&tt10m,&tt10l,xSqh,xSql,t9h,t9m,t9l); /* 130 - 30/53 */
|
90
|
+
Add33(&t10h,&t10m,&t10l,tbl[6],tbl[7],tbl[8],tt10h,tt10m,tt10l); /* 127 - 25/53 */
|
91
|
+
Mul233(&tt11hover,&tt11mover,&tt11lover,xSqh,xSql,t10h,t10m,t10l); /* 121 - 21/53 */
|
92
|
+
|
93
|
+
Renormalize3(&tt11h,&tt11m,&tt11l,tt11hover,tt11mover,tt11lover); /* infty - 52/53 */
|
94
|
+
|
95
|
+
Add33(&t11h,&t11m,&t11l,tbl[3],tbl[4],tbl[5],tt11h,tt11m,tt11l); /* 149 - 47/53 */
|
96
|
+
Mul233(&tt12h,&tt12m,&tt12l,xSqh,xSql,t11h,t11m,t11l); /* 143 - 43/53 */
|
97
|
+
Add33(&t12h,&t12m,&t12l,tbl[0],tbl[1],tbl[2],tt12h,tt12m,tt12l); /* 140 - 38/53 */
|
98
|
+
|
99
|
+
Mul123(&xCubeh,&xCubem,&xCubel,x,xSqh,xSql); /* 154 - 47/53 */
|
100
|
+
|
101
|
+
Mul33(&tt13h,&tt13m,&tt13l,xCubeh,xCubem,xCubel,t12h,t12m,t12l); /* 136 - 34/53 */
|
102
|
+
Add133(&t13h,&t13m,&t13l,x,tt13h,tt13m,tt13l); /* 138 - 32/53 */
|
103
|
+
|
104
|
+
Renormalize3(&polyh,&polym,&polyl,t13h,t13m,t13l); /* infty - 52/53 */
|
105
|
+
*asinh = sign * polyh;
|
106
|
+
*asinm = sign * polym;
|
107
|
+
*asinl = sign * polyl;
|
108
|
+
}
|
109
|
+
|
110
|
+
|
111
|
+
void asin_accurate_middle(double *asinh, double *asinm, double *asinl, double z, int i, double sign) {
|
112
|
+
double highPoly;
|
113
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l;
|
114
|
+
double t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l, t14h, t14m, t14l;
|
115
|
+
double t15h, t15m, t15l, t16h, t16m, t16l;
|
116
|
+
double tt1h, tt1l;
|
117
|
+
double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
|
118
|
+
double tt13h, tt13m, tt13l, tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l;
|
119
|
+
double polyh, polym, polyl, tt13hover, tt13mover, tt13lover;
|
120
|
+
|
121
|
+
#if EVAL_PERF
|
122
|
+
crlibm_second_step_taken++;
|
123
|
+
#endif
|
124
|
+
|
125
|
+
/* Evaluate the polynomial of degree 35
|
126
|
+
Its coefficients start at tbl[i+1]
|
127
|
+
Evaluate degrees 35 to 20 in double precision,
|
128
|
+
degrees 20 to 7 in double-double precision and
|
129
|
+
finally degrees 6 to 1 in triple-double.
|
130
|
+
The constant coefficient is a double-double, the
|
131
|
+
computations are nevertheless in triple-double
|
132
|
+
*/
|
133
|
+
|
134
|
+
/* Double computations */
|
135
|
+
|
136
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
137
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
138
|
+
tbl[i+58] ,z,tbl[i+57]),z,tbl[i+56]),z,tbl[i+55]),z,tbl[i+54]),z,
|
139
|
+
tbl[i+53]),z,tbl[i+52]),z,tbl[i+51]),z,tbl[i+50]),z,tbl[i+49]),z,
|
140
|
+
tbl[i+48]),z,tbl[i+47]),z,tbl[i+46]),z,tbl[i+45]),z,tbl[i+44]),z,
|
141
|
+
tbl[i+43]),z,tbl[i+42]),z,tbl[i+41]),z,tbl[i+40]),z,tbl[i+39]);
|
142
|
+
|
143
|
+
#else
|
144
|
+
highPoly = tbl[i+39] + z * (tbl[i+40] + z * (tbl[i+41] + z * (tbl[i+42] + z * (
|
145
|
+
tbl[i+43] + z * (tbl[i+44] + z * (tbl[i+45] + z * (tbl[i+46] + z * (
|
146
|
+
tbl[i+47] + z * (tbl[i+48] + z * (tbl[i+49] + z * (tbl[i+50] + z * (
|
147
|
+
tbl[i+51] + z * (tbl[i+52] + z * (tbl[i+53] + z * (tbl[i+54] + z * (
|
148
|
+
tbl[i+55] + z * (tbl[i+56] + z * (tbl[i+57] + z * tbl[i+58]))))))))))))))))));
|
149
|
+
#endif
|
150
|
+
|
151
|
+
|
152
|
+
/* Double-double computations */
|
153
|
+
|
154
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
155
|
+
Add22(&t1h,&t1l,tbl[i+37],tbl[i+38],tt1h,tt1l);
|
156
|
+
|
157
|
+
MulAdd212(&t2h,&t2l,tbl[i+35],tbl[i+36],z,t1h,t1l);
|
158
|
+
MulAdd212(&t3h,&t3l,tbl[i+33],tbl[i+34],z,t2h,t2l);
|
159
|
+
MulAdd212(&t4h,&t4l,tbl[i+31],tbl[i+32],z,t3h,t3l);
|
160
|
+
MulAdd212(&t5h,&t5l,tbl[i+29],tbl[i+30],z,t4h,t4l);
|
161
|
+
MulAdd212(&t6h,&t6l,tbl[i+27],tbl[i+28],z,t5h,t5l);
|
162
|
+
MulAdd212(&t7h,&t7l,tbl[i+25],tbl[i+26],z,t6h,t6l);
|
163
|
+
MulAdd212(&t8h,&t8l,tbl[i+23],tbl[i+24],z,t7h,t7l);
|
164
|
+
MulAdd212(&t9h,&t9l,tbl[i+21],tbl[i+22],z,t8h,t8l);
|
165
|
+
|
166
|
+
/* Triple-double computations */
|
167
|
+
|
168
|
+
Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
|
169
|
+
Add33(&t10h,&t10m,&t10l,tbl[i+18],tbl[i+19],tbl[i+20],tt10h,tt10m,tt10l); /* 144 - 42/53 */
|
170
|
+
Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 38/53 */
|
171
|
+
Add33(&t11h,&t11m,&t11l,tbl[i+15],tbl[i+16],tbl[i+17],tt11h,tt11m,tt11l); /* 136 - 33/53 */
|
172
|
+
Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 133 - 28/53 */
|
173
|
+
Add33(&t12h,&t12m,&t12l,tbl[i+12],tbl[i+13],tbl[i+14],tt12h,tt12m,tt12l); /* 125 - 23/53 */
|
174
|
+
Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 123 - 18/53 */
|
175
|
+
|
176
|
+
Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
|
177
|
+
|
178
|
+
Add33(&t13h,&t13m,&t13l,tbl[i+9],tbl[i+10],tbl[i+11],tt13h,tt13m,tt13l); /* 149 - 47/53 */
|
179
|
+
Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
|
180
|
+
Add33(&t14h,&t14m,&t14l,tbl[i+6],tbl[i+7],tbl[i+8],tt14h,tt14m,tt14l); /* 139 - 37/53 */
|
181
|
+
Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
|
182
|
+
Add33(&t15h,&t15m,&t15l,tbl[i+3],tbl[i+4],tbl[i+5],tt15h,tt15m,tt15l); /* 129 - 28/53 */
|
183
|
+
Mul133(&tt16h,&tt16m,&tt16l,z,t15h,t15m,t15l); /* 128 - 23/53 */
|
184
|
+
Add233(&t16h,&t16m,&t16l,tbl[i+1],tbl[i+2],tt16h,tt16m,tt16l); /* 126 - 19/53 */
|
185
|
+
|
186
|
+
Renormalize3(&polyh,&polym,&polyl,t16h,t16m,t16l); /* infty - 52/53 */
|
187
|
+
*asinh = sign * polyh;
|
188
|
+
*asinm = sign * polym;
|
189
|
+
*asinl = sign * polyl;
|
190
|
+
}
|
191
|
+
|
192
|
+
|
193
|
+
void asin_accurate_higher(double *asinh, double *asinm, double *asinl, double z, double sign) {
|
194
|
+
double highPoly;
|
195
|
+
double tt1h, tt1l;
|
196
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l;
|
197
|
+
double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l, tt13h, tt13m, tt13l;
|
198
|
+
double tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l, tt17h, tt17m, tt17l;
|
199
|
+
double t9h, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l;
|
200
|
+
double t14h, t14m, t14l, t15h, t15m, t15l, t16h, t16m, t16l, t17h, t17m, t17l;
|
201
|
+
double tt18h, tt18m, tt18l, polyh, polym, polyl;
|
202
|
+
double sqrtzh, sqrtzm, sqrtzl, twoZ, pTimesSh, pTimesSm, pTimesSl;
|
203
|
+
double allhover, allmover, alllover, allh, allm, alll;
|
204
|
+
double tt13hover, tt13mover, tt13lover, tt16hover, tt16mover, tt16lover;
|
205
|
+
|
206
|
+
#if EVAL_PERF
|
207
|
+
crlibm_second_step_taken++;
|
208
|
+
#endif
|
209
|
+
|
210
|
+
/* We evaluate asin(x) as
|
211
|
+
|
212
|
+
asin(x) = f(z) * sqrt(2*z) + Pi/2
|
213
|
+
|
214
|
+
with z = 1 - x and
|
215
|
+
|
216
|
+
f(z) = (asin(z) - Pi/2) / sqrt(2*z)
|
217
|
+
|
218
|
+
f(z) is approximated by p(z)
|
219
|
+
|
220
|
+
The polynomial p(z) is of degree 29
|
221
|
+
Its coefficients start at tbl[TBLIDX10]
|
222
|
+
Coefficients for degrees 29 to 18 are in double precision,
|
223
|
+
for degrees 17 to 9 in double-double precision and
|
224
|
+
finally for degrees 8 to 1 in triple-double.
|
225
|
+
The constant coefficient (-1) is not stored in the table,
|
226
|
+
the computations are nevertheless in triple-double
|
227
|
+
We evaluate the monomials in the precision in which
|
228
|
+
the correspondant coefficients are stored
|
229
|
+
The coefficients' values decrease very quickly
|
230
|
+
so even with |z| < 2^-2.18 we can compute degree 18
|
231
|
+
already in double precision
|
232
|
+
|
233
|
+
Compute than sqrt(2*z) as a triple-double
|
234
|
+
multiply in triple-double and add Pi/2
|
235
|
+
We will cancel no bit in the addition since
|
236
|
+
f(z) < 0.5 * Pi/2
|
237
|
+
|
238
|
+
*/
|
239
|
+
|
240
|
+
/* Double computations */
|
241
|
+
|
242
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
243
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
244
|
+
tbl[TBLIDX10+53] ,z,tbl[TBLIDX10+52]),z,tbl[TBLIDX10+51]),z,
|
245
|
+
tbl[TBLIDX10+50]),z,tbl[TBLIDX10+49]),z,tbl[TBLIDX10+48]),z,
|
246
|
+
tbl[TBLIDX10+47]),z,tbl[TBLIDX10+46]),z,tbl[TBLIDX10+45]),z,
|
247
|
+
tbl[TBLIDX10+44]),z,tbl[TBLIDX10+43]),z,tbl[TBLIDX10+42]);
|
248
|
+
#else
|
249
|
+
highPoly = tbl[TBLIDX10+42] + z * (tbl[TBLIDX10+43] + z * (tbl[TBLIDX10+44] + z * (
|
250
|
+
tbl[TBLIDX10+45] + z * (tbl[TBLIDX10+46] + z * (tbl[TBLIDX10+47] + z * (
|
251
|
+
tbl[TBLIDX10+48] + z * (tbl[TBLIDX10+49] + z * (tbl[TBLIDX10+50] + z * (
|
252
|
+
tbl[TBLIDX10+51] + z * (tbl[TBLIDX10+52] + z * tbl[TBLIDX10+53]))))))))));
|
253
|
+
#endif
|
254
|
+
|
255
|
+
/* Double-double computations */
|
256
|
+
|
257
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
258
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+40],tbl[TBLIDX10+41],tt1h,tt1l);
|
259
|
+
|
260
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+38],tbl[TBLIDX10+39],z,t1h,t1l);
|
261
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+36],tbl[TBLIDX10+37],z,t2h,t2l);
|
262
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+34],tbl[TBLIDX10+35],z,t3h,t3l);
|
263
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+32],tbl[TBLIDX10+33],z,t4h,t4l);
|
264
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+30],tbl[TBLIDX10+31],z,t5h,t5l);
|
265
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+28],tbl[TBLIDX10+29],z,t6h,t6l);
|
266
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+26],tbl[TBLIDX10+27],z,t7h,t7l);
|
267
|
+
MulAdd212(&t9h,&t9l,tbl[TBLIDX10+24],tbl[TBLIDX10+25],z,t8h,t8l);
|
268
|
+
|
269
|
+
/* Triple-double computations */
|
270
|
+
|
271
|
+
Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
|
272
|
+
Add33(&t10h,&t10m,&t10l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tbl[TBLIDX10+23],tt10h,tt10m,tt10l); /* 144 - 42/53 */
|
273
|
+
Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 37/53 */
|
274
|
+
Add33(&t11h,&t11m,&t11l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],tbl[TBLIDX10+20],tt11h,tt11m,tt11l); /* 134 - 32/53 */
|
275
|
+
Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 132 - 27/53 */
|
276
|
+
Add33(&t12h,&t12m,&t12l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],tbl[TBLIDX10+17],tt12h,tt12m,tt12l); /* 124 - 22/53 */
|
277
|
+
Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 122 - 17/53 */
|
278
|
+
|
279
|
+
Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
|
280
|
+
|
281
|
+
Add33(&t13h,&t13m,&t13l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],tbl[TBLIDX10+14],tt13h,tt13m,tt13l); /* 149 - 47/53 */
|
282
|
+
Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
|
283
|
+
Add33(&t14h,&t14m,&t14l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],tbl[TBLIDX10+11],tt14h,tt14m,tt14l); /* 139 - 37/53 */
|
284
|
+
Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
|
285
|
+
Add33(&t15h,&t15m,&t15l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],tbl[TBLIDX10+8],tt15h,tt15m,tt15l); /* 129 - 27/53 */
|
286
|
+
Mul133(&tt16hover,&tt16mover,&tt16lover,z,t15h,t15m,t15l); /* 127 - 22/53 */
|
287
|
+
|
288
|
+
Renormalize3(&tt16h,&tt16m,&tt16l,tt16hover,tt16mover,tt16lover); /* infty - 52/53 */
|
289
|
+
|
290
|
+
Add33(&t16h,&t16m,&t16l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],tbl[TBLIDX10+5],tt16h,tt16m,tt16l); /* 149 - 47/53 */
|
291
|
+
Mul133(&tt17h,&tt17m,&tt17l,z,t16h,t16m,t16l); /* 147 - 42/53 */
|
292
|
+
Add33(&t17h,&t17m,&t17l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],tbl[TBLIDX10+2],tt17h,tt17m,tt17l); /* 139 - 37/53 */
|
293
|
+
Mul133(&tt18h,&tt18m,&tt18l,z,t17h,t17m,t17l); /* 137 - 32/53 */
|
294
|
+
Add133(&polyh,&polym,&polyl,-1,tt18h,tt18m,tt18l); /* 136 - 30/53 */
|
295
|
+
|
296
|
+
/* Compute sqrt(2*z) as a triple-double */
|
297
|
+
|
298
|
+
twoZ = 2 * z;
|
299
|
+
Sqrt13(&sqrtzh,&sqrtzm,&sqrtzl,twoZ); /* 146 - 52/53 */
|
300
|
+
|
301
|
+
/* Multiply p(z) by sqrt(2*z) and add Pi/2 */
|
302
|
+
|
303
|
+
Mul33(&pTimesSh,&pTimesSm,&pTimesSl,polyh,polym,polyl,sqrtzh,sqrtzm,sqrtzl); /* 128 - 26/53 */
|
304
|
+
Add33(&allhover,&allmover,&alllover,PIHALFH,PIHALFM,PIHALFL,pTimesSh,pTimesSm,pTimesSl); /* 126 - 21/53 */
|
305
|
+
|
306
|
+
/* Renormalize and multiply by sign */
|
307
|
+
Renormalize3(&allh,&allm,&alll,allhover,allmover,alllover); /* infty - 52/53 */
|
308
|
+
*asinh = sign * allh;
|
309
|
+
*asinm = sign * allm;
|
310
|
+
*asinl = sign * alll;
|
311
|
+
}
|
312
|
+
|
313
|
+
|
314
|
+
|
315
|
+
|
316
|
+
|
317
|
+
|
318
|
+
|
319
|
+
|
320
|
+
double asin_rn(double x) {
|
321
|
+
db_number xdb;
|
322
|
+
double sign, z, asinh, asinm, asinl;
|
323
|
+
int i;
|
324
|
+
double xSqh, xSql;
|
325
|
+
double tt1h, tt1l;
|
326
|
+
double tt6h, tt6l;
|
327
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
|
328
|
+
double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
|
329
|
+
double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
|
330
|
+
double tmp1, tmp2, tmp3, tmp4, tmp5;
|
331
|
+
|
332
|
+
/* Transform the argument into integer */
|
333
|
+
xdb.d = x;
|
334
|
+
|
335
|
+
/* Special case handling */
|
336
|
+
|
337
|
+
/* Strip off the sign of argument x */
|
338
|
+
if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
|
339
|
+
xdb.i[HI] &= 0x7fffffff;
|
340
|
+
|
341
|
+
/* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
|
342
|
+
if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
|
343
|
+
return (x-x)/0.0; /* return NaN */
|
344
|
+
}
|
345
|
+
|
346
|
+
/* If |x| < 2^(-28) we have
|
347
|
+
|
348
|
+
arcsin(x) = x * ( 1 + xi )
|
349
|
+
|
350
|
+
with 0 <= xi < 2^(-55)
|
351
|
+
|
352
|
+
So we can decide the rounding without any computation
|
353
|
+
*/
|
354
|
+
if (xdb.i[HI] < 0x3e300000) {
|
355
|
+
return x;
|
356
|
+
}
|
357
|
+
|
358
|
+
/* Recast x */
|
359
|
+
x = xdb.d;
|
360
|
+
|
361
|
+
/* Find correspondant interval and compute index to the table
|
362
|
+
We start by filtering the two special cases around 0 and 1
|
363
|
+
*/
|
364
|
+
|
365
|
+
if (xdb.i[HI] < BOUND1) {
|
366
|
+
/* Special interval 0..BOUND1
|
367
|
+
The polynomial has no even monomials
|
368
|
+
We must prove extra accuracy in the interval 0..sin(2^(-18))
|
369
|
+
*/
|
370
|
+
|
371
|
+
/* Quick phase starts */
|
372
|
+
|
373
|
+
/* Compute square of x for both quick and accurate phases */
|
374
|
+
Mul12(&xSqh,&xSql,x,x);
|
375
|
+
|
376
|
+
tmp4 = tbl[3];
|
377
|
+
tmp5 = tbl[4];
|
378
|
+
t4h = tmp4;
|
379
|
+
t4l = tmp5;
|
380
|
+
if (xdb.i[HI] > EXTRABOUND) {
|
381
|
+
/* Double precision evaluation */
|
382
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
383
|
+
highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
|
384
|
+
#else
|
385
|
+
highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
|
386
|
+
#endif
|
387
|
+
|
388
|
+
/* Double-double precision evaluation */
|
389
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
390
|
+
Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
|
391
|
+
|
392
|
+
MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
|
393
|
+
MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
|
394
|
+
MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
|
395
|
+
}
|
396
|
+
|
397
|
+
MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
|
398
|
+
|
399
|
+
Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
|
400
|
+
Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
|
401
|
+
|
402
|
+
Add12(tmp1,tmp2,x,tt6h);
|
403
|
+
tmp3 = tmp2 + tt6l;
|
404
|
+
Add12(polyh,polyl,tmp1,tmp3);
|
405
|
+
|
406
|
+
/* Multiply by sign */
|
407
|
+
asinh = sign * polyh;
|
408
|
+
asinm = sign * polyl;
|
409
|
+
|
410
|
+
/* Rounding test (on polyh+polyl, equivalently to asinh+asinm)
|
411
|
+
The RN rounding constant is at tbl[34]
|
412
|
+
*/
|
413
|
+
if(polyh == (polyh + (polyl * tbl[34])))
|
414
|
+
return asinh;
|
415
|
+
|
416
|
+
/* Launch accurate phase */
|
417
|
+
|
418
|
+
asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
|
419
|
+
|
420
|
+
ReturnRoundToNearest3(asinh,asinm,asinl);
|
421
|
+
}
|
422
|
+
|
423
|
+
if (xdb.i[HI] >= BOUND9) {
|
424
|
+
/* Special interval BOUND9..1
|
425
|
+
We use an asymptotic development of arcsin in sqrt(1 - x)
|
426
|
+
*/
|
427
|
+
|
428
|
+
/* Argument reduction for quick and accurate phase
|
429
|
+
z = 1 - x
|
430
|
+
The operation is exact as per Sterbenz' lemma
|
431
|
+
*/
|
432
|
+
|
433
|
+
z = 1 - x;
|
434
|
+
|
435
|
+
/* Quick phase starts */
|
436
|
+
|
437
|
+
/* Double precision evaluation */
|
438
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
439
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
440
|
+
tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
|
441
|
+
tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
|
442
|
+
tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
|
443
|
+
tbl[TBLIDX10+24]);
|
444
|
+
#else
|
445
|
+
highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
|
446
|
+
tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
|
447
|
+
tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
|
448
|
+
tbl[TBLIDX10+42]))))))));
|
449
|
+
#endif
|
450
|
+
|
451
|
+
/* Double-double precision evaluation */
|
452
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
453
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
|
454
|
+
|
455
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
|
456
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
|
457
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
|
458
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
|
459
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
|
460
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
|
461
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
|
462
|
+
MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
|
463
|
+
|
464
|
+
/* Compute sqrt(2*z) as a double-double */
|
465
|
+
|
466
|
+
twoZ = 2 * z;
|
467
|
+
sqrt12(&sqrtzh,&sqrtzl,twoZ);
|
468
|
+
|
469
|
+
/* Multiply p(z) by sqrt(2*z) and add Pi/2 */
|
470
|
+
|
471
|
+
Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
|
472
|
+
Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
|
473
|
+
|
474
|
+
/* Multiply by sign */
|
475
|
+
asinh = sign * allh;
|
476
|
+
asinm = sign * alll;
|
477
|
+
|
478
|
+
/* Rounding test
|
479
|
+
The RN rounding constant is at tbl[TBLIDX10+54]
|
480
|
+
*/
|
481
|
+
|
482
|
+
if(allh == (allh + (alll * tbl[TBLIDX10+54])))
|
483
|
+
return asinh;
|
484
|
+
|
485
|
+
/* Launch accurate phase */
|
486
|
+
|
487
|
+
asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
|
488
|
+
|
489
|
+
ReturnRoundToNearest3(asinh,asinm,asinl);
|
490
|
+
}
|
491
|
+
|
492
|
+
/* General 8 main intervals
|
493
|
+
We can already suppose that BOUND1 <= x <= BOUND9
|
494
|
+
*/
|
495
|
+
|
496
|
+
if (xdb.i[HI] < BOUND5) {
|
497
|
+
if (xdb.i[HI] < BOUND3) {
|
498
|
+
if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
|
499
|
+
} else {
|
500
|
+
if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
|
501
|
+
}
|
502
|
+
} else {
|
503
|
+
if (xdb.i[HI] < BOUND7) {
|
504
|
+
if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
|
505
|
+
} else {
|
506
|
+
if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
|
507
|
+
}
|
508
|
+
}
|
509
|
+
|
510
|
+
/* Argument reduction
|
511
|
+
i points to the interval midpoint value in the table
|
512
|
+
*/
|
513
|
+
z = x - tbl[i];
|
514
|
+
|
515
|
+
/* Quick phase starts */
|
516
|
+
|
517
|
+
/* Double precision evaluation */
|
518
|
+
|
519
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
520
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
521
|
+
tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
|
522
|
+
tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
|
523
|
+
#else
|
524
|
+
highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
|
525
|
+
tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
|
526
|
+
tbl[i+33] + z * tbl[i+35]))))));
|
527
|
+
#endif
|
528
|
+
|
529
|
+
/* Double-double precision evaluation */
|
530
|
+
|
531
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
532
|
+
Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
|
533
|
+
|
534
|
+
MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
|
535
|
+
MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
|
536
|
+
MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
|
537
|
+
MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
|
538
|
+
MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
|
539
|
+
MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
|
540
|
+
|
541
|
+
/* Multiply by sign */
|
542
|
+
asinh = sign * polyh;
|
543
|
+
asinm = sign * polyl;
|
544
|
+
|
545
|
+
/* Rounding test
|
546
|
+
The RN rounding constant is at tbl[i+59]
|
547
|
+
*/
|
548
|
+
if(polyh == (polyh + (polyl * tbl[i+59])))
|
549
|
+
return asinh;
|
550
|
+
|
551
|
+
/* Launch accurate phase */
|
552
|
+
|
553
|
+
asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
|
554
|
+
|
555
|
+
ReturnRoundToNearest3(asinh,asinm,asinl);
|
556
|
+
}
|
557
|
+
|
558
|
+
|
559
|
+
|
560
|
+
|
561
|
+
|
562
|
+
|
563
|
+
double asin_ru(double x) {
|
564
|
+
db_number xdb;
|
565
|
+
double sign, z, asinh, asinm, asinl;
|
566
|
+
int i;
|
567
|
+
double xSqh, xSql;
|
568
|
+
double tt1h, tt1l;
|
569
|
+
double tt6h, tt6l;
|
570
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
|
571
|
+
double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
|
572
|
+
double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
|
573
|
+
double tmp1, tmp2, tmp3, tmp4, tmp5;
|
574
|
+
|
575
|
+
/* Transform the argument into integer */
|
576
|
+
xdb.d = x;
|
577
|
+
|
578
|
+
/* Special case handling */
|
579
|
+
|
580
|
+
/* Strip off the sign of argument x */
|
581
|
+
if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
|
582
|
+
xdb.i[HI] &= 0x7fffffff;
|
583
|
+
|
584
|
+
/* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
|
585
|
+
if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
|
586
|
+
return (x-x)/0.0; /* return NaN */
|
587
|
+
}
|
588
|
+
|
589
|
+
/* If |x| < 2^(-28) we have
|
590
|
+
|
591
|
+
arcsin(x) = x * ( 1 + xi )
|
592
|
+
|
593
|
+
with 0 <= xi < 2^(-55)
|
594
|
+
|
595
|
+
So we can decide the rounding without any computation
|
596
|
+
*/
|
597
|
+
if (xdb.i[HI] < 0x3e300000) {
|
598
|
+
/* If x == 0 then we got the algebraic result arcsin(0) = 0
|
599
|
+
If x < 0 then the truncation rest is negative but less than
|
600
|
+
1 ulp; we round upwards by returning x
|
601
|
+
*/
|
602
|
+
if (x <= 0) return x;
|
603
|
+
/* Otherwise the rest is positive, less than 1 ulp and the
|
604
|
+
image is not algebraic
|
605
|
+
We return x + 1ulp
|
606
|
+
*/
|
607
|
+
xdb.l++;
|
608
|
+
return xdb.d;
|
609
|
+
}
|
610
|
+
|
611
|
+
/* Recast x */
|
612
|
+
x = xdb.d;
|
613
|
+
|
614
|
+
/* Find correspondant interval and compute index to the table
|
615
|
+
We start by filtering the two special cases around 0 and 1
|
616
|
+
*/
|
617
|
+
|
618
|
+
if (xdb.i[HI] < BOUND1) {
|
619
|
+
/* Special interval 0..BOUND1
|
620
|
+
The polynomial has no even monomials
|
621
|
+
We must prove extra accuracy in the interval 0..sin(2^(-18))
|
622
|
+
*/
|
623
|
+
|
624
|
+
/* Quick phase starts */
|
625
|
+
|
626
|
+
/* Compute square of x for both quick and accurate phases */
|
627
|
+
Mul12(&xSqh,&xSql,x,x);
|
628
|
+
|
629
|
+
tmp4 = tbl[3];
|
630
|
+
tmp5 = tbl[4];
|
631
|
+
t4h = tmp4;
|
632
|
+
t4l = tmp5;
|
633
|
+
if (xdb.i[HI] > EXTRABOUND) {
|
634
|
+
/* Double precision evaluation */
|
635
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
636
|
+
highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
|
637
|
+
#else
|
638
|
+
highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
|
639
|
+
#endif
|
640
|
+
|
641
|
+
/* Double-double precision evaluation */
|
642
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
643
|
+
Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
|
644
|
+
|
645
|
+
MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
|
646
|
+
MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
|
647
|
+
MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
|
648
|
+
}
|
649
|
+
|
650
|
+
MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
|
651
|
+
|
652
|
+
Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
|
653
|
+
Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
|
654
|
+
|
655
|
+
Add12(tmp1,tmp2,x,tt6h);
|
656
|
+
tmp3 = tmp2 + tt6l;
|
657
|
+
Add12(polyh,polyl,tmp1,tmp3);
|
658
|
+
|
659
|
+
/* Multiply by sign */
|
660
|
+
asinh = sign * polyh;
|
661
|
+
asinm = sign * polyl;
|
662
|
+
|
663
|
+
/* Rounding test
|
664
|
+
The RU rounding constant is at tbl[35]
|
665
|
+
*/
|
666
|
+
TEST_AND_RETURN_RU(asinh, asinm, tbl[35]);
|
667
|
+
|
668
|
+
/* Launch accurate phase */
|
669
|
+
|
670
|
+
asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
|
671
|
+
|
672
|
+
ReturnRoundUpwards3(asinh,asinm,asinl);
|
673
|
+
}
|
674
|
+
|
675
|
+
if (xdb.i[HI] > BOUND9) {
|
676
|
+
/* Special interval BOUND9..1
|
677
|
+
We use an asymptotic development of arcsin in sqrt(1 - x)
|
678
|
+
*/
|
679
|
+
|
680
|
+
/* Argument reduction for quick and accurate phase
|
681
|
+
z = 1 - x
|
682
|
+
The operation is exact as per Sterbenz' lemma
|
683
|
+
*/
|
684
|
+
|
685
|
+
z = 1 - x;
|
686
|
+
|
687
|
+
/* Quick phase starts */
|
688
|
+
|
689
|
+
/* Double precision evaluation */
|
690
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
691
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
692
|
+
tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
|
693
|
+
tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
|
694
|
+
tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
|
695
|
+
tbl[TBLIDX10+24]);
|
696
|
+
#else
|
697
|
+
highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
|
698
|
+
tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
|
699
|
+
tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
|
700
|
+
tbl[TBLIDX10+42]))))))));
|
701
|
+
#endif
|
702
|
+
|
703
|
+
/* Double-double precision evaluation */
|
704
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
705
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
|
706
|
+
|
707
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
|
708
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
|
709
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
|
710
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
|
711
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
|
712
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
|
713
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
|
714
|
+
MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
|
715
|
+
|
716
|
+
/* Compute sqrt(2*z) as a double-double */
|
717
|
+
|
718
|
+
twoZ = 2 * z;
|
719
|
+
sqrt12(&sqrtzh,&sqrtzl,twoZ);
|
720
|
+
|
721
|
+
/* Multiply p(z) by sqrt(2*z) and add Pi/2 */
|
722
|
+
|
723
|
+
Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
|
724
|
+
Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
|
725
|
+
|
726
|
+
/* Multiply by sign */
|
727
|
+
asinh = sign * allh;
|
728
|
+
asinm = sign * alll;
|
729
|
+
|
730
|
+
/* Rounding test
|
731
|
+
The RU rounding constant is at tbl[TBLIDX10+55]
|
732
|
+
*/
|
733
|
+
TEST_AND_RETURN_RU(asinh, asinm, tbl[TBLIDX10+55]);
|
734
|
+
|
735
|
+
/* Launch accurate phase */
|
736
|
+
|
737
|
+
asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
|
738
|
+
|
739
|
+
ReturnRoundUpwards3(asinh,asinm,asinl);
|
740
|
+
}
|
741
|
+
|
742
|
+
/* General 8 main intervals
|
743
|
+
We can already suppose that BOUND1 <= x <= BOUND9
|
744
|
+
*/
|
745
|
+
|
746
|
+
if (xdb.i[HI] < BOUND5) {
|
747
|
+
if (xdb.i[HI] < BOUND3) {
|
748
|
+
if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
|
749
|
+
} else {
|
750
|
+
if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
|
751
|
+
}
|
752
|
+
} else {
|
753
|
+
if (xdb.i[HI] < BOUND7) {
|
754
|
+
if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
|
755
|
+
} else {
|
756
|
+
if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
|
757
|
+
}
|
758
|
+
}
|
759
|
+
|
760
|
+
/* Argument reduction
|
761
|
+
i points to the interval midpoint value in the table
|
762
|
+
*/
|
763
|
+
z = x - tbl[i];
|
764
|
+
|
765
|
+
/* Quick phase starts */
|
766
|
+
|
767
|
+
/* Double precision evaluation */
|
768
|
+
|
769
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
770
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
771
|
+
tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
|
772
|
+
tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
|
773
|
+
#else
|
774
|
+
highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
|
775
|
+
tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
|
776
|
+
tbl[i+33] + z * tbl[i+35]))))));
|
777
|
+
#endif
|
778
|
+
|
779
|
+
/* Double-double precision evaluation */
|
780
|
+
|
781
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
782
|
+
Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
|
783
|
+
|
784
|
+
MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
|
785
|
+
MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
|
786
|
+
MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
|
787
|
+
MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
|
788
|
+
MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
|
789
|
+
MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
|
790
|
+
|
791
|
+
/* Multiply by sign */
|
792
|
+
asinh = sign * polyh;
|
793
|
+
asinm = sign * polyl;
|
794
|
+
|
795
|
+
/* Rounding test
|
796
|
+
The RU rounding constant is at tbl[i+60]
|
797
|
+
*/
|
798
|
+
TEST_AND_RETURN_RU(asinh, asinm, tbl[i+60]);
|
799
|
+
|
800
|
+
/* Launch accurate phase */
|
801
|
+
|
802
|
+
asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
|
803
|
+
|
804
|
+
ReturnRoundUpwards3(asinh,asinm,asinl);
|
805
|
+
}
|
806
|
+
|
807
|
+
double asin_rd(double x) {
|
808
|
+
db_number xdb;
|
809
|
+
double sign, z, asinh, asinm, asinl;
|
810
|
+
int i;
|
811
|
+
double xSqh, xSql;
|
812
|
+
double tt1h, tt1l;
|
813
|
+
double tt6h, tt6l;
|
814
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
|
815
|
+
double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
|
816
|
+
double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
|
817
|
+
double tmp1, tmp2, tmp3, tmp4, tmp5;
|
818
|
+
|
819
|
+
/* Transform the argument into integer */
|
820
|
+
xdb.d = x;
|
821
|
+
|
822
|
+
/* Special case handling */
|
823
|
+
|
824
|
+
/* Strip off the sign of argument x */
|
825
|
+
if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
|
826
|
+
xdb.i[HI] &= 0x7fffffff;
|
827
|
+
|
828
|
+
/* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
|
829
|
+
if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
|
830
|
+
return (x-x)/0.0; /* return NaN */
|
831
|
+
}
|
832
|
+
|
833
|
+
/* If |x| < 2^(-28) we have
|
834
|
+
|
835
|
+
arcsin(x) = x * ( 1 + xi )
|
836
|
+
|
837
|
+
with 0 <= xi < 2^(-55)
|
838
|
+
|
839
|
+
So we can decide the rounding without any computation
|
840
|
+
*/
|
841
|
+
if (xdb.i[HI] < 0x3e300000) {
|
842
|
+
/* If x == 0 then we got the algebraic result arcsin(0) = 0
|
843
|
+
If x > 0 then the truncation rest is positive but less than
|
844
|
+
1 ulp; we round downwards by returning x
|
845
|
+
*/
|
846
|
+
if (x >= 0) return x;
|
847
|
+
/* Otherwise the rest is negative, less than 1 ulp and the
|
848
|
+
image is not algebraic
|
849
|
+
We return x - 1ulp
|
850
|
+
We stripped off the sign, so we add 1 ulp to -x (in xdb.d) and multiply by -1
|
851
|
+
*/
|
852
|
+
xdb.l++;
|
853
|
+
return -1 * xdb.d;
|
854
|
+
}
|
855
|
+
|
856
|
+
/* Recast x */
|
857
|
+
x = xdb.d;
|
858
|
+
|
859
|
+
/* Find correspondant interval and compute index to the table
|
860
|
+
We start by filtering the two special cases around 0 and 1
|
861
|
+
*/
|
862
|
+
|
863
|
+
if (xdb.i[HI] < BOUND1) {
|
864
|
+
/* Special interval 0..BOUND1
|
865
|
+
The polynomial has no even monomials
|
866
|
+
We must prove extra accuracy in the interval 0..sin(2^(-18))
|
867
|
+
*/
|
868
|
+
|
869
|
+
/* Quick phase starts */
|
870
|
+
|
871
|
+
/* Compute square of x for both quick and accurate phases */
|
872
|
+
Mul12(&xSqh,&xSql,x,x);
|
873
|
+
|
874
|
+
tmp4 = tbl[3];
|
875
|
+
tmp5 = tbl[4];
|
876
|
+
t4h = tmp4;
|
877
|
+
t4l = tmp5;
|
878
|
+
if (xdb.i[HI] > EXTRABOUND) {
|
879
|
+
/* Double precision evaluation */
|
880
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
881
|
+
highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
|
882
|
+
#else
|
883
|
+
highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
|
884
|
+
#endif
|
885
|
+
|
886
|
+
/* Double-double precision evaluation */
|
887
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
888
|
+
Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
|
889
|
+
|
890
|
+
MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
|
891
|
+
MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
|
892
|
+
MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
|
893
|
+
}
|
894
|
+
|
895
|
+
MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
|
896
|
+
|
897
|
+
Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
|
898
|
+
Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
|
899
|
+
|
900
|
+
Add12(tmp1,tmp2,x,tt6h);
|
901
|
+
tmp3 = tmp2 + tt6l;
|
902
|
+
Add12(polyh,polyl,tmp1,tmp3);
|
903
|
+
|
904
|
+
/* Multiply by sign */
|
905
|
+
asinh = sign * polyh;
|
906
|
+
asinm = sign * polyl;
|
907
|
+
|
908
|
+
/* Rounding test
|
909
|
+
The RD rounding constant is at tbl[35]
|
910
|
+
*/
|
911
|
+
TEST_AND_RETURN_RD(asinh, asinm, tbl[35]);
|
912
|
+
|
913
|
+
/* Launch accurate phase */
|
914
|
+
|
915
|
+
asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
|
916
|
+
|
917
|
+
ReturnRoundDownwards3(asinh,asinm,asinl);
|
918
|
+
}
|
919
|
+
|
920
|
+
if (xdb.i[HI] > BOUND9) {
|
921
|
+
/* Special interval BOUND9..1
|
922
|
+
We use an asymptotic development of arcsin in sqrt(1 - x)
|
923
|
+
*/
|
924
|
+
|
925
|
+
/* Argument reduction for quick and accurate phase
|
926
|
+
z = 1 - x
|
927
|
+
The operation is exact as per Sterbenz' lemma
|
928
|
+
*/
|
929
|
+
|
930
|
+
z = 1 - x;
|
931
|
+
|
932
|
+
/* Quick phase starts */
|
933
|
+
|
934
|
+
/* Double precision evaluation */
|
935
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
936
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
937
|
+
tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
|
938
|
+
tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
|
939
|
+
tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
|
940
|
+
tbl[TBLIDX10+24]);
|
941
|
+
#else
|
942
|
+
highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
|
943
|
+
tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
|
944
|
+
tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
|
945
|
+
tbl[TBLIDX10+42]))))))));
|
946
|
+
#endif
|
947
|
+
|
948
|
+
/* Double-double precision evaluation */
|
949
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
950
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
|
951
|
+
|
952
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
|
953
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
|
954
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
|
955
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
|
956
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
|
957
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
|
958
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
|
959
|
+
MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
|
960
|
+
|
961
|
+
/* Compute sqrt(2*z) as a double-double */
|
962
|
+
|
963
|
+
twoZ = 2 * z;
|
964
|
+
sqrt12(&sqrtzh,&sqrtzl,twoZ);
|
965
|
+
|
966
|
+
/* Multiply p(z) by sqrt(2*z) and add Pi/2 */
|
967
|
+
|
968
|
+
Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
|
969
|
+
Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
|
970
|
+
|
971
|
+
/* Multiply by sign */
|
972
|
+
asinh = sign * allh;
|
973
|
+
asinm = sign * alll;
|
974
|
+
|
975
|
+
/* Rounding test
|
976
|
+
The RD rounding constant is at tbl[TBLIDX10+55]
|
977
|
+
*/
|
978
|
+
TEST_AND_RETURN_RD(asinh, asinm, tbl[TBLIDX10+55]);
|
979
|
+
|
980
|
+
/* Launch accurate phase */
|
981
|
+
|
982
|
+
asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
|
983
|
+
|
984
|
+
ReturnRoundDownwards3(asinh,asinm,asinl);
|
985
|
+
}
|
986
|
+
|
987
|
+
/* General 8 main intervals
|
988
|
+
We can already suppose that BOUND1 <= x <= BOUND9
|
989
|
+
*/
|
990
|
+
|
991
|
+
if (xdb.i[HI] < BOUND5) {
|
992
|
+
if (xdb.i[HI] < BOUND3) {
|
993
|
+
if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
|
994
|
+
} else {
|
995
|
+
if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
|
996
|
+
}
|
997
|
+
} else {
|
998
|
+
if (xdb.i[HI] < BOUND7) {
|
999
|
+
if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
|
1000
|
+
} else {
|
1001
|
+
if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
|
1002
|
+
}
|
1003
|
+
}
|
1004
|
+
|
1005
|
+
/* Argument reduction
|
1006
|
+
i points to the interval midpoint value in the table
|
1007
|
+
*/
|
1008
|
+
z = x - tbl[i];
|
1009
|
+
|
1010
|
+
/* Quick phase starts */
|
1011
|
+
|
1012
|
+
/* Double precision evaluation */
|
1013
|
+
|
1014
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
1015
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
1016
|
+
tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
|
1017
|
+
tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
|
1018
|
+
#else
|
1019
|
+
highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
|
1020
|
+
tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
|
1021
|
+
tbl[i+33] + z * tbl[i+35]))))));
|
1022
|
+
#endif
|
1023
|
+
|
1024
|
+
/* Double-double precision evaluation */
|
1025
|
+
|
1026
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
1027
|
+
Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
|
1028
|
+
|
1029
|
+
MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
|
1030
|
+
MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
|
1031
|
+
MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
|
1032
|
+
MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
|
1033
|
+
MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
|
1034
|
+
MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
|
1035
|
+
|
1036
|
+
/* Multiply by sign */
|
1037
|
+
asinh = sign * polyh;
|
1038
|
+
asinm = sign * polyl;
|
1039
|
+
|
1040
|
+
/* Rounding test
|
1041
|
+
The RD rounding constant is at tbl[i+60]
|
1042
|
+
*/
|
1043
|
+
TEST_AND_RETURN_RD(asinh, asinm, tbl[i+60]);
|
1044
|
+
|
1045
|
+
/* Launch accurate phase */
|
1046
|
+
|
1047
|
+
asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
|
1048
|
+
|
1049
|
+
ReturnRoundDownwards3(asinh,asinm,asinl);
|
1050
|
+
}
|
1051
|
+
|
1052
|
+
double asin_rz(double x) {
|
1053
|
+
db_number xdb;
|
1054
|
+
double sign, z, asinh, asinm, asinl;
|
1055
|
+
int i;
|
1056
|
+
double xSqh, xSql;
|
1057
|
+
double tt1h, tt1l;
|
1058
|
+
double tt6h, tt6l;
|
1059
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
|
1060
|
+
double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
|
1061
|
+
double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
|
1062
|
+
double tmp1, tmp2, tmp3, tmp4, tmp5;
|
1063
|
+
|
1064
|
+
/* Transform the argument into integer */
|
1065
|
+
xdb.d = x;
|
1066
|
+
|
1067
|
+
/* Special case handling */
|
1068
|
+
|
1069
|
+
/* Strip off the sign of argument x */
|
1070
|
+
if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
|
1071
|
+
xdb.i[HI] &= 0x7fffffff;
|
1072
|
+
|
1073
|
+
/* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
|
1074
|
+
if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
|
1075
|
+
return (x-x)/0.0; /* return NaN */
|
1076
|
+
}
|
1077
|
+
|
1078
|
+
/* If |x| < 2^(-28) we have
|
1079
|
+
|
1080
|
+
arcsin(x) = x * ( 1 + xi )
|
1081
|
+
|
1082
|
+
with 0 <= xi < 2^(-55)
|
1083
|
+
|
1084
|
+
So we can decide the rounding without any computation
|
1085
|
+
*/
|
1086
|
+
if (xdb.i[HI] < 0x3e300000) {
|
1087
|
+
/* If x == 0 the result is algebraic and equal to 0
|
1088
|
+
If x < 0 the truncation rest is negative and less than 1 ulp, we return x
|
1089
|
+
If x > 0 the truncation rest is positive and less than 1 ulp, we return x
|
1090
|
+
*/
|
1091
|
+
return x;
|
1092
|
+
}
|
1093
|
+
|
1094
|
+
/* Recast x */
|
1095
|
+
x = xdb.d;
|
1096
|
+
|
1097
|
+
/* Find correspondant interval and compute index to the table
|
1098
|
+
We start by filtering the two special cases around 0 and 1
|
1099
|
+
*/
|
1100
|
+
|
1101
|
+
if (xdb.i[HI] < BOUND1) {
|
1102
|
+
/* Special interval 0..BOUND1
|
1103
|
+
The polynomial has no even monomials
|
1104
|
+
We must prove extra accuracy in the interval 0..sin(2^(-18))
|
1105
|
+
*/
|
1106
|
+
|
1107
|
+
/* Quick phase starts */
|
1108
|
+
|
1109
|
+
/* Compute square of x for both quick and accurate phases */
|
1110
|
+
Mul12(&xSqh,&xSql,x,x);
|
1111
|
+
|
1112
|
+
tmp4 = tbl[3];
|
1113
|
+
tmp5 = tbl[4];
|
1114
|
+
t4h = tmp4;
|
1115
|
+
t4l = tmp5;
|
1116
|
+
if (xdb.i[HI] > EXTRABOUND) {
|
1117
|
+
/* Double precision evaluation */
|
1118
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
1119
|
+
highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
|
1120
|
+
#else
|
1121
|
+
highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
|
1122
|
+
#endif
|
1123
|
+
|
1124
|
+
/* Double-double precision evaluation */
|
1125
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
1126
|
+
Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
|
1127
|
+
|
1128
|
+
MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
|
1129
|
+
MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
|
1130
|
+
MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
|
1131
|
+
}
|
1132
|
+
|
1133
|
+
MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
|
1134
|
+
|
1135
|
+
Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
|
1136
|
+
Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
|
1137
|
+
|
1138
|
+
Add12(tmp1,tmp2,x,tt6h);
|
1139
|
+
tmp3 = tmp2 + tt6l;
|
1140
|
+
Add12(polyh,polyl,tmp1,tmp3);
|
1141
|
+
|
1142
|
+
/* Multiply by sign */
|
1143
|
+
asinh = sign * polyh;
|
1144
|
+
asinm = sign * polyl;
|
1145
|
+
|
1146
|
+
/* Rounding test
|
1147
|
+
The RZ rounding constant is at tbl[35]
|
1148
|
+
*/
|
1149
|
+
TEST_AND_RETURN_RZ(asinh, asinm, tbl[35]);
|
1150
|
+
|
1151
|
+
/* Launch accurate phase */
|
1152
|
+
|
1153
|
+
asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
|
1154
|
+
|
1155
|
+
ReturnRoundTowardsZero3(asinh,asinm,asinl);
|
1156
|
+
}
|
1157
|
+
|
1158
|
+
if (xdb.i[HI] > BOUND9) {
|
1159
|
+
/* Special interval BOUND9..1
|
1160
|
+
We use an asymptotic development of arcsin in sqrt(1 - x)
|
1161
|
+
*/
|
1162
|
+
|
1163
|
+
/* Argument reduction for quick and accurate phase
|
1164
|
+
z = 1 - x
|
1165
|
+
The operation is exact as per Sterbenz' lemma
|
1166
|
+
*/
|
1167
|
+
|
1168
|
+
z = 1 - x;
|
1169
|
+
|
1170
|
+
/* Quick phase starts */
|
1171
|
+
|
1172
|
+
/* Double precision evaluation */
|
1173
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
1174
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
1175
|
+
tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
|
1176
|
+
tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
|
1177
|
+
tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
|
1178
|
+
tbl[TBLIDX10+24]);
|
1179
|
+
#else
|
1180
|
+
highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
|
1181
|
+
tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
|
1182
|
+
tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
|
1183
|
+
tbl[TBLIDX10+42]))))))));
|
1184
|
+
#endif
|
1185
|
+
|
1186
|
+
/* Double-double precision evaluation */
|
1187
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
1188
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
|
1189
|
+
|
1190
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
|
1191
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
|
1192
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
|
1193
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
|
1194
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
|
1195
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
|
1196
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
|
1197
|
+
MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
|
1198
|
+
|
1199
|
+
/* Compute sqrt(2*z) as a double-double */
|
1200
|
+
|
1201
|
+
twoZ = 2 * z;
|
1202
|
+
sqrt12(&sqrtzh,&sqrtzl,twoZ);
|
1203
|
+
|
1204
|
+
/* Multiply p(z) by sqrt(2*z) and add Pi/2 */
|
1205
|
+
|
1206
|
+
Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
|
1207
|
+
Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
|
1208
|
+
|
1209
|
+
/* Multiply by sign */
|
1210
|
+
asinh = sign * allh;
|
1211
|
+
asinm = sign * alll;
|
1212
|
+
|
1213
|
+
/* Rounding test
|
1214
|
+
The RZ rounding constant is at tbl[TBLIDX10+55]
|
1215
|
+
*/
|
1216
|
+
TEST_AND_RETURN_RZ(asinh, asinm, tbl[TBLIDX10+55]);
|
1217
|
+
|
1218
|
+
/* Launch accurate phase */
|
1219
|
+
|
1220
|
+
asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
|
1221
|
+
|
1222
|
+
ReturnRoundTowardsZero3(asinh,asinm,asinl);
|
1223
|
+
}
|
1224
|
+
|
1225
|
+
/* General 8 main intervals
|
1226
|
+
We can already suppose that BOUND1 <= x <= BOUND9
|
1227
|
+
*/
|
1228
|
+
|
1229
|
+
if (xdb.i[HI] < BOUND5) {
|
1230
|
+
if (xdb.i[HI] < BOUND3) {
|
1231
|
+
if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
|
1232
|
+
} else {
|
1233
|
+
if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
|
1234
|
+
}
|
1235
|
+
} else {
|
1236
|
+
if (xdb.i[HI] < BOUND7) {
|
1237
|
+
if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
|
1238
|
+
} else {
|
1239
|
+
if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
|
1240
|
+
}
|
1241
|
+
}
|
1242
|
+
|
1243
|
+
/* Argument reduction
|
1244
|
+
i points to the interval midpoint value in the table
|
1245
|
+
*/
|
1246
|
+
z = x - tbl[i];
|
1247
|
+
|
1248
|
+
/* Quick phase starts */
|
1249
|
+
|
1250
|
+
/* Double precision evaluation */
|
1251
|
+
|
1252
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
1253
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
1254
|
+
tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
|
1255
|
+
tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
|
1256
|
+
#else
|
1257
|
+
highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
|
1258
|
+
tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
|
1259
|
+
tbl[i+33] + z * tbl[i+35]))))));
|
1260
|
+
#endif
|
1261
|
+
|
1262
|
+
/* Double-double precision evaluation */
|
1263
|
+
|
1264
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
1265
|
+
Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
|
1266
|
+
|
1267
|
+
MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
|
1268
|
+
MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
|
1269
|
+
MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
|
1270
|
+
MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
|
1271
|
+
MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
|
1272
|
+
MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
|
1273
|
+
|
1274
|
+
/* Multiply by sign */
|
1275
|
+
asinh = sign * polyh;
|
1276
|
+
asinm = sign * polyl;
|
1277
|
+
|
1278
|
+
/* Rounding test
|
1279
|
+
The RZ rounding constant is at tbl[i+60]
|
1280
|
+
*/
|
1281
|
+
TEST_AND_RETURN_RZ(asinh, asinm, tbl[i+60]);
|
1282
|
+
|
1283
|
+
/* Launch accurate phase */
|
1284
|
+
|
1285
|
+
asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
|
1286
|
+
|
1287
|
+
ReturnRoundTowardsZero3(asinh,asinm,asinl);
|
1288
|
+
}
|
1289
|
+
|
1290
|
+
|
1291
|
+
|
1292
|
+
|
1293
|
+
|
1294
|
+
|
1295
|
+
|
1296
|
+
|
1297
|
+
|