crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1195 @@
|
|
1
|
+
/*
|
2
|
+
* Correctly rounded arccosine
|
3
|
+
*
|
4
|
+
* Author : Christoph Lauter (ENS Lyon)
|
5
|
+
*
|
6
|
+
* This file is part of the crlibm library developed by the Arenaire
|
7
|
+
* project at Ecole Normale Superieure de Lyon
|
8
|
+
*
|
9
|
+
* This program is free software; you can redistribute it and/or modify
|
10
|
+
* it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
* the Free Software Foundation; either version 2 of the License, or
|
12
|
+
* (at your option) any later version.
|
13
|
+
*
|
14
|
+
* This program is distributed in the hope that it will be useful,
|
15
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
* GNU General Public License for more details.
|
18
|
+
*
|
19
|
+
* You should have received a copy of the GNU Lesser General Public License
|
20
|
+
* along with this program; if not, write to the Free Software
|
21
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#include <stdio.h>
|
25
|
+
#include <stdlib.h>
|
26
|
+
#include "crlibm.h"
|
27
|
+
#include "crlibm_private.h"
|
28
|
+
#include "triple-double.h"
|
29
|
+
#include "acos-td.h"
|
30
|
+
|
31
|
+
#define AVOID_FMA 1
|
32
|
+
|
33
|
+
void acos_accurate_lower(double *acosh, double *acosm, double *acosl, double x, double xSqh, double xSql, double sign) {
|
34
|
+
double highPoly;
|
35
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l;
|
36
|
+
double tt1h, tt1l;
|
37
|
+
double t8h, t8m, t8l, t9h, t9m, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l;
|
38
|
+
double tt8h, tt8m, tt8l, tt9h, tt9m, tt9l, tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
|
39
|
+
double xCubeh, xCubem, xCubel, tt13h, tt13m, tt13l, t13h, t13m, t13l, polyh, polym, polyl;
|
40
|
+
double tt11hover, tt11mover, tt11lover;
|
41
|
+
double zw1h, zw1m, zw1l, acoshover, acosmover, acoslover;
|
42
|
+
|
43
|
+
#if EVAL_PERF
|
44
|
+
crlibm_second_step_taken++;
|
45
|
+
#endif
|
46
|
+
|
47
|
+
/* Evaluate the polynomial of degree 37
|
48
|
+
Its coefficients start at tbl[0]
|
49
|
+
|
50
|
+
p(x) = x + x * x^2 * (c3 + x^2 * (c5 + ...
|
51
|
+
|
52
|
+
We receive x^2 as xSqh + xSql = x * x (exactly)
|
53
|
+
in argument
|
54
|
+
|
55
|
+
|x| <= 0.185 = 2^(-2.43)
|
56
|
+
|
57
|
+
Compute monomials 27 to 37 in double precision
|
58
|
+
monomials 13 to 25 in double-double and
|
59
|
+
1 to 11 in triple-double precision in a
|
60
|
+
modified Horner form
|
61
|
+
|
62
|
+
*/
|
63
|
+
|
64
|
+
/* Double computations */
|
65
|
+
|
66
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
67
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(tbl[33],xSqh,tbl[32]),xSqh,tbl[31]),xSqh,tbl[30]),xSqh,tbl[29]),xSqh,tbl[28]);
|
68
|
+
#else
|
69
|
+
highPoly = tbl[28] + xSqh * (tbl[29] + xSqh * (tbl[30] + xSqh * (tbl[31] + xSqh * (tbl[32] + xSqh * tbl[33]))));
|
70
|
+
#endif
|
71
|
+
|
72
|
+
/* Double-double computations */
|
73
|
+
|
74
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
75
|
+
Add22(&t1h,&t1l,tbl[27],0,tt1h,tt1l);
|
76
|
+
|
77
|
+
MulAdd22(&t2h,&t2l,tbl[25],tbl[26],xSqh,xSql,t1h,t1l);
|
78
|
+
MulAdd22(&t3h,&t3l,tbl[23],tbl[24],xSqh,xSql,t2h,t2l);
|
79
|
+
MulAdd22(&t4h,&t4l,tbl[21],tbl[22],xSqh,xSql,t3h,t3l);
|
80
|
+
MulAdd22(&t5h,&t5l,tbl[19],tbl[20],xSqh,xSql,t4h,t4l);
|
81
|
+
MulAdd22(&t6h,&t6l,tbl[17],tbl[18],xSqh,xSql,t5h,t5l);
|
82
|
+
MulAdd22(&t7h,&t7l,tbl[15],tbl[16],xSqh,xSql,t6h,t6l);
|
83
|
+
|
84
|
+
/* Triple-double computations */
|
85
|
+
|
86
|
+
Mul23(&tt8h,&tt8m,&tt8l,xSqh,xSql,t7h,t7l); /* 149 - 48/53 */
|
87
|
+
Add33(&t8h,&t8m,&t8l,tbl[12],tbl[13],tbl[14],tt8h,tt8m,tt8l); /* 145 - 43/53 */
|
88
|
+
Mul233(&tt9h,&tt9m,&tt9l,xSqh,xSql,t8h,t8m,t8l); /* 139 - 39/53 */
|
89
|
+
Add33(&t9h,&t9m,&t9l,tbl[9],tbl[10],tbl[11],tt9h,tt9m,tt9l); /* 136 - 34/53 */
|
90
|
+
Mul233(&tt10h,&tt10m,&tt10l,xSqh,xSql,t9h,t9m,t9l); /* 130 - 30/53 */
|
91
|
+
Add33(&t10h,&t10m,&t10l,tbl[6],tbl[7],tbl[8],tt10h,tt10m,tt10l); /* 127 - 25/53 */
|
92
|
+
Mul233(&tt11hover,&tt11mover,&tt11lover,xSqh,xSql,t10h,t10m,t10l); /* 121 - 21/53 */
|
93
|
+
|
94
|
+
Renormalize3(&tt11h,&tt11m,&tt11l,tt11hover,tt11mover,tt11lover); /* infty - 52/53 */
|
95
|
+
|
96
|
+
Add33(&t11h,&t11m,&t11l,tbl[3],tbl[4],tbl[5],tt11h,tt11m,tt11l); /* 149 - 47/53 */
|
97
|
+
Mul233(&tt12h,&tt12m,&tt12l,xSqh,xSql,t11h,t11m,t11l); /* 143 - 43/53 */
|
98
|
+
Add33(&t12h,&t12m,&t12l,tbl[0],tbl[1],tbl[2],tt12h,tt12m,tt12l); /* 140 - 38/53 */
|
99
|
+
|
100
|
+
Mul123(&xCubeh,&xCubem,&xCubel,x,xSqh,xSql); /* 154 - 47/53 */
|
101
|
+
|
102
|
+
Mul33(&tt13h,&tt13m,&tt13l,xCubeh,xCubem,xCubel,t12h,t12m,t12l); /* 136 - 34/53 */
|
103
|
+
Add133(&t13h,&t13m,&t13l,x,tt13h,tt13m,tt13l); /* 138 - 32/53 */
|
104
|
+
|
105
|
+
Renormalize3(&polyh,&polym,&polyl,t13h,t13m,t13l); /* infty - 52/53 */
|
106
|
+
|
107
|
+
/* Reconstruction:
|
108
|
+
|
109
|
+
- Multiply by the inverted sign
|
110
|
+
- Add Pi/2 in triple-double
|
111
|
+
- Renormalize
|
112
|
+
|
113
|
+
*/
|
114
|
+
|
115
|
+
zw1h = -sign * polyh;
|
116
|
+
zw1m = -sign * polym;
|
117
|
+
zw1l = -sign * polyl;
|
118
|
+
|
119
|
+
Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,zw1h,zw1m,zw1l);
|
120
|
+
|
121
|
+
Renormalize3(acosh,acosm,acosl,acoshover,acosmover,acoslover);
|
122
|
+
|
123
|
+
}
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
void acos_accurate_middle(double *acosh, double *acosm, double *acosl, double z, int i, double sign) {
|
128
|
+
double highPoly;
|
129
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l;
|
130
|
+
double t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l, t14h, t14m, t14l;
|
131
|
+
double t15h, t15m, t15l, t16h, t16m, t16l;
|
132
|
+
double tt1h, tt1l;
|
133
|
+
double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
|
134
|
+
double tt13h, tt13m, tt13l, tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l;
|
135
|
+
double polyh, polym, polyl, tt13hover, tt13mover, tt13lover;
|
136
|
+
double zw1h, zw1m, zw1l, acoshover, acosmover, acoslover;
|
137
|
+
|
138
|
+
#if EVAL_PERF
|
139
|
+
crlibm_second_step_taken++;
|
140
|
+
#endif
|
141
|
+
|
142
|
+
/* Evaluate the polynomial of degree 35
|
143
|
+
Its coefficients start at tbl[i+1]
|
144
|
+
Evaluate degrees 35 to 20 in double precision,
|
145
|
+
degrees 20 to 7 in double-double precision and
|
146
|
+
finally degrees 6 to 1 in triple-double.
|
147
|
+
The constant coefficient is a double-double, the
|
148
|
+
computations are nevertheless in triple-double
|
149
|
+
*/
|
150
|
+
|
151
|
+
/* Double computations */
|
152
|
+
|
153
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
154
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
155
|
+
tbl[i+58] ,z,tbl[i+57]),z,tbl[i+56]),z,tbl[i+55]),z,tbl[i+54]),z,
|
156
|
+
tbl[i+53]),z,tbl[i+52]),z,tbl[i+51]),z,tbl[i+50]),z,tbl[i+49]),z,
|
157
|
+
tbl[i+48]),z,tbl[i+47]),z,tbl[i+46]),z,tbl[i+45]),z,tbl[i+44]),z,
|
158
|
+
tbl[i+43]),z,tbl[i+42]),z,tbl[i+41]),z,tbl[i+40]),z,tbl[i+39]);
|
159
|
+
|
160
|
+
#else
|
161
|
+
highPoly = tbl[i+39] + z * (tbl[i+40] + z * (tbl[i+41] + z * (tbl[i+42] + z * (
|
162
|
+
tbl[i+43] + z * (tbl[i+44] + z * (tbl[i+45] + z * (tbl[i+46] + z * (
|
163
|
+
tbl[i+47] + z * (tbl[i+48] + z * (tbl[i+49] + z * (tbl[i+50] + z * (
|
164
|
+
tbl[i+51] + z * (tbl[i+52] + z * (tbl[i+53] + z * (tbl[i+54] + z * (
|
165
|
+
tbl[i+55] + z * (tbl[i+56] + z * (tbl[i+57] + z * tbl[i+58]))))))))))))))))));
|
166
|
+
#endif
|
167
|
+
|
168
|
+
|
169
|
+
/* Double-double computations */
|
170
|
+
|
171
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
172
|
+
Add22(&t1h,&t1l,tbl[i+37],tbl[i+38],tt1h,tt1l);
|
173
|
+
|
174
|
+
MulAdd212(&t2h,&t2l,tbl[i+35],tbl[i+36],z,t1h,t1l);
|
175
|
+
MulAdd212(&t3h,&t3l,tbl[i+33],tbl[i+34],z,t2h,t2l);
|
176
|
+
MulAdd212(&t4h,&t4l,tbl[i+31],tbl[i+32],z,t3h,t3l);
|
177
|
+
MulAdd212(&t5h,&t5l,tbl[i+29],tbl[i+30],z,t4h,t4l);
|
178
|
+
MulAdd212(&t6h,&t6l,tbl[i+27],tbl[i+28],z,t5h,t5l);
|
179
|
+
MulAdd212(&t7h,&t7l,tbl[i+25],tbl[i+26],z,t6h,t6l);
|
180
|
+
MulAdd212(&t8h,&t8l,tbl[i+23],tbl[i+24],z,t7h,t7l);
|
181
|
+
MulAdd212(&t9h,&t9l,tbl[i+21],tbl[i+22],z,t8h,t8l);
|
182
|
+
|
183
|
+
/* Triple-double computations */
|
184
|
+
|
185
|
+
Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
|
186
|
+
Add33(&t10h,&t10m,&t10l,tbl[i+18],tbl[i+19],tbl[i+20],tt10h,tt10m,tt10l); /* 144 - 42/53 */
|
187
|
+
Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 38/53 */
|
188
|
+
Add33(&t11h,&t11m,&t11l,tbl[i+15],tbl[i+16],tbl[i+17],tt11h,tt11m,tt11l); /* 136 - 33/53 */
|
189
|
+
Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 133 - 28/53 */
|
190
|
+
Add33(&t12h,&t12m,&t12l,tbl[i+12],tbl[i+13],tbl[i+14],tt12h,tt12m,tt12l); /* 125 - 23/53 */
|
191
|
+
Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 123 - 18/53 */
|
192
|
+
|
193
|
+
Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
|
194
|
+
|
195
|
+
Add33(&t13h,&t13m,&t13l,tbl[i+9],tbl[i+10],tbl[i+11],tt13h,tt13m,tt13l); /* 149 - 47/53 */
|
196
|
+
Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
|
197
|
+
Add33(&t14h,&t14m,&t14l,tbl[i+6],tbl[i+7],tbl[i+8],tt14h,tt14m,tt14l); /* 139 - 37/53 */
|
198
|
+
Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
|
199
|
+
Add33(&t15h,&t15m,&t15l,tbl[i+3],tbl[i+4],tbl[i+5],tt15h,tt15m,tt15l); /* 129 - 28/53 */
|
200
|
+
Mul133(&tt16h,&tt16m,&tt16l,z,t15h,t15m,t15l); /* 128 - 23/53 */
|
201
|
+
Add233(&t16h,&t16m,&t16l,tbl[i+1],tbl[i+2],tt16h,tt16m,tt16l); /* 126 - 19/53 */
|
202
|
+
|
203
|
+
Renormalize3(&polyh,&polym,&polyl,t16h,t16m,t16l); /* infty - 52/53 */
|
204
|
+
|
205
|
+
/* Reconstruction:
|
206
|
+
|
207
|
+
- Multiply by the inverted sign
|
208
|
+
- Add Pi/2 in triple-double
|
209
|
+
- Renormalize
|
210
|
+
|
211
|
+
*/
|
212
|
+
|
213
|
+
zw1h = -sign * polyh;
|
214
|
+
zw1m = -sign * polym;
|
215
|
+
zw1l = -sign * polyl;
|
216
|
+
|
217
|
+
Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,zw1h,zw1m,zw1l);
|
218
|
+
|
219
|
+
Renormalize3(acosh,acosm,acosl,acoshover,acosmover,acoslover);
|
220
|
+
|
221
|
+
}
|
222
|
+
|
223
|
+
|
224
|
+
void acos_accurate_higher(double *acosh, double *acosm, double *acosl, double z, double sign) {
|
225
|
+
double highPoly;
|
226
|
+
double tt1h, tt1l;
|
227
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l;
|
228
|
+
double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l, tt13h, tt13m, tt13l;
|
229
|
+
double tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l, tt17h, tt17m, tt17l;
|
230
|
+
double t9h, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l;
|
231
|
+
double t14h, t14m, t14l, t15h, t15m, t15l, t16h, t16m, t16l, t17h, t17m, t17l;
|
232
|
+
double tt18h, tt18m, tt18l, polyh, polym, polyl;
|
233
|
+
double sqrtzh, sqrtzm, sqrtzl, twoZ, pTimesSh, pTimesSm, pTimesSl;
|
234
|
+
double allh, allm, alll;
|
235
|
+
double tt13hover, tt13mover, tt13lover, tt16hover, tt16mover, tt16lover;
|
236
|
+
double polyhover, polymover, polylover;
|
237
|
+
|
238
|
+
#if EVAL_PERF
|
239
|
+
crlibm_second_step_taken++;
|
240
|
+
#endif
|
241
|
+
|
242
|
+
/* We evaluate acos(x) with x > 0 as
|
243
|
+
|
244
|
+
acos(x) = -1 * f(z) * sqrt(2*z)
|
245
|
+
|
246
|
+
with z = 1 - x and
|
247
|
+
|
248
|
+
f(z) = (asin(z) - Pi/2) / sqrt(2*z)
|
249
|
+
|
250
|
+
f(z) is approximated by p(z)
|
251
|
+
|
252
|
+
The polynomial p(z) is of degree 29
|
253
|
+
Its coefficients start at tbl[TBLIDX10]
|
254
|
+
Coefficients for degrees 29 to 18 are in double precision,
|
255
|
+
for degrees 17 to 9 in double-double precision and
|
256
|
+
finally for degrees 8 to 1 in triple-double.
|
257
|
+
The constant coefficient (-1) is not stored in the table,
|
258
|
+
the computations are nevertheless in triple-double
|
259
|
+
We evaluate the monomials in the precision in which
|
260
|
+
the correspondant coefficients are stored
|
261
|
+
The coefficients' values decrease very quickly
|
262
|
+
so even with |z| < 2^-2.18 we can compute degree 18
|
263
|
+
already in double precision
|
264
|
+
|
265
|
+
Compute than sqrt(2*z) as a triple-double
|
266
|
+
multiply in triple-double.
|
267
|
+
|
268
|
+
*/
|
269
|
+
|
270
|
+
/* Double computations */
|
271
|
+
|
272
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
273
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
274
|
+
tbl[TBLIDX10+53] ,z,tbl[TBLIDX10+52]),z,tbl[TBLIDX10+51]),z,
|
275
|
+
tbl[TBLIDX10+50]),z,tbl[TBLIDX10+49]),z,tbl[TBLIDX10+48]),z,
|
276
|
+
tbl[TBLIDX10+47]),z,tbl[TBLIDX10+46]),z,tbl[TBLIDX10+45]),z,
|
277
|
+
tbl[TBLIDX10+44]),z,tbl[TBLIDX10+43]),z,tbl[TBLIDX10+42]);
|
278
|
+
#else
|
279
|
+
highPoly = tbl[TBLIDX10+42] + z * (tbl[TBLIDX10+43] + z * (tbl[TBLIDX10+44] + z * (
|
280
|
+
tbl[TBLIDX10+45] + z * (tbl[TBLIDX10+46] + z * (tbl[TBLIDX10+47] + z * (
|
281
|
+
tbl[TBLIDX10+48] + z * (tbl[TBLIDX10+49] + z * (tbl[TBLIDX10+50] + z * (
|
282
|
+
tbl[TBLIDX10+51] + z * (tbl[TBLIDX10+52] + z * tbl[TBLIDX10+53]))))))))));
|
283
|
+
#endif
|
284
|
+
|
285
|
+
/* Double-double computations */
|
286
|
+
|
287
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
288
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+40],tbl[TBLIDX10+41],tt1h,tt1l);
|
289
|
+
|
290
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+38],tbl[TBLIDX10+39],z,t1h,t1l);
|
291
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+36],tbl[TBLIDX10+37],z,t2h,t2l);
|
292
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+34],tbl[TBLIDX10+35],z,t3h,t3l);
|
293
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+32],tbl[TBLIDX10+33],z,t4h,t4l);
|
294
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+30],tbl[TBLIDX10+31],z,t5h,t5l);
|
295
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+28],tbl[TBLIDX10+29],z,t6h,t6l);
|
296
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+26],tbl[TBLIDX10+27],z,t7h,t7l);
|
297
|
+
MulAdd212(&t9h,&t9l,tbl[TBLIDX10+24],tbl[TBLIDX10+25],z,t8h,t8l);
|
298
|
+
|
299
|
+
/* Triple-double computations */
|
300
|
+
|
301
|
+
Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
|
302
|
+
Add33(&t10h,&t10m,&t10l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tbl[TBLIDX10+23],tt10h,tt10m,tt10l); /* 144 - 42/53 */
|
303
|
+
Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 37/53 */
|
304
|
+
Add33(&t11h,&t11m,&t11l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],tbl[TBLIDX10+20],tt11h,tt11m,tt11l); /* 134 - 32/53 */
|
305
|
+
Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 132 - 27/53 */
|
306
|
+
Add33(&t12h,&t12m,&t12l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],tbl[TBLIDX10+17],tt12h,tt12m,tt12l); /* 124 - 22/53 */
|
307
|
+
Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 122 - 17/53 */
|
308
|
+
|
309
|
+
Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
|
310
|
+
|
311
|
+
Add33(&t13h,&t13m,&t13l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],tbl[TBLIDX10+14],tt13h,tt13m,tt13l); /* 149 - 47/53 */
|
312
|
+
Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
|
313
|
+
Add33(&t14h,&t14m,&t14l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],tbl[TBLIDX10+11],tt14h,tt14m,tt14l); /* 139 - 37/53 */
|
314
|
+
Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
|
315
|
+
Add33(&t15h,&t15m,&t15l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],tbl[TBLIDX10+8],tt15h,tt15m,tt15l); /* 129 - 27/53 */
|
316
|
+
Mul133(&tt16hover,&tt16mover,&tt16lover,z,t15h,t15m,t15l); /* 127 - 22/53 */
|
317
|
+
|
318
|
+
Renormalize3(&tt16h,&tt16m,&tt16l,tt16hover,tt16mover,tt16lover); /* infty - 52/53 */
|
319
|
+
|
320
|
+
Add33(&t16h,&t16m,&t16l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],tbl[TBLIDX10+5],tt16h,tt16m,tt16l); /* 149 - 47/53 */
|
321
|
+
Mul133(&tt17h,&tt17m,&tt17l,z,t16h,t16m,t16l); /* 147 - 42/53 */
|
322
|
+
Add33(&t17h,&t17m,&t17l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],tbl[TBLIDX10+2],tt17h,tt17m,tt17l); /* 139 - 37/53 */
|
323
|
+
Mul133(&tt18h,&tt18m,&tt18l,z,t17h,t17m,t17l); /* 137 - 32/53 */
|
324
|
+
Add133(&polyhover,&polymover,&polylover,-1,tt18h,tt18m,tt18l); /* 136 - 30/53 */
|
325
|
+
|
326
|
+
Renormalize3(&polyh,&polym,&polyl,polyhover,polymover,polylover); /* infty - 52/53 */
|
327
|
+
|
328
|
+
/* Compute sqrt(2*z) as a triple-double */
|
329
|
+
|
330
|
+
twoZ = 2 * z;
|
331
|
+
Sqrt13(&sqrtzh,&sqrtzm,&sqrtzl,twoZ); /* 146 - 52/53 */
|
332
|
+
|
333
|
+
/* Multiply p(z) by sqrt(2*z) */
|
334
|
+
|
335
|
+
Mul33(&pTimesSh,&pTimesSm,&pTimesSl,polyh,polym,polyl,sqrtzh,sqrtzm,sqrtzl); /* 139 - 48/53 */
|
336
|
+
|
337
|
+
/* Reconstruction:
|
338
|
+
|
339
|
+
If the sign of x in acos(x) was positive:
|
340
|
+
- Multiply pTimesSh + pTimesSm + pTimesSl approx f(x) * sqrt(2 * z) by -1
|
341
|
+
- Renormalize
|
342
|
+
- Return
|
343
|
+
|
344
|
+
Otherwise:
|
345
|
+
- Add Pi in triple-double to pTimesSh + pTimesSm + pTimesSl approx f(x) * sqrt(2 * z)
|
346
|
+
- Renormalize
|
347
|
+
- Return
|
348
|
+
|
349
|
+
*/
|
350
|
+
|
351
|
+
if (sign > 0) {
|
352
|
+
|
353
|
+
allh = -1.0 * pTimesSh;
|
354
|
+
allm = -1.0 * pTimesSm;
|
355
|
+
alll = -1.0 * pTimesSl; /* 139 - 48/53 */
|
356
|
+
|
357
|
+
} else {
|
358
|
+
|
359
|
+
Add33(&allh,&allm,&alll,PIH,PIM,PIL,pTimesSh,pTimesSm,pTimesSl); /* 130 - 43/53 */
|
360
|
+
|
361
|
+
}
|
362
|
+
|
363
|
+
Renormalize3(acosh,acosm,acosl,allh,allm,alll); /* infty - 52/53 */
|
364
|
+
|
365
|
+
}
|
366
|
+
|
367
|
+
|
368
|
+
double acos_rn(double x) {
|
369
|
+
db_number xdb;
|
370
|
+
double sign, z, acosh, acosm, acosl;
|
371
|
+
int i;
|
372
|
+
double xSqh, xSql;
|
373
|
+
double tt1h, tt1l;
|
374
|
+
double tt6h, tt6l;
|
375
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
|
376
|
+
double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
|
377
|
+
double pTimesSh, pTimesSl, highPoly, xCubeh, xCubel;
|
378
|
+
double tmp1, tmp2, tmp3, tmp4, tmp5;
|
379
|
+
double zw1h, zw1l;
|
380
|
+
|
381
|
+
/*
|
382
|
+
#if CRLIBM_REQUIRES_ROUNDING_MODE_CHANGE
|
383
|
+
SAVE_STATE_AND_SET_RNDOUBLE
|
384
|
+
#endif
|
385
|
+
*/
|
386
|
+
|
387
|
+
/* Transform the argument into integer */
|
388
|
+
xdb.d = x;
|
389
|
+
|
390
|
+
/* Special case handling */
|
391
|
+
|
392
|
+
/* Exact algebraic case x = 1, acos(1) = 0 */
|
393
|
+
|
394
|
+
if (x == 1.0) return 0.0;
|
395
|
+
|
396
|
+
/* Strip off the sign of argument x */
|
397
|
+
if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
|
398
|
+
xdb.i[HI] &= 0x7fffffff;
|
399
|
+
|
400
|
+
/* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
|
401
|
+
if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
|
402
|
+
return (x-x)/0.0; /* return NaN */
|
403
|
+
}
|
404
|
+
|
405
|
+
/* If |x| < 2^(-120) we have
|
406
|
+
|
407
|
+
round(acos(x)) = round(pi/2)
|
408
|
+
|
409
|
+
So we can decide the rounding without any computation
|
410
|
+
*/
|
411
|
+
if (xdb.i[HI] < 0x38700000) {
|
412
|
+
return PIHALFDOUBLERN;
|
413
|
+
}
|
414
|
+
|
415
|
+
/* Recast x */
|
416
|
+
x = xdb.d;
|
417
|
+
|
418
|
+
/* Find correspondant interval and compute index to the table
|
419
|
+
We start by filtering the two special cases around 0 and 1
|
420
|
+
*/
|
421
|
+
|
422
|
+
if (xdb.i[HI] < BOUND1) {
|
423
|
+
/* Special interval 0..BOUND1
|
424
|
+
The polynomial has no even monomials
|
425
|
+
We must prove extra accuracy in the interval 0..sin(2^(-18))
|
426
|
+
*/
|
427
|
+
|
428
|
+
/* Quick phase starts */
|
429
|
+
|
430
|
+
/* Compute square of x for both quick and accurate phases */
|
431
|
+
Mul12(&xSqh,&xSql,x,x);
|
432
|
+
|
433
|
+
tmp4 = tbl[3];
|
434
|
+
tmp5 = tbl[4];
|
435
|
+
t4h = tmp4;
|
436
|
+
t4l = tmp5;
|
437
|
+
if (xdb.i[HI] > EXTRABOUND) {
|
438
|
+
/* Double precision evaluation */
|
439
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
440
|
+
highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
|
441
|
+
#else
|
442
|
+
highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
|
443
|
+
#endif
|
444
|
+
|
445
|
+
/* Double-double precision evaluation */
|
446
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
447
|
+
Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
|
448
|
+
|
449
|
+
MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
|
450
|
+
MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
|
451
|
+
MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
|
452
|
+
}
|
453
|
+
|
454
|
+
MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
|
455
|
+
|
456
|
+
Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
|
457
|
+
Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
|
458
|
+
|
459
|
+
Add12(tmp1,tmp2,x,tt6h);
|
460
|
+
tmp3 = tmp2 + tt6l;
|
461
|
+
Add12(polyh,polyl,tmp1,tmp3);
|
462
|
+
|
463
|
+
/* Reconstruction:
|
464
|
+
|
465
|
+
- Multiply by the inverted sign
|
466
|
+
- Add Pi/2 in double-double precision
|
467
|
+
|
468
|
+
*/
|
469
|
+
|
470
|
+
zw1h = -sign * polyh;
|
471
|
+
zw1l = -sign * polyl;
|
472
|
+
|
473
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
|
474
|
+
|
475
|
+
/* Rounding test
|
476
|
+
The RN rounding constant is at tbl[34]
|
477
|
+
*/
|
478
|
+
if(acosh == (acosh + (acosm * tbl[34])))
|
479
|
+
return acosh;
|
480
|
+
|
481
|
+
/* Launch accurate phase */
|
482
|
+
|
483
|
+
acos_accurate_lower(&acosh,&acosm,&acosl,x,xSqh,xSql,sign);
|
484
|
+
|
485
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
486
|
+
}
|
487
|
+
|
488
|
+
if (xdb.i[HI] >= BOUND9) {
|
489
|
+
/* Special interval BOUND9..1
|
490
|
+
We use an asymptotic development of arcsin in sqrt(1 - x)
|
491
|
+
*/
|
492
|
+
|
493
|
+
/* Argument reduction for quick and accurate phase
|
494
|
+
z = 1 - x
|
495
|
+
The operation is exact as per Sterbenz' lemma
|
496
|
+
*/
|
497
|
+
|
498
|
+
z = 1 - x;
|
499
|
+
|
500
|
+
/* Quick phase starts */
|
501
|
+
|
502
|
+
/* Double precision evaluation */
|
503
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
504
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
505
|
+
tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
|
506
|
+
tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
|
507
|
+
tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
|
508
|
+
tbl[TBLIDX10+24]);
|
509
|
+
#else
|
510
|
+
highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
|
511
|
+
tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
|
512
|
+
tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
|
513
|
+
tbl[TBLIDX10+42]))))))));
|
514
|
+
#endif
|
515
|
+
|
516
|
+
/* Double-double precision evaluation */
|
517
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
518
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
|
519
|
+
|
520
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
|
521
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
|
522
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
|
523
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
|
524
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
|
525
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
|
526
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
|
527
|
+
MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
|
528
|
+
|
529
|
+
/* Compute sqrt(2*z) as a double-double */
|
530
|
+
|
531
|
+
twoZ = 2 * z;
|
532
|
+
sqrt12(&sqrtzh,&sqrtzl,twoZ);
|
533
|
+
|
534
|
+
/* Multiply p(z) by sqrt(2*z) and add Pi/2 */
|
535
|
+
|
536
|
+
Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
|
537
|
+
|
538
|
+
|
539
|
+
/* Reconstruction:
|
540
|
+
|
541
|
+
If the sign of x in acos(x) was positive:
|
542
|
+
- Multiply pTimesSh + pTimesSl approx f(x) * sqrt(2 * z) by -1
|
543
|
+
- Return
|
544
|
+
|
545
|
+
Otherwise:
|
546
|
+
- Add Pi in triple-double to pTimesSh + pTimesSl approx f(x) * sqrt(2 * z)
|
547
|
+
- Return
|
548
|
+
|
549
|
+
*/
|
550
|
+
|
551
|
+
if (sign > 0) {
|
552
|
+
|
553
|
+
acosh = -1.0 * pTimesSh;
|
554
|
+
acosm = -1.0 * pTimesSl;
|
555
|
+
|
556
|
+
} else {
|
557
|
+
|
558
|
+
Add22(&acosh,&acosm,PIH,PIM,pTimesSh,pTimesSl);
|
559
|
+
|
560
|
+
}
|
561
|
+
|
562
|
+
/* Rounding test
|
563
|
+
The RN rounding constant is at tbl[TBLIDX10+54]
|
564
|
+
*/
|
565
|
+
|
566
|
+
if(acosh == (acosh + (acosm * tbl[TBLIDX10+54])))
|
567
|
+
return acosh;
|
568
|
+
|
569
|
+
/* Launch accurate phase */
|
570
|
+
|
571
|
+
acos_accurate_higher(&acosh,&acosm,&acosl,z,sign);
|
572
|
+
|
573
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
574
|
+
}
|
575
|
+
|
576
|
+
/* General 8 main intervals
|
577
|
+
We can already suppose that BOUND1 <= x <= BOUND9
|
578
|
+
*/
|
579
|
+
|
580
|
+
if (xdb.i[HI] < BOUND5) {
|
581
|
+
if (xdb.i[HI] < BOUND3) {
|
582
|
+
if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
|
583
|
+
} else {
|
584
|
+
if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
|
585
|
+
}
|
586
|
+
} else {
|
587
|
+
if (xdb.i[HI] < BOUND7) {
|
588
|
+
if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
|
589
|
+
} else {
|
590
|
+
if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
|
591
|
+
}
|
592
|
+
}
|
593
|
+
|
594
|
+
/* Argument reduction
|
595
|
+
i points to the interval midpoint value in the table
|
596
|
+
*/
|
597
|
+
z = x - tbl[i];
|
598
|
+
|
599
|
+
/* Quick phase starts */
|
600
|
+
|
601
|
+
/* Double precision evaluation */
|
602
|
+
|
603
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
604
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
605
|
+
tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
|
606
|
+
tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
|
607
|
+
#else
|
608
|
+
highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
|
609
|
+
tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
|
610
|
+
tbl[i+33] + z * tbl[i+35]))))));
|
611
|
+
#endif
|
612
|
+
|
613
|
+
/* Double-double precision evaluation */
|
614
|
+
|
615
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
616
|
+
Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
|
617
|
+
|
618
|
+
MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
|
619
|
+
MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
|
620
|
+
MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
|
621
|
+
MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
|
622
|
+
MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
|
623
|
+
MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
|
624
|
+
|
625
|
+
/* Reconstruction:
|
626
|
+
|
627
|
+
- Multiply by the inverted sign
|
628
|
+
- Add Pi/2 in double-double precision
|
629
|
+
|
630
|
+
*/
|
631
|
+
|
632
|
+
zw1h = -sign * polyh;
|
633
|
+
zw1l = -sign * polyl;
|
634
|
+
|
635
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
|
636
|
+
|
637
|
+
/* Rounding test
|
638
|
+
The RN rounding constant is at tbl[i+59]
|
639
|
+
*/
|
640
|
+
if(acosh == (acosh + (acosm * tbl[i+59])))
|
641
|
+
return acosh;
|
642
|
+
|
643
|
+
/* Launch accurate phase */
|
644
|
+
|
645
|
+
acos_accurate_middle(&acosh,&acosm,&acosl,z,i,sign);
|
646
|
+
|
647
|
+
ReturnRoundToNearest3(acosh,acosm,acosl);
|
648
|
+
}
|
649
|
+
|
650
|
+
double acos_ru(double x) {
|
651
|
+
db_number xdb;
|
652
|
+
double sign, z, acosh, acosm, acosl;
|
653
|
+
int i;
|
654
|
+
double xSqh, xSql;
|
655
|
+
double tt1h, tt1l;
|
656
|
+
double tt6h, tt6l;
|
657
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
|
658
|
+
double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
|
659
|
+
double pTimesSh, pTimesSl, highPoly, xCubeh, xCubel;
|
660
|
+
double tmp1, tmp2, tmp3, tmp4, tmp5;
|
661
|
+
double zw1h, zw1l;
|
662
|
+
|
663
|
+
/* Transform the argument into integer */
|
664
|
+
xdb.d = x;
|
665
|
+
|
666
|
+
/* Special case handling */
|
667
|
+
|
668
|
+
/* Exact algebraic case x = 1, acos(1) = 0 */
|
669
|
+
|
670
|
+
if (x == 1.0) return 0.0;
|
671
|
+
|
672
|
+
/* Strip off the sign of argument x */
|
673
|
+
if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
|
674
|
+
xdb.i[HI] &= 0x7fffffff;
|
675
|
+
|
676
|
+
/* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
|
677
|
+
if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
|
678
|
+
return (x-x)/0.0; /* return NaN */
|
679
|
+
}
|
680
|
+
|
681
|
+
/* If |x| < 2^(-120) we have
|
682
|
+
|
683
|
+
round(acos(x)) = round(pi/2)
|
684
|
+
|
685
|
+
So we can decide the rounding without any computation
|
686
|
+
*/
|
687
|
+
if (xdb.i[HI] < 0x38700000) {
|
688
|
+
return PIHALFDOUBLERU;
|
689
|
+
}
|
690
|
+
|
691
|
+
/* Recast x */
|
692
|
+
x = xdb.d;
|
693
|
+
|
694
|
+
/* Find correspondant interval and compute index to the table
|
695
|
+
We start by filtering the two special cases around 0 and 1
|
696
|
+
*/
|
697
|
+
|
698
|
+
if (xdb.i[HI] < BOUND1) {
|
699
|
+
/* Special interval 0..BOUND1
|
700
|
+
The polynomial has no even monomials
|
701
|
+
We must prove extra accuracy in the interval 0..sin(2^(-18))
|
702
|
+
*/
|
703
|
+
|
704
|
+
/* Quick phase starts */
|
705
|
+
|
706
|
+
/* Compute square of x for both quick and accurate phases */
|
707
|
+
Mul12(&xSqh,&xSql,x,x);
|
708
|
+
|
709
|
+
tmp4 = tbl[3];
|
710
|
+
tmp5 = tbl[4];
|
711
|
+
t4h = tmp4;
|
712
|
+
t4l = tmp5;
|
713
|
+
if (xdb.i[HI] > EXTRABOUND) {
|
714
|
+
/* Double precision evaluation */
|
715
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
716
|
+
highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
|
717
|
+
#else
|
718
|
+
highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
|
719
|
+
#endif
|
720
|
+
|
721
|
+
/* Double-double precision evaluation */
|
722
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
723
|
+
Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
|
724
|
+
|
725
|
+
MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
|
726
|
+
MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
|
727
|
+
MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
|
728
|
+
}
|
729
|
+
|
730
|
+
MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
|
731
|
+
|
732
|
+
Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
|
733
|
+
Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
|
734
|
+
|
735
|
+
Add12(tmp1,tmp2,x,tt6h);
|
736
|
+
tmp3 = tmp2 + tt6l;
|
737
|
+
Add12(polyh,polyl,tmp1,tmp3);
|
738
|
+
|
739
|
+
/* Reconstruction:
|
740
|
+
|
741
|
+
- Multiply by the inverted sign
|
742
|
+
- Add Pi/2 in double-double precision
|
743
|
+
|
744
|
+
*/
|
745
|
+
|
746
|
+
zw1h = -sign * polyh;
|
747
|
+
zw1l = -sign * polyl;
|
748
|
+
|
749
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
|
750
|
+
|
751
|
+
/* Rounding test
|
752
|
+
The RU rounding constant is at tbl[35]
|
753
|
+
*/
|
754
|
+
TEST_AND_RETURN_RU(acosh, acosm, tbl[35]);
|
755
|
+
|
756
|
+
/* Launch accurate phase */
|
757
|
+
|
758
|
+
acos_accurate_lower(&acosh,&acosm,&acosl,x,xSqh,xSql,sign);
|
759
|
+
|
760
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
761
|
+
}
|
762
|
+
|
763
|
+
if (xdb.i[HI] >= BOUND9) {
|
764
|
+
/* Special interval BOUND9..1
|
765
|
+
We use an asymptotic development of arcsin in sqrt(1 - x)
|
766
|
+
*/
|
767
|
+
|
768
|
+
/* Argument reduction for quick and accurate phase
|
769
|
+
z = 1 - x
|
770
|
+
The operation is exact as per Sterbenz' lemma
|
771
|
+
*/
|
772
|
+
|
773
|
+
z = 1 - x;
|
774
|
+
|
775
|
+
/* Quick phase starts */
|
776
|
+
|
777
|
+
/* Double precision evaluation */
|
778
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
779
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
780
|
+
tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
|
781
|
+
tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
|
782
|
+
tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
|
783
|
+
tbl[TBLIDX10+24]);
|
784
|
+
#else
|
785
|
+
highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
|
786
|
+
tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
|
787
|
+
tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
|
788
|
+
tbl[TBLIDX10+42]))))))));
|
789
|
+
#endif
|
790
|
+
|
791
|
+
/* Double-double precision evaluation */
|
792
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
793
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
|
794
|
+
|
795
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
|
796
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
|
797
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
|
798
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
|
799
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
|
800
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
|
801
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
|
802
|
+
MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
|
803
|
+
|
804
|
+
/* Compute sqrt(2*z) as a double-double */
|
805
|
+
|
806
|
+
twoZ = 2 * z;
|
807
|
+
sqrt12(&sqrtzh,&sqrtzl,twoZ);
|
808
|
+
|
809
|
+
/* Multiply p(z) by sqrt(2*z) and add Pi/2 */
|
810
|
+
|
811
|
+
Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
|
812
|
+
|
813
|
+
|
814
|
+
/* Reconstruction:
|
815
|
+
|
816
|
+
If the sign of x in acos(x) was positive:
|
817
|
+
- Multiply pTimesSh + pTimesSl approx f(x) * sqrt(2 * z) by -1
|
818
|
+
- Return
|
819
|
+
|
820
|
+
Otherwise:
|
821
|
+
- Add Pi in triple-double to pTimesSh + pTimesSl approx f(x) * sqrt(2 * z)
|
822
|
+
- Return
|
823
|
+
|
824
|
+
*/
|
825
|
+
|
826
|
+
if (sign > 0) {
|
827
|
+
|
828
|
+
acosh = -1.0 * pTimesSh;
|
829
|
+
acosm = -1.0 * pTimesSl;
|
830
|
+
|
831
|
+
} else {
|
832
|
+
|
833
|
+
Add22(&acosh,&acosm,PIH,PIM,pTimesSh,pTimesSl);
|
834
|
+
|
835
|
+
}
|
836
|
+
|
837
|
+
/* Rounding test
|
838
|
+
The RU rounding constant is at tbl[TBLIDX10+55]
|
839
|
+
*/
|
840
|
+
TEST_AND_RETURN_RU(acosh, acosm, tbl[TBLIDX10+55]);
|
841
|
+
|
842
|
+
/* Launch accurate phase */
|
843
|
+
|
844
|
+
acos_accurate_higher(&acosh,&acosm,&acosl,z,sign);
|
845
|
+
|
846
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
847
|
+
}
|
848
|
+
|
849
|
+
/* General 8 main intervals
|
850
|
+
We can already suppose that BOUND1 <= x <= BOUND9
|
851
|
+
*/
|
852
|
+
|
853
|
+
if (xdb.i[HI] < BOUND5) {
|
854
|
+
if (xdb.i[HI] < BOUND3) {
|
855
|
+
if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
|
856
|
+
} else {
|
857
|
+
if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
|
858
|
+
}
|
859
|
+
} else {
|
860
|
+
if (xdb.i[HI] < BOUND7) {
|
861
|
+
if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
|
862
|
+
} else {
|
863
|
+
if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
|
864
|
+
}
|
865
|
+
}
|
866
|
+
|
867
|
+
/* Argument reduction
|
868
|
+
i points to the interval midpoint value in the table
|
869
|
+
*/
|
870
|
+
z = x - tbl[i];
|
871
|
+
|
872
|
+
/* Quick phase starts */
|
873
|
+
|
874
|
+
/* Double precision evaluation */
|
875
|
+
|
876
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
877
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
878
|
+
tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
|
879
|
+
tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
|
880
|
+
#else
|
881
|
+
highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
|
882
|
+
tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
|
883
|
+
tbl[i+33] + z * tbl[i+35]))))));
|
884
|
+
#endif
|
885
|
+
|
886
|
+
/* Double-double precision evaluation */
|
887
|
+
|
888
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
889
|
+
Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
|
890
|
+
|
891
|
+
MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
|
892
|
+
MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
|
893
|
+
MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
|
894
|
+
MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
|
895
|
+
MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
|
896
|
+
MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
|
897
|
+
|
898
|
+
/* Reconstruction:
|
899
|
+
|
900
|
+
- Multiply by the inverted sign
|
901
|
+
- Add Pi/2 in double-double precision
|
902
|
+
|
903
|
+
*/
|
904
|
+
|
905
|
+
zw1h = -sign * polyh;
|
906
|
+
zw1l = -sign * polyl;
|
907
|
+
|
908
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
|
909
|
+
|
910
|
+
/* Rounding test
|
911
|
+
The RU rounding constant is at tbl[i+60]
|
912
|
+
*/
|
913
|
+
TEST_AND_RETURN_RU(acosh, acosm, tbl[i+60]);
|
914
|
+
|
915
|
+
/* Launch accurate phase */
|
916
|
+
|
917
|
+
acos_accurate_middle(&acosh,&acosm,&acosl,z,i,sign);
|
918
|
+
|
919
|
+
ReturnRoundUpwards3(acosh,acosm,acosl);
|
920
|
+
}
|
921
|
+
|
922
|
+
|
923
|
+
|
924
|
+
double acos_rd(double x) {
|
925
|
+
db_number xdb;
|
926
|
+
double sign, z, acosh, acosm, acosl;
|
927
|
+
int i;
|
928
|
+
double xSqh, xSql;
|
929
|
+
double tt1h, tt1l;
|
930
|
+
double tt6h, tt6l;
|
931
|
+
double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
|
932
|
+
double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
|
933
|
+
double pTimesSh, pTimesSl, highPoly, xCubeh, xCubel;
|
934
|
+
double tmp1, tmp2, tmp3, tmp4, tmp5;
|
935
|
+
double zw1h, zw1l;
|
936
|
+
|
937
|
+
/* Transform the argument into integer */
|
938
|
+
xdb.d = x;
|
939
|
+
|
940
|
+
/* Special case handling */
|
941
|
+
|
942
|
+
/* Exact algebraic case x = 1, acos(1) = 0 */
|
943
|
+
|
944
|
+
if (x == 1.0) return 0.0;
|
945
|
+
|
946
|
+
/* Strip off the sign of argument x */
|
947
|
+
if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
|
948
|
+
xdb.i[HI] &= 0x7fffffff;
|
949
|
+
|
950
|
+
/* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
|
951
|
+
if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
|
952
|
+
return (x-x)/0.0; /* return NaN */
|
953
|
+
}
|
954
|
+
|
955
|
+
/* If |x| < 2^(-120) we have
|
956
|
+
|
957
|
+
round(acos(x)) = round(pi/2)
|
958
|
+
|
959
|
+
So we can decide the rounding without any computation
|
960
|
+
*/
|
961
|
+
if (xdb.i[HI] < 0x38700000) {
|
962
|
+
return PIHALFDOUBLERD;
|
963
|
+
}
|
964
|
+
|
965
|
+
/* Recast x */
|
966
|
+
x = xdb.d;
|
967
|
+
|
968
|
+
/* Find correspondant interval and compute index to the table
|
969
|
+
We start by filtering the two special cases around 0 and 1
|
970
|
+
*/
|
971
|
+
|
972
|
+
if (xdb.i[HI] < BOUND1) {
|
973
|
+
/* Special interval 0..BOUND1
|
974
|
+
The polynomial has no even monomials
|
975
|
+
We must prove extra accuracy in the interval 0..sin(2^(-18))
|
976
|
+
*/
|
977
|
+
|
978
|
+
/* Quick phase starts */
|
979
|
+
|
980
|
+
/* Compute square of x for both quick and accurate phases */
|
981
|
+
Mul12(&xSqh,&xSql,x,x);
|
982
|
+
|
983
|
+
tmp4 = tbl[3];
|
984
|
+
tmp5 = tbl[4];
|
985
|
+
t4h = tmp4;
|
986
|
+
t4l = tmp5;
|
987
|
+
if (xdb.i[HI] > EXTRABOUND) {
|
988
|
+
/* Double precision evaluation */
|
989
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
990
|
+
highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
|
991
|
+
#else
|
992
|
+
highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
|
993
|
+
#endif
|
994
|
+
|
995
|
+
/* Double-double precision evaluation */
|
996
|
+
Mul12(&tt1h,&tt1l,xSqh,highPoly);
|
997
|
+
Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
|
998
|
+
|
999
|
+
MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
|
1000
|
+
MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
|
1001
|
+
MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
|
1002
|
+
}
|
1003
|
+
|
1004
|
+
MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
|
1005
|
+
|
1006
|
+
Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
|
1007
|
+
Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
|
1008
|
+
|
1009
|
+
Add12(tmp1,tmp2,x,tt6h);
|
1010
|
+
tmp3 = tmp2 + tt6l;
|
1011
|
+
Add12(polyh,polyl,tmp1,tmp3);
|
1012
|
+
|
1013
|
+
/* Reconstruction:
|
1014
|
+
|
1015
|
+
- Multiply by the inverted sign
|
1016
|
+
- Add Pi/2 in double-double precision
|
1017
|
+
|
1018
|
+
*/
|
1019
|
+
|
1020
|
+
zw1h = -sign * polyh;
|
1021
|
+
zw1l = -sign * polyl;
|
1022
|
+
|
1023
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
|
1024
|
+
|
1025
|
+
/* Rounding test
|
1026
|
+
The RD rounding constant is at tbl[35]
|
1027
|
+
*/
|
1028
|
+
TEST_AND_RETURN_RD(acosh, acosm, tbl[35]);
|
1029
|
+
|
1030
|
+
/* Launch accurate phase */
|
1031
|
+
|
1032
|
+
acos_accurate_lower(&acosh,&acosm,&acosl,x,xSqh,xSql,sign);
|
1033
|
+
|
1034
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
1035
|
+
}
|
1036
|
+
|
1037
|
+
if (xdb.i[HI] >= BOUND9) {
|
1038
|
+
/* Special interval BOUND9..1
|
1039
|
+
We use an asymptotic development of arcsin in sqrt(1 - x)
|
1040
|
+
*/
|
1041
|
+
|
1042
|
+
/* Argument reduction for quick and accurate phase
|
1043
|
+
z = 1 - x
|
1044
|
+
The operation is exact as per Sterbenz' lemma
|
1045
|
+
*/
|
1046
|
+
|
1047
|
+
z = 1 - x;
|
1048
|
+
|
1049
|
+
/* Quick phase starts */
|
1050
|
+
|
1051
|
+
/* Double precision evaluation */
|
1052
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
1053
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
1054
|
+
tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
|
1055
|
+
tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
|
1056
|
+
tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
|
1057
|
+
tbl[TBLIDX10+24]);
|
1058
|
+
#else
|
1059
|
+
highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
|
1060
|
+
tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
|
1061
|
+
tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
|
1062
|
+
tbl[TBLIDX10+42]))))))));
|
1063
|
+
#endif
|
1064
|
+
|
1065
|
+
/* Double-double precision evaluation */
|
1066
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
1067
|
+
Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
|
1068
|
+
|
1069
|
+
MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
|
1070
|
+
MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
|
1071
|
+
MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
|
1072
|
+
MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
|
1073
|
+
MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
|
1074
|
+
MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
|
1075
|
+
MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
|
1076
|
+
MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
|
1077
|
+
|
1078
|
+
/* Compute sqrt(2*z) as a double-double */
|
1079
|
+
|
1080
|
+
twoZ = 2 * z;
|
1081
|
+
sqrt12(&sqrtzh,&sqrtzl,twoZ);
|
1082
|
+
|
1083
|
+
/* Multiply p(z) by sqrt(2*z) and add Pi/2 */
|
1084
|
+
|
1085
|
+
Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
|
1086
|
+
|
1087
|
+
|
1088
|
+
/* Reconstruction:
|
1089
|
+
|
1090
|
+
If the sign of x in acos(x) was positive:
|
1091
|
+
- Multiply pTimesSh + pTimesSl approx f(x) * sqrt(2 * z) by -1
|
1092
|
+
- Return
|
1093
|
+
|
1094
|
+
Otherwise:
|
1095
|
+
- Add Pi in triple-double to pTimesSh + pTimesSl approx f(x) * sqrt(2 * z)
|
1096
|
+
- Return
|
1097
|
+
|
1098
|
+
*/
|
1099
|
+
|
1100
|
+
if (sign > 0) {
|
1101
|
+
|
1102
|
+
acosh = -1.0 * pTimesSh;
|
1103
|
+
acosm = -1.0 * pTimesSl;
|
1104
|
+
|
1105
|
+
} else {
|
1106
|
+
|
1107
|
+
Add22(&acosh,&acosm,PIH,PIM,pTimesSh,pTimesSl);
|
1108
|
+
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
/* Rounding test
|
1112
|
+
The RD rounding constant is at tbl[TBLIDX10+55]
|
1113
|
+
*/
|
1114
|
+
TEST_AND_RETURN_RD(acosh, acosm, tbl[TBLIDX10+55]);
|
1115
|
+
|
1116
|
+
/* Launch accurate phase */
|
1117
|
+
|
1118
|
+
acos_accurate_higher(&acosh,&acosm,&acosl,z,sign);
|
1119
|
+
|
1120
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
1121
|
+
}
|
1122
|
+
|
1123
|
+
/* General 8 main intervals
|
1124
|
+
We can already suppose that BOUND1 <= x <= BOUND9
|
1125
|
+
*/
|
1126
|
+
|
1127
|
+
if (xdb.i[HI] < BOUND5) {
|
1128
|
+
if (xdb.i[HI] < BOUND3) {
|
1129
|
+
if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
|
1130
|
+
} else {
|
1131
|
+
if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
|
1132
|
+
}
|
1133
|
+
} else {
|
1134
|
+
if (xdb.i[HI] < BOUND7) {
|
1135
|
+
if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
|
1136
|
+
} else {
|
1137
|
+
if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
|
1138
|
+
}
|
1139
|
+
}
|
1140
|
+
|
1141
|
+
/* Argument reduction
|
1142
|
+
i points to the interval midpoint value in the table
|
1143
|
+
*/
|
1144
|
+
z = x - tbl[i];
|
1145
|
+
|
1146
|
+
/* Quick phase starts */
|
1147
|
+
|
1148
|
+
/* Double precision evaluation */
|
1149
|
+
|
1150
|
+
#if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
|
1151
|
+
highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
|
1152
|
+
tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
|
1153
|
+
tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
|
1154
|
+
#else
|
1155
|
+
highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
|
1156
|
+
tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
|
1157
|
+
tbl[i+33] + z * tbl[i+35]))))));
|
1158
|
+
#endif
|
1159
|
+
|
1160
|
+
/* Double-double precision evaluation */
|
1161
|
+
|
1162
|
+
Mul12(&tt1h,&tt1l,z,highPoly);
|
1163
|
+
Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
|
1164
|
+
|
1165
|
+
MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
|
1166
|
+
MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
|
1167
|
+
MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
|
1168
|
+
MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
|
1169
|
+
MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
|
1170
|
+
MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
|
1171
|
+
|
1172
|
+
/* Reconstruction:
|
1173
|
+
|
1174
|
+
- Multiply by the inverted sign
|
1175
|
+
- Add Pi/2 in double-double precision
|
1176
|
+
|
1177
|
+
*/
|
1178
|
+
|
1179
|
+
zw1h = -sign * polyh;
|
1180
|
+
zw1l = -sign * polyl;
|
1181
|
+
|
1182
|
+
Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
|
1183
|
+
|
1184
|
+
/* Rounding test
|
1185
|
+
The RD rounding constant is at tbl[i+60]
|
1186
|
+
*/
|
1187
|
+
TEST_AND_RETURN_RD(acosh, acosm, tbl[i+60]);
|
1188
|
+
|
1189
|
+
/* Launch accurate phase */
|
1190
|
+
|
1191
|
+
acos_accurate_middle(&acosh,&acosm,&acosl,z,i,sign);
|
1192
|
+
|
1193
|
+
ReturnRoundDownwards3(acosh,acosm,acosl);
|
1194
|
+
}
|
1195
|
+
|