crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1169 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include "crlibm.h"
|
4
|
+
/*
|
5
|
+
* Correctly rounded trigpi functions
|
6
|
+
*
|
7
|
+
* Authors : F. de Dinechin, S. Chevillard, C. Lauter (the latter two
|
8
|
+
* didn't write a line of this file, but wrote a tool that wrote a
|
9
|
+
* tool that wrote etc that wrote bits of code related to polynomial
|
10
|
+
* evaluation.)
|
11
|
+
*
|
12
|
+
* This file is part of the crlibm library developed by the Arenaire
|
13
|
+
* project at Ecole Normale Superieure de Lyon
|
14
|
+
*
|
15
|
+
* This program is free software; you can redistribute it and/or modify
|
16
|
+
* it under the terms of the GNU Lesser General Public License as published by
|
17
|
+
* the Free Software Foundation; either version 2 of the License, or
|
18
|
+
* (at your option) any later version.
|
19
|
+
*
|
20
|
+
* This program is distributed in the hope that it will be useful,
|
21
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
22
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
23
|
+
* GNU General Public License for more details.
|
24
|
+
*
|
25
|
+
* You should have received a copy of the GNU Lesser General Public License
|
26
|
+
* along with this program; if not, write to the Free Software
|
27
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
28
|
+
*/
|
29
|
+
#include "crlibm_private.h"
|
30
|
+
#include "triple-double.h"
|
31
|
+
#include "trigpi.h"
|
32
|
+
|
33
|
+
|
34
|
+
/* TODO
|
35
|
+
|
36
|
+
|
37
|
+
Use the symmetries of the tables
|
38
|
+
|
39
|
+
Actually, use the tables from the standard trigo.
|
40
|
+
|
41
|
+
Write decent quick steps. Or hammer Christoph and Sylvain to do so.
|
42
|
+
|
43
|
+
*/
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
/* This ugly bits of code in the beginning are polynomial evaluations
|
50
|
+
automagically generated and proven by Chevillard and Lauter's
|
51
|
+
tools
|
52
|
+
*/
|
53
|
+
|
54
|
+
|
55
|
+
#define sinpiacc_coeff_1h 3.14159265358979311599796346854418516159057617187500000000000000000000000000000000e+00
|
56
|
+
#define sinpiacc_coeff_1m 1.22464679914735320717376402945839660462569212467758006379625612680683843791484833e-16
|
57
|
+
#define sinpiacc_coeff_1l -2.87889731599645993207191707893463395148177292198731390393739579574603302514349608e-33
|
58
|
+
#define sinpiacc_coeff_3h -5.16771278004997025590228076907806098461151123046875000000000000000000000000000000e+00
|
59
|
+
#define sinpiacc_coeff_3m 2.26656228257550136196266687046492287115561324595258696490418515168130397796630859e-16
|
60
|
+
#define sinpiacc_coeff_5h 2.55016403987734552316624103696085512638092041015625000000000000000000000000000000e+00
|
61
|
+
#define sinpiacc_coeff_5m -7.93098961936403945684716222915171282926664203267314023904077657789457589387893677e-17
|
62
|
+
#define sinpiacc_coeff_7h -5.99264529320792105338000510528218001127243041992187500000000000000000000000000000e-01
|
63
|
+
#define sinpiacc_coeff_9h 8.21458866130424236740026344705256633460521697998046875000000000000000000000000000e-02
|
64
|
+
#define sinpiacc_coeff_11h -7.37046804820839888960914976223648409359157085418701171875000000000000000000000000e-03
|
65
|
+
|
66
|
+
|
67
|
+
#define cospiacc_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
|
68
|
+
#define cospiacc_coeff_2h -4.93480220054467899615247006295248866081237792968750000000000000000000000000000000e+00
|
69
|
+
#define cospiacc_coeff_2m -3.13264775437072047133490817894057799839785556899468543790021612949203699827194214e-16
|
70
|
+
#define cospiacc_coeff_4h 4.05871212641676848420502210501581430435180664062500000000000000000000000000000000e+00
|
71
|
+
#define cospiacc_coeff_4m -2.66019969731660223662555032718185048048635055542576743903282476821914315223693848e-16
|
72
|
+
#define cospiacc_coeff_6h -1.33526276885458949905682857206556946039199829101562500000000000000000000000000000e+00
|
73
|
+
#define cospiacc_coeff_8h 2.35330630358513925859398341344785876572132110595703125000000000000000000000000000e-01
|
74
|
+
#define cospiacc_coeff_10h -2.58068327360992909313974763563237502239644527435302734375000000000000000000000000e-02
|
75
|
+
|
76
|
+
static void sincospiacc(double *sinpiacc_resh, double *sinpiacc_resm, double *sinpiacc_resl,
|
77
|
+
double *cospiacc_resh, double *cospiacc_resm, double *cospiacc_resl,
|
78
|
+
double x) {
|
79
|
+
double x2h, x2m;
|
80
|
+
double sinpiacc_t_1_0h;
|
81
|
+
double sinpiacc_t_2_0h;
|
82
|
+
double sinpiacc_t_3_0h;
|
83
|
+
double sinpiacc_t_4_0h;
|
84
|
+
double sinpiacc_t_5_0h, sinpiacc_t_5_0m;
|
85
|
+
double sinpiacc_t_6_0h, sinpiacc_t_6_0m;
|
86
|
+
double sinpiacc_t_7_0h, sinpiacc_t_7_0m;
|
87
|
+
double sinpiacc_t_8_0h, sinpiacc_t_8_0m;
|
88
|
+
double sinpiacc_t_9_0h, sinpiacc_t_9_0m, sinpiacc_t_9_0l;
|
89
|
+
double sinpiacc_t_10_0h, sinpiacc_t_10_0m, sinpiacc_t_10_0l;
|
90
|
+
|
91
|
+
double cospiacc_t_1_0h;
|
92
|
+
double cospiacc_t_2_0h;
|
93
|
+
double cospiacc_t_3_0h;
|
94
|
+
double cospiacc_t_4_0h;
|
95
|
+
double cospiacc_t_5_0h, cospiacc_t_5_0m;
|
96
|
+
double cospiacc_t_6_0h, cospiacc_t_6_0m;
|
97
|
+
double cospiacc_t_7_0h, cospiacc_t_7_0m;
|
98
|
+
double cospiacc_t_8_0h, cospiacc_t_8_0m;
|
99
|
+
double cospiacc_t_9_0h, cospiacc_t_9_0m, cospiacc_t_9_0l;
|
100
|
+
|
101
|
+
Mul12(&x2h,&x2m,x,x);
|
102
|
+
|
103
|
+
sinpiacc_t_1_0h = sinpiacc_coeff_11h;
|
104
|
+
sinpiacc_t_2_0h = sinpiacc_t_1_0h * x2h;
|
105
|
+
sinpiacc_t_3_0h = sinpiacc_coeff_9h + sinpiacc_t_2_0h;
|
106
|
+
sinpiacc_t_4_0h = sinpiacc_t_3_0h * x2h;
|
107
|
+
Add12(sinpiacc_t_5_0h,sinpiacc_t_5_0m,sinpiacc_coeff_7h,sinpiacc_t_4_0h);
|
108
|
+
MulAdd22(&sinpiacc_t_6_0h,&sinpiacc_t_6_0m,sinpiacc_coeff_5h,sinpiacc_coeff_5m,x2h,x2m,sinpiacc_t_5_0h,sinpiacc_t_5_0m);
|
109
|
+
MulAdd22(&sinpiacc_t_7_0h,&sinpiacc_t_7_0m,sinpiacc_coeff_3h,sinpiacc_coeff_3m,x2h,x2m,sinpiacc_t_6_0h,sinpiacc_t_6_0m);
|
110
|
+
Mul22(&sinpiacc_t_8_0h,&sinpiacc_t_8_0m,sinpiacc_t_7_0h,sinpiacc_t_7_0m,x2h,x2m);
|
111
|
+
Add233Cond(&sinpiacc_t_9_0h,&sinpiacc_t_9_0m,&sinpiacc_t_9_0l,sinpiacc_t_8_0h,sinpiacc_t_8_0m,sinpiacc_coeff_1h,sinpiacc_coeff_1m,sinpiacc_coeff_1l);
|
112
|
+
Mul133(&sinpiacc_t_10_0h,&sinpiacc_t_10_0m,&sinpiacc_t_10_0l,x,sinpiacc_t_9_0h,sinpiacc_t_9_0m,sinpiacc_t_9_0l);
|
113
|
+
Renormalize3(sinpiacc_resh,sinpiacc_resm,sinpiacc_resl,sinpiacc_t_10_0h,sinpiacc_t_10_0m,sinpiacc_t_10_0l);
|
114
|
+
|
115
|
+
|
116
|
+
cospiacc_t_1_0h = cospiacc_coeff_10h;
|
117
|
+
cospiacc_t_2_0h = cospiacc_t_1_0h * x2h;
|
118
|
+
cospiacc_t_3_0h = cospiacc_coeff_8h + cospiacc_t_2_0h;
|
119
|
+
cospiacc_t_4_0h = cospiacc_t_3_0h * x2h;
|
120
|
+
Add12(cospiacc_t_5_0h,cospiacc_t_5_0m,cospiacc_coeff_6h,cospiacc_t_4_0h);
|
121
|
+
MulAdd22(&cospiacc_t_6_0h,&cospiacc_t_6_0m,cospiacc_coeff_4h,cospiacc_coeff_4m,x2h,x2m,cospiacc_t_5_0h,cospiacc_t_5_0m);
|
122
|
+
MulAdd22(&cospiacc_t_7_0h,&cospiacc_t_7_0m,cospiacc_coeff_2h,cospiacc_coeff_2m,x2h,x2m,cospiacc_t_6_0h,cospiacc_t_6_0m);
|
123
|
+
Mul22(&cospiacc_t_8_0h,&cospiacc_t_8_0m,cospiacc_t_7_0h,cospiacc_t_7_0m,x2h,x2m);
|
124
|
+
Add123(&cospiacc_t_9_0h,&cospiacc_t_9_0m,&cospiacc_t_9_0l,cospiacc_coeff_0h,cospiacc_t_8_0h,cospiacc_t_8_0m);
|
125
|
+
*cospiacc_resh = cospiacc_t_9_0h; *cospiacc_resm = cospiacc_t_9_0m; *cospiacc_resl = cospiacc_t_9_0l;
|
126
|
+
|
127
|
+
}
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
/* Comment on comparing sa, ca, sy and cy
|
140
|
+
either index=0, then sa=0 and ca=1, therefore t2=0, and the Add33 will be exact
|
141
|
+
or index !=0, and
|
142
|
+
-eps1 < sy < eps1 (but sy may be negative)
|
143
|
+
sa > eps1 (sa>0)
|
144
|
+
1-eps2 < cy < 1
|
145
|
+
ca < 1-eps2
|
146
|
+
therefore
|
147
|
+
sacy = t2 >0
|
148
|
+
casy = t1 may be negative
|
149
|
+
abs(t1) <= abs(t2)
|
150
|
+
Unfortunately we need a stronger condition to use the Add33
|
151
|
+
*/
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
|
156
|
+
static void sinpi_accurate(double *rh, double *rm, double *rl,
|
157
|
+
double y, int index, int quadrant)
|
158
|
+
{
|
159
|
+
double syh, sym, syl, cyh, cym, cyl, sah, sam, sal, cah, cam, cal;
|
160
|
+
double t1h, t1m, t1l, t2h, t2m, t2l;
|
161
|
+
|
162
|
+
sincospiacc(&syh, &sym, &syl, &cyh, &cym, &cyl, y);
|
163
|
+
|
164
|
+
sah=sincosTable[index].sh;
|
165
|
+
cah=sincosTable[index].ch;
|
166
|
+
sam=sincosTable[index].sm;
|
167
|
+
cam=sincosTable[index].cm;
|
168
|
+
sal=sincosTable[index].sl;
|
169
|
+
cal=sincosTable[index].cl;
|
170
|
+
|
171
|
+
if(quadrant==0 || quadrant==2) {
|
172
|
+
/* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
|
173
|
+
Mul33(&t1h,&t1m,&t1l, syh,sym,syl, cah,cam,cal);
|
174
|
+
Mul33(&t2h,&t2m,&t2l, sah,sam,sal, cyh,cym,cyl);
|
175
|
+
Add33Cond(rh, rm, rl, t2h,t2m,t2l, t1h,t1m,t1l);
|
176
|
+
}
|
177
|
+
else {
|
178
|
+
/* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
|
179
|
+
Mul33(&t1h,&t1m,&t1l, cyh,cym,cyl, cah,cam,cal);
|
180
|
+
Mul33(&t2h,&t2m,&t2l, sah,sam,sal, syh,sym,syl);
|
181
|
+
Add33Cond(rh, rm, rl, t1h,t1m,t1l, -t2h,-t2m,-t2l);
|
182
|
+
}
|
183
|
+
|
184
|
+
if (quadrant>=2) {
|
185
|
+
*rh = -*rh;
|
186
|
+
*rm = -*rm;
|
187
|
+
*rl = -*rl;
|
188
|
+
}
|
189
|
+
|
190
|
+
};
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
#define sinpiquick_coeff_1h 3.14159265358979311599796346854418516159057617187500000000000000000000000000000000e+00
|
198
|
+
#define sinpiquick_coeff_1m 1.22464971683184787123862072310058851157814368464452070561776508839102461934089661e-16
|
199
|
+
#define sinpiquick_coeff_3h -5.16771278004997025590228076907806098461151123046875000000000000000000000000000000e+00
|
200
|
+
#define sinpiquick_coeff_5h 2.55016403989992213041659852024167776107788085937500000000000000000000000000000000e+00
|
201
|
+
#define sinpiquick_coeff_7h -5.99263913290728922333983064163476228713989257812500000000000000000000000000000000e-01
|
202
|
+
#define cospiquick_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
|
203
|
+
#define cospiquick_coeff_2h -4.93480220054467899615247006295248866081237792968750000000000000000000000000000000e+00
|
204
|
+
#define cospiquick_coeff_4h 4.05871212632582167856298838160000741481781005859375000000000000000000000000000000e+00
|
205
|
+
#define cospiquick_coeff_6h -1.33525456323720947970912220625905320048332214355468750000000000000000000000000000e+00
|
206
|
+
|
207
|
+
|
208
|
+
|
209
|
+
static void cospi_accurate(double *rh, double *rm, double *rl,
|
210
|
+
double y, int index, int quadrant)
|
211
|
+
{
|
212
|
+
double syh, sym, syl, cyh, cym, cyl, sah, sam, sal, cah, cam, cal;
|
213
|
+
double t1h, t1m, t1l, t2h, t2m, t2l;
|
214
|
+
|
215
|
+
sincospiacc(&syh, &sym, &syl, &cyh, &cym, &cyl, y);
|
216
|
+
|
217
|
+
sah=sincosTable[index].sh;
|
218
|
+
cah=sincosTable[index].ch;
|
219
|
+
sam=sincosTable[index].sm;
|
220
|
+
cam=sincosTable[index].cm;
|
221
|
+
sal=sincosTable[index].sl;
|
222
|
+
cal=sincosTable[index].cl;
|
223
|
+
|
224
|
+
if(quadrant==0 || quadrant==2) {
|
225
|
+
/* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
|
226
|
+
Mul33(&t1h,&t1m,&t1l, cyh,cym,cyl, cah,cam,cal);
|
227
|
+
Mul33(&t2h,&t2m,&t2l, sah,sam,sal, syh,sym,syl);
|
228
|
+
Add33Cond(rh, rm, rl, t1h,t1m,t1l, -t2h,-t2m,-t2l);
|
229
|
+
}
|
230
|
+
else {
|
231
|
+
/* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
|
232
|
+
Mul33(&t1h,&t1m,&t1l, syh,sym,syl, cah,cam,cal);
|
233
|
+
Mul33(&t2h,&t2m,&t2l, sah,sam,sal, cyh,cym,cyl);
|
234
|
+
Add33Cond(rh, rm, rl, t2h,t2m,t2l, t1h,t1m,t1l);
|
235
|
+
}
|
236
|
+
|
237
|
+
if (quadrant==1 || quadrant==2) {
|
238
|
+
*rh = -*rh;
|
239
|
+
*rm = -*rm;
|
240
|
+
*rl = -*rl;
|
241
|
+
}
|
242
|
+
|
243
|
+
};
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
|
249
|
+
|
250
|
+
/* This one can clearly be improved. It was set up in less than one hour */
|
251
|
+
void sinpiquick(double *rh, double *rm, double x, int index, int quadrant) {
|
252
|
+
double x2h, x2m;
|
253
|
+
double sinpiquick_t_1_0h;
|
254
|
+
double sinpiquick_t_2_0h;
|
255
|
+
double sinpiquick_t_3_0h;
|
256
|
+
double sinpiquick_t_4_0h;
|
257
|
+
double sinpiquick_t_5_0h, sinpiquick_t_5_0m;
|
258
|
+
double sinpiquick_t_6_0h, sinpiquick_t_6_0m;
|
259
|
+
double sinpiquick_t_7_0h, sinpiquick_t_7_0m;
|
260
|
+
double syh, sym;
|
261
|
+
double cospiquick_t_1_0h;
|
262
|
+
double cospiquick_t_2_0h;
|
263
|
+
double cospiquick_t_3_0h;
|
264
|
+
double cospiquick_t_4_0h;
|
265
|
+
double cospiquick_t_5_0h, cospiquick_t_5_0m;
|
266
|
+
double cospiquick_t_6_0h, cospiquick_t_6_0m;
|
267
|
+
double cospiquick_t_7_0h, cospiquick_t_7_0m;
|
268
|
+
double cyh, cym;
|
269
|
+
double t1h, t1m, t2h, t2m, sah, sam, cah,cam;
|
270
|
+
|
271
|
+
Mul12(&x2h,&x2m,x,x);
|
272
|
+
|
273
|
+
sah=sincosTable[index].sh;
|
274
|
+
cah=sincosTable[index].ch;
|
275
|
+
sam=sincosTable[index].sm;
|
276
|
+
cam=sincosTable[index].cm;
|
277
|
+
|
278
|
+
sinpiquick_t_1_0h = sinpiquick_coeff_7h;
|
279
|
+
sinpiquick_t_2_0h = sinpiquick_t_1_0h * x2h;
|
280
|
+
sinpiquick_t_3_0h = sinpiquick_coeff_5h + sinpiquick_t_2_0h;
|
281
|
+
sinpiquick_t_4_0h = sinpiquick_t_3_0h * x2h;
|
282
|
+
Add12(sinpiquick_t_5_0h,sinpiquick_t_5_0m,sinpiquick_coeff_3h,sinpiquick_t_4_0h);
|
283
|
+
MulAdd22(&sinpiquick_t_6_0h,&sinpiquick_t_6_0m,sinpiquick_coeff_1h,sinpiquick_coeff_1m,x2h,x2m,sinpiquick_t_5_0h,sinpiquick_t_5_0m);
|
284
|
+
Mul122(&sinpiquick_t_7_0h,&sinpiquick_t_7_0m,x,sinpiquick_t_6_0h,sinpiquick_t_6_0m);
|
285
|
+
syh = sinpiquick_t_7_0h; sym = sinpiquick_t_7_0m;
|
286
|
+
|
287
|
+
cospiquick_t_1_0h = cospiquick_coeff_6h;
|
288
|
+
cospiquick_t_2_0h = cospiquick_t_1_0h * x2h;
|
289
|
+
cospiquick_t_3_0h = cospiquick_coeff_4h + cospiquick_t_2_0h;
|
290
|
+
cospiquick_t_4_0h = cospiquick_t_3_0h * x2h;
|
291
|
+
Add12(cospiquick_t_5_0h,cospiquick_t_5_0m,cospiquick_coeff_2h,cospiquick_t_4_0h);
|
292
|
+
Mul22(&cospiquick_t_6_0h,&cospiquick_t_6_0m,cospiquick_t_5_0h,cospiquick_t_5_0m,x2h,x2m);
|
293
|
+
Add122(&cospiquick_t_7_0h,&cospiquick_t_7_0m,cospiquick_coeff_0h,cospiquick_t_6_0h,cospiquick_t_6_0m);
|
294
|
+
cyh = cospiquick_t_7_0h; cym = cospiquick_t_7_0m;
|
295
|
+
|
296
|
+
/* Here comes the hand-written, unproven yet code */
|
297
|
+
if(quadrant==0 || quadrant==2) {
|
298
|
+
/* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
|
299
|
+
Mul22(&t1h,&t1m, syh,sym, cah,cam);
|
300
|
+
Mul22(&t2h,&t2m, sah,sam, cyh,cym);
|
301
|
+
Add22Cond(rh, rm, t2h,t2m, t1h,t1m);
|
302
|
+
}
|
303
|
+
else {
|
304
|
+
/* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
|
305
|
+
Mul22(&t1h,&t1m, cyh,cym, cah,cam);
|
306
|
+
Mul22(&t2h,&t2m, sah,sam, syh,sym);
|
307
|
+
Add22Cond(rh, rm, t1h,t1m, -t2h,-t2m);
|
308
|
+
}
|
309
|
+
|
310
|
+
if (quadrant>=2) {
|
311
|
+
*rh = -*rh;
|
312
|
+
*rm = -*rm;
|
313
|
+
}
|
314
|
+
|
315
|
+
}
|
316
|
+
|
317
|
+
|
318
|
+
|
319
|
+
|
320
|
+
|
321
|
+
|
322
|
+
|
323
|
+
double sinpi_rn(double x){
|
324
|
+
double xs, y,u, rh, rm, rl, sign,absx;
|
325
|
+
db_number xdb, t;
|
326
|
+
int32_t xih, absxih, index, quadrant;
|
327
|
+
|
328
|
+
if (x<0) absx = -x; else absx = x;
|
329
|
+
|
330
|
+
xdb.d = x;
|
331
|
+
|
332
|
+
xs = x*128.0;
|
333
|
+
|
334
|
+
/* argument reduction */
|
335
|
+
if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
|
336
|
+
t.d = xs;
|
337
|
+
t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
|
338
|
+
So what remains in t is an FP integer almost as large as x */
|
339
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
340
|
+
}
|
341
|
+
|
342
|
+
t.d = TWOTO5251 + xs;
|
343
|
+
u = t.d - TWOTO5251;
|
344
|
+
y = xs - u;
|
345
|
+
index = t.i[LO] & 0x3f;
|
346
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
347
|
+
|
348
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
349
|
+
xih = xdb.i[HI];
|
350
|
+
absxih = xih & 0x7fffffff;
|
351
|
+
if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
|
352
|
+
|
353
|
+
if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
|
354
|
+
|
355
|
+
y = y * INV128;
|
356
|
+
|
357
|
+
/* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
|
358
|
+
if (absxih>=0x7ff00000) {
|
359
|
+
xdb.l=0xfff8000000000000LL;
|
360
|
+
return xdb.d - xdb.d;
|
361
|
+
}
|
362
|
+
|
363
|
+
if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
|
364
|
+
return sign*0.0; /*signed */
|
365
|
+
|
366
|
+
if(absxih<=0x3E000000) /*2^{-31}*/ {
|
367
|
+
if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
|
368
|
+
/* in this case, SCS computation, accurate to 2^-210 which is provably enough */
|
369
|
+
scs_t result;
|
370
|
+
scs_set_d(result, x );
|
371
|
+
scs_mul(result, PiSCS_ptr, result);
|
372
|
+
scs_get_d(&rh, result);
|
373
|
+
return rh;
|
374
|
+
}
|
375
|
+
/* First step for Pi*x. TODO: FMA-based optimisation */
|
376
|
+
const double DekkerConst = 134217729.; /* 2^27 +1 */
|
377
|
+
double tt, xh, xl;
|
378
|
+
/* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
|
379
|
+
tt = x*DekkerConst;
|
380
|
+
xh = (x-tt)+tt;
|
381
|
+
xl = x-xh;
|
382
|
+
Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
|
383
|
+
if(rh == (rh + (rl * PIX_RNCST_SIN)))
|
384
|
+
return rh;
|
385
|
+
}
|
386
|
+
/* Fall here either if we have a large input, or if we have a small
|
387
|
+
input and the rounding test fails. */
|
388
|
+
sinpiquick(&rh, &rm, y, index, quadrant);
|
389
|
+
if (rh==rh+1.00001*rm) /* See trigpiquick.gappa. This first step is ridiculously too accurate */
|
390
|
+
return rh;
|
391
|
+
sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
|
392
|
+
ReturnRoundToNearest3(rh,rm,rl);
|
393
|
+
}
|
394
|
+
|
395
|
+
|
396
|
+
|
397
|
+
|
398
|
+
|
399
|
+
|
400
|
+
|
401
|
+
|
402
|
+
|
403
|
+
double sinpi_rd(double x){
|
404
|
+
double xs, y,u, rh, rm, rl, sign,absx;
|
405
|
+
db_number xdb, t;
|
406
|
+
int32_t xih, absxih, index, quadrant;
|
407
|
+
|
408
|
+
if (x<0) absx = -x; else absx = x;
|
409
|
+
|
410
|
+
xdb.d = x;
|
411
|
+
|
412
|
+
xs = x*128.0;
|
413
|
+
|
414
|
+
/* argument reduction */
|
415
|
+
if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
|
416
|
+
t.d = xs;
|
417
|
+
t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
|
418
|
+
So what remains in t is an FP integer almost as large as x */
|
419
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
420
|
+
}
|
421
|
+
|
422
|
+
t.d = TWOTO5251 + xs;
|
423
|
+
u = t.d - TWOTO5251;
|
424
|
+
y = xs - u;
|
425
|
+
index = t.i[LO] & 0x3f;
|
426
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
427
|
+
|
428
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
429
|
+
xih = xdb.i[HI];
|
430
|
+
absxih = xih & 0x7fffffff;
|
431
|
+
if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
|
432
|
+
|
433
|
+
if(index==0 && y==0.0 && ((quadrant&1)==0)) return -0.0; /*signed, inspired by LIA-2 */
|
434
|
+
|
435
|
+
y = y * INV128;
|
436
|
+
|
437
|
+
/* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
|
438
|
+
if (absxih>=0x7ff00000) {
|
439
|
+
xdb.l=0xfff8000000000000LL;
|
440
|
+
return xdb.d - xdb.d;
|
441
|
+
}
|
442
|
+
|
443
|
+
if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
|
444
|
+
return sign*0.0; /*signed */
|
445
|
+
|
446
|
+
if(absxih<=0x3E000000) /*2^{-31}*/ {
|
447
|
+
if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
|
448
|
+
/* in this case, SCS computation, accurate to 2^-210 which is provably enough */
|
449
|
+
scs_t result;
|
450
|
+
scs_set_d(result, x );
|
451
|
+
scs_mul(result, PiSCS_ptr, result);
|
452
|
+
scs_get_d_minf(&rh, result);
|
453
|
+
return rh;
|
454
|
+
}
|
455
|
+
/* First step for Pi*x. TODO: FMA-based optimisation */
|
456
|
+
const double DekkerConst = 134217729.; /* 2^27 +1 */
|
457
|
+
double tt, xh, xl;
|
458
|
+
/* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
|
459
|
+
tt = x*DekkerConst;
|
460
|
+
xh = (x-tt)+tt;
|
461
|
+
xl = x-xh;
|
462
|
+
Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
|
463
|
+
TEST_AND_RETURN_RD(rh,rl,PIX_EPS_SIN);
|
464
|
+
}
|
465
|
+
/* Fall here either if we have a large input, or if we have a small
|
466
|
+
input and the rounding test fails. */
|
467
|
+
sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
|
468
|
+
|
469
|
+
ReturnRoundDownwards3(rh,rm,rl);
|
470
|
+
};
|
471
|
+
|
472
|
+
|
473
|
+
|
474
|
+
double sinpi_ru(double x){
|
475
|
+
double xs, y,u, rh, rm, rl, sign,absx;
|
476
|
+
db_number xdb, t;
|
477
|
+
int32_t xih, absxih, index, quadrant;
|
478
|
+
|
479
|
+
if (x<0) absx = -x; else absx = x;
|
480
|
+
|
481
|
+
xdb.d = x;
|
482
|
+
|
483
|
+
xs = x*128.0;
|
484
|
+
|
485
|
+
/* argument reduction */
|
486
|
+
if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
|
487
|
+
t.d = xs;
|
488
|
+
t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
|
489
|
+
So what remains in t is an FP integer almost as large as x */
|
490
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
491
|
+
}
|
492
|
+
|
493
|
+
t.d = TWOTO5251 + xs;
|
494
|
+
u = t.d - TWOTO5251;
|
495
|
+
y = xs - u;
|
496
|
+
index = t.i[LO] & 0x3f;
|
497
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
498
|
+
|
499
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
500
|
+
xih = xdb.i[HI];
|
501
|
+
absxih = xih & 0x7fffffff;
|
502
|
+
if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
|
503
|
+
|
504
|
+
if(index==0 && y==0.0 && ((quadrant&1)==0)) return +0.0; /*signed, inspired by LIA-2 */
|
505
|
+
|
506
|
+
y = y * INV128;
|
507
|
+
|
508
|
+
/* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
|
509
|
+
if (absxih>=0x7ff00000) {
|
510
|
+
xdb.l=0xfff8000000000000LL;
|
511
|
+
return xdb.d - xdb.d;
|
512
|
+
}
|
513
|
+
|
514
|
+
if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
|
515
|
+
return sign*0.0; /*signed */
|
516
|
+
|
517
|
+
if(absxih<=0x3E000000) /*2^{-31}*/ {
|
518
|
+
if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
|
519
|
+
/* in this case, SCS computation, accurate to 2^-210 which is provably enough */
|
520
|
+
scs_t result;
|
521
|
+
scs_set_d(result, x );
|
522
|
+
scs_mul(result, PiSCS_ptr, result);
|
523
|
+
scs_get_d_pinf(&rh, result);
|
524
|
+
return rh;
|
525
|
+
}
|
526
|
+
/* First step for Pi*x. TODO: FMA-based optimisation */
|
527
|
+
const double DekkerConst = 134217729.; /* 2^27 +1 */
|
528
|
+
double tt, xh, xl;
|
529
|
+
/* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
|
530
|
+
tt = x*DekkerConst;
|
531
|
+
xh = (x-tt)+tt;
|
532
|
+
xl = x-xh;
|
533
|
+
Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
|
534
|
+
TEST_AND_RETURN_RU(rh,rl,PIX_EPS_SIN);
|
535
|
+
}
|
536
|
+
/* Fall here either if we have a large input, or if we have a small
|
537
|
+
input and the rounding test fails. */
|
538
|
+
sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
|
539
|
+
|
540
|
+
ReturnRoundUpwards3(rh,rm,rl);
|
541
|
+
|
542
|
+
};
|
543
|
+
|
544
|
+
|
545
|
+
|
546
|
+
|
547
|
+
double sinpi_rz(double x){
|
548
|
+
double xs, y,u, rh, rm, rl, sign,absx;
|
549
|
+
db_number xdb, t;
|
550
|
+
int32_t xih, absxih, index, quadrant;
|
551
|
+
|
552
|
+
if (x<0) absx = -x; else absx = x;
|
553
|
+
|
554
|
+
xdb.d = x;
|
555
|
+
|
556
|
+
xs = x*128.0;
|
557
|
+
|
558
|
+
/* argument reduction */
|
559
|
+
if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
|
560
|
+
t.d = xs;
|
561
|
+
t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
|
562
|
+
So what remains in t is an FP integer almost as large as x */
|
563
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
564
|
+
}
|
565
|
+
|
566
|
+
t.d = TWOTO5251 + xs;
|
567
|
+
u = t.d - TWOTO5251;
|
568
|
+
y = xs - u;
|
569
|
+
index = t.i[LO] & 0x3f;
|
570
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
571
|
+
|
572
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
573
|
+
xih = xdb.i[HI];
|
574
|
+
absxih = xih & 0x7fffffff;
|
575
|
+
if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
|
576
|
+
|
577
|
+
if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
|
578
|
+
|
579
|
+
y = y * INV128;
|
580
|
+
|
581
|
+
/* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
|
582
|
+
if (absxih>=0x7ff00000) {
|
583
|
+
xdb.l=0xfff8000000000000LL;
|
584
|
+
return xdb.d - xdb.d;
|
585
|
+
}
|
586
|
+
|
587
|
+
if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
|
588
|
+
return sign*0.0; /*signed */
|
589
|
+
|
590
|
+
if(absxih<=0x3E000000) /*2^{-31}*/ {
|
591
|
+
if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
|
592
|
+
/* in this case, SCS computation, accurate to 2^-210 which is provably enough */
|
593
|
+
scs_t result;
|
594
|
+
scs_set_d(result, x );
|
595
|
+
scs_mul(result, PiSCS_ptr, result);
|
596
|
+
scs_get_d_zero(&rh, result);
|
597
|
+
return rh;
|
598
|
+
}
|
599
|
+
/* First step for Pi*x. TODO: FMA-based optimisation */
|
600
|
+
const double DekkerConst = 134217729.; /* 2^27 +1 */
|
601
|
+
double tt, xh, xl;
|
602
|
+
/* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
|
603
|
+
tt = x*DekkerConst;
|
604
|
+
xh = (x-tt)+tt;
|
605
|
+
xl = x-xh;
|
606
|
+
Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
|
607
|
+
TEST_AND_RETURN_RZ(rh,rl,PIX_EPS_SIN);
|
608
|
+
}
|
609
|
+
/* Fall here either if we have a large input, or if we have a small
|
610
|
+
input and the rounding test fails. */
|
611
|
+
sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
|
612
|
+
|
613
|
+
ReturnRoundTowardsZero3(rh,rm,rl);
|
614
|
+
};
|
615
|
+
|
616
|
+
|
617
|
+
|
618
|
+
|
619
|
+
|
620
|
+
|
621
|
+
|
622
|
+
|
623
|
+
|
624
|
+
/* to nearest */
|
625
|
+
double cospi_rn(double x){
|
626
|
+
double xs, y,u, rh, rm, rl, absx;
|
627
|
+
db_number xdb, t;
|
628
|
+
int32_t xih, absxih, index, quadrant;
|
629
|
+
|
630
|
+
if (x<0) absx =-x; else absx=x;
|
631
|
+
|
632
|
+
xdb.d=x;
|
633
|
+
xs = x*128.0;
|
634
|
+
|
635
|
+
/* argument reduction.
|
636
|
+
We do it before the special case tests for performance,
|
637
|
+
it might compute garbage for inf, very large inputs, etc */
|
638
|
+
if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
|
639
|
+
t.d = xs;
|
640
|
+
t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
|
641
|
+
So what remains in t is an FP integer almost as large as x */
|
642
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
643
|
+
}
|
644
|
+
|
645
|
+
t.d = TWOTO5251 + xs;
|
646
|
+
u = t.d - TWOTO5251;
|
647
|
+
y = xs - u;
|
648
|
+
y = y * INV128;
|
649
|
+
index = t.i[LO] & 0x3f;
|
650
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
651
|
+
|
652
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
653
|
+
xih = xdb.i[HI];
|
654
|
+
absxih = xih & 0x7fffffff;
|
655
|
+
|
656
|
+
/* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
|
657
|
+
|
658
|
+
if (absxih>=0x7ff00000) {
|
659
|
+
xdb.l=0xfff8000000000000LL;
|
660
|
+
return xdb.d - xdb.d;
|
661
|
+
}
|
662
|
+
|
663
|
+
if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
|
664
|
+
return 1.0;
|
665
|
+
|
666
|
+
if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
|
667
|
+
/* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
|
668
|
+
in this case */
|
669
|
+
|
670
|
+
if(index==0 && y==0. && quadrant==0) return 1.;
|
671
|
+
if(index==0 && y==0. && quadrant==2) return -1.;
|
672
|
+
|
673
|
+
if (absxih<0x3E26A09E) /* sqrt(2^-53)/4 */
|
674
|
+
return 1.0;
|
675
|
+
/* printf("\n\nint part = %f frac part = %f index=%d quadrant=%d \n", u, y, index, quadrant);
|
676
|
+
*/
|
677
|
+
|
678
|
+
cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
|
679
|
+
ReturnRoundToNearest3(rh,rm,rl);
|
680
|
+
};
|
681
|
+
|
682
|
+
|
683
|
+
|
684
|
+
|
685
|
+
|
686
|
+
double cospi_rd(double x){
|
687
|
+
double xs, y,u, rh, rm, rl, absx;
|
688
|
+
db_number xdb, t;
|
689
|
+
int32_t xih, absxih, index, quadrant;
|
690
|
+
|
691
|
+
if (x<0) absx =-x; else absx=x;
|
692
|
+
|
693
|
+
xdb.d=x;
|
694
|
+
xs = x*128.0;
|
695
|
+
|
696
|
+
|
697
|
+
/* argument reduction.
|
698
|
+
We do it before the special case tests for performance,
|
699
|
+
it might compute garbage for inf, very large inputs, etc */
|
700
|
+
if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
|
701
|
+
t.d = xs;
|
702
|
+
t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
|
703
|
+
So what remains in t is an FP integer almost as large as x */
|
704
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
705
|
+
}
|
706
|
+
|
707
|
+
t.d = TWOTO5251 + xs;
|
708
|
+
u = t.d - TWOTO5251;
|
709
|
+
y = xs - u;
|
710
|
+
y = y * INV128;
|
711
|
+
index = t.i[LO] & 0x3f;
|
712
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
713
|
+
|
714
|
+
|
715
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
716
|
+
xih = xdb.i[HI];
|
717
|
+
absxih = xih & 0x7fffffff;
|
718
|
+
|
719
|
+
/* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
|
720
|
+
|
721
|
+
if (absxih>=0x7ff00000) {
|
722
|
+
xdb.l=0xfff8000000000000LL;
|
723
|
+
return xdb.d - xdb.d;
|
724
|
+
}
|
725
|
+
|
726
|
+
if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
|
727
|
+
return 1.0; /*signed */
|
728
|
+
|
729
|
+
if(index==0 && y==0. && ((quadrant&1)==1)) return -0.;
|
730
|
+
|
731
|
+
if(index==0 && y==0. && quadrant==0) return 1.;
|
732
|
+
if(index==0 && y==0. && quadrant==2) return -1.;
|
733
|
+
|
734
|
+
if (absxih<0x3E200000) /* 2^-29 */
|
735
|
+
return 0.9999999999999998889776975374843459576368331909179687500; /* 1-2^-53 */
|
736
|
+
/* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
|
737
|
+
in this case */
|
738
|
+
|
739
|
+
cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
|
740
|
+
ReturnRoundDownwards3(rh,rm,rl);
|
741
|
+
};
|
742
|
+
|
743
|
+
|
744
|
+
|
745
|
+
|
746
|
+
double cospi_ru(double x){
|
747
|
+
double xs, y,u, rh, rm, rl, absx;
|
748
|
+
db_number xdb, t;
|
749
|
+
int32_t xih, absxih, index, quadrant;
|
750
|
+
|
751
|
+
if (x<0) absx =-x; else absx=x;
|
752
|
+
|
753
|
+
xdb.d=x;
|
754
|
+
xs = x*128.0;
|
755
|
+
|
756
|
+
|
757
|
+
/* argument reduction.
|
758
|
+
We do it before the special case tests for performance,
|
759
|
+
it might compute garbage for inf, very large inputs, etc */
|
760
|
+
if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
|
761
|
+
t.d = xs;
|
762
|
+
t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
|
763
|
+
So what remains in t is an FP integer almost as large as x */
|
764
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
765
|
+
}
|
766
|
+
|
767
|
+
t.d = TWOTO5251 + xs;
|
768
|
+
u = t.d - TWOTO5251;
|
769
|
+
y = xs - u;
|
770
|
+
y = y * INV128;
|
771
|
+
index = t.i[LO] & 0x3f;
|
772
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
773
|
+
|
774
|
+
|
775
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
776
|
+
xih = xdb.i[HI];
|
777
|
+
absxih = xih & 0x7fffffff;
|
778
|
+
|
779
|
+
/* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
|
780
|
+
if (absxih>=0x7ff00000) {
|
781
|
+
xdb.l=0xfff8000000000000LL;
|
782
|
+
return xdb.d - xdb.d;
|
783
|
+
}
|
784
|
+
|
785
|
+
if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
|
786
|
+
return 1.0; /*signed */
|
787
|
+
|
788
|
+
if(index==0 && y==0. && quadrant==0) return 1.;
|
789
|
+
if(index==0 && y==0. && quadrant==2) return -1.;
|
790
|
+
|
791
|
+
if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
|
792
|
+
/* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
|
793
|
+
in this case */
|
794
|
+
|
795
|
+
if (absxih<0x3E200000) /* 2^-29 */
|
796
|
+
return 1;
|
797
|
+
|
798
|
+
cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
|
799
|
+
ReturnRoundUpwards3(rh,rm,rl);
|
800
|
+
};
|
801
|
+
|
802
|
+
|
803
|
+
|
804
|
+
|
805
|
+
double cospi_rz(double x){
|
806
|
+
double xs, y,u, rh, rm, rl, absx;
|
807
|
+
db_number xdb, t;
|
808
|
+
int32_t xih, absxih, index, quadrant;
|
809
|
+
|
810
|
+
if (x<0) absx =-x; else absx=x;
|
811
|
+
|
812
|
+
xdb.d=x;
|
813
|
+
xs = x*128.0;
|
814
|
+
|
815
|
+
|
816
|
+
/* argument reduction.
|
817
|
+
We do it before the special case tests for performance,
|
818
|
+
it might compute garbage for inf, very large inputs, etc */
|
819
|
+
if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
|
820
|
+
t.d = xs;
|
821
|
+
t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
|
822
|
+
So what remains in t is an FP integer almost as large as x */
|
823
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
824
|
+
}
|
825
|
+
|
826
|
+
t.d = TWOTO5251 + xs;
|
827
|
+
u = t.d - TWOTO5251;
|
828
|
+
y = xs - u;
|
829
|
+
y = y * INV128;
|
830
|
+
index = t.i[LO] & 0x3f;
|
831
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
832
|
+
|
833
|
+
|
834
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
835
|
+
xih = xdb.i[HI];
|
836
|
+
absxih = xih & 0x7fffffff;
|
837
|
+
|
838
|
+
/* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
|
839
|
+
|
840
|
+
if (absxih>=0x7ff00000) {
|
841
|
+
xdb.l=0xfff8000000000000LL;
|
842
|
+
return xdb.d - xdb.d;
|
843
|
+
}
|
844
|
+
|
845
|
+
if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
|
846
|
+
return 1.0; /*signed */
|
847
|
+
|
848
|
+
if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
|
849
|
+
/* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
|
850
|
+
in this case */
|
851
|
+
|
852
|
+
if(index==0 && y==0. && quadrant==0) return 1.;
|
853
|
+
if(index==0 && y==0. && quadrant==2) return -1.;
|
854
|
+
|
855
|
+
if (absxih<0x3E200000) /* 2^-29 */
|
856
|
+
return 0.9999999999999998889776975374843459576368331909179687500; /* 1-2^-53 */
|
857
|
+
|
858
|
+
cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
|
859
|
+
ReturnRoundTowardsZero3(rh,rm,rl);
|
860
|
+
};
|
861
|
+
|
862
|
+
|
863
|
+
|
864
|
+
|
865
|
+
|
866
|
+
|
867
|
+
/* tangent of pi times x */
|
868
|
+
double tanpi_rn(double x){
|
869
|
+
double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
|
870
|
+
db_number xdb, t;
|
871
|
+
int32_t xih, absxih, index, quadrant;
|
872
|
+
|
873
|
+
if (x<0) absx = -x; else absx = x;
|
874
|
+
|
875
|
+
xdb.d = x;
|
876
|
+
|
877
|
+
xs = x*128.0;
|
878
|
+
|
879
|
+
/* argument reduction */
|
880
|
+
if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
|
881
|
+
t.d = xs;
|
882
|
+
t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
|
883
|
+
So what remains in t is an FP integer almost as large as x */
|
884
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
885
|
+
}
|
886
|
+
|
887
|
+
t.d = TWOTO5251 + xs;
|
888
|
+
u = t.d - TWOTO5251;
|
889
|
+
y = xs - u;
|
890
|
+
index = t.i[LO] & 0x3f;
|
891
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
892
|
+
|
893
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
894
|
+
xih = xdb.i[HI];
|
895
|
+
absxih = xih & 0x7fffffff;
|
896
|
+
if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
|
897
|
+
|
898
|
+
if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
|
899
|
+
/* TODO ? No test for Pi/4. Such a value will lauch the accurate phase. */
|
900
|
+
|
901
|
+
y = y * INV128;
|
902
|
+
|
903
|
+
/* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
|
904
|
+
if (absxih>=0x7ff00000) {
|
905
|
+
xdb.l=0xfff8000000000000LL;
|
906
|
+
return xdb.d - xdb.d;
|
907
|
+
}
|
908
|
+
|
909
|
+
if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
|
910
|
+
return sign*0.0; /*signed */
|
911
|
+
|
912
|
+
if(absxih<=0x3E000000) /*2^{-31}*/ {
|
913
|
+
if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
|
914
|
+
/* in this case, SCS computation, accurate to 2^-210 which is provably enough */
|
915
|
+
scs_t result;
|
916
|
+
scs_set_d(result, x );
|
917
|
+
scs_mul(result, PiSCS_ptr, result);
|
918
|
+
scs_get_d(&rh, result);
|
919
|
+
return rh;
|
920
|
+
}
|
921
|
+
/* First step for Pi*x. TODO: FMA-based optimisation */
|
922
|
+
const double DekkerConst = 134217729.; /* 2^27 +1 */
|
923
|
+
double tt, xh, xl;
|
924
|
+
/* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
|
925
|
+
tt = x*DekkerConst;
|
926
|
+
xh = (x-tt)+tt;
|
927
|
+
xl = x-xh;
|
928
|
+
Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
|
929
|
+
if(rh == (rh + (rl * PIX_RNCST_TAN)))
|
930
|
+
return rh;
|
931
|
+
}
|
932
|
+
/* Fall here either if we have a large input, or if we have a small
|
933
|
+
input and the rounding test fails. */
|
934
|
+
cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
|
935
|
+
Recpr33(&ich, &icm, &icl, ch, cm, cl);
|
936
|
+
sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
|
937
|
+
Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
|
938
|
+
ReturnRoundToNearest3(rh,rm,rl);
|
939
|
+
|
940
|
+
};
|
941
|
+
|
942
|
+
|
943
|
+
|
944
|
+
|
945
|
+
double tanpi_rd(double x){
|
946
|
+
double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
|
947
|
+
db_number xdb, t;
|
948
|
+
int32_t xih, absxih, index, quadrant;
|
949
|
+
|
950
|
+
if (x<0) absx = -x; else absx = x;
|
951
|
+
|
952
|
+
xdb.d = x;
|
953
|
+
|
954
|
+
xs = x*128.0;
|
955
|
+
|
956
|
+
/* argument reduction */
|
957
|
+
if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
|
958
|
+
t.d = xs;
|
959
|
+
t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
|
960
|
+
So what remains in t is an FP integer almost as large as x */
|
961
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
962
|
+
}
|
963
|
+
|
964
|
+
t.d = TWOTO5251 + xs;
|
965
|
+
u = t.d - TWOTO5251;
|
966
|
+
y = xs - u;
|
967
|
+
index = t.i[LO] & 0x3f;
|
968
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
969
|
+
|
970
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
971
|
+
xih = xdb.i[HI];
|
972
|
+
absxih = xih & 0x7fffffff;
|
973
|
+
if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
|
974
|
+
|
975
|
+
if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
|
976
|
+
|
977
|
+
y = y * INV128;
|
978
|
+
|
979
|
+
/* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
|
980
|
+
if (absxih>=0x7ff00000) {
|
981
|
+
xdb.l=0xfff8000000000000LL;
|
982
|
+
return xdb.d - xdb.d;
|
983
|
+
}
|
984
|
+
|
985
|
+
if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
|
986
|
+
return sign*0.0; /*signed */
|
987
|
+
|
988
|
+
if(absxih<=0x3E000000) /*2^{-31}*/ {
|
989
|
+
if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
|
990
|
+
/* in this case, SCS computation, accurate to 2^-210 which is provably enough */
|
991
|
+
scs_t result;
|
992
|
+
scs_set_d(result, x );
|
993
|
+
scs_mul(result, PiSCS_ptr, result);
|
994
|
+
scs_get_d_minf(&rh, result);
|
995
|
+
return rh;
|
996
|
+
}
|
997
|
+
/* First step for Pi*x. TODO: FMA-based optimisation */
|
998
|
+
const double DekkerConst = 134217729.; /* 2^27 +1 */
|
999
|
+
double tt, xh, xl;
|
1000
|
+
/* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
|
1001
|
+
tt = x*DekkerConst;
|
1002
|
+
xh = (x-tt)+tt;
|
1003
|
+
xl = x-xh;
|
1004
|
+
Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
|
1005
|
+
TEST_AND_RETURN_RD(rh,rl,PIX_EPS_SIN);
|
1006
|
+
}
|
1007
|
+
/* Fall here either if we have a large input, or if we have a small
|
1008
|
+
input and the rounding test fails. */
|
1009
|
+
cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
|
1010
|
+
Recpr33(&ich, &icm, &icl, ch, cm, cl);
|
1011
|
+
sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
|
1012
|
+
Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
|
1013
|
+
ReturnRoundDownwards3(rh,rm,rl);
|
1014
|
+
};
|
1015
|
+
|
1016
|
+
|
1017
|
+
|
1018
|
+
|
1019
|
+
|
1020
|
+
|
1021
|
+
|
1022
|
+
|
1023
|
+
|
1024
|
+
|
1025
|
+
double tanpi_ru(double x){
|
1026
|
+
double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
|
1027
|
+
db_number xdb, t;
|
1028
|
+
int32_t xih, absxih, index, quadrant;
|
1029
|
+
|
1030
|
+
if (x<0) absx = -x; else absx = x;
|
1031
|
+
|
1032
|
+
xdb.d = x;
|
1033
|
+
|
1034
|
+
xs = x*128.0;
|
1035
|
+
|
1036
|
+
/* argument reduction */
|
1037
|
+
if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
|
1038
|
+
t.d = xs;
|
1039
|
+
t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
|
1040
|
+
So what remains in t is an FP integer almost as large as x */
|
1041
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
1042
|
+
}
|
1043
|
+
|
1044
|
+
t.d = TWOTO5251 + xs;
|
1045
|
+
u = t.d - TWOTO5251;
|
1046
|
+
y = xs - u;
|
1047
|
+
index = t.i[LO] & 0x3f;
|
1048
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
1049
|
+
|
1050
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
1051
|
+
xih = xdb.i[HI];
|
1052
|
+
absxih = xih & 0x7fffffff;
|
1053
|
+
if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
|
1054
|
+
|
1055
|
+
if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
|
1056
|
+
|
1057
|
+
y = y * INV128;
|
1058
|
+
|
1059
|
+
/* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
|
1060
|
+
if (absxih>=0x7ff00000) {
|
1061
|
+
xdb.l=0xfff8000000000000LL;
|
1062
|
+
return xdb.d - xdb.d;
|
1063
|
+
}
|
1064
|
+
|
1065
|
+
if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
|
1066
|
+
return sign*0.0; /*signed */
|
1067
|
+
|
1068
|
+
if(absxih<=0x3E000000) /*2^{-31}*/ {
|
1069
|
+
if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
|
1070
|
+
/* in this case, SCS computation, accurate to 2^-210 which is provably enough */
|
1071
|
+
scs_t result;
|
1072
|
+
scs_set_d(result, x );
|
1073
|
+
scs_mul(result, PiSCS_ptr, result);
|
1074
|
+
scs_get_d_pinf(&rh, result);
|
1075
|
+
return rh;
|
1076
|
+
}
|
1077
|
+
/* First step for Pi*x. TODO: FMA-based optimisation */
|
1078
|
+
const double DekkerConst = 134217729.; /* 2^27 +1 */
|
1079
|
+
double tt, xh, xl;
|
1080
|
+
/* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
|
1081
|
+
tt = x*DekkerConst;
|
1082
|
+
xh = (x-tt)+tt;
|
1083
|
+
xl = x-xh;
|
1084
|
+
Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
|
1085
|
+
TEST_AND_RETURN_RU(rh,rl,PIX_EPS_TAN);
|
1086
|
+
}
|
1087
|
+
/* Fall here either if we have a large input, or if we have a small
|
1088
|
+
input and the rounding test fails. */
|
1089
|
+
cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
|
1090
|
+
Recpr33(&ich, &icm, &icl, ch, cm, cl);
|
1091
|
+
sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
|
1092
|
+
Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
|
1093
|
+
ReturnRoundUpwards3(rh,rm,rl);
|
1094
|
+
};
|
1095
|
+
|
1096
|
+
|
1097
|
+
|
1098
|
+
double tanpi_rz(double x){
|
1099
|
+
double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
|
1100
|
+
db_number xdb, t;
|
1101
|
+
int32_t xih, absxih, index, quadrant;
|
1102
|
+
|
1103
|
+
if (x<0) absx = -x; else absx = x;
|
1104
|
+
|
1105
|
+
xdb.d = x;
|
1106
|
+
|
1107
|
+
xs = x*128.0;
|
1108
|
+
|
1109
|
+
/* argument reduction */
|
1110
|
+
if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
|
1111
|
+
t.d = xs;
|
1112
|
+
t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
|
1113
|
+
So what remains in t is an FP integer almost as large as x */
|
1114
|
+
xs = xs-t.d; /* we are going to throw away the int part anyway */
|
1115
|
+
}
|
1116
|
+
|
1117
|
+
t.d = TWOTO5251 + xs;
|
1118
|
+
u = t.d - TWOTO5251;
|
1119
|
+
y = xs - u;
|
1120
|
+
index = t.i[LO] & 0x3f;
|
1121
|
+
quadrant = (t.i[LO] & 0xff) >>6;
|
1122
|
+
|
1123
|
+
/* Special case tests come late because the conversion FP to int is slow */
|
1124
|
+
xih = xdb.i[HI];
|
1125
|
+
absxih = xih & 0x7fffffff;
|
1126
|
+
if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
|
1127
|
+
|
1128
|
+
if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
|
1129
|
+
|
1130
|
+
y = y * INV128;
|
1131
|
+
|
1132
|
+
/* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
|
1133
|
+
if (absxih>=0x7ff00000) {
|
1134
|
+
xdb.l=0xfff8000000000000LL;
|
1135
|
+
return xdb.d - xdb.d;
|
1136
|
+
}
|
1137
|
+
|
1138
|
+
if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
|
1139
|
+
return sign*0.0; /*signed */
|
1140
|
+
|
1141
|
+
if(absxih<=0x3E000000) /*2^{-31}*/ {
|
1142
|
+
if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
|
1143
|
+
/* in this case, SCS computation, accurate to 2^-210 which is provably enough */
|
1144
|
+
scs_t result;
|
1145
|
+
scs_set_d(result, x );
|
1146
|
+
scs_mul(result, PiSCS_ptr, result);
|
1147
|
+
scs_get_d_zero(&rh, result);
|
1148
|
+
return rh;
|
1149
|
+
}
|
1150
|
+
/* First step for Pi*x. TODO: FMA-based optimisation */
|
1151
|
+
const double DekkerConst = 134217729.; /* 2^27 +1 */
|
1152
|
+
double tt, xh, xl;
|
1153
|
+
/* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
|
1154
|
+
tt = x*DekkerConst;
|
1155
|
+
xh = (x-tt)+tt;
|
1156
|
+
xl = x-xh;
|
1157
|
+
Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
|
1158
|
+
TEST_AND_RETURN_RZ(rh,rl,PIX_EPS_SIN);
|
1159
|
+
}
|
1160
|
+
/* Fall here either if we have a large input, or if we have a small
|
1161
|
+
input and the rounding test fails. */
|
1162
|
+
cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
|
1163
|
+
Recpr33(&ich, &icm, &icl, ch, cm, cl);
|
1164
|
+
sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
|
1165
|
+
Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
|
1166
|
+
ReturnRoundTowardsZero3(rh,rm,rl);
|
1167
|
+
};
|
1168
|
+
|
1169
|
+
|