crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,846 @@
|
|
1
|
+
/*
|
2
|
+
*this function computes a correctly rounded atan using double-extended arithmetic, FMAs and other dirty tricks
|
3
|
+
*
|
4
|
+
* Author : Nicolas Gast, Florent de Dinechin
|
5
|
+
* nicolas.gast@ens.fr
|
6
|
+
*
|
7
|
+
|
8
|
+
WARNING : This code is dirty and experimental, and remains here for
|
9
|
+
history. A cleaner, portable version using double-extended arithmetic will be available some day as atan-de.c
|
10
|
+
For this reason there is only atan_rn so it fails the "make check" for all the other rounding modes
|
11
|
+
|
12
|
+
|
13
|
+
To test within crlibm: (tested with Intel icc compiler version 8.1)
|
14
|
+
icc -Qoption,cpp,--extended_float_types -IPF_fp_speculationsafe -c atan-itanium.c; mv atan-itanium.o atan_fast.o; make
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
This file is completely self-contained so that we can change the crlibm infrastructure without bothering maintaining it.
|
20
|
+
|
21
|
+
|
22
|
+
*/
|
23
|
+
|
24
|
+
/* WARNING Due to some quantum effect not understood so far,
|
25
|
+
turning debugging on may change the result */
|
26
|
+
#define DEBUG 0
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
typedef __int64 INT64;
|
31
|
+
typedef signed __int64 SINT64;
|
32
|
+
typedef unsigned __int64 UINT64;
|
33
|
+
|
34
|
+
/* FP register type */
|
35
|
+
typedef __fpreg L_FLOAT_TYPE;
|
36
|
+
|
37
|
+
/* Almost the same as the previous, except exponent field smaller, and morally in memory */
|
38
|
+
typedef long double LC_FLOAT_TYPE;
|
39
|
+
|
40
|
+
/* The double-double-ext type, using registers */
|
41
|
+
typedef struct __X_FLOAT_TYPE_TAG {
|
42
|
+
L_FLOAT_TYPE hi,lo; /* order is critical! */
|
43
|
+
} X_FLOAT_TYPE;
|
44
|
+
|
45
|
+
/* The double-double-ext type, in memory */
|
46
|
+
typedef struct __XC_FLOAT_TYPE_TAG {
|
47
|
+
LC_FLOAT_TYPE hi,lo; /* order is critical! */
|
48
|
+
} XC_FLOAT_TYPE;
|
49
|
+
|
50
|
+
|
51
|
+
/* For debugging */
|
52
|
+
typedef union {
|
53
|
+
int i[3];
|
54
|
+
long double d;
|
55
|
+
} db_ext_number;
|
56
|
+
|
57
|
+
|
58
|
+
typedef enum {
|
59
|
+
_PC_S = 1 /* single .s */
|
60
|
+
,_PC_D = 2 /* double .d */
|
61
|
+
,_PC_NONE = 3 /* dynamic */
|
62
|
+
} _Asm_pc;
|
63
|
+
|
64
|
+
/* Table 1-22: legal getf/setf floating-point register access completers */
|
65
|
+
typedef enum {
|
66
|
+
_FR_S = 1 /* single form .s */
|
67
|
+
,_FR_D = 2 /* double form .d */
|
68
|
+
,_FR_EXP = 3 /* exponent form .exp */
|
69
|
+
,_FR_SIG = 4 /* significand form .sig */
|
70
|
+
} _Asm_fr_access;
|
71
|
+
|
72
|
+
/* Table 1-24: legal floating-point FPSR status field completers (.sf) */
|
73
|
+
typedef enum {
|
74
|
+
_SF0 = 0 /* FPSR status field 0 .s0 */
|
75
|
+
,_SF1 = 1 /* FPSR status field 1 .s1 */
|
76
|
+
,_SF2 = 2 /* FPSR status field 2 .s2 */
|
77
|
+
,_SF3 = 3 /* FPSR status field 3 .s3 */
|
78
|
+
} _Asm_sf;
|
79
|
+
|
80
|
+
#define print_debug(msg, _z) {\
|
81
|
+
db_ext_number dbg;\
|
82
|
+
dbg.d=_z;\
|
83
|
+
printf(msg);\
|
84
|
+
printf(" %08x %08x %08x \n", (dbg.i[2]<<16)>>16, dbg.i[1], dbg.i[0]);\
|
85
|
+
}
|
86
|
+
|
87
|
+
|
88
|
+
#define Add12_ext(s, r, a, b) \
|
89
|
+
{ L_FLOAT_TYPE _z, _a, _b, _s; \
|
90
|
+
_a= (a); _b=(b); \
|
91
|
+
s = (_a + _b); \
|
92
|
+
_z= ( a - s ); \
|
93
|
+
r = (_b + _z); }
|
94
|
+
|
95
|
+
|
96
|
+
#define Add22_ext(zh,zl,xh,xl,yh,yl) \
|
97
|
+
do {\
|
98
|
+
L_FLOAT_TYPE r,s;\
|
99
|
+
r = (xh)+(yh);\
|
100
|
+
s = (xh)-r;\
|
101
|
+
s+= (yh);\
|
102
|
+
s+= (yl);\
|
103
|
+
s+= (xl);\
|
104
|
+
zh = r+s;\
|
105
|
+
zl = r - (zh);\
|
106
|
+
zl+= s;\
|
107
|
+
} while(0)
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
#define Mul12_ext(_rh,_rl,_u,_v) \
|
112
|
+
{ \
|
113
|
+
_rh = _u*_v; \
|
114
|
+
_rl = _Asm_fms( 3/*_PC_NONE*/, _u, _v, _rh, 1/*_SF1*/ );\
|
115
|
+
}
|
116
|
+
#define Mul22_ext(zh,zl, xh,xl, yh,yl) \
|
117
|
+
{ \
|
118
|
+
L_FLOAT_TYPE ph, pl; \
|
119
|
+
ph = (xh)*(yh); \
|
120
|
+
pl = _Asm_fms( 3/*_PC_NONE*/, xh, yh, ph, 1/*_SF1*/ );; \
|
121
|
+
pl = (xh)*(yl) + pl; \
|
122
|
+
pl = (xl)*(yh) + pl; \
|
123
|
+
zh = ph+pl; \
|
124
|
+
zl = ph - zh; \
|
125
|
+
zl += pl; \
|
126
|
+
}
|
127
|
+
|
128
|
+
#define Div22_ext(zh,zl,xh,xl,yh,yl) \
|
129
|
+
{ \
|
130
|
+
L_FLOAT_TYPE _ch,_cl,_uh,_ul; \
|
131
|
+
_ch=(xh)/(yh); \
|
132
|
+
Mul12_ext(_uh,_ul,_ch,(yh)); \
|
133
|
+
_cl=(xh)-_uh; \
|
134
|
+
_cl -= _ul; \
|
135
|
+
_cl += (xl); \
|
136
|
+
_cl -= _ch*(yl); \
|
137
|
+
_cl /= (yh); \
|
138
|
+
zh = _ch + _cl; \
|
139
|
+
zl=(_ch-(zh)); zl += _cl; \
|
140
|
+
}
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
#define ULL(bits) 0x##bits##uLL
|
147
|
+
|
148
|
+
#if (!defined(EM64T) && defined(__linux__) && defined(IA32))
|
149
|
+
# define LDOUBLE_ALIGN 12 /* IA32 Linux: 12-byte alignment */
|
150
|
+
#else
|
151
|
+
# define LDOUBLE_ALIGN 16 /* EM64T, IA32 Win or IPF Win/Linux: 16-byte alignm\
|
152
|
+
ent */
|
153
|
+
#endif
|
154
|
+
|
155
|
+
#if (LDOUBLE_ALIGN == 16)
|
156
|
+
#define _XPD_ ,0x0000,0x0000,0x0000
|
157
|
+
#else /*12*/
|
158
|
+
#define _XPD_ ,0x0000
|
159
|
+
#endif
|
160
|
+
|
161
|
+
#define LDOUBLE_HEX(w4,w3,w2,w1,w0) 0x##w0,0x##w1,0x##w2,0x##w3,0x##w4 _XPD_ /*LITTLE_ENDIAN*/
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
double dde_atan_rn(double x) {
|
166
|
+
return 0;
|
167
|
+
}
|
168
|
+
|
169
|
+
double atan_rd(double x) {
|
170
|
+
return 0;
|
171
|
+
}
|
172
|
+
|
173
|
+
double atan_ru(double x) {
|
174
|
+
return 0;
|
175
|
+
}
|
176
|
+
|
177
|
+
double atan_rz(double x) {
|
178
|
+
return 0;
|
179
|
+
}
|
180
|
+
|
181
|
+
|
182
|
+
static const double HALFPI = 1.57079632679489655799898173427209258079528808593750e+00;
|
183
|
+
#define MIN_REDUCTION_NEEDED ULL(3F89FDF8BCCE533D)
|
184
|
+
#define A 0
|
185
|
+
#define B 1
|
186
|
+
#define ATAN_BHI 0
|
187
|
+
#define ATAN_BLO 1
|
188
|
+
#define epsilon 2.04221581890623872536809598138553304900554884091659e-19
|
189
|
+
#define epsilon_no_red 1.56771350764719825686165002299335165493769973908433e-19
|
190
|
+
#define TWO_M_64 5.42101086242752217003726400434970855712890625000000e-20
|
191
|
+
#define TWO_10 1.02400000000000000000000000000000000000000000000000e+03
|
192
|
+
|
193
|
+
__declspec(align(16))
|
194
|
+
|
195
|
+
static const struct{long long int a; double b;} ab_table[62] = {
|
196
|
+
{ /*a[0] ~= 1.26914436930661800408e-02 */ ULL(3F89FDF8BCCE533D),
|
197
|
+
/*b[0] = */ 2.53869765124364009378776785297304741106927394866943e-02},
|
198
|
+
{ /*a[1] ~= 3.80906929270782388369e-02 */ ULL(3FA3809F90CEBC31),
|
199
|
+
/*b[1] = */ 5.08066978456951506837313559117319528013467788696289e-02},
|
200
|
+
{ /*a[2] ~= 6.35391122156262234502e-02 */ ULL(3FB0441968FBA526),
|
201
|
+
/*b[2] = */ 7.62920780032335793530151590857713017612695693969727e-02},
|
202
|
+
{ /*a[3] ~= 8.90697640843219481662e-02 */ ULL(3FB6CD46ABCDFA25),
|
203
|
+
/*b[3] = */ 1.01876371166982934712841313285025535151362419128418e-01},
|
204
|
+
{ /*a[4] ~= 1.14716138034642060814e-01 */ ULL(3FBD5E096D2EA546),
|
205
|
+
/*b[4] = */ 1.27593346472767293908745500630175229161977767944336e-01},
|
206
|
+
{ /*a[5] ~= 1.40512327929006382604e-01 */ ULL(3FC1FC4ED691E891),
|
207
|
+
/*b[5] = */ 1.53477468508642272970732278736250009387731552124023e-01},
|
208
|
+
{ /*a[6] ~= 1.66493216120905490981e-01 */ ULL(3FC54FA6531F610B),
|
209
|
+
/*b[6] = */ 1.79564085612852891715718328669026959687471389770508e-01},
|
210
|
+
{ /*a[7] ~= 1.92694666476959805056e-01 */ ULL(3FC8AA380550EAF1),
|
211
|
+
/*b[7] = */ 2.05889628199359991933548030829115305095911026000977e-01},
|
212
|
+
{ /*a[8] ~= 2.19153728611415840590e-01 */ ULL(3FCC0D3AB8975BD9),
|
213
|
+
/*b[8] = */ 2.32491819536184141092860500066308304667472839355469e-01},
|
214
|
+
{ /*a[9] ~= 2.45908855876056406352e-01 */ ULL(3FCF79F0FEE46885),
|
215
|
+
/*b[9] = */ 2.59409901651160901270287695297156460583209991455078e-01},
|
216
|
+
{ /*a[10] ~= 2.73000139926648314534e-01 */ ULL(3FD178D5943274CA),
|
217
|
+
/*b[10] = */ 2.86684879348826082701151563014718703925609588623047e-01},
|
218
|
+
{ /*a[11] ~= 3.00469565029600954026e-01 */ ULL(3FD33AE4B2CFB5F7),
|
219
|
+
/*b[11] = */ 3.14359785700871030567071784389554522931575775146484e-01},
|
220
|
+
{ /*a[12] ~= 3.28361285690481766972e-01 */ ULL(3FD503DF0DD40A5B),
|
221
|
+
/*b[12] = */ 3.42479972833279300292730340515845455229282379150391e-01},
|
222
|
+
{ /*a[13] ~= 3.56721931693259067415e-01 */ ULL(3FD6D4883998DD14),
|
223
|
+
/*b[13] = */ 3.71093432391343347465095803272561170160770416259766e-01},
|
224
|
+
{ /*a[14] ~= 3.85600945252912822931e-01 */ ULL(3FD8ADAF964ABFA5),
|
225
|
+
/*b[14] = */ 4.00251150738601846335029676993144676089286804199219e-01},
|
226
|
+
{ /*a[15] ~= 4.15050955725992373816e-01 */ ULL(3FDA9031E241114E),
|
227
|
+
/*b[15] = */ 4.30007504761513281721363455289974808692932128906250e-01},
|
228
|
+
{ /*a[16] ~= 4.45128198220858643198e-01 */ ULL(3FDC7CFAFB78B41D),
|
229
|
+
/*b[16] = */ 4.60420705138676944478959285333985462784767150878906e-01},
|
230
|
+
{ /*a[17] ~= 4.75892983535655022698e-01 */ ULL(3FDE7507D82B9DC6),
|
231
|
+
/*b[17] = */ 4.91553295129659728601723145402502268552780151367188e-01},
|
232
|
+
{ /*a[18] ~= 5.07410228170177493351e-01 */ ULL(3FE03CB45FF4B2AB),
|
233
|
+
/*b[18] = */ 5.23472714391912563591802154405741021037101745605469e-01},
|
234
|
+
{ /*a[19] ~= 5.39750054761637805872e-01 */ ULL(3FE145A1E826E4EA),
|
235
|
+
/*b[19] = */ 5.56251939105489867642972967587411403656005859375000e-01},
|
236
|
+
{ /*a[20] ~= 5.72988475252136329570e-01 */ ULL(3FE255EBED462BAC),
|
237
|
+
/*b[20] = */ 5.89970211851368997457711884635500609874725341796875e-01},
|
238
|
+
{ /*a[21] ~= 6.07208171494496387417e-01 */ ULL(3FE36E3FD4CDD9AC),
|
239
|
+
/*b[21] = */ 6.24713877348479162954220100800739601254463195800781e-01},
|
240
|
+
{ /*a[22] ~= 6.42499390954343656748e-01 */ ULL(3FE48F5AE1FB2991),
|
241
|
+
/*b[22] = */ 6.60577343433393693317157158162444829940795898437500e-01},
|
242
|
+
{ /*a[23] ~= 6.78960978813340497734e-01 */ ULL(3FE5BA0C5FE86E27),
|
243
|
+
/*b[23] = */ 6.97664190728041089251121320558013394474983215332031e-01},
|
244
|
+
{ /*a[24] ~= 7.16701572306941533027e-01 */ ULL(3FE6EF3822C19A5D),
|
245
|
+
/*b[24] = */ 7.36088459496464064812926153535954654216766357421875e-01},
|
246
|
+
{ /*a[25] ~= 7.55840988781748695010e-01 */ ULL(3FE82FD970F967BD),
|
247
|
+
/*b[25] = */ 7.75976148518263131315109148999908939003944396972656e-01},
|
248
|
+
{ /*a[26] ~= 7.96511846049556065643e-01 */ ULL(3FE97D0669351A0D),
|
249
|
+
/*b[26] = */ 8.17466968767843527032823658373672515153884887695312e-01},
|
250
|
+
{ /*a[27] ~= 8.38861462565995493716e-01 */ ULL(3FEAD7F3FE730FCD),
|
251
|
+
/*b[27] = */ 8.60716404767067566616844942473107948899269104003906e-01},
|
252
|
+
{ /*a[28] ~= 8.83054096327761096527e-01 */ ULL(3FEC41FAAA0A733E),
|
253
|
+
/*b[28] = */ 9.05898149317818313086547732382314279675483703613281e-01},
|
254
|
+
{ /*a[29] ~= 9.29273595909162105525e-01 */ ULL(3FEDBC9BFAEEEADF),
|
255
|
+
/*b[29] = */ 9.53206993785724487899813084368361160159111022949219e-01},
|
256
|
+
{ /*a[30] ~= 9.77726555752981254442e-01 */ ULL(3FEF498933AC790A),
|
257
|
+
/*b[30] = */ 1.00286227737052557884567249857354909181594848632812e+00},
|
258
|
+
{ /*a[31] ~= 1.02864609206350806308e+00 */ ULL(3FF075559AC922B4),
|
259
|
+
/*b[31] = */ 1.05511202646791502068879253783961758017539978027344e+00},
|
260
|
+
{ /*a[32] ~= 1.08229638730567912228e+00 */ ULL(3FF151160440E8D3),
|
261
|
+
/*b[32] = */ 1.11023795151925819268967643438372761011123657226562e+00},
|
262
|
+
{ /*a[33] ~= 1.13897819300824741364e+00 */ ULL(3FF23941329D3DD8),
|
263
|
+
/*b[33] = */ 1.16856151675095110142876819736557081341743469238281e+00},
|
264
|
+
{ /*a[34] ~= 1.19903553596580987055e+00 */ ULL(3FF32F3FE2DB7094),
|
265
|
+
/*b[34] = */ 1.23045136228081597451478046423289924860000610351562e+00},
|
266
|
+
{ /*a[35] ~= 1.26286394722716532198e+00 */ ULL(3FF434B0D38A35D7),
|
267
|
+
/*b[35] = */ 1.29633244442242001603915468876948580145835876464844e+00},
|
268
|
+
{ /*a[36] ~= 1.33092063388866265448e+00 */ ULL(3FF54B736F41F96D),
|
269
|
+
/*b[36] = */ 1.36669737760087572908673791971523314714431762695312e+00},
|
270
|
+
{ /*a[37] ~= 1.40373715148086145849e+00 */ ULL(3FF675B5165CA5E1),
|
271
|
+
/*b[37] = */ 1.44212062317890032936418265308020636439323425292969e+00},
|
272
|
+
{ /*a[38] ~= 1.48193532552453321547e+00 */ ULL(3FF7B601D0DEA3C6),
|
273
|
+
/*b[38] = */ 1.52327639603630871079076314345002174377441406250000e+00},
|
274
|
+
{ /*a[39] ~= 1.56624743831976717041e+00 */ ULL(3FF90F5979506F51),
|
275
|
+
/*b[39] = */ 1.61096147803441858137318831722950562834739685058594e+00},
|
276
|
+
{ /*a[40] ~= 1.65754207708184630948e+00 */ ULL(3FFA854AD74CF791),
|
277
|
+
/*b[40] = */ 1.70612458293084490179580825497396290302276611328125e+00},
|
278
|
+
{ /*a[41] ~= 1.75685758736121174681e+00 */ ULL(3FFC1C16B3972246),
|
279
|
+
/*b[41] = */ 1.80990457885083300126893846027087420225143432617188e+00},
|
280
|
+
{ /*a[42] ~= 1.86544587781964938190e+00 */ ULL(3FFDD8DDC6DB1831),
|
281
|
+
/*b[42] = */ 1.92368085119253517945026032975874841213226318359375e+00},
|
282
|
+
{ /*a[43] ~= 1.98483051718814034750e+00 */ ULL(3FFFC1DDA4F6D032),
|
283
|
+
/*b[43] = */ 2.04914055707593512067887786542996764183044433593750e+00},
|
284
|
+
{ /*a[44] ~= 2.11688487740990979279e+00 */ ULL(4000EF6156AEFAF2),
|
285
|
+
/*b[44] = */ 2.18836977316091063627823132264893501996994018554688e+00},
|
286
|
+
{ /*a[45] ~= 2.26393888595347935033e+00 */ ULL(40021C8BFD9A80C1),
|
287
|
+
/*b[45] = */ 2.34397906437763481335423421114683151245117187500000e+00},
|
288
|
+
{ /*a[46] ~= 2.42892740222016626128e+00 */ ULL(40036E717D67269C),
|
289
|
+
/*b[46] = */ 2.51927965826279764982587039412464946508407592773438e+00},
|
290
|
+
{ /*a[47] ~= 2.61560046981161264128e+00 */ ULL(4004ECBFF069F1E4),
|
291
|
+
/*b[47] = */ 2.71853573297491069027387311507482081651687622070312e+00},
|
292
|
+
{ /*a[48] ~= 2.82882779840766906527e+00 */ ULL(4006A170780169B7),
|
293
|
+
/*b[48] = */ 2.94733416149008720097413061012048274278640747070312e+00},
|
294
|
+
{ /*a[49] ~= 3.07505072362971616974e+00 */ ULL(400899B4319C3F02),
|
295
|
+
/*b[49] = */ 3.21314087722892072207514502224512398242950439453125e+00},
|
296
|
+
{ /*a[50] ~= 3.36297230191158715455e+00 */ ULL(400AE75E05B0834A),
|
297
|
+
/*b[50] = */ 3.52616384863255349912947167467791587114334106445312e+00},
|
298
|
+
{ /*a[51] ~= 3.70464601821196143254e+00 */ ULL(400DA31D739BD0E3),
|
299
|
+
/*b[51] = */ 3.90073973345466518125590482668485492467880249023438e+00},
|
300
|
+
{ /*a[52] ~= 4.11726034471856573100e+00 */ ULL(401078131886BC57),
|
301
|
+
/*b[52] = */ 4.35765668014056828383218089584261178970336914062500e+00},
|
302
|
+
{ /*a[53] ~= 4.62619989820137847648e+00 */ ULL(4012813A8BCE2241),
|
303
|
+
/*b[53] = */ 4.92824409985376998832862227573059499263763427734375e+00},
|
304
|
+
{ /*a[54] ~= 5.27059285056349616385e+00 */ ULL(401515164ACECE78),
|
305
|
+
/*b[54] = */ 5.66202526987798027136022938066162168979644775390625e+00},
|
306
|
+
{ /*a[55] ~= 6.11406930017863578891e+00 */ ULL(401874CE9526FAB9),
|
307
|
+
/*b[55] = */ 6.64216890962962569489036468439735472202301025390625e+00},
|
308
|
+
{ /*a[56] ~= 7.26750136287798241547e+00 */ ULL(401D11EBE094C913),
|
309
|
+
/*b[56] = */ 8.01990986231011859786121931392699480056762695312500e+00},
|
310
|
+
{ /*a[57] ~= 8.94284159107796650204e+00 */ ULL(4021E2BC220DFA19),
|
311
|
+
/*b[57] = */ 1.01020964280653942068965989165008068084716796875000e+01},
|
312
|
+
{ /*a[58] ~= 1.16023240149353498339e+01 */ ULL(40273463D0337C49),
|
313
|
+
/*b[58] = */ 1.36206610885392880305744256475009024143218994140625e+01},
|
314
|
+
{ /*a[59] ~= 1.64826377753716631495e+01 */ ULL(40307B8E26350916),
|
315
|
+
/*b[59] = */ 2.08587363260064613257327437167987227439880371093750e+01},
|
316
|
+
{ /*a[60] ~= 2.83859754493341325216e+01 */ ULL(403C62CF497BF2F2),
|
317
|
+
/*b[60] = */ 4.43908820444562195461912779137492179870605468750000e+01},
|
318
|
+
{ /*a[61] ~= 1.01699461607316896213e+02 */ ULL(40596CC3FA9E0EF4),
|
319
|
+
/*b[61] = */ 8.27932424540746438879068591631948947906494140625000e+01}
|
320
|
+
};
|
321
|
+
|
322
|
+
|
323
|
+
#define atanb_table ((const XC_FLOAT_TYPE *)_atanb_table)
|
324
|
+
__declspec(align(16)) static const unsigned short _atanb_table[] = {
|
325
|
+
/*atan_b[0] ~= 2.5381524664e-02*/
|
326
|
+
LDOUBLE_HEX(3FF9, CFEC, EA4B, 4FCB, 5DFD),
|
327
|
+
LDOUBLE_HEX(BFB7, CBBA, 8342, F523, 8BE7),
|
328
|
+
/*atan_b[1] ~= 5.0763049304e-02*/
|
329
|
+
LDOUBLE_HEX(3FFA, CFEC, EA49, B131, 647C),
|
330
|
+
LDOUBLE_HEX(3FB6, D38B, A5E1, 4DEF, A6BD),
|
331
|
+
/*atan_b[2] ~= 7.6144573921e-02*/
|
332
|
+
LDOUBLE_HEX(3FFB, 9BF1, AFB6, 0F03, 5D53),
|
333
|
+
LDOUBLE_HEX(3FB8, EF7C, 871F, DC70, BCA9),
|
334
|
+
/*atan_b[3] ~= 1.0152609851e-01*/
|
335
|
+
LDOUBLE_HEX(3FFB, CFEC, EA46, 78CC, AECA),
|
336
|
+
LDOUBLE_HEX(BFB7, DCB7, 3BED, 3BD7, 633C),
|
337
|
+
/*atan_b[4] ~= 1.2690762308e-01*/
|
338
|
+
LDOUBLE_HEX(3FFC, 81F4, 126B, 0C0A, B24C),
|
339
|
+
LDOUBLE_HEX(3FB8, 9C93, 50C6, 8748, 202B),
|
340
|
+
/*atan_b[5] ~= 1.5228914763e-01*/
|
341
|
+
LDOUBLE_HEX(3FFC, 9BF1, AFB2, 77C1, F1F3),
|
342
|
+
LDOUBLE_HEX(BFBB, 9D89, 6B54, 2B43, C3D3),
|
343
|
+
/*atan_b[6] ~= 1.7767067216e-01*/
|
344
|
+
LDOUBLE_HEX(3FFC, B5EF, 4CF9, 8121, 27D9),
|
345
|
+
LDOUBLE_HEX(BFBB, D8AB, 134C, C337, 1424),
|
346
|
+
/*atan_b[7] ~= 2.0305219666e-01*/
|
347
|
+
LDOUBLE_HEX(3FFC, CFEC, EA40, 29FE, 3D0C),
|
348
|
+
LDOUBLE_HEX(BFBA, 964C, 23A5, 78A9, 286C),
|
349
|
+
/*atan_b[8] ~= 2.2843372114e-01*/
|
350
|
+
LDOUBLE_HEX(3FFC, E9EA, 8786, 746E, CBDE),
|
351
|
+
LDOUBLE_HEX(3FBB, 95CE, 8C74, D4B3, 3D3D),
|
352
|
+
/*atan_b[9] ~= 2.5381524560e-01*/
|
353
|
+
LDOUBLE_HEX(3FFD, 81F4, 1266, 3163, 58ED),
|
354
|
+
LDOUBLE_HEX(3FBB, B292, B8DC, 903F, C86D),
|
355
|
+
/*atan_b[10] ~= 2.7919677004e-01*/
|
356
|
+
LDOUBLE_HEX(3FFD, 8EF2, E108, FBCB, 4839),
|
357
|
+
LDOUBLE_HEX(BFBC, C5E3, D3F8, 42F0, A001),
|
358
|
+
/*atan_b[11] ~= 3.0457829447e-01*/
|
359
|
+
LDOUBLE_HEX(3FFD, 9BF1, AFAB, 9AD5, 051A),
|
360
|
+
LDOUBLE_HEX(3FBC, BE9C, AF21, 45D0, CBC5),
|
361
|
+
/*atan_b[12] ~= 3.2995981887e-01*/
|
362
|
+
LDOUBLE_HEX(3FFD, A8F0, 7E4E, 1002, FE3F),
|
363
|
+
LDOUBLE_HEX(3FB9, ACDF, 4585, 84D5, 7EE8),
|
364
|
+
/*atan_b[13] ~= 3.5534134325e-01*/
|
365
|
+
LDOUBLE_HEX(3FFD, B5EF, 4CF0, 5CF3, 3B2F),
|
366
|
+
LDOUBLE_HEX(BFB9, DAF1, E542, E461, 5C3F),
|
367
|
+
/*atan_b[14] ~= 3.8072286762e-01*/
|
368
|
+
LDOUBLE_HEX(3FFD, C2EE, 1B92, 835E, 5241),
|
369
|
+
LDOUBLE_HEX(3FBC, F450, E872, E8D5, 5B89),
|
370
|
+
/*atan_b[15] ~= 4.0610439197e-01*/
|
371
|
+
LDOUBLE_HEX(3FFD, CFEC, EA34, 8516, 3E60),
|
372
|
+
LDOUBLE_HEX(BFBC, 91DD, F6E6, 0680, E8AD),
|
373
|
+
/*atan_b[16] ~= 4.3148591630e-01*/
|
374
|
+
LDOUBLE_HEX(3FFD, DCEB, B8D6, 6405, 31AA),
|
375
|
+
LDOUBLE_HEX(BFBC, 8502, E09D, 5663, 1B39),
|
376
|
+
/*atan_b[17] ~= 4.5686744062e-01*/
|
377
|
+
LDOUBLE_HEX(3FFD, E9EA, 8778, 222C, 48BB),
|
378
|
+
LDOUBLE_HEX(BFBB, F51E, C2F3, 5A3E, F53D),
|
379
|
+
/*atan_b[18] ~= 4.8224896492e-01*/
|
380
|
+
LDOUBLE_HEX(3FFD, F6E9, 5619, C1A2, 5014),
|
381
|
+
LDOUBLE_HEX(BFBB, E1E1, FABB, 35B7, 64D8),
|
382
|
+
/*atan_b[19] ~= 5.0763048922e-01*/
|
383
|
+
LDOUBLE_HEX(3FFE, 81F4, 125D, A249, 1B96),
|
384
|
+
LDOUBLE_HEX(BFBB, FEB6, 20F5, A80E, ABD8),
|
385
|
+
/*atan_b[20] ~= 5.3301201350e-01*/
|
386
|
+
LDOUBLE_HEX(3FFE, 8873, 79AE, 569C, E82C),
|
387
|
+
LDOUBLE_HEX(BFBD, 9333, CB85, 3253, A31F),
|
388
|
+
/*atan_b[21] ~= 5.5839353776e-01*/
|
389
|
+
LDOUBLE_HEX(3FFE, 8EF2, E0FE, FEF4, 22DF),
|
390
|
+
LDOUBLE_HEX(3FBD, FBF4, E487, 2960, 19F2),
|
391
|
+
/*atan_b[22] ~= 5.8377506202e-01*/
|
392
|
+
LDOUBLE_HEX(3FFE, 9572, 484F, 9C7E, 4569),
|
393
|
+
LDOUBLE_HEX(BFBD, ED41, 6021, 317B, 1548),
|
394
|
+
/*atan_b[23] ~= 6.0915658627e-01*/
|
395
|
+
LDOUBLE_HEX(3FFE, 9BF1, AFA0, 3071, E801),
|
396
|
+
LDOUBLE_HEX(3FBD, C46B, 95C4, B736, D8A5),
|
397
|
+
/*atan_b[24] ~= 6.3453811052e-01*/
|
398
|
+
LDOUBLE_HEX(3FFE, A271, 16F0, BC0B, F541),
|
399
|
+
LDOUBLE_HEX(3FBD, E479, 64B6, 873E, E8BE),
|
400
|
+
/*atan_b[25] ~= 6.5991963475e-01*/
|
401
|
+
LDOUBLE_HEX(3FFE, A8F0, 7E41, 408E, DDC6),
|
402
|
+
LDOUBLE_HEX(3FBD, C200, D1A3, 7D02, 9DAA),
|
403
|
+
/*atan_b[26] ~= 6.8530115898e-01*/
|
404
|
+
LDOUBLE_HEX(3FFE, AF6F, E591, BF41, BD98),
|
405
|
+
LDOUBLE_HEX(3FBC, AB83, 86B7, DBD3, 49B9),
|
406
|
+
/*atan_b[27] ~= 7.1068268321e-01*/
|
407
|
+
LDOUBLE_HEX(3FFE, B5EF, 4CE2, 396F, 887A),
|
408
|
+
LDOUBLE_HEX(3FB9, 93C0, 6F69, 2472, DD13),
|
409
|
+
/*atan_b[28] ~= 7.3606420743e-01*/
|
410
|
+
LDOUBLE_HEX(3FFE, BC6E, B432, B066, 2617),
|
411
|
+
LDOUBLE_HEX(BFBD, C5F2, 72DA, A216, 8845),
|
412
|
+
/*atan_b[29] ~= 7.6144573166e-01*/
|
413
|
+
LDOUBLE_HEX(3FFE, C2EE, 1B83, 2575, A17C),
|
414
|
+
LDOUBLE_HEX(3FBA, FC52, 25AC, D135, 67B0),
|
415
|
+
/*atan_b[30] ~= 7.8682725588e-01*/
|
416
|
+
LDOUBLE_HEX(3FFE, C96D, 82D3, 99EF, 4753),
|
417
|
+
LDOUBLE_HEX(3FBC, E6CB, 9CE5, F7DC, 32EF),
|
418
|
+
/*atan_b[31] ~= 8.1220878010e-01*/
|
419
|
+
LDOUBLE_HEX(3FFE, CFEC, EA24, 0F24, C5A3),
|
420
|
+
LDOUBLE_HEX(BFBB, 9F94, 64A4, 0D49, 77DA),
|
421
|
+
/*atan_b[32] ~= 8.3759030433e-01*/
|
422
|
+
LDOUBLE_HEX(3FFE, D66C, 5174, 8667, 5086),
|
423
|
+
LDOUBLE_HEX(BFBC, E480, 36A7, 98A0, E416),
|
424
|
+
/*atan_b[33] ~= 8.6297182855e-01*/
|
425
|
+
LDOUBLE_HEX(3FFE, DCEB, B8C5, 0106, C115),
|
426
|
+
LDOUBLE_HEX(BFBB, AE5E, 111C, 0925, 5FC1),
|
427
|
+
/*atan_b[34] ~= 8.8835335278e-01*/
|
428
|
+
LDOUBLE_HEX(3FFE, E36B, 2015, 8050, B874),
|
429
|
+
LDOUBLE_HEX(BFBC, 8DD3, E1A9, 67EE, B236),
|
430
|
+
/*atan_b[35] ~= 9.1373487702e-01*/
|
431
|
+
LDOUBLE_HEX(3FFE, E9EA, 8766, 058F, C400),
|
432
|
+
LDOUBLE_HEX(BFBD, 994E, 5D94, 7944, 5BF2),
|
433
|
+
/*atan_b[36] ~= 9.3911640126e-01*/
|
434
|
+
LDOUBLE_HEX(3FFE, F069, EEB6, 920A, 8756),
|
435
|
+
LDOUBLE_HEX(BFBD, F0FC, 830B, 5639, 9FED),
|
436
|
+
/*atan_b[37] ~= 9.6449792552e-01*/
|
437
|
+
LDOUBLE_HEX(3FFE, F6E9, 5607, 2702, D403),
|
438
|
+
LDOUBLE_HEX(BFBD, B0EF, D9DB, FF7A, BBF3),
|
439
|
+
/*atan_b[38] ~= 9.8987944978e-01*/
|
440
|
+
LDOUBLE_HEX(3FFE, FD68, BD57, C5B4, F372),
|
441
|
+
LDOUBLE_HEX(BFBD, 9706, 5831, 4248, 656E),
|
442
|
+
/*atan_b[39] ~= 1.0152609740e+00*/
|
443
|
+
LDOUBLE_HEX(3FFF, 81F4, 1254, 37AB, 59C4),
|
444
|
+
LDOUBLE_HEX(3FBE, C83B, C3BE, 8160, FE56),
|
445
|
+
/*atan_b[40] ~= 1.0406424983e+00*/
|
446
|
+
LDOUBLE_HEX(3FFF, 8533, C5FC, 928B, 5DCD),
|
447
|
+
LDOUBLE_HEX(3FBE, C025, 7DA6, 5435, CDA0),
|
448
|
+
/*atan_b[41] ~= 1.0660240226e+00*/
|
449
|
+
LDOUBLE_HEX(3FFF, 8873, 79A4, F40D, D390),
|
450
|
+
LDOUBLE_HEX(BFBE, BB70, CBE8, FB3B, AA03),
|
451
|
+
/*atan_b[42] ~= 1.0914055469e+00*/
|
452
|
+
LDOUBLE_HEX(3FFF, 8BB3, 2D4D, 5CC1, ADB6),
|
453
|
+
LDOUBLE_HEX(3FBE, 8161, 18FB, A932, 136B),
|
454
|
+
/*atan_b[43] ~= 1.1167870712e+00*/
|
455
|
+
LDOUBLE_HEX(3FFF, 8EF2, E0F5, CD31, 1F80),
|
456
|
+
LDOUBLE_HEX(BFBC, BD96, 57B0, 5730, 7576),
|
457
|
+
/*atan_b[44] ~= 1.1421685956e+00*/
|
458
|
+
LDOUBLE_HEX(3FFF, 9232, 949E, 45E1, 3E02),
|
459
|
+
LDOUBLE_HEX(BFBD, CDB1, 87A1, 5D56, 06EC),
|
460
|
+
/*atan_b[45] ~= 1.1675501199e+00*/
|
461
|
+
LDOUBLE_HEX(3FFF, 9572, 4846, C751, B4C7),
|
462
|
+
LDOUBLE_HEX(BFBD, A1AB, 140B, 2B49, DF68),
|
463
|
+
/*atan_b[46] ~= 1.1929316443e+00*/
|
464
|
+
LDOUBLE_HEX(3FFF, 98B1, FBEF, 51FC, 635A),
|
465
|
+
LDOUBLE_HEX(3FBE, CA64, 3ADC, 86D5, FB02),
|
466
|
+
/*atan_b[47] ~= 1.2183131687e+00*/
|
467
|
+
LDOUBLE_HEX(3FFF, 9BF1, AF97, E655, 1527),
|
468
|
+
LDOUBLE_HEX(3FBE, CA1D, 3262, C2F9, D84C),
|
469
|
+
/*atan_b[48] ~= 1.2436946931e+00*/
|
470
|
+
LDOUBLE_HEX(3FFF, 9F31, 6340, 84C9, 33A7),
|
471
|
+
LDOUBLE_HEX(3FBD, AF23, 2B16, BE75, 8B87),
|
472
|
+
/*atan_b[49] ~= 1.2690762175e+00*/
|
473
|
+
LDOUBLE_HEX(3FFF, A271, 16E9, 2DBF, 7CA7),
|
474
|
+
LDOUBLE_HEX(3FBE, FDDA, 7599, 4DA2, 0F86),
|
475
|
+
/*atan_b[50] ~= 1.2944577420e+00*/
|
476
|
+
LDOUBLE_HEX(3FFF, A5B0, CA91, E197, C307),
|
477
|
+
LDOUBLE_HEX(BFBC, D265, 9307, D567, 08BE),
|
478
|
+
/*atan_b[51] ~= 1.3198392664e+00*/
|
479
|
+
LDOUBLE_HEX(3FFF, A8F0, 7E3A, A0AA, A7E2),
|
480
|
+
LDOUBLE_HEX(3FBE, BE3C, 4D06, 7D11, 0641),
|
481
|
+
/*atan_b[52] ~= 1.3452207909e+00*/
|
482
|
+
LDOUBLE_HEX(3FFF, AC30, 31E3, 6B49, 6713),
|
483
|
+
LDOUBLE_HEX(BFBE, B9DD, 9D13, C459, 6F6C),
|
484
|
+
/*atan_b[53] ~= 1.3706023154e+00*/
|
485
|
+
LDOUBLE_HEX(3FFF, AF6F, E58C, 41BD, 9EA8),
|
486
|
+
LDOUBLE_HEX(BFBD, 802F, 2153, DC49, 3698),
|
487
|
+
/*atan_b[54] ~= 1.3959838399e+00*/
|
488
|
+
LDOUBLE_HEX(3FFF, B2AF, 9935, 2449, 1D44),
|
489
|
+
LDOUBLE_HEX(3FBE, CAFC, 43E2, 3F23, 5075),
|
490
|
+
/*atan_b[55] ~= 1.4213653645e+00*/
|
491
|
+
LDOUBLE_HEX(3FFF, B5EF, 4CDE, 1325, B93A),
|
492
|
+
LDOUBLE_HEX(BFBA, 9155, 4FBC, 9598, FA3D),
|
493
|
+
/*atan_b[56] ~= 1.4467468891e+00*/
|
494
|
+
LDOUBLE_HEX(3FFF, B92F, 0087, 0E85, 296B),
|
495
|
+
LDOUBLE_HEX(3FBE, C76A, DB5B, 6055, 9EA6),
|
496
|
+
/*atan_b[57] ~= 1.4721284137e+00*/
|
497
|
+
LDOUBLE_HEX(3FFF, BC6E, B430, 1690, E405),
|
498
|
+
LDOUBLE_HEX(3FBA, A6CB, 4564, 7FF8, 4121),
|
499
|
+
/*atan_b[58] ~= 1.4975099383e+00*/
|
500
|
+
LDOUBLE_HEX(3FFF, BFAE, 67D9, 2B6A, 02AA),
|
501
|
+
LDOUBLE_HEX(BFBD, B0AE, B984, 420B, 761D),
|
502
|
+
/*atan_b[59] ~= 1.5228914629e+00*/
|
503
|
+
LDOUBLE_HEX(3FFF, C2EE, 1B82, 4D29, 2EBE),
|
504
|
+
LDOUBLE_HEX(BFBE, 9CBD, 26E8, 9FF8, E917),
|
505
|
+
/*atan_b[60] ~= 1.5482729876e+00*/
|
506
|
+
LDOUBLE_HEX(3FFF, C62D, CF2B, 7BDE, 8EE3),
|
507
|
+
LDOUBLE_HEX(BFBE, AF45, EFD8, 2A64, 49A5),
|
508
|
+
/*atan_b[61] ~= 1.5587186337e+00*/
|
509
|
+
LDOUBLE_HEX(3FFF, C784, 1799, 9E5D, D2A5),
|
510
|
+
LDOUBLE_HEX(BFBE, A231, BD90, F170, 34A5),
|
511
|
+
};
|
512
|
+
static const long double coef_poly[9][2] = {
|
513
|
+
{ -3.33333333333333333342368351437379203616728773340583e-01L, 9.03501810404587028364033466367082415937499719525463e-21L},
|
514
|
+
{ 2.00000000000000000002710505431213761085018632002175e-01L, -2.71050543121376108505536620063805076318847614178820e-21L},
|
515
|
+
{ -1.42857142857142857140921067549133027796415262855589e-01L, -1.93607530800982934641564128836546985281459293443700e-21L},
|
516
|
+
{ 1.11111111111111111109605274760436799397211871109903e-01L, 1.50583635067431171387883211317314321885579450456211e-21L},
|
517
|
+
{ -9.09090909090909090933731867556488737136533018201590e-02L, 0},
|
518
|
+
{ 7.69230769230769230779655790120052927250071661546826e-02L, 0},
|
519
|
+
{ -6.66666666666666666698289230030827212658550706692040e-02L, 0},
|
520
|
+
{ 5.88235294117647058825522430464127765503690170589834e-02L, 0},
|
521
|
+
{ -5.26315789473684210515616425929419364138084347359836e-02L, 0},
|
522
|
+
};
|
523
|
+
|
524
|
+
|
525
|
+
|
526
|
+
|
527
|
+
extern double atan_rn(double xd) {
|
528
|
+
|
529
|
+
unsigned int hx;
|
530
|
+
double sign;
|
531
|
+
double u;
|
532
|
+
double comp;
|
533
|
+
|
534
|
+
int i, i1, m;
|
535
|
+
UINT64 x_val,x_abs,sign_mask;
|
536
|
+
L_FLOAT_TYPE xe, tmp, bi, atanbhi, xred, xred2,q;
|
537
|
+
L_FLOAT_TYPE res,reshi,reslo,rn_constant,test;
|
538
|
+
L_FLOAT_TYPE xred4,tmp2;
|
539
|
+
L_FLOAT_TYPE a,b,e0,e1,e2,e3,q0,q1,q2,y0,y1,y2,xred2coarse;
|
540
|
+
L_FLOAT_TYPE C3,C5,C7,C9 ;
|
541
|
+
|
542
|
+
|
543
|
+
x_val = _Asm_getf( _FR_D, xd );
|
544
|
+
x_abs = (x_val & ULL(7fffffffffffffff));
|
545
|
+
sign_mask = ((SINT64)x_val >> 63); /* either 00..00 or 11...11 */
|
546
|
+
|
547
|
+
|
548
|
+
|
549
|
+
/* cast x to a DE register */
|
550
|
+
if(sign_mask)
|
551
|
+
xe=-xd;
|
552
|
+
else
|
553
|
+
xe=xd;
|
554
|
+
|
555
|
+
|
556
|
+
/* Filter cases */
|
557
|
+
if (__builtin_expect( x_abs >= ULL(4350000000000000), 0)) { /* x >= 2^54 */
|
558
|
+
if (xd!=xd )
|
559
|
+
return xd+xd; /* NaN */
|
560
|
+
else {/* atan(x) = +/- Pi/2 */
|
561
|
+
if(sign_mask) return -HALFPI; else return HALFPI;
|
562
|
+
}
|
563
|
+
}
|
564
|
+
else if (__builtin_expect( x_abs < ULL(3E40000000000000), 0))
|
565
|
+
/* TODO Add stuff to raise inexact flag */
|
566
|
+
return xd; /* x<2^-27 then atan(x) =~ x */
|
567
|
+
|
568
|
+
|
569
|
+
/* Now there is something to compute*/
|
570
|
+
|
571
|
+
/* load polynomial coeffs */
|
572
|
+
C3=coef_poly[0][0];
|
573
|
+
C5=coef_poly[1][0];
|
574
|
+
C7=coef_poly[2][0];
|
575
|
+
C9=coef_poly[3][0];
|
576
|
+
|
577
|
+
if (__builtin_expect(x_abs > MIN_REDUCTION_NEEDED, 0)) /* test if reduction is necessary : */
|
578
|
+
{
|
579
|
+
/* 1) Argument reduction : */
|
580
|
+
/* This constant was found by dichotomy. I am very ashamed */
|
581
|
+
rn_constant = 1.002;
|
582
|
+
|
583
|
+
/* compute i so that a[i] < x < a[i+1] */
|
584
|
+
|
585
|
+
if (x_abs>ab_table[61].a)
|
586
|
+
i=61;
|
587
|
+
else {
|
588
|
+
i=31;
|
589
|
+
if (x_abs < ab_table[i].a) i-= 16;
|
590
|
+
else i+=16;
|
591
|
+
if (x_abs < ab_table[i].a) i-= 8;
|
592
|
+
else i+= 8;
|
593
|
+
if (x_abs < ab_table[i].a) i-= 4;
|
594
|
+
else i+= 4;
|
595
|
+
if (x_abs < ab_table[i].a) i-= 2;
|
596
|
+
else i+= 2;
|
597
|
+
if (x_abs < ab_table[i].a) i-= 1;
|
598
|
+
else i+= 1;
|
599
|
+
if (x_abs < ab_table[i].a) i-= 1;
|
600
|
+
}
|
601
|
+
|
602
|
+
bi= ab_table[i].b;
|
603
|
+
atanbhi = atanb_table[i].hi;
|
604
|
+
|
605
|
+
/* the dividend and the divisor for the argument reduction */
|
606
|
+
a = xe-bi; b = 1 + xe * bi;
|
607
|
+
|
608
|
+
|
609
|
+
#if 1
|
610
|
+
/* now we want to compute (xe - bi )/b as a DE, but
|
611
|
+
we will need the accurate quotient only later on,
|
612
|
+
we can start the computation of the polynomial with a much coarser approximation.
|
613
|
+
Saves 12 cycles.
|
614
|
+
*/
|
615
|
+
/* Algo 8.11 in Markstein book */
|
616
|
+
_Asm_frcpa(&y0, a, b, _SF1);
|
617
|
+
|
618
|
+
e0 = 1 - b*y0; q0 = a*y0;
|
619
|
+
e2 = e0 + e0*e0; e1 = e0*e0;
|
620
|
+
e3 = e0 + e1*e1; q1 = q0+q0*e2;
|
621
|
+
xred = q0 + q1*e3; xred2coarse = q1*q1; /* 62 bits in xred, more than enough */
|
622
|
+
xred2 = xred*xred; xred4 = xred2coarse*xred2coarse;
|
623
|
+
|
624
|
+
|
625
|
+
|
626
|
+
/*polynom evaluation */
|
627
|
+
|
628
|
+
tmp2 = C7 + xred2coarse * C9 ;
|
629
|
+
|
630
|
+
/* here we need xred2, xred2coarse loses a lot of precision to win 3 cycles. */
|
631
|
+
tmp = C3 + xred2 * C5;
|
632
|
+
|
633
|
+
q = tmp + xred4 * tmp2;
|
634
|
+
|
635
|
+
|
636
|
+
#else
|
637
|
+
xred=a/b;
|
638
|
+
xred2=xred*xred;
|
639
|
+
xred4=xred2*xred2;
|
640
|
+
tmp2 = C7 + xred2 * C9 ;
|
641
|
+
tmp = C3 + xred2 * C5;
|
642
|
+
q = tmp + xred4 * tmp2;
|
643
|
+
#endif
|
644
|
+
|
645
|
+
tmp = 1+q*xred2;
|
646
|
+
/* reconstruction : atan(x) = atan(b[i]) + atan(x) */
|
647
|
+
res = atanbhi+xred*tmp;
|
648
|
+
/* atan = atanbhi + tmp; with round to double */
|
649
|
+
reshi = _Asm_fma( _PC_D, xred, tmp, atanbhi, _SF0 );
|
650
|
+
|
651
|
+
}
|
652
|
+
else
|
653
|
+
/* no reduction needed */
|
654
|
+
{
|
655
|
+
|
656
|
+
|
657
|
+
/* Polynomial evaluation */
|
658
|
+
|
659
|
+
xred2 = xe*xe;
|
660
|
+
/*poly eval */
|
661
|
+
xred4=xred2*xred2;
|
662
|
+
tmp2 = C7 + xred2 * C9 ;
|
663
|
+
tmp = C3 + xred2 * C5;
|
664
|
+
q = tmp + xred4 * tmp2;
|
665
|
+
q *= xred2;
|
666
|
+
|
667
|
+
|
668
|
+
/* compute q*xe+xe with round to double */
|
669
|
+
res = _Asm_fma( _PC_NONE, q, xe, xe, _SF1 );
|
670
|
+
reshi = _Asm_fma( _PC_D, q, xe, xe, _SF0 );
|
671
|
+
}
|
672
|
+
|
673
|
+
#if 0 /* To time the first step only */
|
674
|
+
if(sign_mask)
|
675
|
+
return -reshi;
|
676
|
+
else
|
677
|
+
return reshi;
|
678
|
+
#endif
|
679
|
+
|
680
|
+
#if 1
|
681
|
+
i1 = _Asm_getf( _FR_SIG, res);
|
682
|
+
m = i1 & (0xff<<3);
|
683
|
+
if(__builtin_expect((m!=(0x7f<<3) && m!=(0x80<<3)), 1+1==2)) {
|
684
|
+
if(sign_mask)
|
685
|
+
return -reshi;
|
686
|
+
else
|
687
|
+
return reshi;
|
688
|
+
}
|
689
|
+
#else
|
690
|
+
/* ROUNDING TEST à la Ziv */
|
691
|
+
/* This constant was found by dichotomy. I am very ashamed */
|
692
|
+
rn_constant = 1.01;
|
693
|
+
reslo = res - reshi;
|
694
|
+
test=_Asm_fma( _PC_D, reslo, rn_constant, reshi, _SF0 );
|
695
|
+
|
696
|
+
if (__builtin_expect(reshi == test, 1+1==2)) {
|
697
|
+
if(sign_mask)
|
698
|
+
return -reshi;
|
699
|
+
else
|
700
|
+
return reshi;
|
701
|
+
}
|
702
|
+
#endif
|
703
|
+
|
704
|
+
|
705
|
+
else {
|
706
|
+
|
707
|
+
/******************************************************************/
|
708
|
+
/* Double-double-extended */
|
709
|
+
L_FLOAT_TYPE tmphi, tmplo, x0hi, x0lo, xmBihi, xmBilo, Xredhi, Xredlo, Xred2, qhi,qlo, q, Xred2hi,Xred2lo, atanhi,atanlo;
|
710
|
+
int j;
|
711
|
+
|
712
|
+
|
713
|
+
|
714
|
+
|
715
|
+
#if EVAL_PERF
|
716
|
+
crlibm_second_step_taken++;
|
717
|
+
#endif
|
718
|
+
|
719
|
+
#if DEBUG
|
720
|
+
printf("Toto\n");
|
721
|
+
#endif
|
722
|
+
|
723
|
+
if (__builtin_expect(x_abs > MIN_REDUCTION_NEEDED, 0)) {/* test if reduction is necessary : */
|
724
|
+
if(i==61){
|
725
|
+
Add12_ext( xmBihi , xmBilo , xe , -ab_table[61].b);
|
726
|
+
}
|
727
|
+
else {
|
728
|
+
xmBihi = xe-ab_table[i].b;
|
729
|
+
xmBilo = 0.0;
|
730
|
+
}
|
731
|
+
|
732
|
+
Mul12_ext(tmphi,tmplo, xe, (ab_table[i].b));
|
733
|
+
|
734
|
+
if (xe > 1) /* TODO remplacer par xabs */
|
735
|
+
Add22_ext(x0hi,x0lo,tmphi,tmplo, 1.0,0.0);
|
736
|
+
else {
|
737
|
+
Add22_ext(x0hi , x0lo , 1.0,0.0,tmphi,tmplo);
|
738
|
+
}
|
739
|
+
|
740
|
+
#if 1
|
741
|
+
Div22_ext(Xredhi, Xredlo, xmBihi , xmBilo , x0hi,x0lo);
|
742
|
+
#else
|
743
|
+
Xredhi=1; Xredlo=0; /* to time the Div22*/
|
744
|
+
#endif
|
745
|
+
|
746
|
+
#if DEBUG
|
747
|
+
printf("i=%d, num=%1.15e den=%1.15e\n",i, (double)xmBihi, (double)x0hi);
|
748
|
+
printf("Xred=%1.15e\n", (double)Xredhi);
|
749
|
+
#endif
|
750
|
+
|
751
|
+
Xred2 = Xredhi*Xredhi;
|
752
|
+
Mul22_ext(Xred2hi,Xred2lo,Xredhi,Xredlo,Xredhi, Xredlo);
|
753
|
+
|
754
|
+
/*poly eval */
|
755
|
+
|
756
|
+
q = (coef_poly[4][0]+Xred2*
|
757
|
+
(coef_poly[5][0]+Xred2*
|
758
|
+
(coef_poly[6][0]+Xred2*
|
759
|
+
(coef_poly[7][0]+
|
760
|
+
(Xred2*coef_poly[8][0])))));
|
761
|
+
|
762
|
+
Mul12_ext(qhi, qlo, q, Xred2);
|
763
|
+
|
764
|
+
for(j=3;j>=0;j--)
|
765
|
+
{
|
766
|
+
Add22_ext(qhi,qlo, (coef_poly[j][0]), (coef_poly[j][1]), qhi,qlo);
|
767
|
+
Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
|
768
|
+
}
|
769
|
+
|
770
|
+
Mul22_ext(qhi,qlo, Xredhi,Xredlo, qhi,qlo);
|
771
|
+
Add22_ext(qhi,qlo, Xredhi,Xredlo, qhi,qlo);
|
772
|
+
|
773
|
+
/* reconstruction : atan(x) = atan(b[i]) + atan(x) */
|
774
|
+
Add22_ext(atanhi,atanlo, atanb_table[i].hi, atanb_table[i].lo, qhi,qlo);
|
775
|
+
}
|
776
|
+
else
|
777
|
+
/* no reduction needed */
|
778
|
+
{
|
779
|
+
|
780
|
+
#if DEBUG
|
781
|
+
printf("Tata\n");
|
782
|
+
#endif
|
783
|
+
/* Polynomial evaluation */
|
784
|
+
Mul12_ext( Xred2hi,Xred2lo,xe,xe);
|
785
|
+
|
786
|
+
/*poly eval - don't take risks, keep plain Horner */
|
787
|
+
|
788
|
+
q = coef_poly[8][0];
|
789
|
+
q = coef_poly[7][0]+Xred2hi*q;
|
790
|
+
q = coef_poly[6][0]+Xred2hi*q;
|
791
|
+
q = coef_poly[5][0]+Xred2hi*q;
|
792
|
+
|
793
|
+
Add12_ext(qhi,qlo, coef_poly[4][0], Xred2hi*q);
|
794
|
+
#if DEBUG
|
795
|
+
printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
|
796
|
+
print_debug("qhi", qhi);
|
797
|
+
print_debug("qlo", qlo);
|
798
|
+
#endif
|
799
|
+
Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
|
800
|
+
#if DEBUG
|
801
|
+
printf(" Xred2 = %1.50Le + %1.50Le\n",(long double)Xred2hi, (long double)Xred2lo);
|
802
|
+
printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
|
803
|
+
print_debug("qhi", qhi);
|
804
|
+
print_debug("qlo", qlo);
|
805
|
+
#endif
|
806
|
+
|
807
|
+
for(j=3;j>=0;j--)
|
808
|
+
{
|
809
|
+
Add22_ext(qhi,qlo, (coef_poly[j][0]), (coef_poly[j][1]), qhi,qlo);
|
810
|
+
Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
|
811
|
+
}
|
812
|
+
|
813
|
+
Mul22_ext (qhi,qlo, xe,0, qhi,qlo);
|
814
|
+
|
815
|
+
#if DEBUG
|
816
|
+
printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
|
817
|
+
print_debug("qhi", qhi);
|
818
|
+
print_debug("qlo", qlo);
|
819
|
+
#endif
|
820
|
+
/* Now comes the addition sequence proven in the TOMS paper */
|
821
|
+
Add12_ext(atanhi,atanlo,xe,qhi);
|
822
|
+
#if DEBUG
|
823
|
+
print_debug("atanhi", atanhi);
|
824
|
+
printf(" atan hi+lo %1.50Le + %1.50Le\n",(long double)atanhi, (long double)atanlo);
|
825
|
+
#endif
|
826
|
+
atanlo += qlo;
|
827
|
+
|
828
|
+
}
|
829
|
+
|
830
|
+
#if DEBUG
|
831
|
+
printf(" atan hi+lo %1.50Le + %1.50Le\n",(long double)atanhi, (long double)atanlo);
|
832
|
+
printf(" %1.50e + %1.50e\n",(double)atanhi,(double) atanlo);
|
833
|
+
printf(" %1.50Le\n",(long double)(atanhi + atanlo));
|
834
|
+
printf(" ");
|
835
|
+
#endif
|
836
|
+
|
837
|
+
if(sign_mask)
|
838
|
+
res= -(double) (atanhi+atanlo);
|
839
|
+
else
|
840
|
+
res= (double) (atanhi+atanlo);
|
841
|
+
|
842
|
+
return res;
|
843
|
+
|
844
|
+
}
|
845
|
+
}
|
846
|
+
|