intervals 0.3.56

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,846 @@
1
+ /*
2
+ *this function computes a correctly rounded atan using double-extended arithmetic, FMAs and other dirty tricks
3
+ *
4
+ * Author : Nicolas Gast, Florent de Dinechin
5
+ * nicolas.gast@ens.fr
6
+ *
7
+
8
+ WARNING : This code is dirty and experimental, and remains here for
9
+ history. A cleaner, portable version using double-extended arithmetic will be available some day as atan-de.c
10
+ For this reason there is only atan_rn so it fails the "make check" for all the other rounding modes
11
+
12
+
13
+ To test within crlibm: (tested with Intel icc compiler version 8.1)
14
+ icc -Qoption,cpp,--extended_float_types -IPF_fp_speculationsafe -c atan-itanium.c; mv atan-itanium.o atan_fast.o; make
15
+
16
+
17
+
18
+
19
+ This file is completely self-contained so that we can change the crlibm infrastructure without bothering maintaining it.
20
+
21
+
22
+ */
23
+
24
+ /* WARNING Due to some quantum effect not understood so far,
25
+ turning debugging on may change the result */
26
+ #define DEBUG 0
27
+
28
+
29
+
30
+ typedef __int64 INT64;
31
+ typedef signed __int64 SINT64;
32
+ typedef unsigned __int64 UINT64;
33
+
34
+ /* FP register type */
35
+ typedef __fpreg L_FLOAT_TYPE;
36
+
37
+ /* Almost the same as the previous, except exponent field smaller, and morally in memory */
38
+ typedef long double LC_FLOAT_TYPE;
39
+
40
+ /* The double-double-ext type, using registers */
41
+ typedef struct __X_FLOAT_TYPE_TAG {
42
+ L_FLOAT_TYPE hi,lo; /* order is critical! */
43
+ } X_FLOAT_TYPE;
44
+
45
+ /* The double-double-ext type, in memory */
46
+ typedef struct __XC_FLOAT_TYPE_TAG {
47
+ LC_FLOAT_TYPE hi,lo; /* order is critical! */
48
+ } XC_FLOAT_TYPE;
49
+
50
+
51
+ /* For debugging */
52
+ typedef union {
53
+ int i[3];
54
+ long double d;
55
+ } db_ext_number;
56
+
57
+
58
+ typedef enum {
59
+ _PC_S = 1 /* single .s */
60
+ ,_PC_D = 2 /* double .d */
61
+ ,_PC_NONE = 3 /* dynamic */
62
+ } _Asm_pc;
63
+
64
+ /* Table 1-22: legal getf/setf floating-point register access completers */
65
+ typedef enum {
66
+ _FR_S = 1 /* single form .s */
67
+ ,_FR_D = 2 /* double form .d */
68
+ ,_FR_EXP = 3 /* exponent form .exp */
69
+ ,_FR_SIG = 4 /* significand form .sig */
70
+ } _Asm_fr_access;
71
+
72
+ /* Table 1-24: legal floating-point FPSR status field completers (.sf) */
73
+ typedef enum {
74
+ _SF0 = 0 /* FPSR status field 0 .s0 */
75
+ ,_SF1 = 1 /* FPSR status field 1 .s1 */
76
+ ,_SF2 = 2 /* FPSR status field 2 .s2 */
77
+ ,_SF3 = 3 /* FPSR status field 3 .s3 */
78
+ } _Asm_sf;
79
+
80
+ #define print_debug(msg, _z) {\
81
+ db_ext_number dbg;\
82
+ dbg.d=_z;\
83
+ printf(msg);\
84
+ printf(" %08x %08x %08x \n", (dbg.i[2]<<16)>>16, dbg.i[1], dbg.i[0]);\
85
+ }
86
+
87
+
88
+ #define Add12_ext(s, r, a, b) \
89
+ { L_FLOAT_TYPE _z, _a, _b, _s; \
90
+ _a= (a); _b=(b); \
91
+ s = (_a + _b); \
92
+ _z= ( a - s ); \
93
+ r = (_b + _z); }
94
+
95
+
96
+ #define Add22_ext(zh,zl,xh,xl,yh,yl) \
97
+ do {\
98
+ L_FLOAT_TYPE r,s;\
99
+ r = (xh)+(yh);\
100
+ s = (xh)-r;\
101
+ s+= (yh);\
102
+ s+= (yl);\
103
+ s+= (xl);\
104
+ zh = r+s;\
105
+ zl = r - (zh);\
106
+ zl+= s;\
107
+ } while(0)
108
+
109
+
110
+
111
+ #define Mul12_ext(_rh,_rl,_u,_v) \
112
+ { \
113
+ _rh = _u*_v; \
114
+ _rl = _Asm_fms( 3/*_PC_NONE*/, _u, _v, _rh, 1/*_SF1*/ );\
115
+ }
116
+ #define Mul22_ext(zh,zl, xh,xl, yh,yl) \
117
+ { \
118
+ L_FLOAT_TYPE ph, pl; \
119
+ ph = (xh)*(yh); \
120
+ pl = _Asm_fms( 3/*_PC_NONE*/, xh, yh, ph, 1/*_SF1*/ );; \
121
+ pl = (xh)*(yl) + pl; \
122
+ pl = (xl)*(yh) + pl; \
123
+ zh = ph+pl; \
124
+ zl = ph - zh; \
125
+ zl += pl; \
126
+ }
127
+
128
+ #define Div22_ext(zh,zl,xh,xl,yh,yl) \
129
+ { \
130
+ L_FLOAT_TYPE _ch,_cl,_uh,_ul; \
131
+ _ch=(xh)/(yh); \
132
+ Mul12_ext(_uh,_ul,_ch,(yh)); \
133
+ _cl=(xh)-_uh; \
134
+ _cl -= _ul; \
135
+ _cl += (xl); \
136
+ _cl -= _ch*(yl); \
137
+ _cl /= (yh); \
138
+ zh = _ch + _cl; \
139
+ zl=(_ch-(zh)); zl += _cl; \
140
+ }
141
+
142
+
143
+
144
+
145
+
146
+ #define ULL(bits) 0x##bits##uLL
147
+
148
+ #if (!defined(EM64T) && defined(__linux__) && defined(IA32))
149
+ # define LDOUBLE_ALIGN 12 /* IA32 Linux: 12-byte alignment */
150
+ #else
151
+ # define LDOUBLE_ALIGN 16 /* EM64T, IA32 Win or IPF Win/Linux: 16-byte alignm\
152
+ ent */
153
+ #endif
154
+
155
+ #if (LDOUBLE_ALIGN == 16)
156
+ #define _XPD_ ,0x0000,0x0000,0x0000
157
+ #else /*12*/
158
+ #define _XPD_ ,0x0000
159
+ #endif
160
+
161
+ #define LDOUBLE_HEX(w4,w3,w2,w1,w0) 0x##w0,0x##w1,0x##w2,0x##w3,0x##w4 _XPD_ /*LITTLE_ENDIAN*/
162
+
163
+
164
+
165
+ double dde_atan_rn(double x) {
166
+ return 0;
167
+ }
168
+
169
+ double atan_rd(double x) {
170
+ return 0;
171
+ }
172
+
173
+ double atan_ru(double x) {
174
+ return 0;
175
+ }
176
+
177
+ double atan_rz(double x) {
178
+ return 0;
179
+ }
180
+
181
+
182
+ static const double HALFPI = 1.57079632679489655799898173427209258079528808593750e+00;
183
+ #define MIN_REDUCTION_NEEDED ULL(3F89FDF8BCCE533D)
184
+ #define A 0
185
+ #define B 1
186
+ #define ATAN_BHI 0
187
+ #define ATAN_BLO 1
188
+ #define epsilon 2.04221581890623872536809598138553304900554884091659e-19
189
+ #define epsilon_no_red 1.56771350764719825686165002299335165493769973908433e-19
190
+ #define TWO_M_64 5.42101086242752217003726400434970855712890625000000e-20
191
+ #define TWO_10 1.02400000000000000000000000000000000000000000000000e+03
192
+
193
+ __declspec(align(16))
194
+
195
+ static const struct{long long int a; double b;} ab_table[62] = {
196
+ { /*a[0] ~= 1.26914436930661800408e-02 */ ULL(3F89FDF8BCCE533D),
197
+ /*b[0] = */ 2.53869765124364009378776785297304741106927394866943e-02},
198
+ { /*a[1] ~= 3.80906929270782388369e-02 */ ULL(3FA3809F90CEBC31),
199
+ /*b[1] = */ 5.08066978456951506837313559117319528013467788696289e-02},
200
+ { /*a[2] ~= 6.35391122156262234502e-02 */ ULL(3FB0441968FBA526),
201
+ /*b[2] = */ 7.62920780032335793530151590857713017612695693969727e-02},
202
+ { /*a[3] ~= 8.90697640843219481662e-02 */ ULL(3FB6CD46ABCDFA25),
203
+ /*b[3] = */ 1.01876371166982934712841313285025535151362419128418e-01},
204
+ { /*a[4] ~= 1.14716138034642060814e-01 */ ULL(3FBD5E096D2EA546),
205
+ /*b[4] = */ 1.27593346472767293908745500630175229161977767944336e-01},
206
+ { /*a[5] ~= 1.40512327929006382604e-01 */ ULL(3FC1FC4ED691E891),
207
+ /*b[5] = */ 1.53477468508642272970732278736250009387731552124023e-01},
208
+ { /*a[6] ~= 1.66493216120905490981e-01 */ ULL(3FC54FA6531F610B),
209
+ /*b[6] = */ 1.79564085612852891715718328669026959687471389770508e-01},
210
+ { /*a[7] ~= 1.92694666476959805056e-01 */ ULL(3FC8AA380550EAF1),
211
+ /*b[7] = */ 2.05889628199359991933548030829115305095911026000977e-01},
212
+ { /*a[8] ~= 2.19153728611415840590e-01 */ ULL(3FCC0D3AB8975BD9),
213
+ /*b[8] = */ 2.32491819536184141092860500066308304667472839355469e-01},
214
+ { /*a[9] ~= 2.45908855876056406352e-01 */ ULL(3FCF79F0FEE46885),
215
+ /*b[9] = */ 2.59409901651160901270287695297156460583209991455078e-01},
216
+ { /*a[10] ~= 2.73000139926648314534e-01 */ ULL(3FD178D5943274CA),
217
+ /*b[10] = */ 2.86684879348826082701151563014718703925609588623047e-01},
218
+ { /*a[11] ~= 3.00469565029600954026e-01 */ ULL(3FD33AE4B2CFB5F7),
219
+ /*b[11] = */ 3.14359785700871030567071784389554522931575775146484e-01},
220
+ { /*a[12] ~= 3.28361285690481766972e-01 */ ULL(3FD503DF0DD40A5B),
221
+ /*b[12] = */ 3.42479972833279300292730340515845455229282379150391e-01},
222
+ { /*a[13] ~= 3.56721931693259067415e-01 */ ULL(3FD6D4883998DD14),
223
+ /*b[13] = */ 3.71093432391343347465095803272561170160770416259766e-01},
224
+ { /*a[14] ~= 3.85600945252912822931e-01 */ ULL(3FD8ADAF964ABFA5),
225
+ /*b[14] = */ 4.00251150738601846335029676993144676089286804199219e-01},
226
+ { /*a[15] ~= 4.15050955725992373816e-01 */ ULL(3FDA9031E241114E),
227
+ /*b[15] = */ 4.30007504761513281721363455289974808692932128906250e-01},
228
+ { /*a[16] ~= 4.45128198220858643198e-01 */ ULL(3FDC7CFAFB78B41D),
229
+ /*b[16] = */ 4.60420705138676944478959285333985462784767150878906e-01},
230
+ { /*a[17] ~= 4.75892983535655022698e-01 */ ULL(3FDE7507D82B9DC6),
231
+ /*b[17] = */ 4.91553295129659728601723145402502268552780151367188e-01},
232
+ { /*a[18] ~= 5.07410228170177493351e-01 */ ULL(3FE03CB45FF4B2AB),
233
+ /*b[18] = */ 5.23472714391912563591802154405741021037101745605469e-01},
234
+ { /*a[19] ~= 5.39750054761637805872e-01 */ ULL(3FE145A1E826E4EA),
235
+ /*b[19] = */ 5.56251939105489867642972967587411403656005859375000e-01},
236
+ { /*a[20] ~= 5.72988475252136329570e-01 */ ULL(3FE255EBED462BAC),
237
+ /*b[20] = */ 5.89970211851368997457711884635500609874725341796875e-01},
238
+ { /*a[21] ~= 6.07208171494496387417e-01 */ ULL(3FE36E3FD4CDD9AC),
239
+ /*b[21] = */ 6.24713877348479162954220100800739601254463195800781e-01},
240
+ { /*a[22] ~= 6.42499390954343656748e-01 */ ULL(3FE48F5AE1FB2991),
241
+ /*b[22] = */ 6.60577343433393693317157158162444829940795898437500e-01},
242
+ { /*a[23] ~= 6.78960978813340497734e-01 */ ULL(3FE5BA0C5FE86E27),
243
+ /*b[23] = */ 6.97664190728041089251121320558013394474983215332031e-01},
244
+ { /*a[24] ~= 7.16701572306941533027e-01 */ ULL(3FE6EF3822C19A5D),
245
+ /*b[24] = */ 7.36088459496464064812926153535954654216766357421875e-01},
246
+ { /*a[25] ~= 7.55840988781748695010e-01 */ ULL(3FE82FD970F967BD),
247
+ /*b[25] = */ 7.75976148518263131315109148999908939003944396972656e-01},
248
+ { /*a[26] ~= 7.96511846049556065643e-01 */ ULL(3FE97D0669351A0D),
249
+ /*b[26] = */ 8.17466968767843527032823658373672515153884887695312e-01},
250
+ { /*a[27] ~= 8.38861462565995493716e-01 */ ULL(3FEAD7F3FE730FCD),
251
+ /*b[27] = */ 8.60716404767067566616844942473107948899269104003906e-01},
252
+ { /*a[28] ~= 8.83054096327761096527e-01 */ ULL(3FEC41FAAA0A733E),
253
+ /*b[28] = */ 9.05898149317818313086547732382314279675483703613281e-01},
254
+ { /*a[29] ~= 9.29273595909162105525e-01 */ ULL(3FEDBC9BFAEEEADF),
255
+ /*b[29] = */ 9.53206993785724487899813084368361160159111022949219e-01},
256
+ { /*a[30] ~= 9.77726555752981254442e-01 */ ULL(3FEF498933AC790A),
257
+ /*b[30] = */ 1.00286227737052557884567249857354909181594848632812e+00},
258
+ { /*a[31] ~= 1.02864609206350806308e+00 */ ULL(3FF075559AC922B4),
259
+ /*b[31] = */ 1.05511202646791502068879253783961758017539978027344e+00},
260
+ { /*a[32] ~= 1.08229638730567912228e+00 */ ULL(3FF151160440E8D3),
261
+ /*b[32] = */ 1.11023795151925819268967643438372761011123657226562e+00},
262
+ { /*a[33] ~= 1.13897819300824741364e+00 */ ULL(3FF23941329D3DD8),
263
+ /*b[33] = */ 1.16856151675095110142876819736557081341743469238281e+00},
264
+ { /*a[34] ~= 1.19903553596580987055e+00 */ ULL(3FF32F3FE2DB7094),
265
+ /*b[34] = */ 1.23045136228081597451478046423289924860000610351562e+00},
266
+ { /*a[35] ~= 1.26286394722716532198e+00 */ ULL(3FF434B0D38A35D7),
267
+ /*b[35] = */ 1.29633244442242001603915468876948580145835876464844e+00},
268
+ { /*a[36] ~= 1.33092063388866265448e+00 */ ULL(3FF54B736F41F96D),
269
+ /*b[36] = */ 1.36669737760087572908673791971523314714431762695312e+00},
270
+ { /*a[37] ~= 1.40373715148086145849e+00 */ ULL(3FF675B5165CA5E1),
271
+ /*b[37] = */ 1.44212062317890032936418265308020636439323425292969e+00},
272
+ { /*a[38] ~= 1.48193532552453321547e+00 */ ULL(3FF7B601D0DEA3C6),
273
+ /*b[38] = */ 1.52327639603630871079076314345002174377441406250000e+00},
274
+ { /*a[39] ~= 1.56624743831976717041e+00 */ ULL(3FF90F5979506F51),
275
+ /*b[39] = */ 1.61096147803441858137318831722950562834739685058594e+00},
276
+ { /*a[40] ~= 1.65754207708184630948e+00 */ ULL(3FFA854AD74CF791),
277
+ /*b[40] = */ 1.70612458293084490179580825497396290302276611328125e+00},
278
+ { /*a[41] ~= 1.75685758736121174681e+00 */ ULL(3FFC1C16B3972246),
279
+ /*b[41] = */ 1.80990457885083300126893846027087420225143432617188e+00},
280
+ { /*a[42] ~= 1.86544587781964938190e+00 */ ULL(3FFDD8DDC6DB1831),
281
+ /*b[42] = */ 1.92368085119253517945026032975874841213226318359375e+00},
282
+ { /*a[43] ~= 1.98483051718814034750e+00 */ ULL(3FFFC1DDA4F6D032),
283
+ /*b[43] = */ 2.04914055707593512067887786542996764183044433593750e+00},
284
+ { /*a[44] ~= 2.11688487740990979279e+00 */ ULL(4000EF6156AEFAF2),
285
+ /*b[44] = */ 2.18836977316091063627823132264893501996994018554688e+00},
286
+ { /*a[45] ~= 2.26393888595347935033e+00 */ ULL(40021C8BFD9A80C1),
287
+ /*b[45] = */ 2.34397906437763481335423421114683151245117187500000e+00},
288
+ { /*a[46] ~= 2.42892740222016626128e+00 */ ULL(40036E717D67269C),
289
+ /*b[46] = */ 2.51927965826279764982587039412464946508407592773438e+00},
290
+ { /*a[47] ~= 2.61560046981161264128e+00 */ ULL(4004ECBFF069F1E4),
291
+ /*b[47] = */ 2.71853573297491069027387311507482081651687622070312e+00},
292
+ { /*a[48] ~= 2.82882779840766906527e+00 */ ULL(4006A170780169B7),
293
+ /*b[48] = */ 2.94733416149008720097413061012048274278640747070312e+00},
294
+ { /*a[49] ~= 3.07505072362971616974e+00 */ ULL(400899B4319C3F02),
295
+ /*b[49] = */ 3.21314087722892072207514502224512398242950439453125e+00},
296
+ { /*a[50] ~= 3.36297230191158715455e+00 */ ULL(400AE75E05B0834A),
297
+ /*b[50] = */ 3.52616384863255349912947167467791587114334106445312e+00},
298
+ { /*a[51] ~= 3.70464601821196143254e+00 */ ULL(400DA31D739BD0E3),
299
+ /*b[51] = */ 3.90073973345466518125590482668485492467880249023438e+00},
300
+ { /*a[52] ~= 4.11726034471856573100e+00 */ ULL(401078131886BC57),
301
+ /*b[52] = */ 4.35765668014056828383218089584261178970336914062500e+00},
302
+ { /*a[53] ~= 4.62619989820137847648e+00 */ ULL(4012813A8BCE2241),
303
+ /*b[53] = */ 4.92824409985376998832862227573059499263763427734375e+00},
304
+ { /*a[54] ~= 5.27059285056349616385e+00 */ ULL(401515164ACECE78),
305
+ /*b[54] = */ 5.66202526987798027136022938066162168979644775390625e+00},
306
+ { /*a[55] ~= 6.11406930017863578891e+00 */ ULL(401874CE9526FAB9),
307
+ /*b[55] = */ 6.64216890962962569489036468439735472202301025390625e+00},
308
+ { /*a[56] ~= 7.26750136287798241547e+00 */ ULL(401D11EBE094C913),
309
+ /*b[56] = */ 8.01990986231011859786121931392699480056762695312500e+00},
310
+ { /*a[57] ~= 8.94284159107796650204e+00 */ ULL(4021E2BC220DFA19),
311
+ /*b[57] = */ 1.01020964280653942068965989165008068084716796875000e+01},
312
+ { /*a[58] ~= 1.16023240149353498339e+01 */ ULL(40273463D0337C49),
313
+ /*b[58] = */ 1.36206610885392880305744256475009024143218994140625e+01},
314
+ { /*a[59] ~= 1.64826377753716631495e+01 */ ULL(40307B8E26350916),
315
+ /*b[59] = */ 2.08587363260064613257327437167987227439880371093750e+01},
316
+ { /*a[60] ~= 2.83859754493341325216e+01 */ ULL(403C62CF497BF2F2),
317
+ /*b[60] = */ 4.43908820444562195461912779137492179870605468750000e+01},
318
+ { /*a[61] ~= 1.01699461607316896213e+02 */ ULL(40596CC3FA9E0EF4),
319
+ /*b[61] = */ 8.27932424540746438879068591631948947906494140625000e+01}
320
+ };
321
+
322
+
323
+ #define atanb_table ((const XC_FLOAT_TYPE *)_atanb_table)
324
+ __declspec(align(16)) static const unsigned short _atanb_table[] = {
325
+ /*atan_b[0] ~= 2.5381524664e-02*/
326
+ LDOUBLE_HEX(3FF9, CFEC, EA4B, 4FCB, 5DFD),
327
+ LDOUBLE_HEX(BFB7, CBBA, 8342, F523, 8BE7),
328
+ /*atan_b[1] ~= 5.0763049304e-02*/
329
+ LDOUBLE_HEX(3FFA, CFEC, EA49, B131, 647C),
330
+ LDOUBLE_HEX(3FB6, D38B, A5E1, 4DEF, A6BD),
331
+ /*atan_b[2] ~= 7.6144573921e-02*/
332
+ LDOUBLE_HEX(3FFB, 9BF1, AFB6, 0F03, 5D53),
333
+ LDOUBLE_HEX(3FB8, EF7C, 871F, DC70, BCA9),
334
+ /*atan_b[3] ~= 1.0152609851e-01*/
335
+ LDOUBLE_HEX(3FFB, CFEC, EA46, 78CC, AECA),
336
+ LDOUBLE_HEX(BFB7, DCB7, 3BED, 3BD7, 633C),
337
+ /*atan_b[4] ~= 1.2690762308e-01*/
338
+ LDOUBLE_HEX(3FFC, 81F4, 126B, 0C0A, B24C),
339
+ LDOUBLE_HEX(3FB8, 9C93, 50C6, 8748, 202B),
340
+ /*atan_b[5] ~= 1.5228914763e-01*/
341
+ LDOUBLE_HEX(3FFC, 9BF1, AFB2, 77C1, F1F3),
342
+ LDOUBLE_HEX(BFBB, 9D89, 6B54, 2B43, C3D3),
343
+ /*atan_b[6] ~= 1.7767067216e-01*/
344
+ LDOUBLE_HEX(3FFC, B5EF, 4CF9, 8121, 27D9),
345
+ LDOUBLE_HEX(BFBB, D8AB, 134C, C337, 1424),
346
+ /*atan_b[7] ~= 2.0305219666e-01*/
347
+ LDOUBLE_HEX(3FFC, CFEC, EA40, 29FE, 3D0C),
348
+ LDOUBLE_HEX(BFBA, 964C, 23A5, 78A9, 286C),
349
+ /*atan_b[8] ~= 2.2843372114e-01*/
350
+ LDOUBLE_HEX(3FFC, E9EA, 8786, 746E, CBDE),
351
+ LDOUBLE_HEX(3FBB, 95CE, 8C74, D4B3, 3D3D),
352
+ /*atan_b[9] ~= 2.5381524560e-01*/
353
+ LDOUBLE_HEX(3FFD, 81F4, 1266, 3163, 58ED),
354
+ LDOUBLE_HEX(3FBB, B292, B8DC, 903F, C86D),
355
+ /*atan_b[10] ~= 2.7919677004e-01*/
356
+ LDOUBLE_HEX(3FFD, 8EF2, E108, FBCB, 4839),
357
+ LDOUBLE_HEX(BFBC, C5E3, D3F8, 42F0, A001),
358
+ /*atan_b[11] ~= 3.0457829447e-01*/
359
+ LDOUBLE_HEX(3FFD, 9BF1, AFAB, 9AD5, 051A),
360
+ LDOUBLE_HEX(3FBC, BE9C, AF21, 45D0, CBC5),
361
+ /*atan_b[12] ~= 3.2995981887e-01*/
362
+ LDOUBLE_HEX(3FFD, A8F0, 7E4E, 1002, FE3F),
363
+ LDOUBLE_HEX(3FB9, ACDF, 4585, 84D5, 7EE8),
364
+ /*atan_b[13] ~= 3.5534134325e-01*/
365
+ LDOUBLE_HEX(3FFD, B5EF, 4CF0, 5CF3, 3B2F),
366
+ LDOUBLE_HEX(BFB9, DAF1, E542, E461, 5C3F),
367
+ /*atan_b[14] ~= 3.8072286762e-01*/
368
+ LDOUBLE_HEX(3FFD, C2EE, 1B92, 835E, 5241),
369
+ LDOUBLE_HEX(3FBC, F450, E872, E8D5, 5B89),
370
+ /*atan_b[15] ~= 4.0610439197e-01*/
371
+ LDOUBLE_HEX(3FFD, CFEC, EA34, 8516, 3E60),
372
+ LDOUBLE_HEX(BFBC, 91DD, F6E6, 0680, E8AD),
373
+ /*atan_b[16] ~= 4.3148591630e-01*/
374
+ LDOUBLE_HEX(3FFD, DCEB, B8D6, 6405, 31AA),
375
+ LDOUBLE_HEX(BFBC, 8502, E09D, 5663, 1B39),
376
+ /*atan_b[17] ~= 4.5686744062e-01*/
377
+ LDOUBLE_HEX(3FFD, E9EA, 8778, 222C, 48BB),
378
+ LDOUBLE_HEX(BFBB, F51E, C2F3, 5A3E, F53D),
379
+ /*atan_b[18] ~= 4.8224896492e-01*/
380
+ LDOUBLE_HEX(3FFD, F6E9, 5619, C1A2, 5014),
381
+ LDOUBLE_HEX(BFBB, E1E1, FABB, 35B7, 64D8),
382
+ /*atan_b[19] ~= 5.0763048922e-01*/
383
+ LDOUBLE_HEX(3FFE, 81F4, 125D, A249, 1B96),
384
+ LDOUBLE_HEX(BFBB, FEB6, 20F5, A80E, ABD8),
385
+ /*atan_b[20] ~= 5.3301201350e-01*/
386
+ LDOUBLE_HEX(3FFE, 8873, 79AE, 569C, E82C),
387
+ LDOUBLE_HEX(BFBD, 9333, CB85, 3253, A31F),
388
+ /*atan_b[21] ~= 5.5839353776e-01*/
389
+ LDOUBLE_HEX(3FFE, 8EF2, E0FE, FEF4, 22DF),
390
+ LDOUBLE_HEX(3FBD, FBF4, E487, 2960, 19F2),
391
+ /*atan_b[22] ~= 5.8377506202e-01*/
392
+ LDOUBLE_HEX(3FFE, 9572, 484F, 9C7E, 4569),
393
+ LDOUBLE_HEX(BFBD, ED41, 6021, 317B, 1548),
394
+ /*atan_b[23] ~= 6.0915658627e-01*/
395
+ LDOUBLE_HEX(3FFE, 9BF1, AFA0, 3071, E801),
396
+ LDOUBLE_HEX(3FBD, C46B, 95C4, B736, D8A5),
397
+ /*atan_b[24] ~= 6.3453811052e-01*/
398
+ LDOUBLE_HEX(3FFE, A271, 16F0, BC0B, F541),
399
+ LDOUBLE_HEX(3FBD, E479, 64B6, 873E, E8BE),
400
+ /*atan_b[25] ~= 6.5991963475e-01*/
401
+ LDOUBLE_HEX(3FFE, A8F0, 7E41, 408E, DDC6),
402
+ LDOUBLE_HEX(3FBD, C200, D1A3, 7D02, 9DAA),
403
+ /*atan_b[26] ~= 6.8530115898e-01*/
404
+ LDOUBLE_HEX(3FFE, AF6F, E591, BF41, BD98),
405
+ LDOUBLE_HEX(3FBC, AB83, 86B7, DBD3, 49B9),
406
+ /*atan_b[27] ~= 7.1068268321e-01*/
407
+ LDOUBLE_HEX(3FFE, B5EF, 4CE2, 396F, 887A),
408
+ LDOUBLE_HEX(3FB9, 93C0, 6F69, 2472, DD13),
409
+ /*atan_b[28] ~= 7.3606420743e-01*/
410
+ LDOUBLE_HEX(3FFE, BC6E, B432, B066, 2617),
411
+ LDOUBLE_HEX(BFBD, C5F2, 72DA, A216, 8845),
412
+ /*atan_b[29] ~= 7.6144573166e-01*/
413
+ LDOUBLE_HEX(3FFE, C2EE, 1B83, 2575, A17C),
414
+ LDOUBLE_HEX(3FBA, FC52, 25AC, D135, 67B0),
415
+ /*atan_b[30] ~= 7.8682725588e-01*/
416
+ LDOUBLE_HEX(3FFE, C96D, 82D3, 99EF, 4753),
417
+ LDOUBLE_HEX(3FBC, E6CB, 9CE5, F7DC, 32EF),
418
+ /*atan_b[31] ~= 8.1220878010e-01*/
419
+ LDOUBLE_HEX(3FFE, CFEC, EA24, 0F24, C5A3),
420
+ LDOUBLE_HEX(BFBB, 9F94, 64A4, 0D49, 77DA),
421
+ /*atan_b[32] ~= 8.3759030433e-01*/
422
+ LDOUBLE_HEX(3FFE, D66C, 5174, 8667, 5086),
423
+ LDOUBLE_HEX(BFBC, E480, 36A7, 98A0, E416),
424
+ /*atan_b[33] ~= 8.6297182855e-01*/
425
+ LDOUBLE_HEX(3FFE, DCEB, B8C5, 0106, C115),
426
+ LDOUBLE_HEX(BFBB, AE5E, 111C, 0925, 5FC1),
427
+ /*atan_b[34] ~= 8.8835335278e-01*/
428
+ LDOUBLE_HEX(3FFE, E36B, 2015, 8050, B874),
429
+ LDOUBLE_HEX(BFBC, 8DD3, E1A9, 67EE, B236),
430
+ /*atan_b[35] ~= 9.1373487702e-01*/
431
+ LDOUBLE_HEX(3FFE, E9EA, 8766, 058F, C400),
432
+ LDOUBLE_HEX(BFBD, 994E, 5D94, 7944, 5BF2),
433
+ /*atan_b[36] ~= 9.3911640126e-01*/
434
+ LDOUBLE_HEX(3FFE, F069, EEB6, 920A, 8756),
435
+ LDOUBLE_HEX(BFBD, F0FC, 830B, 5639, 9FED),
436
+ /*atan_b[37] ~= 9.6449792552e-01*/
437
+ LDOUBLE_HEX(3FFE, F6E9, 5607, 2702, D403),
438
+ LDOUBLE_HEX(BFBD, B0EF, D9DB, FF7A, BBF3),
439
+ /*atan_b[38] ~= 9.8987944978e-01*/
440
+ LDOUBLE_HEX(3FFE, FD68, BD57, C5B4, F372),
441
+ LDOUBLE_HEX(BFBD, 9706, 5831, 4248, 656E),
442
+ /*atan_b[39] ~= 1.0152609740e+00*/
443
+ LDOUBLE_HEX(3FFF, 81F4, 1254, 37AB, 59C4),
444
+ LDOUBLE_HEX(3FBE, C83B, C3BE, 8160, FE56),
445
+ /*atan_b[40] ~= 1.0406424983e+00*/
446
+ LDOUBLE_HEX(3FFF, 8533, C5FC, 928B, 5DCD),
447
+ LDOUBLE_HEX(3FBE, C025, 7DA6, 5435, CDA0),
448
+ /*atan_b[41] ~= 1.0660240226e+00*/
449
+ LDOUBLE_HEX(3FFF, 8873, 79A4, F40D, D390),
450
+ LDOUBLE_HEX(BFBE, BB70, CBE8, FB3B, AA03),
451
+ /*atan_b[42] ~= 1.0914055469e+00*/
452
+ LDOUBLE_HEX(3FFF, 8BB3, 2D4D, 5CC1, ADB6),
453
+ LDOUBLE_HEX(3FBE, 8161, 18FB, A932, 136B),
454
+ /*atan_b[43] ~= 1.1167870712e+00*/
455
+ LDOUBLE_HEX(3FFF, 8EF2, E0F5, CD31, 1F80),
456
+ LDOUBLE_HEX(BFBC, BD96, 57B0, 5730, 7576),
457
+ /*atan_b[44] ~= 1.1421685956e+00*/
458
+ LDOUBLE_HEX(3FFF, 9232, 949E, 45E1, 3E02),
459
+ LDOUBLE_HEX(BFBD, CDB1, 87A1, 5D56, 06EC),
460
+ /*atan_b[45] ~= 1.1675501199e+00*/
461
+ LDOUBLE_HEX(3FFF, 9572, 4846, C751, B4C7),
462
+ LDOUBLE_HEX(BFBD, A1AB, 140B, 2B49, DF68),
463
+ /*atan_b[46] ~= 1.1929316443e+00*/
464
+ LDOUBLE_HEX(3FFF, 98B1, FBEF, 51FC, 635A),
465
+ LDOUBLE_HEX(3FBE, CA64, 3ADC, 86D5, FB02),
466
+ /*atan_b[47] ~= 1.2183131687e+00*/
467
+ LDOUBLE_HEX(3FFF, 9BF1, AF97, E655, 1527),
468
+ LDOUBLE_HEX(3FBE, CA1D, 3262, C2F9, D84C),
469
+ /*atan_b[48] ~= 1.2436946931e+00*/
470
+ LDOUBLE_HEX(3FFF, 9F31, 6340, 84C9, 33A7),
471
+ LDOUBLE_HEX(3FBD, AF23, 2B16, BE75, 8B87),
472
+ /*atan_b[49] ~= 1.2690762175e+00*/
473
+ LDOUBLE_HEX(3FFF, A271, 16E9, 2DBF, 7CA7),
474
+ LDOUBLE_HEX(3FBE, FDDA, 7599, 4DA2, 0F86),
475
+ /*atan_b[50] ~= 1.2944577420e+00*/
476
+ LDOUBLE_HEX(3FFF, A5B0, CA91, E197, C307),
477
+ LDOUBLE_HEX(BFBC, D265, 9307, D567, 08BE),
478
+ /*atan_b[51] ~= 1.3198392664e+00*/
479
+ LDOUBLE_HEX(3FFF, A8F0, 7E3A, A0AA, A7E2),
480
+ LDOUBLE_HEX(3FBE, BE3C, 4D06, 7D11, 0641),
481
+ /*atan_b[52] ~= 1.3452207909e+00*/
482
+ LDOUBLE_HEX(3FFF, AC30, 31E3, 6B49, 6713),
483
+ LDOUBLE_HEX(BFBE, B9DD, 9D13, C459, 6F6C),
484
+ /*atan_b[53] ~= 1.3706023154e+00*/
485
+ LDOUBLE_HEX(3FFF, AF6F, E58C, 41BD, 9EA8),
486
+ LDOUBLE_HEX(BFBD, 802F, 2153, DC49, 3698),
487
+ /*atan_b[54] ~= 1.3959838399e+00*/
488
+ LDOUBLE_HEX(3FFF, B2AF, 9935, 2449, 1D44),
489
+ LDOUBLE_HEX(3FBE, CAFC, 43E2, 3F23, 5075),
490
+ /*atan_b[55] ~= 1.4213653645e+00*/
491
+ LDOUBLE_HEX(3FFF, B5EF, 4CDE, 1325, B93A),
492
+ LDOUBLE_HEX(BFBA, 9155, 4FBC, 9598, FA3D),
493
+ /*atan_b[56] ~= 1.4467468891e+00*/
494
+ LDOUBLE_HEX(3FFF, B92F, 0087, 0E85, 296B),
495
+ LDOUBLE_HEX(3FBE, C76A, DB5B, 6055, 9EA6),
496
+ /*atan_b[57] ~= 1.4721284137e+00*/
497
+ LDOUBLE_HEX(3FFF, BC6E, B430, 1690, E405),
498
+ LDOUBLE_HEX(3FBA, A6CB, 4564, 7FF8, 4121),
499
+ /*atan_b[58] ~= 1.4975099383e+00*/
500
+ LDOUBLE_HEX(3FFF, BFAE, 67D9, 2B6A, 02AA),
501
+ LDOUBLE_HEX(BFBD, B0AE, B984, 420B, 761D),
502
+ /*atan_b[59] ~= 1.5228914629e+00*/
503
+ LDOUBLE_HEX(3FFF, C2EE, 1B82, 4D29, 2EBE),
504
+ LDOUBLE_HEX(BFBE, 9CBD, 26E8, 9FF8, E917),
505
+ /*atan_b[60] ~= 1.5482729876e+00*/
506
+ LDOUBLE_HEX(3FFF, C62D, CF2B, 7BDE, 8EE3),
507
+ LDOUBLE_HEX(BFBE, AF45, EFD8, 2A64, 49A5),
508
+ /*atan_b[61] ~= 1.5587186337e+00*/
509
+ LDOUBLE_HEX(3FFF, C784, 1799, 9E5D, D2A5),
510
+ LDOUBLE_HEX(BFBE, A231, BD90, F170, 34A5),
511
+ };
512
+ static const long double coef_poly[9][2] = {
513
+ { -3.33333333333333333342368351437379203616728773340583e-01L, 9.03501810404587028364033466367082415937499719525463e-21L},
514
+ { 2.00000000000000000002710505431213761085018632002175e-01L, -2.71050543121376108505536620063805076318847614178820e-21L},
515
+ { -1.42857142857142857140921067549133027796415262855589e-01L, -1.93607530800982934641564128836546985281459293443700e-21L},
516
+ { 1.11111111111111111109605274760436799397211871109903e-01L, 1.50583635067431171387883211317314321885579450456211e-21L},
517
+ { -9.09090909090909090933731867556488737136533018201590e-02L, 0},
518
+ { 7.69230769230769230779655790120052927250071661546826e-02L, 0},
519
+ { -6.66666666666666666698289230030827212658550706692040e-02L, 0},
520
+ { 5.88235294117647058825522430464127765503690170589834e-02L, 0},
521
+ { -5.26315789473684210515616425929419364138084347359836e-02L, 0},
522
+ };
523
+
524
+
525
+
526
+
527
+ extern double atan_rn(double xd) {
528
+
529
+ unsigned int hx;
530
+ double sign;
531
+ double u;
532
+ double comp;
533
+
534
+ int i, i1, m;
535
+ UINT64 x_val,x_abs,sign_mask;
536
+ L_FLOAT_TYPE xe, tmp, bi, atanbhi, xred, xred2,q;
537
+ L_FLOAT_TYPE res,reshi,reslo,rn_constant,test;
538
+ L_FLOAT_TYPE xred4,tmp2;
539
+ L_FLOAT_TYPE a,b,e0,e1,e2,e3,q0,q1,q2,y0,y1,y2,xred2coarse;
540
+ L_FLOAT_TYPE C3,C5,C7,C9 ;
541
+
542
+
543
+ x_val = _Asm_getf( _FR_D, xd );
544
+ x_abs = (x_val & ULL(7fffffffffffffff));
545
+ sign_mask = ((SINT64)x_val >> 63); /* either 00..00 or 11...11 */
546
+
547
+
548
+
549
+ /* cast x to a DE register */
550
+ if(sign_mask)
551
+ xe=-xd;
552
+ else
553
+ xe=xd;
554
+
555
+
556
+ /* Filter cases */
557
+ if (__builtin_expect( x_abs >= ULL(4350000000000000), 0)) { /* x >= 2^54 */
558
+ if (xd!=xd )
559
+ return xd+xd; /* NaN */
560
+ else {/* atan(x) = +/- Pi/2 */
561
+ if(sign_mask) return -HALFPI; else return HALFPI;
562
+ }
563
+ }
564
+ else if (__builtin_expect( x_abs < ULL(3E40000000000000), 0))
565
+ /* TODO Add stuff to raise inexact flag */
566
+ return xd; /* x<2^-27 then atan(x) =~ x */
567
+
568
+
569
+ /* Now there is something to compute*/
570
+
571
+ /* load polynomial coeffs */
572
+ C3=coef_poly[0][0];
573
+ C5=coef_poly[1][0];
574
+ C7=coef_poly[2][0];
575
+ C9=coef_poly[3][0];
576
+
577
+ if (__builtin_expect(x_abs > MIN_REDUCTION_NEEDED, 0)) /* test if reduction is necessary : */
578
+ {
579
+ /* 1) Argument reduction : */
580
+ /* This constant was found by dichotomy. I am very ashamed */
581
+ rn_constant = 1.002;
582
+
583
+ /* compute i so that a[i] < x < a[i+1] */
584
+
585
+ if (x_abs>ab_table[61].a)
586
+ i=61;
587
+ else {
588
+ i=31;
589
+ if (x_abs < ab_table[i].a) i-= 16;
590
+ else i+=16;
591
+ if (x_abs < ab_table[i].a) i-= 8;
592
+ else i+= 8;
593
+ if (x_abs < ab_table[i].a) i-= 4;
594
+ else i+= 4;
595
+ if (x_abs < ab_table[i].a) i-= 2;
596
+ else i+= 2;
597
+ if (x_abs < ab_table[i].a) i-= 1;
598
+ else i+= 1;
599
+ if (x_abs < ab_table[i].a) i-= 1;
600
+ }
601
+
602
+ bi= ab_table[i].b;
603
+ atanbhi = atanb_table[i].hi;
604
+
605
+ /* the dividend and the divisor for the argument reduction */
606
+ a = xe-bi; b = 1 + xe * bi;
607
+
608
+
609
+ #if 1
610
+ /* now we want to compute (xe - bi )/b as a DE, but
611
+ we will need the accurate quotient only later on,
612
+ we can start the computation of the polynomial with a much coarser approximation.
613
+ Saves 12 cycles.
614
+ */
615
+ /* Algo 8.11 in Markstein book */
616
+ _Asm_frcpa(&y0, a, b, _SF1);
617
+
618
+ e0 = 1 - b*y0; q0 = a*y0;
619
+ e2 = e0 + e0*e0; e1 = e0*e0;
620
+ e3 = e0 + e1*e1; q1 = q0+q0*e2;
621
+ xred = q0 + q1*e3; xred2coarse = q1*q1; /* 62 bits in xred, more than enough */
622
+ xred2 = xred*xred; xred4 = xred2coarse*xred2coarse;
623
+
624
+
625
+
626
+ /*polynom evaluation */
627
+
628
+ tmp2 = C7 + xred2coarse * C9 ;
629
+
630
+ /* here we need xred2, xred2coarse loses a lot of precision to win 3 cycles. */
631
+ tmp = C3 + xred2 * C5;
632
+
633
+ q = tmp + xred4 * tmp2;
634
+
635
+
636
+ #else
637
+ xred=a/b;
638
+ xred2=xred*xred;
639
+ xred4=xred2*xred2;
640
+ tmp2 = C7 + xred2 * C9 ;
641
+ tmp = C3 + xred2 * C5;
642
+ q = tmp + xred4 * tmp2;
643
+ #endif
644
+
645
+ tmp = 1+q*xred2;
646
+ /* reconstruction : atan(x) = atan(b[i]) + atan(x) */
647
+ res = atanbhi+xred*tmp;
648
+ /* atan = atanbhi + tmp; with round to double */
649
+ reshi = _Asm_fma( _PC_D, xred, tmp, atanbhi, _SF0 );
650
+
651
+ }
652
+ else
653
+ /* no reduction needed */
654
+ {
655
+
656
+
657
+ /* Polynomial evaluation */
658
+
659
+ xred2 = xe*xe;
660
+ /*poly eval */
661
+ xred4=xred2*xred2;
662
+ tmp2 = C7 + xred2 * C9 ;
663
+ tmp = C3 + xred2 * C5;
664
+ q = tmp + xred4 * tmp2;
665
+ q *= xred2;
666
+
667
+
668
+ /* compute q*xe+xe with round to double */
669
+ res = _Asm_fma( _PC_NONE, q, xe, xe, _SF1 );
670
+ reshi = _Asm_fma( _PC_D, q, xe, xe, _SF0 );
671
+ }
672
+
673
+ #if 0 /* To time the first step only */
674
+ if(sign_mask)
675
+ return -reshi;
676
+ else
677
+ return reshi;
678
+ #endif
679
+
680
+ #if 1
681
+ i1 = _Asm_getf( _FR_SIG, res);
682
+ m = i1 & (0xff<<3);
683
+ if(__builtin_expect((m!=(0x7f<<3) && m!=(0x80<<3)), 1+1==2)) {
684
+ if(sign_mask)
685
+ return -reshi;
686
+ else
687
+ return reshi;
688
+ }
689
+ #else
690
+ /* ROUNDING TEST � la Ziv */
691
+ /* This constant was found by dichotomy. I am very ashamed */
692
+ rn_constant = 1.01;
693
+ reslo = res - reshi;
694
+ test=_Asm_fma( _PC_D, reslo, rn_constant, reshi, _SF0 );
695
+
696
+ if (__builtin_expect(reshi == test, 1+1==2)) {
697
+ if(sign_mask)
698
+ return -reshi;
699
+ else
700
+ return reshi;
701
+ }
702
+ #endif
703
+
704
+
705
+ else {
706
+
707
+ /******************************************************************/
708
+ /* Double-double-extended */
709
+ L_FLOAT_TYPE tmphi, tmplo, x0hi, x0lo, xmBihi, xmBilo, Xredhi, Xredlo, Xred2, qhi,qlo, q, Xred2hi,Xred2lo, atanhi,atanlo;
710
+ int j;
711
+
712
+
713
+
714
+
715
+ #if EVAL_PERF
716
+ crlibm_second_step_taken++;
717
+ #endif
718
+
719
+ #if DEBUG
720
+ printf("Toto\n");
721
+ #endif
722
+
723
+ if (__builtin_expect(x_abs > MIN_REDUCTION_NEEDED, 0)) {/* test if reduction is necessary : */
724
+ if(i==61){
725
+ Add12_ext( xmBihi , xmBilo , xe , -ab_table[61].b);
726
+ }
727
+ else {
728
+ xmBihi = xe-ab_table[i].b;
729
+ xmBilo = 0.0;
730
+ }
731
+
732
+ Mul12_ext(tmphi,tmplo, xe, (ab_table[i].b));
733
+
734
+ if (xe > 1) /* TODO remplacer par xabs */
735
+ Add22_ext(x0hi,x0lo,tmphi,tmplo, 1.0,0.0);
736
+ else {
737
+ Add22_ext(x0hi , x0lo , 1.0,0.0,tmphi,tmplo);
738
+ }
739
+
740
+ #if 1
741
+ Div22_ext(Xredhi, Xredlo, xmBihi , xmBilo , x0hi,x0lo);
742
+ #else
743
+ Xredhi=1; Xredlo=0; /* to time the Div22*/
744
+ #endif
745
+
746
+ #if DEBUG
747
+ printf("i=%d, num=%1.15e den=%1.15e\n",i, (double)xmBihi, (double)x0hi);
748
+ printf("Xred=%1.15e\n", (double)Xredhi);
749
+ #endif
750
+
751
+ Xred2 = Xredhi*Xredhi;
752
+ Mul22_ext(Xred2hi,Xred2lo,Xredhi,Xredlo,Xredhi, Xredlo);
753
+
754
+ /*poly eval */
755
+
756
+ q = (coef_poly[4][0]+Xred2*
757
+ (coef_poly[5][0]+Xred2*
758
+ (coef_poly[6][0]+Xred2*
759
+ (coef_poly[7][0]+
760
+ (Xred2*coef_poly[8][0])))));
761
+
762
+ Mul12_ext(qhi, qlo, q, Xred2);
763
+
764
+ for(j=3;j>=0;j--)
765
+ {
766
+ Add22_ext(qhi,qlo, (coef_poly[j][0]), (coef_poly[j][1]), qhi,qlo);
767
+ Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
768
+ }
769
+
770
+ Mul22_ext(qhi,qlo, Xredhi,Xredlo, qhi,qlo);
771
+ Add22_ext(qhi,qlo, Xredhi,Xredlo, qhi,qlo);
772
+
773
+ /* reconstruction : atan(x) = atan(b[i]) + atan(x) */
774
+ Add22_ext(atanhi,atanlo, atanb_table[i].hi, atanb_table[i].lo, qhi,qlo);
775
+ }
776
+ else
777
+ /* no reduction needed */
778
+ {
779
+
780
+ #if DEBUG
781
+ printf("Tata\n");
782
+ #endif
783
+ /* Polynomial evaluation */
784
+ Mul12_ext( Xred2hi,Xred2lo,xe,xe);
785
+
786
+ /*poly eval - don't take risks, keep plain Horner */
787
+
788
+ q = coef_poly[8][0];
789
+ q = coef_poly[7][0]+Xred2hi*q;
790
+ q = coef_poly[6][0]+Xred2hi*q;
791
+ q = coef_poly[5][0]+Xred2hi*q;
792
+
793
+ Add12_ext(qhi,qlo, coef_poly[4][0], Xred2hi*q);
794
+ #if DEBUG
795
+ printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
796
+ print_debug("qhi", qhi);
797
+ print_debug("qlo", qlo);
798
+ #endif
799
+ Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
800
+ #if DEBUG
801
+ printf(" Xred2 = %1.50Le + %1.50Le\n",(long double)Xred2hi, (long double)Xred2lo);
802
+ printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
803
+ print_debug("qhi", qhi);
804
+ print_debug("qlo", qlo);
805
+ #endif
806
+
807
+ for(j=3;j>=0;j--)
808
+ {
809
+ Add22_ext(qhi,qlo, (coef_poly[j][0]), (coef_poly[j][1]), qhi,qlo);
810
+ Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
811
+ }
812
+
813
+ Mul22_ext (qhi,qlo, xe,0, qhi,qlo);
814
+
815
+ #if DEBUG
816
+ printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
817
+ print_debug("qhi", qhi);
818
+ print_debug("qlo", qlo);
819
+ #endif
820
+ /* Now comes the addition sequence proven in the TOMS paper */
821
+ Add12_ext(atanhi,atanlo,xe,qhi);
822
+ #if DEBUG
823
+ print_debug("atanhi", atanhi);
824
+ printf(" atan hi+lo %1.50Le + %1.50Le\n",(long double)atanhi, (long double)atanlo);
825
+ #endif
826
+ atanlo += qlo;
827
+
828
+ }
829
+
830
+ #if DEBUG
831
+ printf(" atan hi+lo %1.50Le + %1.50Le\n",(long double)atanhi, (long double)atanlo);
832
+ printf(" %1.50e + %1.50e\n",(double)atanhi,(double) atanlo);
833
+ printf(" %1.50Le\n",(long double)(atanhi + atanlo));
834
+ printf(" ");
835
+ #endif
836
+
837
+ if(sign_mask)
838
+ res= -(double) (atanhi+atanlo);
839
+ else
840
+ res= (double) (atanhi+atanlo);
841
+
842
+ return res;
843
+
844
+ }
845
+ }
846
+