crmf 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +102 -1
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +16 -16
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +104 -3
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,846 @@
1
+ /*
2
+ *this function computes a correctly rounded atan using double-extended arithmetic, FMAs and other dirty tricks
3
+ *
4
+ * Author : Nicolas Gast, Florent de Dinechin
5
+ * nicolas.gast@ens.fr
6
+ *
7
+
8
+ WARNING : This code is dirty and experimental, and remains here for
9
+ history. A cleaner, portable version using double-extended arithmetic will be available some day as atan-de.c
10
+ For this reason there is only atan_rn so it fails the "make check" for all the other rounding modes
11
+
12
+
13
+ To test within crlibm: (tested with Intel icc compiler version 8.1)
14
+ icc -Qoption,cpp,--extended_float_types -IPF_fp_speculationsafe -c atan-itanium.c; mv atan-itanium.o atan_fast.o; make
15
+
16
+
17
+
18
+
19
+ This file is completely self-contained so that we can change the crlibm infrastructure without bothering maintaining it.
20
+
21
+
22
+ */
23
+
24
+ /* WARNING Due to some quantum effect not understood so far,
25
+ turning debugging on may change the result */
26
+ #define DEBUG 0
27
+
28
+
29
+
30
+ typedef __int64 INT64;
31
+ typedef signed __int64 SINT64;
32
+ typedef unsigned __int64 UINT64;
33
+
34
+ /* FP register type */
35
+ typedef __fpreg L_FLOAT_TYPE;
36
+
37
+ /* Almost the same as the previous, except exponent field smaller, and morally in memory */
38
+ typedef long double LC_FLOAT_TYPE;
39
+
40
+ /* The double-double-ext type, using registers */
41
+ typedef struct __X_FLOAT_TYPE_TAG {
42
+ L_FLOAT_TYPE hi,lo; /* order is critical! */
43
+ } X_FLOAT_TYPE;
44
+
45
+ /* The double-double-ext type, in memory */
46
+ typedef struct __XC_FLOAT_TYPE_TAG {
47
+ LC_FLOAT_TYPE hi,lo; /* order is critical! */
48
+ } XC_FLOAT_TYPE;
49
+
50
+
51
+ /* For debugging */
52
+ typedef union {
53
+ int i[3];
54
+ long double d;
55
+ } db_ext_number;
56
+
57
+
58
+ typedef enum {
59
+ _PC_S = 1 /* single .s */
60
+ ,_PC_D = 2 /* double .d */
61
+ ,_PC_NONE = 3 /* dynamic */
62
+ } _Asm_pc;
63
+
64
+ /* Table 1-22: legal getf/setf floating-point register access completers */
65
+ typedef enum {
66
+ _FR_S = 1 /* single form .s */
67
+ ,_FR_D = 2 /* double form .d */
68
+ ,_FR_EXP = 3 /* exponent form .exp */
69
+ ,_FR_SIG = 4 /* significand form .sig */
70
+ } _Asm_fr_access;
71
+
72
+ /* Table 1-24: legal floating-point FPSR status field completers (.sf) */
73
+ typedef enum {
74
+ _SF0 = 0 /* FPSR status field 0 .s0 */
75
+ ,_SF1 = 1 /* FPSR status field 1 .s1 */
76
+ ,_SF2 = 2 /* FPSR status field 2 .s2 */
77
+ ,_SF3 = 3 /* FPSR status field 3 .s3 */
78
+ } _Asm_sf;
79
+
80
+ #define print_debug(msg, _z) {\
81
+ db_ext_number dbg;\
82
+ dbg.d=_z;\
83
+ printf(msg);\
84
+ printf(" %08x %08x %08x \n", (dbg.i[2]<<16)>>16, dbg.i[1], dbg.i[0]);\
85
+ }
86
+
87
+
88
+ #define Add12_ext(s, r, a, b) \
89
+ { L_FLOAT_TYPE _z, _a, _b, _s; \
90
+ _a= (a); _b=(b); \
91
+ s = (_a + _b); \
92
+ _z= ( a - s ); \
93
+ r = (_b + _z); }
94
+
95
+
96
+ #define Add22_ext(zh,zl,xh,xl,yh,yl) \
97
+ do {\
98
+ L_FLOAT_TYPE r,s;\
99
+ r = (xh)+(yh);\
100
+ s = (xh)-r;\
101
+ s+= (yh);\
102
+ s+= (yl);\
103
+ s+= (xl);\
104
+ zh = r+s;\
105
+ zl = r - (zh);\
106
+ zl+= s;\
107
+ } while(0)
108
+
109
+
110
+
111
+ #define Mul12_ext(_rh,_rl,_u,_v) \
112
+ { \
113
+ _rh = _u*_v; \
114
+ _rl = _Asm_fms( 3/*_PC_NONE*/, _u, _v, _rh, 1/*_SF1*/ );\
115
+ }
116
+ #define Mul22_ext(zh,zl, xh,xl, yh,yl) \
117
+ { \
118
+ L_FLOAT_TYPE ph, pl; \
119
+ ph = (xh)*(yh); \
120
+ pl = _Asm_fms( 3/*_PC_NONE*/, xh, yh, ph, 1/*_SF1*/ );; \
121
+ pl = (xh)*(yl) + pl; \
122
+ pl = (xl)*(yh) + pl; \
123
+ zh = ph+pl; \
124
+ zl = ph - zh; \
125
+ zl += pl; \
126
+ }
127
+
128
+ #define Div22_ext(zh,zl,xh,xl,yh,yl) \
129
+ { \
130
+ L_FLOAT_TYPE _ch,_cl,_uh,_ul; \
131
+ _ch=(xh)/(yh); \
132
+ Mul12_ext(_uh,_ul,_ch,(yh)); \
133
+ _cl=(xh)-_uh; \
134
+ _cl -= _ul; \
135
+ _cl += (xl); \
136
+ _cl -= _ch*(yl); \
137
+ _cl /= (yh); \
138
+ zh = _ch + _cl; \
139
+ zl=(_ch-(zh)); zl += _cl; \
140
+ }
141
+
142
+
143
+
144
+
145
+
146
+ #define ULL(bits) 0x##bits##uLL
147
+
148
+ #if (!defined(EM64T) && defined(__linux__) && defined(IA32))
149
+ # define LDOUBLE_ALIGN 12 /* IA32 Linux: 12-byte alignment */
150
+ #else
151
+ # define LDOUBLE_ALIGN 16 /* EM64T, IA32 Win or IPF Win/Linux: 16-byte alignm\
152
+ ent */
153
+ #endif
154
+
155
+ #if (LDOUBLE_ALIGN == 16)
156
+ #define _XPD_ ,0x0000,0x0000,0x0000
157
+ #else /*12*/
158
+ #define _XPD_ ,0x0000
159
+ #endif
160
+
161
+ #define LDOUBLE_HEX(w4,w3,w2,w1,w0) 0x##w0,0x##w1,0x##w2,0x##w3,0x##w4 _XPD_ /*LITTLE_ENDIAN*/
162
+
163
+
164
+
165
+ double dde_atan_rn(double x) {
166
+ return 0;
167
+ }
168
+
169
+ double atan_rd(double x) {
170
+ return 0;
171
+ }
172
+
173
+ double atan_ru(double x) {
174
+ return 0;
175
+ }
176
+
177
+ double atan_rz(double x) {
178
+ return 0;
179
+ }
180
+
181
+
182
+ static const double HALFPI = 1.57079632679489655799898173427209258079528808593750e+00;
183
+ #define MIN_REDUCTION_NEEDED ULL(3F89FDF8BCCE533D)
184
+ #define A 0
185
+ #define B 1
186
+ #define ATAN_BHI 0
187
+ #define ATAN_BLO 1
188
+ #define epsilon 2.04221581890623872536809598138553304900554884091659e-19
189
+ #define epsilon_no_red 1.56771350764719825686165002299335165493769973908433e-19
190
+ #define TWO_M_64 5.42101086242752217003726400434970855712890625000000e-20
191
+ #define TWO_10 1.02400000000000000000000000000000000000000000000000e+03
192
+
193
+ __declspec(align(16))
194
+
195
+ static const struct{long long int a; double b;} ab_table[62] = {
196
+ { /*a[0] ~= 1.26914436930661800408e-02 */ ULL(3F89FDF8BCCE533D),
197
+ /*b[0] = */ 2.53869765124364009378776785297304741106927394866943e-02},
198
+ { /*a[1] ~= 3.80906929270782388369e-02 */ ULL(3FA3809F90CEBC31),
199
+ /*b[1] = */ 5.08066978456951506837313559117319528013467788696289e-02},
200
+ { /*a[2] ~= 6.35391122156262234502e-02 */ ULL(3FB0441968FBA526),
201
+ /*b[2] = */ 7.62920780032335793530151590857713017612695693969727e-02},
202
+ { /*a[3] ~= 8.90697640843219481662e-02 */ ULL(3FB6CD46ABCDFA25),
203
+ /*b[3] = */ 1.01876371166982934712841313285025535151362419128418e-01},
204
+ { /*a[4] ~= 1.14716138034642060814e-01 */ ULL(3FBD5E096D2EA546),
205
+ /*b[4] = */ 1.27593346472767293908745500630175229161977767944336e-01},
206
+ { /*a[5] ~= 1.40512327929006382604e-01 */ ULL(3FC1FC4ED691E891),
207
+ /*b[5] = */ 1.53477468508642272970732278736250009387731552124023e-01},
208
+ { /*a[6] ~= 1.66493216120905490981e-01 */ ULL(3FC54FA6531F610B),
209
+ /*b[6] = */ 1.79564085612852891715718328669026959687471389770508e-01},
210
+ { /*a[7] ~= 1.92694666476959805056e-01 */ ULL(3FC8AA380550EAF1),
211
+ /*b[7] = */ 2.05889628199359991933548030829115305095911026000977e-01},
212
+ { /*a[8] ~= 2.19153728611415840590e-01 */ ULL(3FCC0D3AB8975BD9),
213
+ /*b[8] = */ 2.32491819536184141092860500066308304667472839355469e-01},
214
+ { /*a[9] ~= 2.45908855876056406352e-01 */ ULL(3FCF79F0FEE46885),
215
+ /*b[9] = */ 2.59409901651160901270287695297156460583209991455078e-01},
216
+ { /*a[10] ~= 2.73000139926648314534e-01 */ ULL(3FD178D5943274CA),
217
+ /*b[10] = */ 2.86684879348826082701151563014718703925609588623047e-01},
218
+ { /*a[11] ~= 3.00469565029600954026e-01 */ ULL(3FD33AE4B2CFB5F7),
219
+ /*b[11] = */ 3.14359785700871030567071784389554522931575775146484e-01},
220
+ { /*a[12] ~= 3.28361285690481766972e-01 */ ULL(3FD503DF0DD40A5B),
221
+ /*b[12] = */ 3.42479972833279300292730340515845455229282379150391e-01},
222
+ { /*a[13] ~= 3.56721931693259067415e-01 */ ULL(3FD6D4883998DD14),
223
+ /*b[13] = */ 3.71093432391343347465095803272561170160770416259766e-01},
224
+ { /*a[14] ~= 3.85600945252912822931e-01 */ ULL(3FD8ADAF964ABFA5),
225
+ /*b[14] = */ 4.00251150738601846335029676993144676089286804199219e-01},
226
+ { /*a[15] ~= 4.15050955725992373816e-01 */ ULL(3FDA9031E241114E),
227
+ /*b[15] = */ 4.30007504761513281721363455289974808692932128906250e-01},
228
+ { /*a[16] ~= 4.45128198220858643198e-01 */ ULL(3FDC7CFAFB78B41D),
229
+ /*b[16] = */ 4.60420705138676944478959285333985462784767150878906e-01},
230
+ { /*a[17] ~= 4.75892983535655022698e-01 */ ULL(3FDE7507D82B9DC6),
231
+ /*b[17] = */ 4.91553295129659728601723145402502268552780151367188e-01},
232
+ { /*a[18] ~= 5.07410228170177493351e-01 */ ULL(3FE03CB45FF4B2AB),
233
+ /*b[18] = */ 5.23472714391912563591802154405741021037101745605469e-01},
234
+ { /*a[19] ~= 5.39750054761637805872e-01 */ ULL(3FE145A1E826E4EA),
235
+ /*b[19] = */ 5.56251939105489867642972967587411403656005859375000e-01},
236
+ { /*a[20] ~= 5.72988475252136329570e-01 */ ULL(3FE255EBED462BAC),
237
+ /*b[20] = */ 5.89970211851368997457711884635500609874725341796875e-01},
238
+ { /*a[21] ~= 6.07208171494496387417e-01 */ ULL(3FE36E3FD4CDD9AC),
239
+ /*b[21] = */ 6.24713877348479162954220100800739601254463195800781e-01},
240
+ { /*a[22] ~= 6.42499390954343656748e-01 */ ULL(3FE48F5AE1FB2991),
241
+ /*b[22] = */ 6.60577343433393693317157158162444829940795898437500e-01},
242
+ { /*a[23] ~= 6.78960978813340497734e-01 */ ULL(3FE5BA0C5FE86E27),
243
+ /*b[23] = */ 6.97664190728041089251121320558013394474983215332031e-01},
244
+ { /*a[24] ~= 7.16701572306941533027e-01 */ ULL(3FE6EF3822C19A5D),
245
+ /*b[24] = */ 7.36088459496464064812926153535954654216766357421875e-01},
246
+ { /*a[25] ~= 7.55840988781748695010e-01 */ ULL(3FE82FD970F967BD),
247
+ /*b[25] = */ 7.75976148518263131315109148999908939003944396972656e-01},
248
+ { /*a[26] ~= 7.96511846049556065643e-01 */ ULL(3FE97D0669351A0D),
249
+ /*b[26] = */ 8.17466968767843527032823658373672515153884887695312e-01},
250
+ { /*a[27] ~= 8.38861462565995493716e-01 */ ULL(3FEAD7F3FE730FCD),
251
+ /*b[27] = */ 8.60716404767067566616844942473107948899269104003906e-01},
252
+ { /*a[28] ~= 8.83054096327761096527e-01 */ ULL(3FEC41FAAA0A733E),
253
+ /*b[28] = */ 9.05898149317818313086547732382314279675483703613281e-01},
254
+ { /*a[29] ~= 9.29273595909162105525e-01 */ ULL(3FEDBC9BFAEEEADF),
255
+ /*b[29] = */ 9.53206993785724487899813084368361160159111022949219e-01},
256
+ { /*a[30] ~= 9.77726555752981254442e-01 */ ULL(3FEF498933AC790A),
257
+ /*b[30] = */ 1.00286227737052557884567249857354909181594848632812e+00},
258
+ { /*a[31] ~= 1.02864609206350806308e+00 */ ULL(3FF075559AC922B4),
259
+ /*b[31] = */ 1.05511202646791502068879253783961758017539978027344e+00},
260
+ { /*a[32] ~= 1.08229638730567912228e+00 */ ULL(3FF151160440E8D3),
261
+ /*b[32] = */ 1.11023795151925819268967643438372761011123657226562e+00},
262
+ { /*a[33] ~= 1.13897819300824741364e+00 */ ULL(3FF23941329D3DD8),
263
+ /*b[33] = */ 1.16856151675095110142876819736557081341743469238281e+00},
264
+ { /*a[34] ~= 1.19903553596580987055e+00 */ ULL(3FF32F3FE2DB7094),
265
+ /*b[34] = */ 1.23045136228081597451478046423289924860000610351562e+00},
266
+ { /*a[35] ~= 1.26286394722716532198e+00 */ ULL(3FF434B0D38A35D7),
267
+ /*b[35] = */ 1.29633244442242001603915468876948580145835876464844e+00},
268
+ { /*a[36] ~= 1.33092063388866265448e+00 */ ULL(3FF54B736F41F96D),
269
+ /*b[36] = */ 1.36669737760087572908673791971523314714431762695312e+00},
270
+ { /*a[37] ~= 1.40373715148086145849e+00 */ ULL(3FF675B5165CA5E1),
271
+ /*b[37] = */ 1.44212062317890032936418265308020636439323425292969e+00},
272
+ { /*a[38] ~= 1.48193532552453321547e+00 */ ULL(3FF7B601D0DEA3C6),
273
+ /*b[38] = */ 1.52327639603630871079076314345002174377441406250000e+00},
274
+ { /*a[39] ~= 1.56624743831976717041e+00 */ ULL(3FF90F5979506F51),
275
+ /*b[39] = */ 1.61096147803441858137318831722950562834739685058594e+00},
276
+ { /*a[40] ~= 1.65754207708184630948e+00 */ ULL(3FFA854AD74CF791),
277
+ /*b[40] = */ 1.70612458293084490179580825497396290302276611328125e+00},
278
+ { /*a[41] ~= 1.75685758736121174681e+00 */ ULL(3FFC1C16B3972246),
279
+ /*b[41] = */ 1.80990457885083300126893846027087420225143432617188e+00},
280
+ { /*a[42] ~= 1.86544587781964938190e+00 */ ULL(3FFDD8DDC6DB1831),
281
+ /*b[42] = */ 1.92368085119253517945026032975874841213226318359375e+00},
282
+ { /*a[43] ~= 1.98483051718814034750e+00 */ ULL(3FFFC1DDA4F6D032),
283
+ /*b[43] = */ 2.04914055707593512067887786542996764183044433593750e+00},
284
+ { /*a[44] ~= 2.11688487740990979279e+00 */ ULL(4000EF6156AEFAF2),
285
+ /*b[44] = */ 2.18836977316091063627823132264893501996994018554688e+00},
286
+ { /*a[45] ~= 2.26393888595347935033e+00 */ ULL(40021C8BFD9A80C1),
287
+ /*b[45] = */ 2.34397906437763481335423421114683151245117187500000e+00},
288
+ { /*a[46] ~= 2.42892740222016626128e+00 */ ULL(40036E717D67269C),
289
+ /*b[46] = */ 2.51927965826279764982587039412464946508407592773438e+00},
290
+ { /*a[47] ~= 2.61560046981161264128e+00 */ ULL(4004ECBFF069F1E4),
291
+ /*b[47] = */ 2.71853573297491069027387311507482081651687622070312e+00},
292
+ { /*a[48] ~= 2.82882779840766906527e+00 */ ULL(4006A170780169B7),
293
+ /*b[48] = */ 2.94733416149008720097413061012048274278640747070312e+00},
294
+ { /*a[49] ~= 3.07505072362971616974e+00 */ ULL(400899B4319C3F02),
295
+ /*b[49] = */ 3.21314087722892072207514502224512398242950439453125e+00},
296
+ { /*a[50] ~= 3.36297230191158715455e+00 */ ULL(400AE75E05B0834A),
297
+ /*b[50] = */ 3.52616384863255349912947167467791587114334106445312e+00},
298
+ { /*a[51] ~= 3.70464601821196143254e+00 */ ULL(400DA31D739BD0E3),
299
+ /*b[51] = */ 3.90073973345466518125590482668485492467880249023438e+00},
300
+ { /*a[52] ~= 4.11726034471856573100e+00 */ ULL(401078131886BC57),
301
+ /*b[52] = */ 4.35765668014056828383218089584261178970336914062500e+00},
302
+ { /*a[53] ~= 4.62619989820137847648e+00 */ ULL(4012813A8BCE2241),
303
+ /*b[53] = */ 4.92824409985376998832862227573059499263763427734375e+00},
304
+ { /*a[54] ~= 5.27059285056349616385e+00 */ ULL(401515164ACECE78),
305
+ /*b[54] = */ 5.66202526987798027136022938066162168979644775390625e+00},
306
+ { /*a[55] ~= 6.11406930017863578891e+00 */ ULL(401874CE9526FAB9),
307
+ /*b[55] = */ 6.64216890962962569489036468439735472202301025390625e+00},
308
+ { /*a[56] ~= 7.26750136287798241547e+00 */ ULL(401D11EBE094C913),
309
+ /*b[56] = */ 8.01990986231011859786121931392699480056762695312500e+00},
310
+ { /*a[57] ~= 8.94284159107796650204e+00 */ ULL(4021E2BC220DFA19),
311
+ /*b[57] = */ 1.01020964280653942068965989165008068084716796875000e+01},
312
+ { /*a[58] ~= 1.16023240149353498339e+01 */ ULL(40273463D0337C49),
313
+ /*b[58] = */ 1.36206610885392880305744256475009024143218994140625e+01},
314
+ { /*a[59] ~= 1.64826377753716631495e+01 */ ULL(40307B8E26350916),
315
+ /*b[59] = */ 2.08587363260064613257327437167987227439880371093750e+01},
316
+ { /*a[60] ~= 2.83859754493341325216e+01 */ ULL(403C62CF497BF2F2),
317
+ /*b[60] = */ 4.43908820444562195461912779137492179870605468750000e+01},
318
+ { /*a[61] ~= 1.01699461607316896213e+02 */ ULL(40596CC3FA9E0EF4),
319
+ /*b[61] = */ 8.27932424540746438879068591631948947906494140625000e+01}
320
+ };
321
+
322
+
323
+ #define atanb_table ((const XC_FLOAT_TYPE *)_atanb_table)
324
+ __declspec(align(16)) static const unsigned short _atanb_table[] = {
325
+ /*atan_b[0] ~= 2.5381524664e-02*/
326
+ LDOUBLE_HEX(3FF9, CFEC, EA4B, 4FCB, 5DFD),
327
+ LDOUBLE_HEX(BFB7, CBBA, 8342, F523, 8BE7),
328
+ /*atan_b[1] ~= 5.0763049304e-02*/
329
+ LDOUBLE_HEX(3FFA, CFEC, EA49, B131, 647C),
330
+ LDOUBLE_HEX(3FB6, D38B, A5E1, 4DEF, A6BD),
331
+ /*atan_b[2] ~= 7.6144573921e-02*/
332
+ LDOUBLE_HEX(3FFB, 9BF1, AFB6, 0F03, 5D53),
333
+ LDOUBLE_HEX(3FB8, EF7C, 871F, DC70, BCA9),
334
+ /*atan_b[3] ~= 1.0152609851e-01*/
335
+ LDOUBLE_HEX(3FFB, CFEC, EA46, 78CC, AECA),
336
+ LDOUBLE_HEX(BFB7, DCB7, 3BED, 3BD7, 633C),
337
+ /*atan_b[4] ~= 1.2690762308e-01*/
338
+ LDOUBLE_HEX(3FFC, 81F4, 126B, 0C0A, B24C),
339
+ LDOUBLE_HEX(3FB8, 9C93, 50C6, 8748, 202B),
340
+ /*atan_b[5] ~= 1.5228914763e-01*/
341
+ LDOUBLE_HEX(3FFC, 9BF1, AFB2, 77C1, F1F3),
342
+ LDOUBLE_HEX(BFBB, 9D89, 6B54, 2B43, C3D3),
343
+ /*atan_b[6] ~= 1.7767067216e-01*/
344
+ LDOUBLE_HEX(3FFC, B5EF, 4CF9, 8121, 27D9),
345
+ LDOUBLE_HEX(BFBB, D8AB, 134C, C337, 1424),
346
+ /*atan_b[7] ~= 2.0305219666e-01*/
347
+ LDOUBLE_HEX(3FFC, CFEC, EA40, 29FE, 3D0C),
348
+ LDOUBLE_HEX(BFBA, 964C, 23A5, 78A9, 286C),
349
+ /*atan_b[8] ~= 2.2843372114e-01*/
350
+ LDOUBLE_HEX(3FFC, E9EA, 8786, 746E, CBDE),
351
+ LDOUBLE_HEX(3FBB, 95CE, 8C74, D4B3, 3D3D),
352
+ /*atan_b[9] ~= 2.5381524560e-01*/
353
+ LDOUBLE_HEX(3FFD, 81F4, 1266, 3163, 58ED),
354
+ LDOUBLE_HEX(3FBB, B292, B8DC, 903F, C86D),
355
+ /*atan_b[10] ~= 2.7919677004e-01*/
356
+ LDOUBLE_HEX(3FFD, 8EF2, E108, FBCB, 4839),
357
+ LDOUBLE_HEX(BFBC, C5E3, D3F8, 42F0, A001),
358
+ /*atan_b[11] ~= 3.0457829447e-01*/
359
+ LDOUBLE_HEX(3FFD, 9BF1, AFAB, 9AD5, 051A),
360
+ LDOUBLE_HEX(3FBC, BE9C, AF21, 45D0, CBC5),
361
+ /*atan_b[12] ~= 3.2995981887e-01*/
362
+ LDOUBLE_HEX(3FFD, A8F0, 7E4E, 1002, FE3F),
363
+ LDOUBLE_HEX(3FB9, ACDF, 4585, 84D5, 7EE8),
364
+ /*atan_b[13] ~= 3.5534134325e-01*/
365
+ LDOUBLE_HEX(3FFD, B5EF, 4CF0, 5CF3, 3B2F),
366
+ LDOUBLE_HEX(BFB9, DAF1, E542, E461, 5C3F),
367
+ /*atan_b[14] ~= 3.8072286762e-01*/
368
+ LDOUBLE_HEX(3FFD, C2EE, 1B92, 835E, 5241),
369
+ LDOUBLE_HEX(3FBC, F450, E872, E8D5, 5B89),
370
+ /*atan_b[15] ~= 4.0610439197e-01*/
371
+ LDOUBLE_HEX(3FFD, CFEC, EA34, 8516, 3E60),
372
+ LDOUBLE_HEX(BFBC, 91DD, F6E6, 0680, E8AD),
373
+ /*atan_b[16] ~= 4.3148591630e-01*/
374
+ LDOUBLE_HEX(3FFD, DCEB, B8D6, 6405, 31AA),
375
+ LDOUBLE_HEX(BFBC, 8502, E09D, 5663, 1B39),
376
+ /*atan_b[17] ~= 4.5686744062e-01*/
377
+ LDOUBLE_HEX(3FFD, E9EA, 8778, 222C, 48BB),
378
+ LDOUBLE_HEX(BFBB, F51E, C2F3, 5A3E, F53D),
379
+ /*atan_b[18] ~= 4.8224896492e-01*/
380
+ LDOUBLE_HEX(3FFD, F6E9, 5619, C1A2, 5014),
381
+ LDOUBLE_HEX(BFBB, E1E1, FABB, 35B7, 64D8),
382
+ /*atan_b[19] ~= 5.0763048922e-01*/
383
+ LDOUBLE_HEX(3FFE, 81F4, 125D, A249, 1B96),
384
+ LDOUBLE_HEX(BFBB, FEB6, 20F5, A80E, ABD8),
385
+ /*atan_b[20] ~= 5.3301201350e-01*/
386
+ LDOUBLE_HEX(3FFE, 8873, 79AE, 569C, E82C),
387
+ LDOUBLE_HEX(BFBD, 9333, CB85, 3253, A31F),
388
+ /*atan_b[21] ~= 5.5839353776e-01*/
389
+ LDOUBLE_HEX(3FFE, 8EF2, E0FE, FEF4, 22DF),
390
+ LDOUBLE_HEX(3FBD, FBF4, E487, 2960, 19F2),
391
+ /*atan_b[22] ~= 5.8377506202e-01*/
392
+ LDOUBLE_HEX(3FFE, 9572, 484F, 9C7E, 4569),
393
+ LDOUBLE_HEX(BFBD, ED41, 6021, 317B, 1548),
394
+ /*atan_b[23] ~= 6.0915658627e-01*/
395
+ LDOUBLE_HEX(3FFE, 9BF1, AFA0, 3071, E801),
396
+ LDOUBLE_HEX(3FBD, C46B, 95C4, B736, D8A5),
397
+ /*atan_b[24] ~= 6.3453811052e-01*/
398
+ LDOUBLE_HEX(3FFE, A271, 16F0, BC0B, F541),
399
+ LDOUBLE_HEX(3FBD, E479, 64B6, 873E, E8BE),
400
+ /*atan_b[25] ~= 6.5991963475e-01*/
401
+ LDOUBLE_HEX(3FFE, A8F0, 7E41, 408E, DDC6),
402
+ LDOUBLE_HEX(3FBD, C200, D1A3, 7D02, 9DAA),
403
+ /*atan_b[26] ~= 6.8530115898e-01*/
404
+ LDOUBLE_HEX(3FFE, AF6F, E591, BF41, BD98),
405
+ LDOUBLE_HEX(3FBC, AB83, 86B7, DBD3, 49B9),
406
+ /*atan_b[27] ~= 7.1068268321e-01*/
407
+ LDOUBLE_HEX(3FFE, B5EF, 4CE2, 396F, 887A),
408
+ LDOUBLE_HEX(3FB9, 93C0, 6F69, 2472, DD13),
409
+ /*atan_b[28] ~= 7.3606420743e-01*/
410
+ LDOUBLE_HEX(3FFE, BC6E, B432, B066, 2617),
411
+ LDOUBLE_HEX(BFBD, C5F2, 72DA, A216, 8845),
412
+ /*atan_b[29] ~= 7.6144573166e-01*/
413
+ LDOUBLE_HEX(3FFE, C2EE, 1B83, 2575, A17C),
414
+ LDOUBLE_HEX(3FBA, FC52, 25AC, D135, 67B0),
415
+ /*atan_b[30] ~= 7.8682725588e-01*/
416
+ LDOUBLE_HEX(3FFE, C96D, 82D3, 99EF, 4753),
417
+ LDOUBLE_HEX(3FBC, E6CB, 9CE5, F7DC, 32EF),
418
+ /*atan_b[31] ~= 8.1220878010e-01*/
419
+ LDOUBLE_HEX(3FFE, CFEC, EA24, 0F24, C5A3),
420
+ LDOUBLE_HEX(BFBB, 9F94, 64A4, 0D49, 77DA),
421
+ /*atan_b[32] ~= 8.3759030433e-01*/
422
+ LDOUBLE_HEX(3FFE, D66C, 5174, 8667, 5086),
423
+ LDOUBLE_HEX(BFBC, E480, 36A7, 98A0, E416),
424
+ /*atan_b[33] ~= 8.6297182855e-01*/
425
+ LDOUBLE_HEX(3FFE, DCEB, B8C5, 0106, C115),
426
+ LDOUBLE_HEX(BFBB, AE5E, 111C, 0925, 5FC1),
427
+ /*atan_b[34] ~= 8.8835335278e-01*/
428
+ LDOUBLE_HEX(3FFE, E36B, 2015, 8050, B874),
429
+ LDOUBLE_HEX(BFBC, 8DD3, E1A9, 67EE, B236),
430
+ /*atan_b[35] ~= 9.1373487702e-01*/
431
+ LDOUBLE_HEX(3FFE, E9EA, 8766, 058F, C400),
432
+ LDOUBLE_HEX(BFBD, 994E, 5D94, 7944, 5BF2),
433
+ /*atan_b[36] ~= 9.3911640126e-01*/
434
+ LDOUBLE_HEX(3FFE, F069, EEB6, 920A, 8756),
435
+ LDOUBLE_HEX(BFBD, F0FC, 830B, 5639, 9FED),
436
+ /*atan_b[37] ~= 9.6449792552e-01*/
437
+ LDOUBLE_HEX(3FFE, F6E9, 5607, 2702, D403),
438
+ LDOUBLE_HEX(BFBD, B0EF, D9DB, FF7A, BBF3),
439
+ /*atan_b[38] ~= 9.8987944978e-01*/
440
+ LDOUBLE_HEX(3FFE, FD68, BD57, C5B4, F372),
441
+ LDOUBLE_HEX(BFBD, 9706, 5831, 4248, 656E),
442
+ /*atan_b[39] ~= 1.0152609740e+00*/
443
+ LDOUBLE_HEX(3FFF, 81F4, 1254, 37AB, 59C4),
444
+ LDOUBLE_HEX(3FBE, C83B, C3BE, 8160, FE56),
445
+ /*atan_b[40] ~= 1.0406424983e+00*/
446
+ LDOUBLE_HEX(3FFF, 8533, C5FC, 928B, 5DCD),
447
+ LDOUBLE_HEX(3FBE, C025, 7DA6, 5435, CDA0),
448
+ /*atan_b[41] ~= 1.0660240226e+00*/
449
+ LDOUBLE_HEX(3FFF, 8873, 79A4, F40D, D390),
450
+ LDOUBLE_HEX(BFBE, BB70, CBE8, FB3B, AA03),
451
+ /*atan_b[42] ~= 1.0914055469e+00*/
452
+ LDOUBLE_HEX(3FFF, 8BB3, 2D4D, 5CC1, ADB6),
453
+ LDOUBLE_HEX(3FBE, 8161, 18FB, A932, 136B),
454
+ /*atan_b[43] ~= 1.1167870712e+00*/
455
+ LDOUBLE_HEX(3FFF, 8EF2, E0F5, CD31, 1F80),
456
+ LDOUBLE_HEX(BFBC, BD96, 57B0, 5730, 7576),
457
+ /*atan_b[44] ~= 1.1421685956e+00*/
458
+ LDOUBLE_HEX(3FFF, 9232, 949E, 45E1, 3E02),
459
+ LDOUBLE_HEX(BFBD, CDB1, 87A1, 5D56, 06EC),
460
+ /*atan_b[45] ~= 1.1675501199e+00*/
461
+ LDOUBLE_HEX(3FFF, 9572, 4846, C751, B4C7),
462
+ LDOUBLE_HEX(BFBD, A1AB, 140B, 2B49, DF68),
463
+ /*atan_b[46] ~= 1.1929316443e+00*/
464
+ LDOUBLE_HEX(3FFF, 98B1, FBEF, 51FC, 635A),
465
+ LDOUBLE_HEX(3FBE, CA64, 3ADC, 86D5, FB02),
466
+ /*atan_b[47] ~= 1.2183131687e+00*/
467
+ LDOUBLE_HEX(3FFF, 9BF1, AF97, E655, 1527),
468
+ LDOUBLE_HEX(3FBE, CA1D, 3262, C2F9, D84C),
469
+ /*atan_b[48] ~= 1.2436946931e+00*/
470
+ LDOUBLE_HEX(3FFF, 9F31, 6340, 84C9, 33A7),
471
+ LDOUBLE_HEX(3FBD, AF23, 2B16, BE75, 8B87),
472
+ /*atan_b[49] ~= 1.2690762175e+00*/
473
+ LDOUBLE_HEX(3FFF, A271, 16E9, 2DBF, 7CA7),
474
+ LDOUBLE_HEX(3FBE, FDDA, 7599, 4DA2, 0F86),
475
+ /*atan_b[50] ~= 1.2944577420e+00*/
476
+ LDOUBLE_HEX(3FFF, A5B0, CA91, E197, C307),
477
+ LDOUBLE_HEX(BFBC, D265, 9307, D567, 08BE),
478
+ /*atan_b[51] ~= 1.3198392664e+00*/
479
+ LDOUBLE_HEX(3FFF, A8F0, 7E3A, A0AA, A7E2),
480
+ LDOUBLE_HEX(3FBE, BE3C, 4D06, 7D11, 0641),
481
+ /*atan_b[52] ~= 1.3452207909e+00*/
482
+ LDOUBLE_HEX(3FFF, AC30, 31E3, 6B49, 6713),
483
+ LDOUBLE_HEX(BFBE, B9DD, 9D13, C459, 6F6C),
484
+ /*atan_b[53] ~= 1.3706023154e+00*/
485
+ LDOUBLE_HEX(3FFF, AF6F, E58C, 41BD, 9EA8),
486
+ LDOUBLE_HEX(BFBD, 802F, 2153, DC49, 3698),
487
+ /*atan_b[54] ~= 1.3959838399e+00*/
488
+ LDOUBLE_HEX(3FFF, B2AF, 9935, 2449, 1D44),
489
+ LDOUBLE_HEX(3FBE, CAFC, 43E2, 3F23, 5075),
490
+ /*atan_b[55] ~= 1.4213653645e+00*/
491
+ LDOUBLE_HEX(3FFF, B5EF, 4CDE, 1325, B93A),
492
+ LDOUBLE_HEX(BFBA, 9155, 4FBC, 9598, FA3D),
493
+ /*atan_b[56] ~= 1.4467468891e+00*/
494
+ LDOUBLE_HEX(3FFF, B92F, 0087, 0E85, 296B),
495
+ LDOUBLE_HEX(3FBE, C76A, DB5B, 6055, 9EA6),
496
+ /*atan_b[57] ~= 1.4721284137e+00*/
497
+ LDOUBLE_HEX(3FFF, BC6E, B430, 1690, E405),
498
+ LDOUBLE_HEX(3FBA, A6CB, 4564, 7FF8, 4121),
499
+ /*atan_b[58] ~= 1.4975099383e+00*/
500
+ LDOUBLE_HEX(3FFF, BFAE, 67D9, 2B6A, 02AA),
501
+ LDOUBLE_HEX(BFBD, B0AE, B984, 420B, 761D),
502
+ /*atan_b[59] ~= 1.5228914629e+00*/
503
+ LDOUBLE_HEX(3FFF, C2EE, 1B82, 4D29, 2EBE),
504
+ LDOUBLE_HEX(BFBE, 9CBD, 26E8, 9FF8, E917),
505
+ /*atan_b[60] ~= 1.5482729876e+00*/
506
+ LDOUBLE_HEX(3FFF, C62D, CF2B, 7BDE, 8EE3),
507
+ LDOUBLE_HEX(BFBE, AF45, EFD8, 2A64, 49A5),
508
+ /*atan_b[61] ~= 1.5587186337e+00*/
509
+ LDOUBLE_HEX(3FFF, C784, 1799, 9E5D, D2A5),
510
+ LDOUBLE_HEX(BFBE, A231, BD90, F170, 34A5),
511
+ };
512
+ static const long double coef_poly[9][2] = {
513
+ { -3.33333333333333333342368351437379203616728773340583e-01L, 9.03501810404587028364033466367082415937499719525463e-21L},
514
+ { 2.00000000000000000002710505431213761085018632002175e-01L, -2.71050543121376108505536620063805076318847614178820e-21L},
515
+ { -1.42857142857142857140921067549133027796415262855589e-01L, -1.93607530800982934641564128836546985281459293443700e-21L},
516
+ { 1.11111111111111111109605274760436799397211871109903e-01L, 1.50583635067431171387883211317314321885579450456211e-21L},
517
+ { -9.09090909090909090933731867556488737136533018201590e-02L, 0},
518
+ { 7.69230769230769230779655790120052927250071661546826e-02L, 0},
519
+ { -6.66666666666666666698289230030827212658550706692040e-02L, 0},
520
+ { 5.88235294117647058825522430464127765503690170589834e-02L, 0},
521
+ { -5.26315789473684210515616425929419364138084347359836e-02L, 0},
522
+ };
523
+
524
+
525
+
526
+
527
+ extern double atan_rn(double xd) {
528
+
529
+ unsigned int hx;
530
+ double sign;
531
+ double u;
532
+ double comp;
533
+
534
+ int i, i1, m;
535
+ UINT64 x_val,x_abs,sign_mask;
536
+ L_FLOAT_TYPE xe, tmp, bi, atanbhi, xred, xred2,q;
537
+ L_FLOAT_TYPE res,reshi,reslo,rn_constant,test;
538
+ L_FLOAT_TYPE xred4,tmp2;
539
+ L_FLOAT_TYPE a,b,e0,e1,e2,e3,q0,q1,q2,y0,y1,y2,xred2coarse;
540
+ L_FLOAT_TYPE C3,C5,C7,C9 ;
541
+
542
+
543
+ x_val = _Asm_getf( _FR_D, xd );
544
+ x_abs = (x_val & ULL(7fffffffffffffff));
545
+ sign_mask = ((SINT64)x_val >> 63); /* either 00..00 or 11...11 */
546
+
547
+
548
+
549
+ /* cast x to a DE register */
550
+ if(sign_mask)
551
+ xe=-xd;
552
+ else
553
+ xe=xd;
554
+
555
+
556
+ /* Filter cases */
557
+ if (__builtin_expect( x_abs >= ULL(4350000000000000), 0)) { /* x >= 2^54 */
558
+ if (xd!=xd )
559
+ return xd+xd; /* NaN */
560
+ else {/* atan(x) = +/- Pi/2 */
561
+ if(sign_mask) return -HALFPI; else return HALFPI;
562
+ }
563
+ }
564
+ else if (__builtin_expect( x_abs < ULL(3E40000000000000), 0))
565
+ /* TODO Add stuff to raise inexact flag */
566
+ return xd; /* x<2^-27 then atan(x) =~ x */
567
+
568
+
569
+ /* Now there is something to compute*/
570
+
571
+ /* load polynomial coeffs */
572
+ C3=coef_poly[0][0];
573
+ C5=coef_poly[1][0];
574
+ C7=coef_poly[2][0];
575
+ C9=coef_poly[3][0];
576
+
577
+ if (__builtin_expect(x_abs > MIN_REDUCTION_NEEDED, 0)) /* test if reduction is necessary : */
578
+ {
579
+ /* 1) Argument reduction : */
580
+ /* This constant was found by dichotomy. I am very ashamed */
581
+ rn_constant = 1.002;
582
+
583
+ /* compute i so that a[i] < x < a[i+1] */
584
+
585
+ if (x_abs>ab_table[61].a)
586
+ i=61;
587
+ else {
588
+ i=31;
589
+ if (x_abs < ab_table[i].a) i-= 16;
590
+ else i+=16;
591
+ if (x_abs < ab_table[i].a) i-= 8;
592
+ else i+= 8;
593
+ if (x_abs < ab_table[i].a) i-= 4;
594
+ else i+= 4;
595
+ if (x_abs < ab_table[i].a) i-= 2;
596
+ else i+= 2;
597
+ if (x_abs < ab_table[i].a) i-= 1;
598
+ else i+= 1;
599
+ if (x_abs < ab_table[i].a) i-= 1;
600
+ }
601
+
602
+ bi= ab_table[i].b;
603
+ atanbhi = atanb_table[i].hi;
604
+
605
+ /* the dividend and the divisor for the argument reduction */
606
+ a = xe-bi; b = 1 + xe * bi;
607
+
608
+
609
+ #if 1
610
+ /* now we want to compute (xe - bi )/b as a DE, but
611
+ we will need the accurate quotient only later on,
612
+ we can start the computation of the polynomial with a much coarser approximation.
613
+ Saves 12 cycles.
614
+ */
615
+ /* Algo 8.11 in Markstein book */
616
+ _Asm_frcpa(&y0, a, b, _SF1);
617
+
618
+ e0 = 1 - b*y0; q0 = a*y0;
619
+ e2 = e0 + e0*e0; e1 = e0*e0;
620
+ e3 = e0 + e1*e1; q1 = q0+q0*e2;
621
+ xred = q0 + q1*e3; xred2coarse = q1*q1; /* 62 bits in xred, more than enough */
622
+ xred2 = xred*xred; xred4 = xred2coarse*xred2coarse;
623
+
624
+
625
+
626
+ /*polynom evaluation */
627
+
628
+ tmp2 = C7 + xred2coarse * C9 ;
629
+
630
+ /* here we need xred2, xred2coarse loses a lot of precision to win 3 cycles. */
631
+ tmp = C3 + xred2 * C5;
632
+
633
+ q = tmp + xred4 * tmp2;
634
+
635
+
636
+ #else
637
+ xred=a/b;
638
+ xred2=xred*xred;
639
+ xred4=xred2*xred2;
640
+ tmp2 = C7 + xred2 * C9 ;
641
+ tmp = C3 + xred2 * C5;
642
+ q = tmp + xred4 * tmp2;
643
+ #endif
644
+
645
+ tmp = 1+q*xred2;
646
+ /* reconstruction : atan(x) = atan(b[i]) + atan(x) */
647
+ res = atanbhi+xred*tmp;
648
+ /* atan = atanbhi + tmp; with round to double */
649
+ reshi = _Asm_fma( _PC_D, xred, tmp, atanbhi, _SF0 );
650
+
651
+ }
652
+ else
653
+ /* no reduction needed */
654
+ {
655
+
656
+
657
+ /* Polynomial evaluation */
658
+
659
+ xred2 = xe*xe;
660
+ /*poly eval */
661
+ xred4=xred2*xred2;
662
+ tmp2 = C7 + xred2 * C9 ;
663
+ tmp = C3 + xred2 * C5;
664
+ q = tmp + xred4 * tmp2;
665
+ q *= xred2;
666
+
667
+
668
+ /* compute q*xe+xe with round to double */
669
+ res = _Asm_fma( _PC_NONE, q, xe, xe, _SF1 );
670
+ reshi = _Asm_fma( _PC_D, q, xe, xe, _SF0 );
671
+ }
672
+
673
+ #if 0 /* To time the first step only */
674
+ if(sign_mask)
675
+ return -reshi;
676
+ else
677
+ return reshi;
678
+ #endif
679
+
680
+ #if 1
681
+ i1 = _Asm_getf( _FR_SIG, res);
682
+ m = i1 & (0xff<<3);
683
+ if(__builtin_expect((m!=(0x7f<<3) && m!=(0x80<<3)), 1+1==2)) {
684
+ if(sign_mask)
685
+ return -reshi;
686
+ else
687
+ return reshi;
688
+ }
689
+ #else
690
+ /* ROUNDING TEST à la Ziv */
691
+ /* This constant was found by dichotomy. I am very ashamed */
692
+ rn_constant = 1.01;
693
+ reslo = res - reshi;
694
+ test=_Asm_fma( _PC_D, reslo, rn_constant, reshi, _SF0 );
695
+
696
+ if (__builtin_expect(reshi == test, 1+1==2)) {
697
+ if(sign_mask)
698
+ return -reshi;
699
+ else
700
+ return reshi;
701
+ }
702
+ #endif
703
+
704
+
705
+ else {
706
+
707
+ /******************************************************************/
708
+ /* Double-double-extended */
709
+ L_FLOAT_TYPE tmphi, tmplo, x0hi, x0lo, xmBihi, xmBilo, Xredhi, Xredlo, Xred2, qhi,qlo, q, Xred2hi,Xred2lo, atanhi,atanlo;
710
+ int j;
711
+
712
+
713
+
714
+
715
+ #if EVAL_PERF
716
+ crlibm_second_step_taken++;
717
+ #endif
718
+
719
+ #if DEBUG
720
+ printf("Toto\n");
721
+ #endif
722
+
723
+ if (__builtin_expect(x_abs > MIN_REDUCTION_NEEDED, 0)) {/* test if reduction is necessary : */
724
+ if(i==61){
725
+ Add12_ext( xmBihi , xmBilo , xe , -ab_table[61].b);
726
+ }
727
+ else {
728
+ xmBihi = xe-ab_table[i].b;
729
+ xmBilo = 0.0;
730
+ }
731
+
732
+ Mul12_ext(tmphi,tmplo, xe, (ab_table[i].b));
733
+
734
+ if (xe > 1) /* TODO remplacer par xabs */
735
+ Add22_ext(x0hi,x0lo,tmphi,tmplo, 1.0,0.0);
736
+ else {
737
+ Add22_ext(x0hi , x0lo , 1.0,0.0,tmphi,tmplo);
738
+ }
739
+
740
+ #if 1
741
+ Div22_ext(Xredhi, Xredlo, xmBihi , xmBilo , x0hi,x0lo);
742
+ #else
743
+ Xredhi=1; Xredlo=0; /* to time the Div22*/
744
+ #endif
745
+
746
+ #if DEBUG
747
+ printf("i=%d, num=%1.15e den=%1.15e\n",i, (double)xmBihi, (double)x0hi);
748
+ printf("Xred=%1.15e\n", (double)Xredhi);
749
+ #endif
750
+
751
+ Xred2 = Xredhi*Xredhi;
752
+ Mul22_ext(Xred2hi,Xred2lo,Xredhi,Xredlo,Xredhi, Xredlo);
753
+
754
+ /*poly eval */
755
+
756
+ q = (coef_poly[4][0]+Xred2*
757
+ (coef_poly[5][0]+Xred2*
758
+ (coef_poly[6][0]+Xred2*
759
+ (coef_poly[7][0]+
760
+ (Xred2*coef_poly[8][0])))));
761
+
762
+ Mul12_ext(qhi, qlo, q, Xred2);
763
+
764
+ for(j=3;j>=0;j--)
765
+ {
766
+ Add22_ext(qhi,qlo, (coef_poly[j][0]), (coef_poly[j][1]), qhi,qlo);
767
+ Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
768
+ }
769
+
770
+ Mul22_ext(qhi,qlo, Xredhi,Xredlo, qhi,qlo);
771
+ Add22_ext(qhi,qlo, Xredhi,Xredlo, qhi,qlo);
772
+
773
+ /* reconstruction : atan(x) = atan(b[i]) + atan(x) */
774
+ Add22_ext(atanhi,atanlo, atanb_table[i].hi, atanb_table[i].lo, qhi,qlo);
775
+ }
776
+ else
777
+ /* no reduction needed */
778
+ {
779
+
780
+ #if DEBUG
781
+ printf("Tata\n");
782
+ #endif
783
+ /* Polynomial evaluation */
784
+ Mul12_ext( Xred2hi,Xred2lo,xe,xe);
785
+
786
+ /*poly eval - don't take risks, keep plain Horner */
787
+
788
+ q = coef_poly[8][0];
789
+ q = coef_poly[7][0]+Xred2hi*q;
790
+ q = coef_poly[6][0]+Xred2hi*q;
791
+ q = coef_poly[5][0]+Xred2hi*q;
792
+
793
+ Add12_ext(qhi,qlo, coef_poly[4][0], Xred2hi*q);
794
+ #if DEBUG
795
+ printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
796
+ print_debug("qhi", qhi);
797
+ print_debug("qlo", qlo);
798
+ #endif
799
+ Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
800
+ #if DEBUG
801
+ printf(" Xred2 = %1.50Le + %1.50Le\n",(long double)Xred2hi, (long double)Xred2lo);
802
+ printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
803
+ print_debug("qhi", qhi);
804
+ print_debug("qlo", qlo);
805
+ #endif
806
+
807
+ for(j=3;j>=0;j--)
808
+ {
809
+ Add22_ext(qhi,qlo, (coef_poly[j][0]), (coef_poly[j][1]), qhi,qlo);
810
+ Mul22_ext(qhi,qlo, qhi,qlo, Xred2hi,Xred2lo);
811
+ }
812
+
813
+ Mul22_ext (qhi,qlo, xe,0, qhi,qlo);
814
+
815
+ #if DEBUG
816
+ printf(" qhi+ql = %1.50Le + %1.50Le\n",(long double)qhi, (long double)qlo);
817
+ print_debug("qhi", qhi);
818
+ print_debug("qlo", qlo);
819
+ #endif
820
+ /* Now comes the addition sequence proven in the TOMS paper */
821
+ Add12_ext(atanhi,atanlo,xe,qhi);
822
+ #if DEBUG
823
+ print_debug("atanhi", atanhi);
824
+ printf(" atan hi+lo %1.50Le + %1.50Le\n",(long double)atanhi, (long double)atanlo);
825
+ #endif
826
+ atanlo += qlo;
827
+
828
+ }
829
+
830
+ #if DEBUG
831
+ printf(" atan hi+lo %1.50Le + %1.50Le\n",(long double)atanhi, (long double)atanlo);
832
+ printf(" %1.50e + %1.50e\n",(double)atanhi,(double) atanlo);
833
+ printf(" %1.50Le\n",(long double)(atanhi + atanlo));
834
+ printf(" ");
835
+ #endif
836
+
837
+ if(sign_mask)
838
+ res= -(double) (atanhi+atanlo);
839
+ else
840
+ res= (double) (atanhi+atanlo);
841
+
842
+ return res;
843
+
844
+ }
845
+ }
846
+