crmf 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +102 -1
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +16 -16
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +104 -3
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1195 @@
1
+ /*
2
+ * Correctly rounded arccosine
3
+ *
4
+ * Author : Christoph Lauter (ENS Lyon)
5
+ *
6
+ * This file is part of the crlibm library developed by the Arenaire
7
+ * project at Ecole Normale Superieure de Lyon
8
+ *
9
+ * This program is free software; you can redistribute it and/or modify
10
+ * it under the terms of the GNU Lesser General Public License as published by
11
+ * the Free Software Foundation; either version 2 of the License, or
12
+ * (at your option) any later version.
13
+ *
14
+ * This program is distributed in the hope that it will be useful,
15
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ * GNU General Public License for more details.
18
+ *
19
+ * You should have received a copy of the GNU Lesser General Public License
20
+ * along with this program; if not, write to the Free Software
21
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22
+ */
23
+
24
+ #include <stdio.h>
25
+ #include <stdlib.h>
26
+ #include "crlibm.h"
27
+ #include "crlibm_private.h"
28
+ #include "triple-double.h"
29
+ #include "acos-td.h"
30
+
31
+ #define AVOID_FMA 1
32
+
33
+ void acos_accurate_lower(double *acosh, double *acosm, double *acosl, double x, double xSqh, double xSql, double sign) {
34
+ double highPoly;
35
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l;
36
+ double tt1h, tt1l;
37
+ double t8h, t8m, t8l, t9h, t9m, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l;
38
+ double tt8h, tt8m, tt8l, tt9h, tt9m, tt9l, tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
39
+ double xCubeh, xCubem, xCubel, tt13h, tt13m, tt13l, t13h, t13m, t13l, polyh, polym, polyl;
40
+ double tt11hover, tt11mover, tt11lover;
41
+ double zw1h, zw1m, zw1l, acoshover, acosmover, acoslover;
42
+
43
+ #if EVAL_PERF
44
+ crlibm_second_step_taken++;
45
+ #endif
46
+
47
+ /* Evaluate the polynomial of degree 37
48
+ Its coefficients start at tbl[0]
49
+
50
+ p(x) = x + x * x^2 * (c3 + x^2 * (c5 + ...
51
+
52
+ We receive x^2 as xSqh + xSql = x * x (exactly)
53
+ in argument
54
+
55
+ |x| <= 0.185 = 2^(-2.43)
56
+
57
+ Compute monomials 27 to 37 in double precision
58
+ monomials 13 to 25 in double-double and
59
+ 1 to 11 in triple-double precision in a
60
+ modified Horner form
61
+
62
+ */
63
+
64
+ /* Double computations */
65
+
66
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
67
+ highPoly = FMA(FMA(FMA(FMA(FMA(tbl[33],xSqh,tbl[32]),xSqh,tbl[31]),xSqh,tbl[30]),xSqh,tbl[29]),xSqh,tbl[28]);
68
+ #else
69
+ highPoly = tbl[28] + xSqh * (tbl[29] + xSqh * (tbl[30] + xSqh * (tbl[31] + xSqh * (tbl[32] + xSqh * tbl[33]))));
70
+ #endif
71
+
72
+ /* Double-double computations */
73
+
74
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
75
+ Add22(&t1h,&t1l,tbl[27],0,tt1h,tt1l);
76
+
77
+ MulAdd22(&t2h,&t2l,tbl[25],tbl[26],xSqh,xSql,t1h,t1l);
78
+ MulAdd22(&t3h,&t3l,tbl[23],tbl[24],xSqh,xSql,t2h,t2l);
79
+ MulAdd22(&t4h,&t4l,tbl[21],tbl[22],xSqh,xSql,t3h,t3l);
80
+ MulAdd22(&t5h,&t5l,tbl[19],tbl[20],xSqh,xSql,t4h,t4l);
81
+ MulAdd22(&t6h,&t6l,tbl[17],tbl[18],xSqh,xSql,t5h,t5l);
82
+ MulAdd22(&t7h,&t7l,tbl[15],tbl[16],xSqh,xSql,t6h,t6l);
83
+
84
+ /* Triple-double computations */
85
+
86
+ Mul23(&tt8h,&tt8m,&tt8l,xSqh,xSql,t7h,t7l); /* 149 - 48/53 */
87
+ Add33(&t8h,&t8m,&t8l,tbl[12],tbl[13],tbl[14],tt8h,tt8m,tt8l); /* 145 - 43/53 */
88
+ Mul233(&tt9h,&tt9m,&tt9l,xSqh,xSql,t8h,t8m,t8l); /* 139 - 39/53 */
89
+ Add33(&t9h,&t9m,&t9l,tbl[9],tbl[10],tbl[11],tt9h,tt9m,tt9l); /* 136 - 34/53 */
90
+ Mul233(&tt10h,&tt10m,&tt10l,xSqh,xSql,t9h,t9m,t9l); /* 130 - 30/53 */
91
+ Add33(&t10h,&t10m,&t10l,tbl[6],tbl[7],tbl[8],tt10h,tt10m,tt10l); /* 127 - 25/53 */
92
+ Mul233(&tt11hover,&tt11mover,&tt11lover,xSqh,xSql,t10h,t10m,t10l); /* 121 - 21/53 */
93
+
94
+ Renormalize3(&tt11h,&tt11m,&tt11l,tt11hover,tt11mover,tt11lover); /* infty - 52/53 */
95
+
96
+ Add33(&t11h,&t11m,&t11l,tbl[3],tbl[4],tbl[5],tt11h,tt11m,tt11l); /* 149 - 47/53 */
97
+ Mul233(&tt12h,&tt12m,&tt12l,xSqh,xSql,t11h,t11m,t11l); /* 143 - 43/53 */
98
+ Add33(&t12h,&t12m,&t12l,tbl[0],tbl[1],tbl[2],tt12h,tt12m,tt12l); /* 140 - 38/53 */
99
+
100
+ Mul123(&xCubeh,&xCubem,&xCubel,x,xSqh,xSql); /* 154 - 47/53 */
101
+
102
+ Mul33(&tt13h,&tt13m,&tt13l,xCubeh,xCubem,xCubel,t12h,t12m,t12l); /* 136 - 34/53 */
103
+ Add133(&t13h,&t13m,&t13l,x,tt13h,tt13m,tt13l); /* 138 - 32/53 */
104
+
105
+ Renormalize3(&polyh,&polym,&polyl,t13h,t13m,t13l); /* infty - 52/53 */
106
+
107
+ /* Reconstruction:
108
+
109
+ - Multiply by the inverted sign
110
+ - Add Pi/2 in triple-double
111
+ - Renormalize
112
+
113
+ */
114
+
115
+ zw1h = -sign * polyh;
116
+ zw1m = -sign * polym;
117
+ zw1l = -sign * polyl;
118
+
119
+ Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,zw1h,zw1m,zw1l);
120
+
121
+ Renormalize3(acosh,acosm,acosl,acoshover,acosmover,acoslover);
122
+
123
+ }
124
+
125
+
126
+
127
+ void acos_accurate_middle(double *acosh, double *acosm, double *acosl, double z, int i, double sign) {
128
+ double highPoly;
129
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l;
130
+ double t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l, t14h, t14m, t14l;
131
+ double t15h, t15m, t15l, t16h, t16m, t16l;
132
+ double tt1h, tt1l;
133
+ double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
134
+ double tt13h, tt13m, tt13l, tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l;
135
+ double polyh, polym, polyl, tt13hover, tt13mover, tt13lover;
136
+ double zw1h, zw1m, zw1l, acoshover, acosmover, acoslover;
137
+
138
+ #if EVAL_PERF
139
+ crlibm_second_step_taken++;
140
+ #endif
141
+
142
+ /* Evaluate the polynomial of degree 35
143
+ Its coefficients start at tbl[i+1]
144
+ Evaluate degrees 35 to 20 in double precision,
145
+ degrees 20 to 7 in double-double precision and
146
+ finally degrees 6 to 1 in triple-double.
147
+ The constant coefficient is a double-double, the
148
+ computations are nevertheless in triple-double
149
+ */
150
+
151
+ /* Double computations */
152
+
153
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
154
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
155
+ tbl[i+58] ,z,tbl[i+57]),z,tbl[i+56]),z,tbl[i+55]),z,tbl[i+54]),z,
156
+ tbl[i+53]),z,tbl[i+52]),z,tbl[i+51]),z,tbl[i+50]),z,tbl[i+49]),z,
157
+ tbl[i+48]),z,tbl[i+47]),z,tbl[i+46]),z,tbl[i+45]),z,tbl[i+44]),z,
158
+ tbl[i+43]),z,tbl[i+42]),z,tbl[i+41]),z,tbl[i+40]),z,tbl[i+39]);
159
+
160
+ #else
161
+ highPoly = tbl[i+39] + z * (tbl[i+40] + z * (tbl[i+41] + z * (tbl[i+42] + z * (
162
+ tbl[i+43] + z * (tbl[i+44] + z * (tbl[i+45] + z * (tbl[i+46] + z * (
163
+ tbl[i+47] + z * (tbl[i+48] + z * (tbl[i+49] + z * (tbl[i+50] + z * (
164
+ tbl[i+51] + z * (tbl[i+52] + z * (tbl[i+53] + z * (tbl[i+54] + z * (
165
+ tbl[i+55] + z * (tbl[i+56] + z * (tbl[i+57] + z * tbl[i+58]))))))))))))))))));
166
+ #endif
167
+
168
+
169
+ /* Double-double computations */
170
+
171
+ Mul12(&tt1h,&tt1l,z,highPoly);
172
+ Add22(&t1h,&t1l,tbl[i+37],tbl[i+38],tt1h,tt1l);
173
+
174
+ MulAdd212(&t2h,&t2l,tbl[i+35],tbl[i+36],z,t1h,t1l);
175
+ MulAdd212(&t3h,&t3l,tbl[i+33],tbl[i+34],z,t2h,t2l);
176
+ MulAdd212(&t4h,&t4l,tbl[i+31],tbl[i+32],z,t3h,t3l);
177
+ MulAdd212(&t5h,&t5l,tbl[i+29],tbl[i+30],z,t4h,t4l);
178
+ MulAdd212(&t6h,&t6l,tbl[i+27],tbl[i+28],z,t5h,t5l);
179
+ MulAdd212(&t7h,&t7l,tbl[i+25],tbl[i+26],z,t6h,t6l);
180
+ MulAdd212(&t8h,&t8l,tbl[i+23],tbl[i+24],z,t7h,t7l);
181
+ MulAdd212(&t9h,&t9l,tbl[i+21],tbl[i+22],z,t8h,t8l);
182
+
183
+ /* Triple-double computations */
184
+
185
+ Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
186
+ Add33(&t10h,&t10m,&t10l,tbl[i+18],tbl[i+19],tbl[i+20],tt10h,tt10m,tt10l); /* 144 - 42/53 */
187
+ Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 38/53 */
188
+ Add33(&t11h,&t11m,&t11l,tbl[i+15],tbl[i+16],tbl[i+17],tt11h,tt11m,tt11l); /* 136 - 33/53 */
189
+ Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 133 - 28/53 */
190
+ Add33(&t12h,&t12m,&t12l,tbl[i+12],tbl[i+13],tbl[i+14],tt12h,tt12m,tt12l); /* 125 - 23/53 */
191
+ Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 123 - 18/53 */
192
+
193
+ Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
194
+
195
+ Add33(&t13h,&t13m,&t13l,tbl[i+9],tbl[i+10],tbl[i+11],tt13h,tt13m,tt13l); /* 149 - 47/53 */
196
+ Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
197
+ Add33(&t14h,&t14m,&t14l,tbl[i+6],tbl[i+7],tbl[i+8],tt14h,tt14m,tt14l); /* 139 - 37/53 */
198
+ Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
199
+ Add33(&t15h,&t15m,&t15l,tbl[i+3],tbl[i+4],tbl[i+5],tt15h,tt15m,tt15l); /* 129 - 28/53 */
200
+ Mul133(&tt16h,&tt16m,&tt16l,z,t15h,t15m,t15l); /* 128 - 23/53 */
201
+ Add233(&t16h,&t16m,&t16l,tbl[i+1],tbl[i+2],tt16h,tt16m,tt16l); /* 126 - 19/53 */
202
+
203
+ Renormalize3(&polyh,&polym,&polyl,t16h,t16m,t16l); /* infty - 52/53 */
204
+
205
+ /* Reconstruction:
206
+
207
+ - Multiply by the inverted sign
208
+ - Add Pi/2 in triple-double
209
+ - Renormalize
210
+
211
+ */
212
+
213
+ zw1h = -sign * polyh;
214
+ zw1m = -sign * polym;
215
+ zw1l = -sign * polyl;
216
+
217
+ Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,zw1h,zw1m,zw1l);
218
+
219
+ Renormalize3(acosh,acosm,acosl,acoshover,acosmover,acoslover);
220
+
221
+ }
222
+
223
+
224
+ void acos_accurate_higher(double *acosh, double *acosm, double *acosl, double z, double sign) {
225
+ double highPoly;
226
+ double tt1h, tt1l;
227
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l;
228
+ double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l, tt13h, tt13m, tt13l;
229
+ double tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l, tt17h, tt17m, tt17l;
230
+ double t9h, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l;
231
+ double t14h, t14m, t14l, t15h, t15m, t15l, t16h, t16m, t16l, t17h, t17m, t17l;
232
+ double tt18h, tt18m, tt18l, polyh, polym, polyl;
233
+ double sqrtzh, sqrtzm, sqrtzl, twoZ, pTimesSh, pTimesSm, pTimesSl;
234
+ double allh, allm, alll;
235
+ double tt13hover, tt13mover, tt13lover, tt16hover, tt16mover, tt16lover;
236
+ double polyhover, polymover, polylover;
237
+
238
+ #if EVAL_PERF
239
+ crlibm_second_step_taken++;
240
+ #endif
241
+
242
+ /* We evaluate acos(x) with x > 0 as
243
+
244
+ acos(x) = -1 * f(z) * sqrt(2*z)
245
+
246
+ with z = 1 - x and
247
+
248
+ f(z) = (asin(z) - Pi/2) / sqrt(2*z)
249
+
250
+ f(z) is approximated by p(z)
251
+
252
+ The polynomial p(z) is of degree 29
253
+ Its coefficients start at tbl[TBLIDX10]
254
+ Coefficients for degrees 29 to 18 are in double precision,
255
+ for degrees 17 to 9 in double-double precision and
256
+ finally for degrees 8 to 1 in triple-double.
257
+ The constant coefficient (-1) is not stored in the table,
258
+ the computations are nevertheless in triple-double
259
+ We evaluate the monomials in the precision in which
260
+ the correspondant coefficients are stored
261
+ The coefficients' values decrease very quickly
262
+ so even with |z| < 2^-2.18 we can compute degree 18
263
+ already in double precision
264
+
265
+ Compute than sqrt(2*z) as a triple-double
266
+ multiply in triple-double.
267
+
268
+ */
269
+
270
+ /* Double computations */
271
+
272
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
273
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
274
+ tbl[TBLIDX10+53] ,z,tbl[TBLIDX10+52]),z,tbl[TBLIDX10+51]),z,
275
+ tbl[TBLIDX10+50]),z,tbl[TBLIDX10+49]),z,tbl[TBLIDX10+48]),z,
276
+ tbl[TBLIDX10+47]),z,tbl[TBLIDX10+46]),z,tbl[TBLIDX10+45]),z,
277
+ tbl[TBLIDX10+44]),z,tbl[TBLIDX10+43]),z,tbl[TBLIDX10+42]);
278
+ #else
279
+ highPoly = tbl[TBLIDX10+42] + z * (tbl[TBLIDX10+43] + z * (tbl[TBLIDX10+44] + z * (
280
+ tbl[TBLIDX10+45] + z * (tbl[TBLIDX10+46] + z * (tbl[TBLIDX10+47] + z * (
281
+ tbl[TBLIDX10+48] + z * (tbl[TBLIDX10+49] + z * (tbl[TBLIDX10+50] + z * (
282
+ tbl[TBLIDX10+51] + z * (tbl[TBLIDX10+52] + z * tbl[TBLIDX10+53]))))))))));
283
+ #endif
284
+
285
+ /* Double-double computations */
286
+
287
+ Mul12(&tt1h,&tt1l,z,highPoly);
288
+ Add22(&t1h,&t1l,tbl[TBLIDX10+40],tbl[TBLIDX10+41],tt1h,tt1l);
289
+
290
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+38],tbl[TBLIDX10+39],z,t1h,t1l);
291
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+36],tbl[TBLIDX10+37],z,t2h,t2l);
292
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+34],tbl[TBLIDX10+35],z,t3h,t3l);
293
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+32],tbl[TBLIDX10+33],z,t4h,t4l);
294
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+30],tbl[TBLIDX10+31],z,t5h,t5l);
295
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+28],tbl[TBLIDX10+29],z,t6h,t6l);
296
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+26],tbl[TBLIDX10+27],z,t7h,t7l);
297
+ MulAdd212(&t9h,&t9l,tbl[TBLIDX10+24],tbl[TBLIDX10+25],z,t8h,t8l);
298
+
299
+ /* Triple-double computations */
300
+
301
+ Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
302
+ Add33(&t10h,&t10m,&t10l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tbl[TBLIDX10+23],tt10h,tt10m,tt10l); /* 144 - 42/53 */
303
+ Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 37/53 */
304
+ Add33(&t11h,&t11m,&t11l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],tbl[TBLIDX10+20],tt11h,tt11m,tt11l); /* 134 - 32/53 */
305
+ Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 132 - 27/53 */
306
+ Add33(&t12h,&t12m,&t12l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],tbl[TBLIDX10+17],tt12h,tt12m,tt12l); /* 124 - 22/53 */
307
+ Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 122 - 17/53 */
308
+
309
+ Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
310
+
311
+ Add33(&t13h,&t13m,&t13l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],tbl[TBLIDX10+14],tt13h,tt13m,tt13l); /* 149 - 47/53 */
312
+ Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
313
+ Add33(&t14h,&t14m,&t14l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],tbl[TBLIDX10+11],tt14h,tt14m,tt14l); /* 139 - 37/53 */
314
+ Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
315
+ Add33(&t15h,&t15m,&t15l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],tbl[TBLIDX10+8],tt15h,tt15m,tt15l); /* 129 - 27/53 */
316
+ Mul133(&tt16hover,&tt16mover,&tt16lover,z,t15h,t15m,t15l); /* 127 - 22/53 */
317
+
318
+ Renormalize3(&tt16h,&tt16m,&tt16l,tt16hover,tt16mover,tt16lover); /* infty - 52/53 */
319
+
320
+ Add33(&t16h,&t16m,&t16l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],tbl[TBLIDX10+5],tt16h,tt16m,tt16l); /* 149 - 47/53 */
321
+ Mul133(&tt17h,&tt17m,&tt17l,z,t16h,t16m,t16l); /* 147 - 42/53 */
322
+ Add33(&t17h,&t17m,&t17l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],tbl[TBLIDX10+2],tt17h,tt17m,tt17l); /* 139 - 37/53 */
323
+ Mul133(&tt18h,&tt18m,&tt18l,z,t17h,t17m,t17l); /* 137 - 32/53 */
324
+ Add133(&polyhover,&polymover,&polylover,-1,tt18h,tt18m,tt18l); /* 136 - 30/53 */
325
+
326
+ Renormalize3(&polyh,&polym,&polyl,polyhover,polymover,polylover); /* infty - 52/53 */
327
+
328
+ /* Compute sqrt(2*z) as a triple-double */
329
+
330
+ twoZ = 2 * z;
331
+ Sqrt13(&sqrtzh,&sqrtzm,&sqrtzl,twoZ); /* 146 - 52/53 */
332
+
333
+ /* Multiply p(z) by sqrt(2*z) */
334
+
335
+ Mul33(&pTimesSh,&pTimesSm,&pTimesSl,polyh,polym,polyl,sqrtzh,sqrtzm,sqrtzl); /* 139 - 48/53 */
336
+
337
+ /* Reconstruction:
338
+
339
+ If the sign of x in acos(x) was positive:
340
+ - Multiply pTimesSh + pTimesSm + pTimesSl approx f(x) * sqrt(2 * z) by -1
341
+ - Renormalize
342
+ - Return
343
+
344
+ Otherwise:
345
+ - Add Pi in triple-double to pTimesSh + pTimesSm + pTimesSl approx f(x) * sqrt(2 * z)
346
+ - Renormalize
347
+ - Return
348
+
349
+ */
350
+
351
+ if (sign > 0) {
352
+
353
+ allh = -1.0 * pTimesSh;
354
+ allm = -1.0 * pTimesSm;
355
+ alll = -1.0 * pTimesSl; /* 139 - 48/53 */
356
+
357
+ } else {
358
+
359
+ Add33(&allh,&allm,&alll,PIH,PIM,PIL,pTimesSh,pTimesSm,pTimesSl); /* 130 - 43/53 */
360
+
361
+ }
362
+
363
+ Renormalize3(acosh,acosm,acosl,allh,allm,alll); /* infty - 52/53 */
364
+
365
+ }
366
+
367
+
368
+ double acos_rn(double x) {
369
+ db_number xdb;
370
+ double sign, z, acosh, acosm, acosl;
371
+ int i;
372
+ double xSqh, xSql;
373
+ double tt1h, tt1l;
374
+ double tt6h, tt6l;
375
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
376
+ double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
377
+ double pTimesSh, pTimesSl, highPoly, xCubeh, xCubel;
378
+ double tmp1, tmp2, tmp3, tmp4, tmp5;
379
+ double zw1h, zw1l;
380
+
381
+ /*
382
+ #if CRLIBM_REQUIRES_ROUNDING_MODE_CHANGE
383
+ SAVE_STATE_AND_SET_RNDOUBLE
384
+ #endif
385
+ */
386
+
387
+ /* Transform the argument into integer */
388
+ xdb.d = x;
389
+
390
+ /* Special case handling */
391
+
392
+ /* Exact algebraic case x = 1, acos(1) = 0 */
393
+
394
+ if (x == 1.0) return 0.0;
395
+
396
+ /* Strip off the sign of argument x */
397
+ if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
398
+ xdb.i[HI] &= 0x7fffffff;
399
+
400
+ /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
401
+ if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
402
+ return (x-x)/0.0; /* return NaN */
403
+ }
404
+
405
+ /* If |x| < 2^(-120) we have
406
+
407
+ round(acos(x)) = round(pi/2)
408
+
409
+ So we can decide the rounding without any computation
410
+ */
411
+ if (xdb.i[HI] < 0x38700000) {
412
+ return PIHALFDOUBLERN;
413
+ }
414
+
415
+ /* Recast x */
416
+ x = xdb.d;
417
+
418
+ /* Find correspondant interval and compute index to the table
419
+ We start by filtering the two special cases around 0 and 1
420
+ */
421
+
422
+ if (xdb.i[HI] < BOUND1) {
423
+ /* Special interval 0..BOUND1
424
+ The polynomial has no even monomials
425
+ We must prove extra accuracy in the interval 0..sin(2^(-18))
426
+ */
427
+
428
+ /* Quick phase starts */
429
+
430
+ /* Compute square of x for both quick and accurate phases */
431
+ Mul12(&xSqh,&xSql,x,x);
432
+
433
+ tmp4 = tbl[3];
434
+ tmp5 = tbl[4];
435
+ t4h = tmp4;
436
+ t4l = tmp5;
437
+ if (xdb.i[HI] > EXTRABOUND) {
438
+ /* Double precision evaluation */
439
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
440
+ highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
441
+ #else
442
+ highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
443
+ #endif
444
+
445
+ /* Double-double precision evaluation */
446
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
447
+ Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
448
+
449
+ MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
450
+ MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
451
+ MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
452
+ }
453
+
454
+ MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
455
+
456
+ Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
457
+ Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
458
+
459
+ Add12(tmp1,tmp2,x,tt6h);
460
+ tmp3 = tmp2 + tt6l;
461
+ Add12(polyh,polyl,tmp1,tmp3);
462
+
463
+ /* Reconstruction:
464
+
465
+ - Multiply by the inverted sign
466
+ - Add Pi/2 in double-double precision
467
+
468
+ */
469
+
470
+ zw1h = -sign * polyh;
471
+ zw1l = -sign * polyl;
472
+
473
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
474
+
475
+ /* Rounding test
476
+ The RN rounding constant is at tbl[34]
477
+ */
478
+ if(acosh == (acosh + (acosm * tbl[34])))
479
+ return acosh;
480
+
481
+ /* Launch accurate phase */
482
+
483
+ acos_accurate_lower(&acosh,&acosm,&acosl,x,xSqh,xSql,sign);
484
+
485
+ ReturnRoundToNearest3(acosh,acosm,acosl);
486
+ }
487
+
488
+ if (xdb.i[HI] >= BOUND9) {
489
+ /* Special interval BOUND9..1
490
+ We use an asymptotic development of arcsin in sqrt(1 - x)
491
+ */
492
+
493
+ /* Argument reduction for quick and accurate phase
494
+ z = 1 - x
495
+ The operation is exact as per Sterbenz' lemma
496
+ */
497
+
498
+ z = 1 - x;
499
+
500
+ /* Quick phase starts */
501
+
502
+ /* Double precision evaluation */
503
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
504
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
505
+ tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
506
+ tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
507
+ tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
508
+ tbl[TBLIDX10+24]);
509
+ #else
510
+ highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
511
+ tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
512
+ tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
513
+ tbl[TBLIDX10+42]))))))));
514
+ #endif
515
+
516
+ /* Double-double precision evaluation */
517
+ Mul12(&tt1h,&tt1l,z,highPoly);
518
+ Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
519
+
520
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
521
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
522
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
523
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
524
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
525
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
526
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
527
+ MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
528
+
529
+ /* Compute sqrt(2*z) as a double-double */
530
+
531
+ twoZ = 2 * z;
532
+ sqrt12(&sqrtzh,&sqrtzl,twoZ);
533
+
534
+ /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
535
+
536
+ Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
537
+
538
+
539
+ /* Reconstruction:
540
+
541
+ If the sign of x in acos(x) was positive:
542
+ - Multiply pTimesSh + pTimesSl approx f(x) * sqrt(2 * z) by -1
543
+ - Return
544
+
545
+ Otherwise:
546
+ - Add Pi in triple-double to pTimesSh + pTimesSl approx f(x) * sqrt(2 * z)
547
+ - Return
548
+
549
+ */
550
+
551
+ if (sign > 0) {
552
+
553
+ acosh = -1.0 * pTimesSh;
554
+ acosm = -1.0 * pTimesSl;
555
+
556
+ } else {
557
+
558
+ Add22(&acosh,&acosm,PIH,PIM,pTimesSh,pTimesSl);
559
+
560
+ }
561
+
562
+ /* Rounding test
563
+ The RN rounding constant is at tbl[TBLIDX10+54]
564
+ */
565
+
566
+ if(acosh == (acosh + (acosm * tbl[TBLIDX10+54])))
567
+ return acosh;
568
+
569
+ /* Launch accurate phase */
570
+
571
+ acos_accurate_higher(&acosh,&acosm,&acosl,z,sign);
572
+
573
+ ReturnRoundToNearest3(acosh,acosm,acosl);
574
+ }
575
+
576
+ /* General 8 main intervals
577
+ We can already suppose that BOUND1 <= x <= BOUND9
578
+ */
579
+
580
+ if (xdb.i[HI] < BOUND5) {
581
+ if (xdb.i[HI] < BOUND3) {
582
+ if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
583
+ } else {
584
+ if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
585
+ }
586
+ } else {
587
+ if (xdb.i[HI] < BOUND7) {
588
+ if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
589
+ } else {
590
+ if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
591
+ }
592
+ }
593
+
594
+ /* Argument reduction
595
+ i points to the interval midpoint value in the table
596
+ */
597
+ z = x - tbl[i];
598
+
599
+ /* Quick phase starts */
600
+
601
+ /* Double precision evaluation */
602
+
603
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
604
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
605
+ tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
606
+ tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
607
+ #else
608
+ highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
609
+ tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
610
+ tbl[i+33] + z * tbl[i+35]))))));
611
+ #endif
612
+
613
+ /* Double-double precision evaluation */
614
+
615
+ Mul12(&tt1h,&tt1l,z,highPoly);
616
+ Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
617
+
618
+ MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
619
+ MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
620
+ MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
621
+ MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
622
+ MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
623
+ MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
624
+
625
+ /* Reconstruction:
626
+
627
+ - Multiply by the inverted sign
628
+ - Add Pi/2 in double-double precision
629
+
630
+ */
631
+
632
+ zw1h = -sign * polyh;
633
+ zw1l = -sign * polyl;
634
+
635
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
636
+
637
+ /* Rounding test
638
+ The RN rounding constant is at tbl[i+59]
639
+ */
640
+ if(acosh == (acosh + (acosm * tbl[i+59])))
641
+ return acosh;
642
+
643
+ /* Launch accurate phase */
644
+
645
+ acos_accurate_middle(&acosh,&acosm,&acosl,z,i,sign);
646
+
647
+ ReturnRoundToNearest3(acosh,acosm,acosl);
648
+ }
649
+
650
+ double acos_ru(double x) {
651
+ db_number xdb;
652
+ double sign, z, acosh, acosm, acosl;
653
+ int i;
654
+ double xSqh, xSql;
655
+ double tt1h, tt1l;
656
+ double tt6h, tt6l;
657
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
658
+ double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
659
+ double pTimesSh, pTimesSl, highPoly, xCubeh, xCubel;
660
+ double tmp1, tmp2, tmp3, tmp4, tmp5;
661
+ double zw1h, zw1l;
662
+
663
+ /* Transform the argument into integer */
664
+ xdb.d = x;
665
+
666
+ /* Special case handling */
667
+
668
+ /* Exact algebraic case x = 1, acos(1) = 0 */
669
+
670
+ if (x == 1.0) return 0.0;
671
+
672
+ /* Strip off the sign of argument x */
673
+ if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
674
+ xdb.i[HI] &= 0x7fffffff;
675
+
676
+ /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
677
+ if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
678
+ return (x-x)/0.0; /* return NaN */
679
+ }
680
+
681
+ /* If |x| < 2^(-120) we have
682
+
683
+ round(acos(x)) = round(pi/2)
684
+
685
+ So we can decide the rounding without any computation
686
+ */
687
+ if (xdb.i[HI] < 0x38700000) {
688
+ return PIHALFDOUBLERU;
689
+ }
690
+
691
+ /* Recast x */
692
+ x = xdb.d;
693
+
694
+ /* Find correspondant interval and compute index to the table
695
+ We start by filtering the two special cases around 0 and 1
696
+ */
697
+
698
+ if (xdb.i[HI] < BOUND1) {
699
+ /* Special interval 0..BOUND1
700
+ The polynomial has no even monomials
701
+ We must prove extra accuracy in the interval 0..sin(2^(-18))
702
+ */
703
+
704
+ /* Quick phase starts */
705
+
706
+ /* Compute square of x for both quick and accurate phases */
707
+ Mul12(&xSqh,&xSql,x,x);
708
+
709
+ tmp4 = tbl[3];
710
+ tmp5 = tbl[4];
711
+ t4h = tmp4;
712
+ t4l = tmp5;
713
+ if (xdb.i[HI] > EXTRABOUND) {
714
+ /* Double precision evaluation */
715
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
716
+ highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
717
+ #else
718
+ highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
719
+ #endif
720
+
721
+ /* Double-double precision evaluation */
722
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
723
+ Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
724
+
725
+ MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
726
+ MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
727
+ MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
728
+ }
729
+
730
+ MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
731
+
732
+ Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
733
+ Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
734
+
735
+ Add12(tmp1,tmp2,x,tt6h);
736
+ tmp3 = tmp2 + tt6l;
737
+ Add12(polyh,polyl,tmp1,tmp3);
738
+
739
+ /* Reconstruction:
740
+
741
+ - Multiply by the inverted sign
742
+ - Add Pi/2 in double-double precision
743
+
744
+ */
745
+
746
+ zw1h = -sign * polyh;
747
+ zw1l = -sign * polyl;
748
+
749
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
750
+
751
+ /* Rounding test
752
+ The RU rounding constant is at tbl[35]
753
+ */
754
+ TEST_AND_RETURN_RU(acosh, acosm, tbl[35]);
755
+
756
+ /* Launch accurate phase */
757
+
758
+ acos_accurate_lower(&acosh,&acosm,&acosl,x,xSqh,xSql,sign);
759
+
760
+ ReturnRoundUpwards3(acosh,acosm,acosl);
761
+ }
762
+
763
+ if (xdb.i[HI] >= BOUND9) {
764
+ /* Special interval BOUND9..1
765
+ We use an asymptotic development of arcsin in sqrt(1 - x)
766
+ */
767
+
768
+ /* Argument reduction for quick and accurate phase
769
+ z = 1 - x
770
+ The operation is exact as per Sterbenz' lemma
771
+ */
772
+
773
+ z = 1 - x;
774
+
775
+ /* Quick phase starts */
776
+
777
+ /* Double precision evaluation */
778
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
779
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
780
+ tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
781
+ tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
782
+ tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
783
+ tbl[TBLIDX10+24]);
784
+ #else
785
+ highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
786
+ tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
787
+ tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
788
+ tbl[TBLIDX10+42]))))))));
789
+ #endif
790
+
791
+ /* Double-double precision evaluation */
792
+ Mul12(&tt1h,&tt1l,z,highPoly);
793
+ Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
794
+
795
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
796
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
797
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
798
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
799
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
800
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
801
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
802
+ MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
803
+
804
+ /* Compute sqrt(2*z) as a double-double */
805
+
806
+ twoZ = 2 * z;
807
+ sqrt12(&sqrtzh,&sqrtzl,twoZ);
808
+
809
+ /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
810
+
811
+ Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
812
+
813
+
814
+ /* Reconstruction:
815
+
816
+ If the sign of x in acos(x) was positive:
817
+ - Multiply pTimesSh + pTimesSl approx f(x) * sqrt(2 * z) by -1
818
+ - Return
819
+
820
+ Otherwise:
821
+ - Add Pi in triple-double to pTimesSh + pTimesSl approx f(x) * sqrt(2 * z)
822
+ - Return
823
+
824
+ */
825
+
826
+ if (sign > 0) {
827
+
828
+ acosh = -1.0 * pTimesSh;
829
+ acosm = -1.0 * pTimesSl;
830
+
831
+ } else {
832
+
833
+ Add22(&acosh,&acosm,PIH,PIM,pTimesSh,pTimesSl);
834
+
835
+ }
836
+
837
+ /* Rounding test
838
+ The RU rounding constant is at tbl[TBLIDX10+55]
839
+ */
840
+ TEST_AND_RETURN_RU(acosh, acosm, tbl[TBLIDX10+55]);
841
+
842
+ /* Launch accurate phase */
843
+
844
+ acos_accurate_higher(&acosh,&acosm,&acosl,z,sign);
845
+
846
+ ReturnRoundUpwards3(acosh,acosm,acosl);
847
+ }
848
+
849
+ /* General 8 main intervals
850
+ We can already suppose that BOUND1 <= x <= BOUND9
851
+ */
852
+
853
+ if (xdb.i[HI] < BOUND5) {
854
+ if (xdb.i[HI] < BOUND3) {
855
+ if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
856
+ } else {
857
+ if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
858
+ }
859
+ } else {
860
+ if (xdb.i[HI] < BOUND7) {
861
+ if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
862
+ } else {
863
+ if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
864
+ }
865
+ }
866
+
867
+ /* Argument reduction
868
+ i points to the interval midpoint value in the table
869
+ */
870
+ z = x - tbl[i];
871
+
872
+ /* Quick phase starts */
873
+
874
+ /* Double precision evaluation */
875
+
876
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
877
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
878
+ tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
879
+ tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
880
+ #else
881
+ highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
882
+ tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
883
+ tbl[i+33] + z * tbl[i+35]))))));
884
+ #endif
885
+
886
+ /* Double-double precision evaluation */
887
+
888
+ Mul12(&tt1h,&tt1l,z,highPoly);
889
+ Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
890
+
891
+ MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
892
+ MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
893
+ MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
894
+ MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
895
+ MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
896
+ MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
897
+
898
+ /* Reconstruction:
899
+
900
+ - Multiply by the inverted sign
901
+ - Add Pi/2 in double-double precision
902
+
903
+ */
904
+
905
+ zw1h = -sign * polyh;
906
+ zw1l = -sign * polyl;
907
+
908
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
909
+
910
+ /* Rounding test
911
+ The RU rounding constant is at tbl[i+60]
912
+ */
913
+ TEST_AND_RETURN_RU(acosh, acosm, tbl[i+60]);
914
+
915
+ /* Launch accurate phase */
916
+
917
+ acos_accurate_middle(&acosh,&acosm,&acosl,z,i,sign);
918
+
919
+ ReturnRoundUpwards3(acosh,acosm,acosl);
920
+ }
921
+
922
+
923
+
924
+ double acos_rd(double x) {
925
+ db_number xdb;
926
+ double sign, z, acosh, acosm, acosl;
927
+ int i;
928
+ double xSqh, xSql;
929
+ double tt1h, tt1l;
930
+ double tt6h, tt6l;
931
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
932
+ double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
933
+ double pTimesSh, pTimesSl, highPoly, xCubeh, xCubel;
934
+ double tmp1, tmp2, tmp3, tmp4, tmp5;
935
+ double zw1h, zw1l;
936
+
937
+ /* Transform the argument into integer */
938
+ xdb.d = x;
939
+
940
+ /* Special case handling */
941
+
942
+ /* Exact algebraic case x = 1, acos(1) = 0 */
943
+
944
+ if (x == 1.0) return 0.0;
945
+
946
+ /* Strip off the sign of argument x */
947
+ if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
948
+ xdb.i[HI] &= 0x7fffffff;
949
+
950
+ /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
951
+ if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
952
+ return (x-x)/0.0; /* return NaN */
953
+ }
954
+
955
+ /* If |x| < 2^(-120) we have
956
+
957
+ round(acos(x)) = round(pi/2)
958
+
959
+ So we can decide the rounding without any computation
960
+ */
961
+ if (xdb.i[HI] < 0x38700000) {
962
+ return PIHALFDOUBLERD;
963
+ }
964
+
965
+ /* Recast x */
966
+ x = xdb.d;
967
+
968
+ /* Find correspondant interval and compute index to the table
969
+ We start by filtering the two special cases around 0 and 1
970
+ */
971
+
972
+ if (xdb.i[HI] < BOUND1) {
973
+ /* Special interval 0..BOUND1
974
+ The polynomial has no even monomials
975
+ We must prove extra accuracy in the interval 0..sin(2^(-18))
976
+ */
977
+
978
+ /* Quick phase starts */
979
+
980
+ /* Compute square of x for both quick and accurate phases */
981
+ Mul12(&xSqh,&xSql,x,x);
982
+
983
+ tmp4 = tbl[3];
984
+ tmp5 = tbl[4];
985
+ t4h = tmp4;
986
+ t4l = tmp5;
987
+ if (xdb.i[HI] > EXTRABOUND) {
988
+ /* Double precision evaluation */
989
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
990
+ highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
991
+ #else
992
+ highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
993
+ #endif
994
+
995
+ /* Double-double precision evaluation */
996
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
997
+ Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
998
+
999
+ MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
1000
+ MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
1001
+ MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
1002
+ }
1003
+
1004
+ MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
1005
+
1006
+ Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
1007
+ Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
1008
+
1009
+ Add12(tmp1,tmp2,x,tt6h);
1010
+ tmp3 = tmp2 + tt6l;
1011
+ Add12(polyh,polyl,tmp1,tmp3);
1012
+
1013
+ /* Reconstruction:
1014
+
1015
+ - Multiply by the inverted sign
1016
+ - Add Pi/2 in double-double precision
1017
+
1018
+ */
1019
+
1020
+ zw1h = -sign * polyh;
1021
+ zw1l = -sign * polyl;
1022
+
1023
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
1024
+
1025
+ /* Rounding test
1026
+ The RD rounding constant is at tbl[35]
1027
+ */
1028
+ TEST_AND_RETURN_RD(acosh, acosm, tbl[35]);
1029
+
1030
+ /* Launch accurate phase */
1031
+
1032
+ acos_accurate_lower(&acosh,&acosm,&acosl,x,xSqh,xSql,sign);
1033
+
1034
+ ReturnRoundDownwards3(acosh,acosm,acosl);
1035
+ }
1036
+
1037
+ if (xdb.i[HI] >= BOUND9) {
1038
+ /* Special interval BOUND9..1
1039
+ We use an asymptotic development of arcsin in sqrt(1 - x)
1040
+ */
1041
+
1042
+ /* Argument reduction for quick and accurate phase
1043
+ z = 1 - x
1044
+ The operation is exact as per Sterbenz' lemma
1045
+ */
1046
+
1047
+ z = 1 - x;
1048
+
1049
+ /* Quick phase starts */
1050
+
1051
+ /* Double precision evaluation */
1052
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1053
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1054
+ tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
1055
+ tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
1056
+ tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
1057
+ tbl[TBLIDX10+24]);
1058
+ #else
1059
+ highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
1060
+ tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
1061
+ tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
1062
+ tbl[TBLIDX10+42]))))))));
1063
+ #endif
1064
+
1065
+ /* Double-double precision evaluation */
1066
+ Mul12(&tt1h,&tt1l,z,highPoly);
1067
+ Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
1068
+
1069
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
1070
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
1071
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
1072
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
1073
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
1074
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
1075
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
1076
+ MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
1077
+
1078
+ /* Compute sqrt(2*z) as a double-double */
1079
+
1080
+ twoZ = 2 * z;
1081
+ sqrt12(&sqrtzh,&sqrtzl,twoZ);
1082
+
1083
+ /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
1084
+
1085
+ Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
1086
+
1087
+
1088
+ /* Reconstruction:
1089
+
1090
+ If the sign of x in acos(x) was positive:
1091
+ - Multiply pTimesSh + pTimesSl approx f(x) * sqrt(2 * z) by -1
1092
+ - Return
1093
+
1094
+ Otherwise:
1095
+ - Add Pi in triple-double to pTimesSh + pTimesSl approx f(x) * sqrt(2 * z)
1096
+ - Return
1097
+
1098
+ */
1099
+
1100
+ if (sign > 0) {
1101
+
1102
+ acosh = -1.0 * pTimesSh;
1103
+ acosm = -1.0 * pTimesSl;
1104
+
1105
+ } else {
1106
+
1107
+ Add22(&acosh,&acosm,PIH,PIM,pTimesSh,pTimesSl);
1108
+
1109
+ }
1110
+
1111
+ /* Rounding test
1112
+ The RD rounding constant is at tbl[TBLIDX10+55]
1113
+ */
1114
+ TEST_AND_RETURN_RD(acosh, acosm, tbl[TBLIDX10+55]);
1115
+
1116
+ /* Launch accurate phase */
1117
+
1118
+ acos_accurate_higher(&acosh,&acosm,&acosl,z,sign);
1119
+
1120
+ ReturnRoundDownwards3(acosh,acosm,acosl);
1121
+ }
1122
+
1123
+ /* General 8 main intervals
1124
+ We can already suppose that BOUND1 <= x <= BOUND9
1125
+ */
1126
+
1127
+ if (xdb.i[HI] < BOUND5) {
1128
+ if (xdb.i[HI] < BOUND3) {
1129
+ if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
1130
+ } else {
1131
+ if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
1132
+ }
1133
+ } else {
1134
+ if (xdb.i[HI] < BOUND7) {
1135
+ if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
1136
+ } else {
1137
+ if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
1138
+ }
1139
+ }
1140
+
1141
+ /* Argument reduction
1142
+ i points to the interval midpoint value in the table
1143
+ */
1144
+ z = x - tbl[i];
1145
+
1146
+ /* Quick phase starts */
1147
+
1148
+ /* Double precision evaluation */
1149
+
1150
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1151
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1152
+ tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
1153
+ tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
1154
+ #else
1155
+ highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
1156
+ tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
1157
+ tbl[i+33] + z * tbl[i+35]))))));
1158
+ #endif
1159
+
1160
+ /* Double-double precision evaluation */
1161
+
1162
+ Mul12(&tt1h,&tt1l,z,highPoly);
1163
+ Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
1164
+
1165
+ MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
1166
+ MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
1167
+ MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
1168
+ MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
1169
+ MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
1170
+ MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
1171
+
1172
+ /* Reconstruction:
1173
+
1174
+ - Multiply by the inverted sign
1175
+ - Add Pi/2 in double-double precision
1176
+
1177
+ */
1178
+
1179
+ zw1h = -sign * polyh;
1180
+ zw1l = -sign * polyl;
1181
+
1182
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,zw1h,zw1l);
1183
+
1184
+ /* Rounding test
1185
+ The RD rounding constant is at tbl[i+60]
1186
+ */
1187
+ TEST_AND_RETURN_RD(acosh, acosm, tbl[i+60]);
1188
+
1189
+ /* Launch accurate phase */
1190
+
1191
+ acos_accurate_middle(&acosh,&acosm,&acosl,z,i,sign);
1192
+
1193
+ ReturnRoundDownwards3(acosh,acosm,acosl);
1194
+ }
1195
+