crmf 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +105 -3
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +117 -20
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +108 -10
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,4488 @@
1
+ /*
2
+ * Correctly rounded arcsine and arccosine
3
+ *
4
+ * Copyright (c) 2007 Christoph Lauter (ENS Lyon),
5
+ * with Sylvain Chevillard (ENS Lyon) for the polynomials
6
+ *
7
+ * This file is part of the crlibm library developed by the Arenaire
8
+ * project at Ecole Normale Superieure de Lyon
9
+ *
10
+ * This program is free software; you can redistribute it and/or modify
11
+ * it under the terms of the GNU Lesser General Public License as published by
12
+ * the Free Software Foundation; either version 2 of the License, or
13
+ * (at your option) any later version.
14
+ *
15
+ * This program is distributed in the hope that it will be useful,
16
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ * GNU General Public License for more details.
19
+ *
20
+ * You should have received a copy of the GNU Lesser General Public License
21
+ * along with this program; if not, write to the Free Software
22
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23
+ */
24
+
25
+ #include <stdio.h>
26
+ #include <stdlib.h>
27
+ #include "crlibm.h"
28
+ #include "crlibm_private.h"
29
+ #include "triple-double.h"
30
+ #include "asincos.h"
31
+
32
+ static inline void p0_quick(double *p_resh, double *p_resm, double x, int32_t xhi) {
33
+ double p_x_0_pow2h, p_x_0_pow2m;
34
+
35
+
36
+
37
+
38
+
39
+ double p_t_1_0h;
40
+ double p_t_2_0h;
41
+ double p_t_3_0h;
42
+ double p_t_4_0h;
43
+ double p_t_5_0h;
44
+ double p_t_6_0h;
45
+ double p_t_7_0h;
46
+ double p_t_8_0h;
47
+ double p_t_9_0h;
48
+ double p_t_10_0h;
49
+ double p_t_11_0h;
50
+ double p_t_12_0h;
51
+ double p_t_13_0h;
52
+ double p_t_14_0h;
53
+ double p_t_15_0h;
54
+ double p_t_16_0h;
55
+ double p_t_17_0h, p_t_17_0m;
56
+ double p_t_18_0h, p_t_18_0m;
57
+ double p_t_19_0h, p_t_19_0m;
58
+ double p_t_20_0h, p_t_20_0m;
59
+
60
+ if (xhi < EXTRABOUND2) {
61
+
62
+ double t1, t2, t3;
63
+
64
+ t2 = p0_quick_coeff_3h * x;
65
+ t1 = x * x;
66
+ t3 = t1 * t2;
67
+
68
+ Add12(*p_resh,*p_resm,x,t3);
69
+
70
+
71
+ return;
72
+ }
73
+
74
+ Mul12(&p_x_0_pow2h,&p_x_0_pow2m,x,x);
75
+
76
+ p_t_15_0h = p0_quick_coeff_5h;
77
+ if (xhi > EXTRABOUND) {
78
+
79
+ p_t_1_0h = p0_quick_coeff_19h;
80
+ p_t_2_0h = p_t_1_0h * p_x_0_pow2h;
81
+ p_t_3_0h = p0_quick_coeff_17h + p_t_2_0h;
82
+ p_t_4_0h = p_t_3_0h * p_x_0_pow2h;
83
+ p_t_5_0h = p0_quick_coeff_15h + p_t_4_0h;
84
+ p_t_6_0h = p_t_5_0h * p_x_0_pow2h;
85
+ p_t_7_0h = p0_quick_coeff_13h + p_t_6_0h;
86
+ p_t_8_0h = p_t_7_0h * p_x_0_pow2h;
87
+ p_t_9_0h = p0_quick_coeff_11h + p_t_8_0h;
88
+ p_t_10_0h = p_t_9_0h * p_x_0_pow2h;
89
+ p_t_11_0h = p0_quick_coeff_9h + p_t_10_0h;
90
+ p_t_12_0h = p_t_11_0h * p_x_0_pow2h;
91
+ p_t_13_0h = p0_quick_coeff_7h + p_t_12_0h;
92
+ p_t_14_0h = p_t_13_0h * p_x_0_pow2h;
93
+ p_t_15_0h = p_t_15_0h + p_t_14_0h;
94
+ }
95
+
96
+ p_t_16_0h = p_t_15_0h * p_x_0_pow2h;
97
+ Add12(p_t_17_0h,p_t_17_0m,p0_quick_coeff_3h,p_t_16_0h);
98
+
99
+ Mul122(&p_t_18_0h,&p_t_18_0m,x,p_x_0_pow2h,p_x_0_pow2m);
100
+ Mul22(&p_t_19_0h,&p_t_19_0m,p_t_17_0h,p_t_17_0m,p_t_18_0h,p_t_18_0m);
101
+
102
+ Add122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
103
+
104
+ *p_resh = p_t_20_0h; *p_resm = p_t_20_0m;
105
+
106
+
107
+ }
108
+
109
+ static inline void p_quick(double *p_resh, double *p_resm, double x, int index) {
110
+
111
+
112
+ double p_t_1_0h;
113
+ double p_t_2_0h;
114
+ double p_t_3_0h;
115
+ double p_t_4_0h;
116
+ double p_t_5_0h;
117
+ double p_t_6_0h;
118
+ double p_t_7_0h;
119
+ double p_t_8_0h;
120
+ double p_t_9_0h;
121
+ double p_t_10_0h;
122
+ double p_t_11_0h;
123
+ double p_t_12_0h;
124
+ double p_t_13_0h;
125
+ double p_t_14_0h;
126
+ double p_t_15_0h;
127
+ double p_t_16_0h;
128
+ double p_t_17_0h;
129
+ double p_t_18_0h;
130
+ double p_t_19_0h;
131
+ double p_t_20_0h;
132
+ double p_t_21_0h, p_t_21_0m;
133
+ double p_t_22_0h, p_t_22_0m;
134
+ double p_t_23_0h, p_t_23_0m;
135
+
136
+ p_t_1_0h = p_quick_coeff_12h;
137
+ p_t_2_0h = p_t_1_0h * x;
138
+ p_t_3_0h = p_quick_coeff_11h + p_t_2_0h;
139
+ p_t_4_0h = p_t_3_0h * x;
140
+ p_t_5_0h = p_quick_coeff_10h + p_t_4_0h;
141
+ p_t_6_0h = p_t_5_0h * x;
142
+ p_t_7_0h = p_quick_coeff_9h + p_t_6_0h;
143
+ p_t_8_0h = p_t_7_0h * x;
144
+ p_t_9_0h = p_quick_coeff_8h + p_t_8_0h;
145
+ p_t_10_0h = p_t_9_0h * x;
146
+ p_t_11_0h = p_quick_coeff_7h + p_t_10_0h;
147
+ p_t_12_0h = p_t_11_0h * x;
148
+ p_t_13_0h = p_quick_coeff_6h + p_t_12_0h;
149
+ p_t_14_0h = p_t_13_0h * x;
150
+ p_t_15_0h = p_quick_coeff_5h + p_t_14_0h;
151
+ p_t_16_0h = p_t_15_0h * x;
152
+ p_t_17_0h = p_quick_coeff_4h + p_t_16_0h;
153
+ p_t_18_0h = p_t_17_0h * x;
154
+ p_t_19_0h = p_quick_coeff_3h + p_t_18_0h;
155
+ p_t_20_0h = p_t_19_0h * x;
156
+ Add12(p_t_21_0h,p_t_21_0m,p_quick_coeff_2h,p_t_20_0h);
157
+ MulAdd212(&p_t_22_0h,&p_t_22_0m,p_quick_coeff_1h,p_quick_coeff_1m,x,p_t_21_0h,p_t_21_0m);
158
+ MulAdd212(&p_t_23_0h,&p_t_23_0m,p_quick_coeff_0h,p_quick_coeff_0m,x,p_t_22_0h,p_t_22_0m);
159
+ *p_resh = p_t_23_0h; *p_resm = p_t_23_0m;
160
+
161
+ }
162
+
163
+
164
+
165
+ static inline void p9_quick(double *p_resh, double *p_resm, double x) {
166
+
167
+
168
+
169
+
170
+ double p_t_1_0h;
171
+ double p_t_2_0h;
172
+ double p_t_3_0h;
173
+ double p_t_4_0h;
174
+ double p_t_5_0h;
175
+ double p_t_6_0h;
176
+ double p_t_7_0h;
177
+ double p_t_8_0h;
178
+ double p_t_9_0h;
179
+ double p_t_10_0h;
180
+ double p_t_11_0h;
181
+ double p_t_12_0h;
182
+ double p_t_13_0h;
183
+ double p_t_14_0h;
184
+ double p_t_15_0h;
185
+ double p_t_16_0h;
186
+ double p_t_17_0h;
187
+ double p_t_18_0h;
188
+ double p_t_19_0h;
189
+ double p_t_20_0h;
190
+ double p_t_21_0h, p_t_21_0m;
191
+ double p_t_22_0h, p_t_22_0m;
192
+ double p_t_23_0h, p_t_23_0m;
193
+
194
+
195
+
196
+ p_t_1_0h = p9_quick_coeff_11h;
197
+ p_t_2_0h = p_t_1_0h * x;
198
+ p_t_3_0h = p9_quick_coeff_10h + p_t_2_0h;
199
+ p_t_4_0h = p_t_3_0h * x;
200
+ p_t_5_0h = p9_quick_coeff_9h + p_t_4_0h;
201
+ p_t_6_0h = p_t_5_0h * x;
202
+ p_t_7_0h = p9_quick_coeff_8h + p_t_6_0h;
203
+ p_t_8_0h = p_t_7_0h * x;
204
+ p_t_9_0h = p9_quick_coeff_7h + p_t_8_0h;
205
+ p_t_10_0h = p_t_9_0h * x;
206
+ p_t_11_0h = p9_quick_coeff_6h + p_t_10_0h;
207
+ p_t_12_0h = p_t_11_0h * x;
208
+ p_t_13_0h = p9_quick_coeff_5h + p_t_12_0h;
209
+ p_t_14_0h = p_t_13_0h * x;
210
+ p_t_15_0h = p9_quick_coeff_4h + p_t_14_0h;
211
+ p_t_16_0h = p_t_15_0h * x;
212
+ p_t_17_0h = p9_quick_coeff_3h + p_t_16_0h;
213
+ p_t_18_0h = p_t_17_0h * x;
214
+ p_t_19_0h = p9_quick_coeff_2h + p_t_18_0h;
215
+ p_t_20_0h = p_t_19_0h * x;
216
+ Add12(p_t_21_0h,p_t_21_0m,p9_quick_coeff_1h,p_t_20_0h);
217
+ Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
218
+ Add122(&p_t_23_0h,&p_t_23_0m,p9_quick_coeff_0h,p_t_22_0h,p_t_22_0m);
219
+ *p_resh = p_t_23_0h; *p_resm = p_t_23_0m;
220
+
221
+
222
+ }
223
+
224
+
225
+
226
+ static void p0_accu(double *p_resh, double *p_resm, double *p_resl, double x) {
227
+ double p_x_0_pow2h, p_x_0_pow2m;
228
+
229
+
230
+ Mul12(&p_x_0_pow2h,&p_x_0_pow2m,x,x);
231
+
232
+
233
+ double p_t_1_0h;
234
+ double p_t_2_0h;
235
+ double p_t_3_0h;
236
+ double p_t_4_0h;
237
+ double p_t_5_0h;
238
+ double p_t_6_0h;
239
+ double p_t_7_0h, p_t_7_0m;
240
+ double p_t_8_0h, p_t_8_0m;
241
+ double p_t_9_0h, p_t_9_0m;
242
+ double p_t_10_0h, p_t_10_0m;
243
+ double p_t_11_0h, p_t_11_0m;
244
+ double p_t_12_0h, p_t_12_0m;
245
+ double p_t_13_0h, p_t_13_0m;
246
+ double p_t_14_0h, p_t_14_0m;
247
+ double p_t_15_0h, p_t_15_0m;
248
+ double p_t_16_0h, p_t_16_0m;
249
+ double p_t_17_0h, p_t_17_0m;
250
+ double p_t_18_0h, p_t_18_0m;
251
+ double p_t_19_0h, p_t_19_0m;
252
+ double p_t_20_0h, p_t_20_0m;
253
+ double p_t_21_0h, p_t_21_0m;
254
+ double p_t_22_0h, p_t_22_0m;
255
+ double p_t_23_0h, p_t_23_0m;
256
+ double p_t_24_0h, p_t_24_0m;
257
+ double p_t_25_0h, p_t_25_0m;
258
+ double p_t_26_0h, p_t_26_0m;
259
+ double p_t_27_0h, p_t_27_0m;
260
+ double p_t_28_0h, p_t_28_0m, p_t_28_0l;
261
+ double p_t_29_0h, p_t_29_0m, p_t_29_0l;
262
+ double p_t_30_0h, p_t_30_0m, p_t_30_0l;
263
+ double p_t_31_0h, p_t_31_0m, p_t_31_0l;
264
+ double p_t_32_0h, p_t_32_0m, p_t_32_0l;
265
+ double p_t_33_0h, p_t_33_0m, p_t_33_0l;
266
+ double p_t_34_0h, p_t_34_0m, p_t_34_0l;
267
+ double p_t_35_0h, p_t_35_0m, p_t_35_0l;
268
+
269
+
270
+
271
+ p_t_1_0h = p0_accu_coeff_37h;
272
+ p_t_2_0h = p_t_1_0h * p_x_0_pow2h;
273
+ p_t_3_0h = p0_accu_coeff_35h + p_t_2_0h;
274
+ p_t_4_0h = p_t_3_0h * p_x_0_pow2h;
275
+ p_t_5_0h = p0_accu_coeff_33h + p_t_4_0h;
276
+ p_t_6_0h = p_t_5_0h * p_x_0_pow2h;
277
+ Add12(p_t_7_0h,p_t_7_0m,p0_accu_coeff_31h,p_t_6_0h);
278
+ Mul22(&p_t_8_0h,&p_t_8_0m,p_t_7_0h,p_t_7_0m,p_x_0_pow2h,p_x_0_pow2m);
279
+ Add122(&p_t_9_0h,&p_t_9_0m,p0_accu_coeff_29h,p_t_8_0h,p_t_8_0m);
280
+ Mul22(&p_t_10_0h,&p_t_10_0m,p_t_9_0h,p_t_9_0m,p_x_0_pow2h,p_x_0_pow2m);
281
+ Add122(&p_t_11_0h,&p_t_11_0m,p0_accu_coeff_27h,p_t_10_0h,p_t_10_0m);
282
+ Mul22(&p_t_12_0h,&p_t_12_0m,p_t_11_0h,p_t_11_0m,p_x_0_pow2h,p_x_0_pow2m);
283
+ Add122(&p_t_13_0h,&p_t_13_0m,p0_accu_coeff_25h,p_t_12_0h,p_t_12_0m);
284
+ Mul22(&p_t_14_0h,&p_t_14_0m,p_t_13_0h,p_t_13_0m,p_x_0_pow2h,p_x_0_pow2m);
285
+ Add122(&p_t_15_0h,&p_t_15_0m,p0_accu_coeff_23h,p_t_14_0h,p_t_14_0m);
286
+ Mul22(&p_t_16_0h,&p_t_16_0m,p_t_15_0h,p_t_15_0m,p_x_0_pow2h,p_x_0_pow2m);
287
+ Add122(&p_t_17_0h,&p_t_17_0m,p0_accu_coeff_21h,p_t_16_0h,p_t_16_0m);
288
+ Mul22(&p_t_18_0h,&p_t_18_0m,p_t_17_0h,p_t_17_0m,p_x_0_pow2h,p_x_0_pow2m);
289
+ Add122(&p_t_19_0h,&p_t_19_0m,p0_accu_coeff_19h,p_t_18_0h,p_t_18_0m);
290
+ Mul22(&p_t_20_0h,&p_t_20_0m,p_t_19_0h,p_t_19_0m,p_x_0_pow2h,p_x_0_pow2m);
291
+ Add122(&p_t_21_0h,&p_t_21_0m,p0_accu_coeff_17h,p_t_20_0h,p_t_20_0m);
292
+ Mul22(&p_t_22_0h,&p_t_22_0m,p_t_21_0h,p_t_21_0m,p_x_0_pow2h,p_x_0_pow2m);
293
+ Add122(&p_t_23_0h,&p_t_23_0m,p0_accu_coeff_15h,p_t_22_0h,p_t_22_0m);
294
+ MulAdd22(&p_t_24_0h,&p_t_24_0m,p0_accu_coeff_13h,p0_accu_coeff_13m,p_x_0_pow2h,p_x_0_pow2m,p_t_23_0h,p_t_23_0m);
295
+ MulAdd22(&p_t_25_0h,&p_t_25_0m,p0_accu_coeff_11h,p0_accu_coeff_11m,p_x_0_pow2h,p_x_0_pow2m,p_t_24_0h,p_t_24_0m);
296
+ MulAdd22(&p_t_26_0h,&p_t_26_0m,p0_accu_coeff_9h,p0_accu_coeff_9m,p_x_0_pow2h,p_x_0_pow2m,p_t_25_0h,p_t_25_0m);
297
+ Mul22(&p_t_27_0h,&p_t_27_0m,p_t_26_0h,p_t_26_0m,p_x_0_pow2h,p_x_0_pow2m);
298
+ Add23(&p_t_28_0h,&p_t_28_0m,&p_t_28_0l,p0_accu_coeff_7h,p0_accu_coeff_7m,p_t_27_0h,p_t_27_0m);
299
+ Mul233(&p_t_29_0h,&p_t_29_0m,&p_t_29_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_28_0h,p_t_28_0m,p_t_28_0l);
300
+ Add233(&p_t_30_0h,&p_t_30_0m,&p_t_30_0l,p0_accu_coeff_5h,p0_accu_coeff_5m,p_t_29_0h,p_t_29_0m,p_t_29_0l);
301
+ Mul233(&p_t_31_0h,&p_t_31_0m,&p_t_31_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_30_0h,p_t_30_0m,p_t_30_0l);
302
+ Add233(&p_t_32_0h,&p_t_32_0m,&p_t_32_0l,p0_accu_coeff_3h,p0_accu_coeff_3m,p_t_31_0h,p_t_31_0m,p_t_31_0l);
303
+ Mul233(&p_t_33_0h,&p_t_33_0m,&p_t_33_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_32_0h,p_t_32_0m,p_t_32_0l);
304
+ Add133(&p_t_34_0h,&p_t_34_0m,&p_t_34_0l,p0_accu_coeff_1h,p_t_33_0h,p_t_33_0m,p_t_33_0l);
305
+ Mul133(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,x,p_t_34_0h,p_t_34_0m,p_t_34_0l);
306
+ Renormalize3(p_resh,p_resm,p_resl,p_t_35_0h,p_t_35_0m,p_t_35_0l);
307
+
308
+
309
+ }
310
+
311
+
312
+ static void p_accu(double *p_resh, double *p_resm, double *p_resl, double x, int index) {
313
+
314
+
315
+ double p_t_1_0h;
316
+ double p_t_2_0h;
317
+ double p_t_3_0h;
318
+ double p_t_4_0h;
319
+ double p_t_5_0h;
320
+ double p_t_6_0h;
321
+ double p_t_7_0h;
322
+ double p_t_8_0h;
323
+ double p_t_9_0h;
324
+ double p_t_10_0h;
325
+ double p_t_11_0h;
326
+ double p_t_12_0h;
327
+ double p_t_13_0h;
328
+ double p_t_14_0h;
329
+ double p_t_15_0h;
330
+ double p_t_16_0h;
331
+ double p_t_17_0h, p_t_17_0m;
332
+ double p_t_18_0h, p_t_18_0m;
333
+ double p_t_19_0h, p_t_19_0m;
334
+ double p_t_20_0h, p_t_20_0m;
335
+ double p_t_21_0h, p_t_21_0m;
336
+ double p_t_22_0h, p_t_22_0m;
337
+ double p_t_23_0h, p_t_23_0m;
338
+ double p_t_24_0h, p_t_24_0m;
339
+ double p_t_25_0h, p_t_25_0m;
340
+ double p_t_26_0h, p_t_26_0m;
341
+ double p_t_27_0h, p_t_27_0m;
342
+ double p_t_28_0h, p_t_28_0m;
343
+ double p_t_29_0h, p_t_29_0m;
344
+ double p_t_30_0h, p_t_30_0m;
345
+ double p_t_31_0h, p_t_31_0m;
346
+ double p_t_32_0h, p_t_32_0m;
347
+ double p_t_33_0h, p_t_33_0m;
348
+ double p_t_34_0h, p_t_34_0m;
349
+ double p_t_35_0h, p_t_35_0m, p_t_35_0l;
350
+ double p_t_36_0h, p_t_36_0m, p_t_36_0l;
351
+ double p_t_37_0h, p_t_37_0m, p_t_37_0l;
352
+ double p_t_38_0h, p_t_38_0m, p_t_38_0l;
353
+ double p_t_39_0h, p_t_39_0m, p_t_39_0l;
354
+
355
+
356
+
357
+ p_t_1_0h = p_accu_coeff_22h;
358
+ p_t_2_0h = p_t_1_0h * x;
359
+ p_t_3_0h = p_accu_coeff_21h + p_t_2_0h;
360
+ p_t_4_0h = p_t_3_0h * x;
361
+ p_t_5_0h = p_accu_coeff_20h + p_t_4_0h;
362
+ p_t_6_0h = p_t_5_0h * x;
363
+ p_t_7_0h = p_accu_coeff_19h + p_t_6_0h;
364
+ p_t_8_0h = p_t_7_0h * x;
365
+ p_t_9_0h = p_accu_coeff_18h + p_t_8_0h;
366
+ p_t_10_0h = p_t_9_0h * x;
367
+ p_t_11_0h = p_accu_coeff_17h + p_t_10_0h;
368
+ p_t_12_0h = p_t_11_0h * x;
369
+ p_t_13_0h = p_accu_coeff_16h + p_t_12_0h;
370
+ p_t_14_0h = p_t_13_0h * x;
371
+ p_t_15_0h = p_accu_coeff_15h + p_t_14_0h;
372
+ p_t_16_0h = p_t_15_0h * x;
373
+ Add12(p_t_17_0h,p_t_17_0m,p_accu_coeff_14h,p_t_16_0h);
374
+ Mul122(&p_t_18_0h,&p_t_18_0m,x,p_t_17_0h,p_t_17_0m);
375
+ Add122(&p_t_19_0h,&p_t_19_0m,p_accu_coeff_13h,p_t_18_0h,p_t_18_0m);
376
+ Mul122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
377
+ Add122(&p_t_21_0h,&p_t_21_0m,p_accu_coeff_12h,p_t_20_0h,p_t_20_0m);
378
+ Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
379
+ Add122(&p_t_23_0h,&p_t_23_0m,p_accu_coeff_11h,p_t_22_0h,p_t_22_0m);
380
+ Mul122(&p_t_24_0h,&p_t_24_0m,x,p_t_23_0h,p_t_23_0m);
381
+ Add122(&p_t_25_0h,&p_t_25_0m,p_accu_coeff_10h,p_t_24_0h,p_t_24_0m);
382
+ Mul122(&p_t_26_0h,&p_t_26_0m,x,p_t_25_0h,p_t_25_0m);
383
+ Add122(&p_t_27_0h,&p_t_27_0m,p_accu_coeff_9h,p_t_26_0h,p_t_26_0m);
384
+ MulAdd212(&p_t_28_0h,&p_t_28_0m,p_accu_coeff_8h,p_accu_coeff_8m,x,p_t_27_0h,p_t_27_0m);
385
+ MulAdd212(&p_t_29_0h,&p_t_29_0m,p_accu_coeff_7h,p_accu_coeff_7m,x,p_t_28_0h,p_t_28_0m);
386
+ MulAdd212(&p_t_30_0h,&p_t_30_0m,p_accu_coeff_6h,p_accu_coeff_6m,x,p_t_29_0h,p_t_29_0m);
387
+ MulAdd212(&p_t_31_0h,&p_t_31_0m,p_accu_coeff_5h,p_accu_coeff_5m,x,p_t_30_0h,p_t_30_0m);
388
+ MulAdd212(&p_t_32_0h,&p_t_32_0m,p_accu_coeff_4h,p_accu_coeff_4m,x,p_t_31_0h,p_t_31_0m);
389
+ MulAdd212(&p_t_33_0h,&p_t_33_0m,p_accu_coeff_3h,p_accu_coeff_3m,x,p_t_32_0h,p_t_32_0m);
390
+ Mul122(&p_t_34_0h,&p_t_34_0m,x,p_t_33_0h,p_t_33_0m);
391
+ Add23(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,p_accu_coeff_2h,p_accu_coeff_2m,p_t_34_0h,p_t_34_0m);
392
+ Mul133(&p_t_36_0h,&p_t_36_0m,&p_t_36_0l,x,p_t_35_0h,p_t_35_0m,p_t_35_0l);
393
+ Add233(&p_t_37_0h,&p_t_37_0m,&p_t_37_0l,p_accu_coeff_1h,p_accu_coeff_1m,p_t_36_0h,p_t_36_0m,p_t_36_0l);
394
+ Mul133(&p_t_38_0h,&p_t_38_0m,&p_t_38_0l,x,p_t_37_0h,p_t_37_0m,p_t_37_0l);
395
+ Add233(&p_t_39_0h,&p_t_39_0m,&p_t_39_0l,p_accu_coeff_0h,p_accu_coeff_0m,p_t_38_0h,p_t_38_0m,p_t_38_0l);
396
+ Renormalize3(p_resh,p_resm,p_resl,p_t_39_0h,p_t_39_0m,p_t_39_0l);
397
+
398
+
399
+ }
400
+
401
+
402
+ static void p9_accu(double *p_resh, double *p_resm, double *p_resl, double x) {
403
+
404
+
405
+
406
+
407
+ double p_t_1_0h;
408
+ double p_t_2_0h;
409
+ double p_t_3_0h;
410
+ double p_t_4_0h;
411
+ double p_t_5_0h;
412
+ double p_t_6_0h;
413
+ double p_t_7_0h;
414
+ double p_t_8_0h;
415
+ double p_t_9_0h;
416
+ double p_t_10_0h;
417
+ double p_t_11_0h;
418
+ double p_t_12_0h;
419
+ double p_t_13_0h;
420
+ double p_t_14_0h;
421
+ double p_t_15_0h;
422
+ double p_t_16_0h;
423
+ double p_t_17_0h, p_t_17_0m;
424
+ double p_t_18_0h, p_t_18_0m;
425
+ double p_t_19_0h, p_t_19_0m;
426
+ double p_t_20_0h, p_t_20_0m;
427
+ double p_t_21_0h, p_t_21_0m;
428
+ double p_t_22_0h, p_t_22_0m;
429
+ double p_t_23_0h, p_t_23_0m;
430
+ double p_t_24_0h, p_t_24_0m;
431
+ double p_t_25_0h, p_t_25_0m;
432
+ double p_t_26_0h, p_t_26_0m;
433
+ double p_t_27_0h, p_t_27_0m;
434
+ double p_t_28_0h, p_t_28_0m;
435
+ double p_t_29_0h, p_t_29_0m;
436
+ double p_t_30_0h, p_t_30_0m;
437
+ double p_t_31_0h, p_t_31_0m;
438
+ double p_t_32_0h, p_t_32_0m;
439
+ double p_t_33_0h, p_t_33_0m, p_t_33_0l;
440
+ double p_t_34_0h, p_t_34_0m, p_t_34_0l;
441
+ double p_t_35_0h, p_t_35_0m, p_t_35_0l;
442
+
443
+
444
+
445
+ p_t_1_0h = p9_accu_coeff_20h;
446
+ p_t_2_0h = p_t_1_0h * x;
447
+ p_t_3_0h = p9_accu_coeff_19h + p_t_2_0h;
448
+ p_t_4_0h = p_t_3_0h * x;
449
+ p_t_5_0h = p9_accu_coeff_18h + p_t_4_0h;
450
+ p_t_6_0h = p_t_5_0h * x;
451
+ p_t_7_0h = p9_accu_coeff_17h + p_t_6_0h;
452
+ p_t_8_0h = p_t_7_0h * x;
453
+ p_t_9_0h = p9_accu_coeff_16h + p_t_8_0h;
454
+ p_t_10_0h = p_t_9_0h * x;
455
+ p_t_11_0h = p9_accu_coeff_15h + p_t_10_0h;
456
+ p_t_12_0h = p_t_11_0h * x;
457
+ p_t_13_0h = p9_accu_coeff_14h + p_t_12_0h;
458
+ p_t_14_0h = p_t_13_0h * x;
459
+ p_t_15_0h = p9_accu_coeff_13h + p_t_14_0h;
460
+ p_t_16_0h = p_t_15_0h * x;
461
+ Add12(p_t_17_0h,p_t_17_0m,p9_accu_coeff_12h,p_t_16_0h);
462
+ Mul122(&p_t_18_0h,&p_t_18_0m,x,p_t_17_0h,p_t_17_0m);
463
+ Add122(&p_t_19_0h,&p_t_19_0m,p9_accu_coeff_11h,p_t_18_0h,p_t_18_0m);
464
+ Mul122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
465
+ Add122(&p_t_21_0h,&p_t_21_0m,p9_accu_coeff_10h,p_t_20_0h,p_t_20_0m);
466
+ Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
467
+ Add122(&p_t_23_0h,&p_t_23_0m,p9_accu_coeff_9h,p_t_22_0h,p_t_22_0m);
468
+ Mul122(&p_t_24_0h,&p_t_24_0m,x,p_t_23_0h,p_t_23_0m);
469
+ Add122(&p_t_25_0h,&p_t_25_0m,p9_accu_coeff_8h,p_t_24_0h,p_t_24_0m);
470
+ MulAdd212(&p_t_26_0h,&p_t_26_0m,p9_accu_coeff_7h,p9_accu_coeff_7m,x,p_t_25_0h,p_t_25_0m);
471
+ MulAdd212(&p_t_27_0h,&p_t_27_0m,p9_accu_coeff_6h,p9_accu_coeff_6m,x,p_t_26_0h,p_t_26_0m);
472
+ MulAdd212(&p_t_28_0h,&p_t_28_0m,p9_accu_coeff_5h,p9_accu_coeff_5m,x,p_t_27_0h,p_t_27_0m);
473
+ MulAdd212(&p_t_29_0h,&p_t_29_0m,p9_accu_coeff_4h,p9_accu_coeff_4m,x,p_t_28_0h,p_t_28_0m);
474
+ MulAdd212(&p_t_30_0h,&p_t_30_0m,p9_accu_coeff_3h,p9_accu_coeff_3m,x,p_t_29_0h,p_t_29_0m);
475
+ MulAdd212(&p_t_31_0h,&p_t_31_0m,p9_accu_coeff_2h,p9_accu_coeff_2m,x,p_t_30_0h,p_t_30_0m);
476
+ Mul122(&p_t_32_0h,&p_t_32_0m,x,p_t_31_0h,p_t_31_0m);
477
+ Add23(&p_t_33_0h,&p_t_33_0m,&p_t_33_0l,p9_accu_coeff_1h,p9_accu_coeff_1m,p_t_32_0h,p_t_32_0m);
478
+ Mul133(&p_t_34_0h,&p_t_34_0m,&p_t_34_0l,x,p_t_33_0h,p_t_33_0m,p_t_33_0l);
479
+ Add233(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,p9_accu_coeff_0h,p9_accu_coeff_0m,p_t_34_0h,p_t_34_0m,p_t_34_0l);
480
+ Renormalize3(p_resh,p_resm,p_resl,p_t_35_0h,p_t_35_0m,p_t_35_0l);
481
+
482
+
483
+ }
484
+
485
+
486
+
487
+
488
+ double asin_rn(double x) {
489
+ db_number xdb, zdb;
490
+ double sign, z, zp;
491
+ int index;
492
+ double asinh, asinm, asinl;
493
+ double asinhover, asinmover, asinlover;
494
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
495
+ double t1h, t1m, t1l;
496
+ double asin;
497
+ double xabs;
498
+
499
+ /* Start already computations for argument reduction */
500
+
501
+ zdb.d = 1.0 + x * x;
502
+
503
+ xdb.d = x;
504
+
505
+ /* Special case handling */
506
+
507
+ /* Remove sign of x in floating-point */
508
+ xabs = ABS(x);
509
+ xdb.i[HI] &= 0x7fffffff;
510
+
511
+ /* If |x| < 2^(-28) we have
512
+
513
+ arcsin(x) = x * ( 1 + xi )
514
+
515
+ with 0 <= xi < 2^(-55)
516
+
517
+ So we can decide the rounding without any computation
518
+ */
519
+ if (xdb.i[HI] < ASINSIMPLEBOUND) {
520
+ return x;
521
+ }
522
+
523
+ /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
524
+ if (xdb.i[HI] >= 0x3ff00000) {
525
+ if (x == 1.0) {
526
+ return PIHALFH;
527
+ }
528
+ if (x == -1.0) {
529
+ return - PIHALFH;
530
+ }
531
+ return (x-x)/0.0; /* return NaN */
532
+ }
533
+
534
+ /* Argument reduction:
535
+
536
+ We have 10 intervals and 3 paths:
537
+
538
+ - interval 0 => path 1 using p0
539
+ - interval 1-8 => path 2 using p
540
+ - interval 9 => path 3 using p9
541
+
542
+ */
543
+
544
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
545
+
546
+ /* 0 <= index <= 15
547
+
548
+ index approximates roughly x^2
549
+
550
+ Map indexes to intervals as follows:
551
+
552
+ 0 -> 0
553
+ 1 -> 1
554
+ ...
555
+ 8 -> 8
556
+ 9 -> 9
557
+ ...
558
+ 15 -> 9
559
+
560
+ For this mapping, filter first the case 0 -> 0
561
+ In consequence, 1 <= index <= 15, i.e.
562
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
563
+
564
+ 0 -> 1
565
+ ...
566
+ 7 -> 8
567
+ 8 -> 9
568
+ ...
569
+ 15 -> 9
570
+
571
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
572
+
573
+ */
574
+
575
+ if (index == 0) {
576
+ /* Path 1 using p0 */
577
+
578
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
579
+
580
+ /* Rounding test */
581
+
582
+ if(asinh == (asinh + (asinm * RNROUNDCST)))
583
+ return asinh;
584
+
585
+ /* Rounding test failed, launch accurate phase */
586
+
587
+ #if EVAL_PERF
588
+ crlibm_second_step_taken++;
589
+ #endif
590
+
591
+ p0_accu(&asinh, &asinm, &asinl, x);
592
+
593
+ /* Final rounding */
594
+
595
+ RoundToNearest3(&asin,asinh,asinm,asinl);
596
+
597
+ return asin;
598
+ }
599
+
600
+ /* Strip off the sign of argument x */
601
+ sign = 1.0;
602
+ if (x < 0.0) sign = -sign;
603
+
604
+ index--;
605
+ if ((index & 0x8) != 0) {
606
+ /* Path 3 using p9 */
607
+
608
+ /* Do argument reduction using a MI_9 as a midpoint value
609
+ for the polynomial and compute exactly zp = 2 * (1 - x)
610
+ for the asymptotical approximation using a square root.
611
+ */
612
+
613
+ z = xabs - MI_9;
614
+ zp = 2.0 * (1.0 - xabs);
615
+
616
+ /* Polynomial approximation and square root extraction */
617
+
618
+ p9_quick(&p9h, &p9m, z);
619
+ p9h = -p9h;
620
+ p9m = -p9m;
621
+
622
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
623
+
624
+ /* Reconstruction */
625
+
626
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
627
+ Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
628
+
629
+ /* Rounding test */
630
+
631
+ if(asinh == (asinh + (asinm * RNROUNDCST)))
632
+ return sign * asinh;
633
+
634
+ /* Rounding test failed, launch accurate phase */
635
+
636
+ #if EVAL_PERF
637
+ crlibm_second_step_taken++;
638
+ #endif
639
+
640
+ p9_accu(&p9h, &p9m, &p9l, z);
641
+ p9h = -p9h;
642
+ p9m = -p9m;
643
+ p9l = -p9l;
644
+
645
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
646
+
647
+ /* Reconstruction */
648
+
649
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
650
+ Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
651
+
652
+ Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
653
+
654
+ /* Final rounding */
655
+
656
+ RoundToNearest3(&asin,asinh,asinm,asinl);
657
+
658
+ return sign * asin;
659
+
660
+ }
661
+
662
+ /* Path 2 using p */
663
+
664
+ /* Do argument reduction using a table value for
665
+ the midpoint value
666
+ */
667
+
668
+ z = xabs - mi_i;
669
+
670
+ p_quick(&asinh, &asinm, z, index);
671
+
672
+
673
+ /* Rounding test */
674
+
675
+ if(asinh == (asinh + (asinm * RNROUNDCST)))
676
+ return sign * asinh;
677
+
678
+ /* Rounding test failed, launch accurate phase */
679
+
680
+ #if EVAL_PERF
681
+ crlibm_second_step_taken++;
682
+ #endif
683
+
684
+ p_accu(&asinh, &asinm, &asinl, z, index);
685
+
686
+ /* Final rounding */
687
+
688
+ RoundToNearest3(&asin,asinh,asinm,asinl);
689
+
690
+ return sign * asin;
691
+
692
+ }
693
+
694
+
695
+ double asin_ru(double x) {
696
+ db_number xdb, zdb;
697
+ double sign, z, zp;
698
+ int index;
699
+ double asinh, asinm, asinl;
700
+ double asinhover, asinmover, asinlover;
701
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
702
+ double t1h, t1m, t1l;
703
+ double xabs;
704
+
705
+ /* Start already computations for argument reduction */
706
+
707
+ zdb.d = 1.0 + x * x;
708
+
709
+ xdb.d = x;
710
+
711
+ /* Special case handling */
712
+
713
+ /* Remove sign of x in floating-point */
714
+ xabs = ABS(x);
715
+ xdb.i[HI] &= 0x7fffffff;
716
+
717
+ /* If |x| < 2^(-28) we have
718
+
719
+ arcsin(x) = x * ( 1 + xi )
720
+
721
+ with 0 <= xi < 2^(-55)
722
+
723
+ So we can decide the rounding without any computation
724
+ */
725
+ if (xdb.i[HI] < ASINSIMPLEBOUND) {
726
+ /* If x == 0 then we got the algebraic result arcsin(0) = 0
727
+ If x < 0 then the truncation rest is negative but less than
728
+ 1 ulp; we round upwards by returning x
729
+ */
730
+ if (x <= 0.0) return x;
731
+ /* Otherwise the rest is positive, less than 1 ulp and the
732
+ image is not algebraic
733
+ We return x + 1ulp
734
+ */
735
+ xdb.l++;
736
+ return xdb.d;
737
+ }
738
+
739
+ /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
740
+ if (xdb.i[HI] >= 0x3ff00000) {
741
+ if (x == 1.0) {
742
+ return PIHALFRU;
743
+ }
744
+ if (x == -1.0) {
745
+ return - PIHALFH;
746
+ }
747
+ return (x-x)/0.0; /* return NaN */
748
+ }
749
+
750
+ /* Argument reduction:
751
+
752
+ We have 10 intervals and 3 paths:
753
+
754
+ - interval 0 => path 1 using p0
755
+ - interval 1-8 => path 2 using p
756
+ - interval 9 => path 3 using p9
757
+
758
+ */
759
+
760
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
761
+
762
+ /* 0 <= index <= 15
763
+
764
+ index approximates roughly x^2
765
+
766
+ Map indexes to intervals as follows:
767
+
768
+ 0 -> 0
769
+ 1 -> 1
770
+ ...
771
+ 8 -> 8
772
+ 9 -> 9
773
+ ...
774
+ 15 -> 9
775
+
776
+ For this mapping, filter first the case 0 -> 0
777
+ In consequence, 1 <= index <= 15, i.e.
778
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
779
+
780
+ 0 -> 1
781
+ ...
782
+ 7 -> 8
783
+ 8 -> 9
784
+ ...
785
+ 15 -> 9
786
+
787
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
788
+
789
+ */
790
+
791
+ if (index == 0) {
792
+ /* Path 1 using p0 */
793
+
794
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
795
+
796
+ /* Rounding test */
797
+
798
+ TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
799
+
800
+ /* Rounding test failed, launch accurate phase */
801
+
802
+ #if EVAL_PERF
803
+ crlibm_second_step_taken++;
804
+ #endif
805
+
806
+ p0_accu(&asinh, &asinm, &asinl, x);
807
+
808
+ /* Final rounding */
809
+
810
+ ReturnRoundUpwards3(asinh,asinm,asinl);
811
+
812
+ }
813
+
814
+ /* Strip off the sign of argument x */
815
+ sign = 1.0;
816
+ if (x < 0.0) sign = -sign;
817
+
818
+ index--;
819
+ if ((index & 0x8) != 0) {
820
+ /* Path 3 using p9 */
821
+
822
+ /* Do argument reduction using a MI_9 as a midpoint value
823
+ for the polynomial and compute exactly zp = 2 * (1 - x)
824
+ for the asymptotical approximation using a square root.
825
+ */
826
+
827
+ z = xabs - MI_9;
828
+ zp = 2.0 * (1.0 - xabs);
829
+
830
+ /* Polynomial approximation and square root extraction */
831
+
832
+ p9_quick(&p9h, &p9m, z);
833
+ p9h = -p9h;
834
+ p9m = -p9m;
835
+
836
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
837
+
838
+ /* Reconstruction */
839
+
840
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
841
+ Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
842
+
843
+ /* Rounding test */
844
+
845
+ asinh *= sign;
846
+ asinm *= sign;
847
+
848
+ TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
849
+
850
+ /* Rounding test failed, launch accurate phase */
851
+
852
+ #if EVAL_PERF
853
+ crlibm_second_step_taken++;
854
+ #endif
855
+
856
+ p9_accu(&p9h, &p9m, &p9l, z);
857
+ p9h = -p9h;
858
+ p9m = -p9m;
859
+ p9l = -p9l;
860
+
861
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
862
+
863
+ /* Reconstruction */
864
+
865
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
866
+ Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
867
+
868
+ Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
869
+
870
+ /* Final rounding */
871
+
872
+ asinh *= sign;
873
+ asinm *= sign;
874
+ asinl *= sign;
875
+
876
+ ReturnRoundUpwards3(asinh,asinm,asinl);
877
+
878
+ }
879
+
880
+ /* Path 2 using p */
881
+
882
+ /* Do argument reduction using a table value for
883
+ the midpoint value
884
+ */
885
+
886
+ z = xabs - mi_i;
887
+
888
+ p_quick(&asinh, &asinm, z, index);
889
+
890
+
891
+ /* Rounding test */
892
+
893
+ asinh *= sign;
894
+ asinm *= sign;
895
+
896
+ TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
897
+
898
+ /* Rounding test failed, launch accurate phase */
899
+
900
+ #if EVAL_PERF
901
+ crlibm_second_step_taken++;
902
+ #endif
903
+
904
+ if (x == ASINBADCASEX) return ASINBADCASEYRU;
905
+
906
+ p_accu(&asinh, &asinm, &asinl, z, index);
907
+
908
+ /* Final rounding */
909
+
910
+ asinh *= sign;
911
+ asinm *= sign;
912
+ asinl *= sign;
913
+
914
+ ReturnRoundUpwards3(asinh,asinm,asinl);
915
+
916
+ }
917
+
918
+ double asin_rd(double x) {
919
+ db_number xdb, zdb;
920
+ double sign, z, zp;
921
+ int index;
922
+ double asinh, asinm, asinl;
923
+ double asinhover, asinmover, asinlover;
924
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
925
+ double t1h, t1m, t1l;
926
+ double xabs;
927
+
928
+ /* Start already computations for argument reduction */
929
+
930
+ zdb.d = 1.0 + x * x;
931
+
932
+ xdb.d = x;
933
+
934
+ /* Special case handling */
935
+
936
+ /* Remove sign of x in floating-point */
937
+ xabs = ABS(x);
938
+ xdb.i[HI] &= 0x7fffffff;
939
+
940
+ /* If |x| < 2^(-28) we have
941
+
942
+ arcsin(x) = x * ( 1 + xi )
943
+
944
+ with 0 <= xi < 2^(-55)
945
+
946
+ So we can decide the rounding without any computation
947
+ */
948
+ if (xdb.i[HI] < ASINSIMPLEBOUND) {
949
+ /* If x == 0 then we got the algebraic result arcsin(0) = 0
950
+ If x > 0 then the truncation rest is positive but less than
951
+ 1 ulp; we round downwards by returning x
952
+ */
953
+ if (x >= 0) return x;
954
+ /* Otherwise the rest is negative, less than 1 ulp and the
955
+ image is not algebraic
956
+ We return x - 1ulp
957
+ We stripped off the sign, so we add 1 ulp to -x (in xdb.d) and multiply by -1
958
+ */
959
+ xdb.l++;
960
+ return -1 * xdb.d;
961
+ }
962
+
963
+ /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
964
+ if (xdb.i[HI] >= 0x3ff00000) {
965
+ if (x == 1.0) {
966
+ return PIHALFH;
967
+ }
968
+ if (x == -1.0) {
969
+ return - PIHALFRU;
970
+ }
971
+ return (x-x)/0.0; /* return NaN */
972
+ }
973
+
974
+ /* Argument reduction:
975
+
976
+ We have 10 intervals and 3 paths:
977
+
978
+ - interval 0 => path 1 using p0
979
+ - interval 1-8 => path 2 using p
980
+ - interval 9 => path 3 using p9
981
+
982
+ */
983
+
984
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
985
+
986
+ /* 0 <= index <= 15
987
+
988
+ index approximates roughly x^2
989
+
990
+ Map indexes to intervals as follows:
991
+
992
+ 0 -> 0
993
+ 1 -> 1
994
+ ...
995
+ 8 -> 8
996
+ 9 -> 9
997
+ ...
998
+ 15 -> 9
999
+
1000
+ For this mapping, filter first the case 0 -> 0
1001
+ In consequence, 1 <= index <= 15, i.e.
1002
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
1003
+
1004
+ 0 -> 1
1005
+ ...
1006
+ 7 -> 8
1007
+ 8 -> 9
1008
+ ...
1009
+ 15 -> 9
1010
+
1011
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
1012
+
1013
+ */
1014
+
1015
+ if (index == 0) {
1016
+ /* Path 1 using p0 */
1017
+
1018
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
1019
+
1020
+ /* Rounding test */
1021
+
1022
+ TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
1023
+
1024
+ /* Rounding test failed, launch accurate phase */
1025
+
1026
+ #if EVAL_PERF
1027
+ crlibm_second_step_taken++;
1028
+ #endif
1029
+
1030
+ p0_accu(&asinh, &asinm, &asinl, x);
1031
+
1032
+ /* Final rounding */
1033
+
1034
+ ReturnRoundDownwards3(asinh,asinm,asinl);
1035
+
1036
+ }
1037
+
1038
+ /* Strip off the sign of argument x */
1039
+ sign = 1.0;
1040
+ if (x < 0.0) sign = -sign;
1041
+
1042
+ index--;
1043
+ if ((index & 0x8) != 0) {
1044
+ /* Path 3 using p9 */
1045
+
1046
+ /* Do argument reduction using a MI_9 as a midpoint value
1047
+ for the polynomial and compute exactly zp = 2 * (1 - x)
1048
+ for the asymptotical approximation using a square root.
1049
+ */
1050
+
1051
+ z = xabs - MI_9;
1052
+ zp = 2.0 * (1.0 - xabs);
1053
+
1054
+ /* Polynomial approximation and square root extraction */
1055
+
1056
+ p9_quick(&p9h, &p9m, z);
1057
+ p9h = -p9h;
1058
+ p9m = -p9m;
1059
+
1060
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
1061
+
1062
+ /* Reconstruction */
1063
+
1064
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
1065
+ Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
1066
+
1067
+ /* Rounding test */
1068
+
1069
+ asinh *= sign;
1070
+ asinm *= sign;
1071
+
1072
+ TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
1073
+
1074
+ /* Rounding test failed, launch accurate phase */
1075
+
1076
+ #if EVAL_PERF
1077
+ crlibm_second_step_taken++;
1078
+ #endif
1079
+
1080
+ p9_accu(&p9h, &p9m, &p9l, z);
1081
+ p9h = -p9h;
1082
+ p9m = -p9m;
1083
+ p9l = -p9l;
1084
+
1085
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
1086
+
1087
+ /* Reconstruction */
1088
+
1089
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
1090
+ Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
1091
+
1092
+ Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
1093
+
1094
+ /* Final rounding */
1095
+
1096
+ asinh *= sign;
1097
+ asinm *= sign;
1098
+ asinl *= sign;
1099
+
1100
+ ReturnRoundDownwards3(asinh,asinm,asinl);
1101
+
1102
+ }
1103
+
1104
+ /* Path 2 using p */
1105
+
1106
+ /* Do argument reduction using a table value for
1107
+ the midpoint value
1108
+ */
1109
+
1110
+ z = xabs - mi_i;
1111
+
1112
+ p_quick(&asinh, &asinm, z, index);
1113
+
1114
+
1115
+ /* Rounding test */
1116
+
1117
+ asinh *= sign;
1118
+ asinm *= sign;
1119
+
1120
+ TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
1121
+
1122
+ /* Rounding test failed, launch accurate phase */
1123
+
1124
+ #if EVAL_PERF
1125
+ crlibm_second_step_taken++;
1126
+ #endif
1127
+
1128
+ if (x == ASINBADCASEX) return ASINBADCASEYRD;
1129
+
1130
+ p_accu(&asinh, &asinm, &asinl, z, index);
1131
+
1132
+ /* Final rounding */
1133
+
1134
+ asinh *= sign;
1135
+ asinm *= sign;
1136
+ asinl *= sign;
1137
+
1138
+ ReturnRoundDownwards3(asinh,asinm,asinl);
1139
+
1140
+ }
1141
+
1142
+ double asin_rz(double x) {
1143
+ db_number xdb, zdb;
1144
+ double sign, z, zp;
1145
+ int index;
1146
+ double asinh, asinm, asinl;
1147
+ double asinhover, asinmover, asinlover;
1148
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
1149
+ double t1h, t1m, t1l;
1150
+ double xabs;
1151
+
1152
+ /* Start already computations for argument reduction */
1153
+
1154
+ zdb.d = 1.0 + x * x;
1155
+
1156
+ xdb.d = x;
1157
+
1158
+ /* Special case handling */
1159
+
1160
+ /* Remove sign of x in floating-point */
1161
+ xabs = ABS(x);
1162
+ xdb.i[HI] &= 0x7fffffff;
1163
+
1164
+ /* If |x| < 2^(-28) we have
1165
+
1166
+ arcsin(x) = x * ( 1 + xi )
1167
+
1168
+ with 0 <= xi < 2^(-55)
1169
+
1170
+ So we can decide the rounding without any computation
1171
+ */
1172
+ if (xdb.i[HI] < ASINSIMPLEBOUND) {
1173
+ /* If x == 0 the result is algebraic and equal to 0
1174
+ If x < 0 the truncation rest is negative and less than 1 ulp, we return x
1175
+ If x > 0 the truncation rest is positive and less than 1 ulp, we return x
1176
+ */
1177
+ return x;
1178
+ }
1179
+
1180
+ /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
1181
+ if (xdb.i[HI] >= 0x3ff00000) {
1182
+ if (x == 1.0) {
1183
+ return PIHALFH;
1184
+ }
1185
+ if (x == -1.0) {
1186
+ return - PIHALFH;
1187
+ }
1188
+ return (x-x)/0.0; /* return NaN */
1189
+ }
1190
+
1191
+ /* Argument reduction:
1192
+
1193
+ We have 10 intervals and 3 paths:
1194
+
1195
+ - interval 0 => path 1 using p0
1196
+ - interval 1-8 => path 2 using p
1197
+ - interval 9 => path 3 using p9
1198
+
1199
+ */
1200
+
1201
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
1202
+
1203
+ /* 0 <= index <= 15
1204
+
1205
+ index approximates roughly x^2
1206
+
1207
+ Map indexes to intervals as follows:
1208
+
1209
+ 0 -> 0
1210
+ 1 -> 1
1211
+ ...
1212
+ 8 -> 8
1213
+ 9 -> 9
1214
+ ...
1215
+ 15 -> 9
1216
+
1217
+ For this mapping, filter first the case 0 -> 0
1218
+ In consequence, 1 <= index <= 15, i.e.
1219
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
1220
+
1221
+ 0 -> 1
1222
+ ...
1223
+ 7 -> 8
1224
+ 8 -> 9
1225
+ ...
1226
+ 15 -> 9
1227
+
1228
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
1229
+
1230
+ */
1231
+
1232
+ if (index == 0) {
1233
+ /* Path 1 using p0 */
1234
+
1235
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
1236
+
1237
+ /* Rounding test */
1238
+
1239
+ TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
1240
+
1241
+ /* Rounding test failed, launch accurate phase */
1242
+
1243
+ #if EVAL_PERF
1244
+ crlibm_second_step_taken++;
1245
+ #endif
1246
+
1247
+ p0_accu(&asinh, &asinm, &asinl, x);
1248
+
1249
+ /* Final rounding */
1250
+
1251
+ ReturnRoundTowardsZero3(asinh,asinm,asinl);
1252
+
1253
+ }
1254
+
1255
+ /* Strip off the sign of argument x */
1256
+ sign = 1.0;
1257
+ if (x < 0.0) sign = -sign;
1258
+
1259
+ index--;
1260
+ if ((index & 0x8) != 0) {
1261
+ /* Path 3 using p9 */
1262
+
1263
+ /* Do argument reduction using a MI_9 as a midpoint value
1264
+ for the polynomial and compute exactly zp = 2 * (1 - x)
1265
+ for the asymptotical approximation using a square root.
1266
+ */
1267
+
1268
+ z = xabs - MI_9;
1269
+ zp = 2.0 * (1.0 - xabs);
1270
+
1271
+ /* Polynomial approximation and square root extraction */
1272
+
1273
+ p9_quick(&p9h, &p9m, z);
1274
+ p9h = -p9h;
1275
+ p9m = -p9m;
1276
+
1277
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
1278
+
1279
+ /* Reconstruction */
1280
+
1281
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
1282
+ Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
1283
+
1284
+ /* Rounding test */
1285
+
1286
+ asinh *= sign;
1287
+ asinm *= sign;
1288
+
1289
+ TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
1290
+
1291
+ /* Rounding test failed, launch accurate phase */
1292
+
1293
+ #if EVAL_PERF
1294
+ crlibm_second_step_taken++;
1295
+ #endif
1296
+
1297
+ p9_accu(&p9h, &p9m, &p9l, z);
1298
+ p9h = -p9h;
1299
+ p9m = -p9m;
1300
+ p9l = -p9l;
1301
+
1302
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
1303
+
1304
+ /* Reconstruction */
1305
+
1306
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
1307
+ Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
1308
+
1309
+ Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
1310
+
1311
+ /* Final rounding */
1312
+
1313
+ asinh *= sign;
1314
+ asinm *= sign;
1315
+ asinl *= sign;
1316
+
1317
+ ReturnRoundTowardsZero3(asinh,asinm,asinl);
1318
+
1319
+ }
1320
+
1321
+ /* Path 2 using p */
1322
+
1323
+ /* Do argument reduction using a table value for
1324
+ the midpoint value
1325
+ */
1326
+
1327
+ z = xabs - mi_i;
1328
+
1329
+ p_quick(&asinh, &asinm, z, index);
1330
+
1331
+
1332
+ /* Rounding test */
1333
+
1334
+ asinh *= sign;
1335
+ asinm *= sign;
1336
+
1337
+ TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
1338
+
1339
+ /* Rounding test failed, launch accurate phase */
1340
+
1341
+ #if EVAL_PERF
1342
+ crlibm_second_step_taken++;
1343
+ #endif
1344
+
1345
+ if (x == ASINBADCASEX) return ASINBADCASEYRD;
1346
+
1347
+ p_accu(&asinh, &asinm, &asinl, z, index);
1348
+
1349
+ /* Final rounding */
1350
+
1351
+ asinh *= sign;
1352
+ asinm *= sign;
1353
+ asinl *= sign;
1354
+
1355
+ ReturnRoundTowardsZero3(asinh,asinm,asinl);
1356
+
1357
+ }
1358
+
1359
+ double acos_rn(double x) {
1360
+ db_number xdb, zdb;
1361
+ double z, zp;
1362
+ int index;
1363
+ double asinh, asinm, asinl;
1364
+ double acosh, acosm, acosl;
1365
+ double acoshover, acosmover, acoslover;
1366
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
1367
+ double t1h, t1m, t1l;
1368
+ double xabs;
1369
+
1370
+ /* Start already computations for argument reduction */
1371
+
1372
+ zdb.d = 1.0 + x * x;
1373
+
1374
+ xdb.d = x;
1375
+
1376
+ /* Special case handling */
1377
+
1378
+ /* Remove sign of x in floating-point */
1379
+ xabs = ABS(x);
1380
+ xdb.i[HI] &= 0x7fffffff;
1381
+
1382
+ /* If |x| < 2^(-28) we have
1383
+
1384
+ arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
1385
+
1386
+ with 0 <= xi < 2^(-87)
1387
+
1388
+ From 2^(-27) we have no bad rounding case longer than 5 bits
1389
+ more than the ulp of x, thus the approximation suffices.
1390
+
1391
+ */
1392
+ if (xdb.i[HI] < ACOSSIMPLEBOUND) {
1393
+ Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
1394
+
1395
+ return acosh;
1396
+ }
1397
+
1398
+ /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
1399
+ if (xdb.i[HI] >= 0x3ff00000) {
1400
+ if (x == 1.0) {
1401
+ return 0.0;
1402
+ }
1403
+ if (x == -1.0) {
1404
+ return PIH;
1405
+ }
1406
+ return (x-x)/0.0; /* return NaN */
1407
+ }
1408
+
1409
+ /* Argument reduction:
1410
+
1411
+ We have 10 intervals and 3 paths:
1412
+
1413
+ - interval 0 => path 1 using p0
1414
+ - interval 1-8 => path 2 using p
1415
+ - interval 9 => path 3 using p9
1416
+
1417
+ */
1418
+
1419
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
1420
+
1421
+ /* 0 <= index <= 15
1422
+
1423
+ index approximates roughly x^2
1424
+
1425
+ Map indexes to intervals as follows:
1426
+
1427
+ 0 -> 0
1428
+ 1 -> 1
1429
+ ...
1430
+ 8 -> 8
1431
+ 9 -> 9
1432
+ ...
1433
+ 15 -> 9
1434
+
1435
+ For this mapping, filter first the case 0 -> 0
1436
+ In consequence, 1 <= index <= 15, i.e.
1437
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
1438
+
1439
+ 0 -> 1
1440
+ ...
1441
+ 7 -> 8
1442
+ 8 -> 9
1443
+ ...
1444
+ 15 -> 9
1445
+
1446
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
1447
+
1448
+ */
1449
+
1450
+ if (index == 0) {
1451
+ /* Path 1 using p0 */
1452
+
1453
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
1454
+
1455
+ /* Recompose acos
1456
+
1457
+ No cancellation possible because
1458
+
1459
+ |asin(x)| <= 0.264 for |x| <= 0.26
1460
+
1461
+ */
1462
+
1463
+ asinh = - asinh;
1464
+ asinm = - asinm;
1465
+
1466
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
1467
+
1468
+ /* Rounding test */
1469
+
1470
+ if(acosh == (acosh + (acosm * RNROUNDCST)))
1471
+ return acosh;
1472
+
1473
+ /* Rounding test failed, launch accurate phase */
1474
+
1475
+ #if EVAL_PERF
1476
+ crlibm_second_step_taken++;
1477
+ #endif
1478
+
1479
+ p0_accu(&asinh, &asinm, &asinl, x);
1480
+
1481
+ /* Recompose acos */
1482
+
1483
+ asinh = -asinh;
1484
+ asinm = -asinm;
1485
+ asinl = -asinl;
1486
+
1487
+ Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
1488
+
1489
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1490
+
1491
+ /* Final rounding */
1492
+
1493
+ ReturnRoundToNearest3(acosh,acosm,acosl);
1494
+
1495
+ }
1496
+
1497
+ index--;
1498
+ if ((index & 0x8) != 0) {
1499
+ /* Path 3 using p9 */
1500
+
1501
+ /* Do argument reduction using a MI_9 as a midpoint value
1502
+ for the polynomial and compute exactly zp = 2 * (1 - x)
1503
+ for the asymptotical approximation using a square root.
1504
+ */
1505
+
1506
+ z = xabs - MI_9;
1507
+ zp = 2.0 * (1.0 - xabs);
1508
+
1509
+ /* Polynomial approximation and square root extraction */
1510
+
1511
+ p9_quick(&p9h, &p9m, z);
1512
+
1513
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
1514
+
1515
+ /* Reconstruction */
1516
+
1517
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
1518
+
1519
+ if (x > 0.0) {
1520
+ acosh = t1h;
1521
+ acosm = t1m;
1522
+ } else {
1523
+ /* arccos(-x) = Pi - arccos(x) */
1524
+ t1h = - t1h;
1525
+ t1m = - t1m;
1526
+
1527
+ Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
1528
+ }
1529
+
1530
+ /* Rounding test */
1531
+
1532
+ if(acosh == (acosh + (acosm * RNROUNDCST)))
1533
+ return acosh;
1534
+
1535
+ /* Rounding test failed, launch accurate phase */
1536
+
1537
+ #if EVAL_PERF
1538
+ crlibm_second_step_taken++;
1539
+ #endif
1540
+
1541
+ p9_accu(&p9h, &p9m, &p9l, z);
1542
+
1543
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
1544
+
1545
+ /* Reconstruction */
1546
+
1547
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
1548
+
1549
+ if (x > 0.0) {
1550
+ Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
1551
+ } else {
1552
+ /* arccos(-x) = Pi - arccos(x) */
1553
+ t1h = - t1h;
1554
+ t1m = - t1m;
1555
+ t1l = - t1l;
1556
+
1557
+ Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
1558
+
1559
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1560
+ }
1561
+
1562
+ /* Final rounding */
1563
+
1564
+ ReturnRoundToNearest3(acosh,acosm,acosl);
1565
+
1566
+ }
1567
+
1568
+ /* Path 2 using p */
1569
+
1570
+ /* Do argument reduction using a table value for
1571
+ the midpoint value
1572
+ */
1573
+
1574
+ z = xabs - mi_i;
1575
+
1576
+ p_quick(&asinh, &asinm, z, index);
1577
+
1578
+ /* Recompose acos(x) out of asin(abs(x))
1579
+
1580
+ In the case of a substraction, we will cancel
1581
+ not more than 1 bit.
1582
+
1583
+ */
1584
+
1585
+ if (x > 0.0) {
1586
+ asinh = - asinh;
1587
+ asinm = - asinm;
1588
+ }
1589
+
1590
+ Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
1591
+
1592
+ /* Rounding test */
1593
+
1594
+ if(acosh == (acosh + (acosm * RNROUNDCST)))
1595
+ return acosh;
1596
+
1597
+ /* Rounding test failed, launch accurate phase */
1598
+
1599
+ #if EVAL_PERF
1600
+ crlibm_second_step_taken++;
1601
+ #endif
1602
+
1603
+ p_accu(&asinh, &asinm, &asinl, z, index);
1604
+
1605
+ /* Recompose acos(x) out of asin(abs(x))
1606
+
1607
+ In the case of a substraction, we will cancel
1608
+ not more than 1 bit.
1609
+
1610
+ */
1611
+
1612
+ if (x > 0.0) {
1613
+ asinh = - asinh;
1614
+ asinm = - asinm;
1615
+ asinl = - asinl;
1616
+ }
1617
+
1618
+ Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
1619
+
1620
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1621
+
1622
+
1623
+ /* Final rounding */
1624
+
1625
+ ReturnRoundToNearest3(acosh,acosm,acosl);
1626
+
1627
+
1628
+ }
1629
+
1630
+ double acos_ru(double x) {
1631
+ db_number xdb, zdb, acoshdb;
1632
+ double z, zp;
1633
+ int index;
1634
+ double asinh, asinm, asinl;
1635
+ double acosh, acosm, acosl;
1636
+ double acoshover, acosmover, acoslover;
1637
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
1638
+ double t1h, t1m, t1l;
1639
+ double xabs;
1640
+
1641
+ /* Start already computations for argument reduction */
1642
+
1643
+ zdb.d = 1.0 + x * x;
1644
+
1645
+ xdb.d = x;
1646
+
1647
+ /* Special case handling */
1648
+
1649
+ /* Remove sign of x in floating-point */
1650
+ xabs = ABS(x);
1651
+ xdb.i[HI] &= 0x7fffffff;
1652
+
1653
+ /* If |x| < 2^(-28) we have
1654
+
1655
+ arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
1656
+
1657
+ with 0 <= xi < 2^(-87)
1658
+
1659
+ From 2^(-27) we have no bad rounding case longer than 5 bits
1660
+ more than the ulp of x, thus the approximation suffices.
1661
+
1662
+ */
1663
+ if (xdb.i[HI] < ACOSSIMPLEBOUND) {
1664
+ Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
1665
+
1666
+ /* acosh is the round-to-nearest of acos(x) in this domain
1667
+
1668
+ acosm is a at least 20 bit exact correction of the
1669
+ rounding error of this round-to-nearest rounding.
1670
+
1671
+ If acosh is the rounded up result of acos(x), the
1672
+ correction is negative and vice-versa.
1673
+
1674
+ */
1675
+
1676
+ if (acosm < 0.0)
1677
+ return acosh;
1678
+
1679
+ /* Here the correction acosm is positive, acosh is
1680
+ therefore the rounded down result of acos(x).
1681
+ We add thus one ulp.
1682
+ */
1683
+
1684
+ acoshdb.d = acosh;
1685
+ acoshdb.l++;
1686
+
1687
+ return acoshdb.d;
1688
+
1689
+ }
1690
+
1691
+ /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
1692
+ if (xdb.i[HI] >= 0x3ff00000) {
1693
+ if (x == 1.0) {
1694
+ return 0.0;
1695
+ }
1696
+ if (x == -1.0) {
1697
+ return PIRU;
1698
+ }
1699
+ return (x-x)/0.0; /* return NaN */
1700
+ }
1701
+
1702
+ /* Argument reduction:
1703
+
1704
+ We have 10 intervals and 3 paths:
1705
+
1706
+ - interval 0 => path 1 using p0
1707
+ - interval 1-8 => path 2 using p
1708
+ - interval 9 => path 3 using p9
1709
+
1710
+ */
1711
+
1712
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
1713
+
1714
+ /* 0 <= index <= 15
1715
+
1716
+ index approximates roughly x^2
1717
+
1718
+ Map indexes to intervals as follows:
1719
+
1720
+ 0 -> 0
1721
+ 1 -> 1
1722
+ ...
1723
+ 8 -> 8
1724
+ 9 -> 9
1725
+ ...
1726
+ 15 -> 9
1727
+
1728
+ For this mapping, filter first the case 0 -> 0
1729
+ In consequence, 1 <= index <= 15, i.e.
1730
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
1731
+
1732
+ 0 -> 1
1733
+ ...
1734
+ 7 -> 8
1735
+ 8 -> 9
1736
+ ...
1737
+ 15 -> 9
1738
+
1739
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
1740
+
1741
+ */
1742
+
1743
+ if (index == 0) {
1744
+ /* Path 1 using p0 */
1745
+
1746
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
1747
+
1748
+ /* Recompose acos
1749
+
1750
+ No cancellation possible because
1751
+
1752
+ |asin(x)| <= 0.264 for |x| <= 0.26
1753
+
1754
+ */
1755
+
1756
+ asinh = - asinh;
1757
+ asinm = - asinm;
1758
+
1759
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
1760
+
1761
+ /* Rounding test */
1762
+
1763
+ TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
1764
+
1765
+ /* Rounding test failed, launch accurate phase */
1766
+
1767
+ #if EVAL_PERF
1768
+ crlibm_second_step_taken++;
1769
+ #endif
1770
+
1771
+ p0_accu(&asinh, &asinm, &asinl, x);
1772
+
1773
+ /* Recompose acos */
1774
+
1775
+ asinh = -asinh;
1776
+ asinm = -asinm;
1777
+ asinl = -asinl;
1778
+
1779
+ Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
1780
+
1781
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1782
+
1783
+ /* Final rounding */
1784
+
1785
+ ReturnRoundUpwards3(acosh,acosm,acosl);
1786
+
1787
+ }
1788
+
1789
+ index--;
1790
+ if ((index & 0x8) != 0) {
1791
+ /* Path 3 using p9 */
1792
+
1793
+ /* Do argument reduction using a MI_9 as a midpoint value
1794
+ for the polynomial and compute exactly zp = 2 * (1 - x)
1795
+ for the asymptotical approximation using a square root.
1796
+ */
1797
+
1798
+ z = xabs - MI_9;
1799
+ zp = 2.0 * (1.0 - xabs);
1800
+
1801
+ /* Polynomial approximation and square root extraction */
1802
+
1803
+ p9_quick(&p9h, &p9m, z);
1804
+
1805
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
1806
+
1807
+ /* Reconstruction */
1808
+
1809
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
1810
+
1811
+ if (x > 0.0) {
1812
+ acosh = t1h;
1813
+ acosm = t1m;
1814
+ } else {
1815
+ /* arccos(-x) = Pi - arccos(x) */
1816
+ t1h = - t1h;
1817
+ t1m = - t1m;
1818
+
1819
+ Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
1820
+ }
1821
+
1822
+ /* Rounding test */
1823
+
1824
+ TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
1825
+
1826
+ /* Rounding test failed, launch accurate phase */
1827
+
1828
+ #if EVAL_PERF
1829
+ crlibm_second_step_taken++;
1830
+ #endif
1831
+
1832
+ p9_accu(&p9h, &p9m, &p9l, z);
1833
+
1834
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
1835
+
1836
+ /* Reconstruction */
1837
+
1838
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
1839
+
1840
+ if (x > 0.0) {
1841
+ Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
1842
+ } else {
1843
+ /* arccos(-x) = Pi - arccos(x) */
1844
+ t1h = - t1h;
1845
+ t1m = - t1m;
1846
+ t1l = - t1l;
1847
+
1848
+ Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
1849
+
1850
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1851
+ }
1852
+
1853
+ /* Final rounding */
1854
+
1855
+ ReturnRoundUpwards3(acosh,acosm,acosl);
1856
+
1857
+ }
1858
+
1859
+ /* Path 2 using p */
1860
+
1861
+ /* Do argument reduction using a table value for
1862
+ the midpoint value
1863
+ */
1864
+
1865
+ z = xabs - mi_i;
1866
+
1867
+ p_quick(&asinh, &asinm, z, index);
1868
+
1869
+ /* Recompose acos(x) out of asin(abs(x))
1870
+
1871
+ In the case of a substraction, we will cancel
1872
+ not more than 1 bit.
1873
+
1874
+ */
1875
+
1876
+ if (x > 0.0) {
1877
+ asinh = - asinh;
1878
+ asinm = - asinm;
1879
+ }
1880
+
1881
+ Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
1882
+
1883
+ /* Rounding test */
1884
+
1885
+ TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
1886
+
1887
+ /* Rounding test failed, launch accurate phase */
1888
+
1889
+ #if EVAL_PERF
1890
+ crlibm_second_step_taken++;
1891
+ #endif
1892
+
1893
+ p_accu(&asinh, &asinm, &asinl, z, index);
1894
+
1895
+ /* Recompose acos(x) out of asin(abs(x))
1896
+
1897
+ In the case of a substraction, we will cancel
1898
+ not more than 1 bit.
1899
+
1900
+ */
1901
+
1902
+ if (x > 0.0) {
1903
+ asinh = - asinh;
1904
+ asinm = - asinm;
1905
+ asinl = - asinl;
1906
+ }
1907
+
1908
+ Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
1909
+
1910
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1911
+
1912
+
1913
+ /* Final rounding */
1914
+
1915
+ ReturnRoundUpwards3(acosh,acosm,acosl);
1916
+
1917
+ }
1918
+
1919
+
1920
+ double acos_rd(double x) {
1921
+ db_number xdb, zdb, acoshdb;
1922
+ double z, zp;
1923
+ int index;
1924
+ double asinh, asinm, asinl;
1925
+ double acosh, acosm, acosl;
1926
+ double acoshover, acosmover, acoslover;
1927
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
1928
+ double t1h, t1m, t1l;
1929
+ double xabs;
1930
+
1931
+ /* Start already computations for argument reduction */
1932
+
1933
+ zdb.d = 1.0 + x * x;
1934
+
1935
+ xdb.d = x;
1936
+
1937
+ /* Special case handling */
1938
+
1939
+ /* Remove sign of x in floating-point */
1940
+ xabs = ABS(x);
1941
+ xdb.i[HI] &= 0x7fffffff;
1942
+
1943
+ /* If |x| < 2^(-28) we have
1944
+
1945
+ arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
1946
+
1947
+ with 0 <= xi < 2^(-87)
1948
+
1949
+ From 2^(-27) we have no bad rounding case longer than 5 bits
1950
+ more than the ulp of x, thus the approximation suffices.
1951
+
1952
+ */
1953
+ if (xdb.i[HI] < ACOSSIMPLEBOUND) {
1954
+ Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
1955
+
1956
+ /* acosh is the round-to-nearest of acos(x) in this domain
1957
+
1958
+ acosm is a at least 20 bit exact correction of the
1959
+ rounding error of this round-to-nearest rounding.
1960
+
1961
+ If acosh is the rounded up result of acos(x), the
1962
+ correction is negative and vice-versa.
1963
+
1964
+ */
1965
+
1966
+ if (acosm > 0.0)
1967
+ return acosh;
1968
+
1969
+ /* Here the correction acosm is negative, acosh is
1970
+ therefore the rounded up result of acos(x).
1971
+ We subtract thus one ulp.
1972
+ */
1973
+
1974
+ acoshdb.d = acosh;
1975
+ acoshdb.l--;
1976
+
1977
+ return acoshdb.d;
1978
+
1979
+ }
1980
+
1981
+ /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
1982
+ if (xdb.i[HI] >= 0x3ff00000) {
1983
+ if (x == 1.0) {
1984
+ return 0.0;
1985
+ }
1986
+ if (x == -1.0) {
1987
+ return PIH;
1988
+ }
1989
+ return (x-x)/0.0; /* return NaN */
1990
+ }
1991
+
1992
+ /* Argument reduction:
1993
+
1994
+ We have 10 intervals and 3 paths:
1995
+
1996
+ - interval 0 => path 1 using p0
1997
+ - interval 1-8 => path 2 using p
1998
+ - interval 9 => path 3 using p9
1999
+
2000
+ */
2001
+
2002
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
2003
+
2004
+ /* 0 <= index <= 15
2005
+
2006
+ index approximates roughly x^2
2007
+
2008
+ Map indexes to intervals as follows:
2009
+
2010
+ 0 -> 0
2011
+ 1 -> 1
2012
+ ...
2013
+ 8 -> 8
2014
+ 9 -> 9
2015
+ ...
2016
+ 15 -> 9
2017
+
2018
+ For this mapping, filter first the case 0 -> 0
2019
+ In consequence, 1 <= index <= 15, i.e.
2020
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
2021
+
2022
+ 0 -> 1
2023
+ ...
2024
+ 7 -> 8
2025
+ 8 -> 9
2026
+ ...
2027
+ 15 -> 9
2028
+
2029
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
2030
+
2031
+ */
2032
+
2033
+ if (index == 0) {
2034
+ /* Path 1 using p0 */
2035
+
2036
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
2037
+
2038
+ /* Recompose acos
2039
+
2040
+ No cancellation possible because
2041
+
2042
+ |asin(x)| <= 0.264 for |x| <= 0.26
2043
+
2044
+ */
2045
+
2046
+ asinh = - asinh;
2047
+ asinm = - asinm;
2048
+
2049
+ Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
2050
+
2051
+ /* Rounding test */
2052
+
2053
+ TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2054
+
2055
+ /* Rounding test failed, launch accurate phase */
2056
+
2057
+ #if EVAL_PERF
2058
+ crlibm_second_step_taken++;
2059
+ #endif
2060
+
2061
+ p0_accu(&asinh, &asinm, &asinl, x);
2062
+
2063
+ /* Recompose acos */
2064
+
2065
+ asinh = -asinh;
2066
+ asinm = -asinm;
2067
+ asinl = -asinl;
2068
+
2069
+ Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
2070
+
2071
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2072
+
2073
+ /* Final rounding */
2074
+
2075
+ ReturnRoundDownwards3(acosh,acosm,acosl);
2076
+
2077
+ }
2078
+
2079
+ index--;
2080
+ if ((index & 0x8) != 0) {
2081
+ /* Path 3 using p9 */
2082
+
2083
+ /* Do argument reduction using a MI_9 as a midpoint value
2084
+ for the polynomial and compute exactly zp = 2 * (1 - x)
2085
+ for the asymptotical approximation using a square root.
2086
+ */
2087
+
2088
+ z = xabs - MI_9;
2089
+ zp = 2.0 * (1.0 - xabs);
2090
+
2091
+ /* Polynomial approximation and square root extraction */
2092
+
2093
+ p9_quick(&p9h, &p9m, z);
2094
+
2095
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
2096
+
2097
+ /* Reconstruction */
2098
+
2099
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
2100
+
2101
+ if (x > 0.0) {
2102
+ acosh = t1h;
2103
+ acosm = t1m;
2104
+ } else {
2105
+ /* arccos(-x) = Pi - arccos(x) */
2106
+ t1h = - t1h;
2107
+ t1m = - t1m;
2108
+
2109
+ Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
2110
+ }
2111
+
2112
+ /* Rounding test */
2113
+
2114
+ TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2115
+
2116
+ /* Rounding test failed, launch accurate phase */
2117
+
2118
+ #if EVAL_PERF
2119
+ crlibm_second_step_taken++;
2120
+ #endif
2121
+
2122
+ p9_accu(&p9h, &p9m, &p9l, z);
2123
+
2124
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
2125
+
2126
+ /* Reconstruction */
2127
+
2128
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
2129
+
2130
+ if (x > 0.0) {
2131
+ Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
2132
+ } else {
2133
+ /* arccos(-x) = Pi - arccos(x) */
2134
+ t1h = - t1h;
2135
+ t1m = - t1m;
2136
+ t1l = - t1l;
2137
+
2138
+ Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
2139
+
2140
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2141
+ }
2142
+
2143
+ /* Final rounding */
2144
+
2145
+ ReturnRoundDownwards3(acosh,acosm,acosl);
2146
+
2147
+ }
2148
+
2149
+ /* Path 2 using p */
2150
+
2151
+ /* Do argument reduction using a table value for
2152
+ the midpoint value
2153
+ */
2154
+
2155
+ z = xabs - mi_i;
2156
+
2157
+ p_quick(&asinh, &asinm, z, index);
2158
+
2159
+ /* Recompose acos(x) out of asin(abs(x))
2160
+
2161
+ In the case of a substraction, we will cancel
2162
+ not more than 1 bit.
2163
+
2164
+ */
2165
+
2166
+ if (x > 0.0) {
2167
+ asinh = - asinh;
2168
+ asinm = - asinm;
2169
+ }
2170
+
2171
+ Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
2172
+
2173
+ /* Rounding test */
2174
+
2175
+ TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2176
+
2177
+ /* Rounding test failed, launch accurate phase */
2178
+
2179
+ #if EVAL_PERF
2180
+ crlibm_second_step_taken++;
2181
+ #endif
2182
+
2183
+ p_accu(&asinh, &asinm, &asinl, z, index);
2184
+
2185
+ /* Recompose acos(x) out of asin(abs(x))
2186
+
2187
+ In the case of a substraction, we will cancel
2188
+ not more than 1 bit.
2189
+
2190
+ */
2191
+
2192
+ if (x > 0.0) {
2193
+ asinh = - asinh;
2194
+ asinm = - asinm;
2195
+ asinl = - asinl;
2196
+ }
2197
+
2198
+ Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
2199
+
2200
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2201
+
2202
+ /* Final rounding */
2203
+
2204
+ ReturnRoundDownwards3(acosh,acosm,acosl);
2205
+
2206
+ }
2207
+
2208
+
2209
+
2210
+
2211
+ double acospi_rn(double x) {
2212
+ db_number xdb, zdb;
2213
+ double z, zp;
2214
+ int index;
2215
+ double asinh, asinm, asinl;
2216
+ double asinpih, asinpim, asinpil;
2217
+ double acosh, acosm, acosl;
2218
+ double acoshover, acosmover, acoslover;
2219
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
2220
+ double t1h, t1m, t1l;
2221
+ double t2h, t2m, t2l;
2222
+ double xabs;
2223
+
2224
+ /* Start already computations for argument reduction */
2225
+
2226
+ zdb.d = 1.0 + x * x;
2227
+
2228
+ xdb.d = x;
2229
+
2230
+ /* Special case handling */
2231
+
2232
+ /* Remove sign of x in floating-point */
2233
+ xabs = ABS(x);
2234
+ xdb.i[HI] &= 0x7fffffff;
2235
+
2236
+ /* If |x| < 2^(-54) we have
2237
+
2238
+ arccos(x)/pi = 1/2 * (1 + xi)
2239
+
2240
+ with 0 <= xi < 2^(-54)
2241
+
2242
+ So arcospi(x) = 0.5 in this case.
2243
+
2244
+ */
2245
+ if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
2246
+ return 0.5;
2247
+ }
2248
+
2249
+ /* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
2250
+ if (xdb.i[HI] >= 0x3ff00000) {
2251
+ if (x == 1.0) {
2252
+ return 0.0;
2253
+ }
2254
+ if (x == -1.0) {
2255
+ return 1.0;
2256
+ }
2257
+ return (x-x)/0.0; /* return NaN */
2258
+ }
2259
+
2260
+ /* Argument reduction:
2261
+
2262
+ We have 10 intervals and 3 paths:
2263
+
2264
+ - interval 0 => path 1 using p0
2265
+ - interval 1-8 => path 2 using p
2266
+ - interval 9 => path 3 using p9
2267
+
2268
+ */
2269
+
2270
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
2271
+
2272
+ /* 0 <= index <= 15
2273
+
2274
+ index approximates roughly x^2
2275
+
2276
+ Map indexes to intervals as follows:
2277
+
2278
+ 0 -> 0
2279
+ 1 -> 1
2280
+ ...
2281
+ 8 -> 8
2282
+ 9 -> 9
2283
+ ...
2284
+ 15 -> 9
2285
+
2286
+ For this mapping, filter first the case 0 -> 0
2287
+ In consequence, 1 <= index <= 15, i.e.
2288
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
2289
+
2290
+ 0 -> 1
2291
+ ...
2292
+ 7 -> 8
2293
+ 8 -> 9
2294
+ ...
2295
+ 15 -> 9
2296
+
2297
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
2298
+
2299
+ */
2300
+
2301
+ if (index == 0) {
2302
+ /* Path 1 using p0 */
2303
+
2304
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
2305
+
2306
+ /* Recompose acos/pi
2307
+
2308
+ We have
2309
+
2310
+ arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
2311
+
2312
+ No cancellation possible because
2313
+
2314
+ |asin(x)/pi| <= 0.0837 for |x| <= 0.26
2315
+
2316
+ */
2317
+
2318
+ Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
2319
+
2320
+ Add122(&acosh,&acosm,0.5,asinpih,asinpim);
2321
+
2322
+ /* Rounding test */
2323
+
2324
+ if(acosh == (acosh + (acosm * RNROUNDCST)))
2325
+ return acosh;
2326
+
2327
+ /* Rounding test failed, launch accurate phase */
2328
+
2329
+ #if EVAL_PERF
2330
+ crlibm_second_step_taken++;
2331
+ #endif
2332
+
2333
+ p0_accu(&asinh, &asinm, &asinl, x);
2334
+
2335
+ /* Recompose acos/pi */
2336
+
2337
+ Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
2338
+
2339
+ Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2340
+
2341
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2342
+
2343
+ /* Final rounding */
2344
+
2345
+ ReturnRoundToNearest3(acosh,acosm,acosl);
2346
+
2347
+ }
2348
+
2349
+ index--;
2350
+ if ((index & 0x8) != 0) {
2351
+ /* Path 3 using p9 */
2352
+
2353
+ /* Do argument reduction using a MI_9 as a midpoint value
2354
+ for the polynomial and compute exactly zp = 2 * (1 - x)
2355
+ for the asymptotical approximation using a square root.
2356
+ */
2357
+
2358
+ z = xabs - MI_9;
2359
+ zp = 2.0 * (1.0 - xabs);
2360
+
2361
+ /* Polynomial approximation and square root extraction */
2362
+
2363
+ p9_quick(&p9h, &p9m, z);
2364
+
2365
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
2366
+
2367
+ /* Reconstruction */
2368
+
2369
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
2370
+
2371
+ Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
2372
+
2373
+ if (x > 0.0) {
2374
+ acosh = t2h;
2375
+ acosm = t2m;
2376
+ } else {
2377
+ /* arccos(-x)/pi = 1 - arccos(x)/pi */
2378
+ t2h = - t2h;
2379
+ t2m = - t2m;
2380
+
2381
+ Add122(&acosh,&acosm,1.0,t2h,t2m);
2382
+ }
2383
+
2384
+ /* Rounding test */
2385
+
2386
+ if(acosh == (acosh + (acosm * RNROUNDCST)))
2387
+ return acosh;
2388
+
2389
+ /* Rounding test failed, launch accurate phase */
2390
+
2391
+ #if EVAL_PERF
2392
+ crlibm_second_step_taken++;
2393
+ #endif
2394
+
2395
+ p9_accu(&p9h, &p9m, &p9l, z);
2396
+
2397
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
2398
+
2399
+ /* Reconstruction */
2400
+
2401
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
2402
+
2403
+ Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
2404
+
2405
+ if (x > 0.0) {
2406
+ Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
2407
+ } else {
2408
+ /* arccos(-x)/pi = 1 - arccos(x)/pi */
2409
+ t2h = - t2h;
2410
+ t2m = - t2m;
2411
+ t2l = - t2l;
2412
+
2413
+ Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
2414
+
2415
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2416
+ }
2417
+
2418
+ /* Final rounding */
2419
+
2420
+ ReturnRoundToNearest3(acosh,acosm,acosl);
2421
+
2422
+ }
2423
+
2424
+ /* Path 2 using p */
2425
+
2426
+ /* Do argument reduction using a table value for
2427
+ the midpoint value
2428
+ */
2429
+
2430
+ z = xabs - mi_i;
2431
+
2432
+ p_quick(&asinh, &asinm, z, index);
2433
+
2434
+ /* Recompose acos(x)/pi out of asin(abs(x))
2435
+
2436
+ In the case of a substraction, we will cancel
2437
+ not more than 1 bit.
2438
+
2439
+ */
2440
+
2441
+ if (x > 0.0) {
2442
+ asinh = - asinh;
2443
+ asinm = - asinm;
2444
+ }
2445
+
2446
+ Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
2447
+
2448
+ Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
2449
+
2450
+ /* Rounding test */
2451
+
2452
+ if(acosh == (acosh + (acosm * RNROUNDCST)))
2453
+ return acosh;
2454
+
2455
+ /* Rounding test failed, launch accurate phase */
2456
+
2457
+ #if EVAL_PERF
2458
+ crlibm_second_step_taken++;
2459
+ #endif
2460
+
2461
+ p_accu(&asinh, &asinm, &asinl, z, index);
2462
+
2463
+ /* Recompose acos(x) out of asin(abs(x))
2464
+
2465
+ In the case of a substraction, we will cancel
2466
+ not more than 1 bit.
2467
+
2468
+ */
2469
+
2470
+ if (x > 0.0) {
2471
+ asinh = - asinh;
2472
+ asinm = - asinm;
2473
+ asinl = - asinl;
2474
+ }
2475
+
2476
+ Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
2477
+
2478
+ Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2479
+
2480
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2481
+
2482
+ /* Final rounding */
2483
+
2484
+ ReturnRoundToNearest3(acosh,acosm,acosl);
2485
+
2486
+ }
2487
+
2488
+ double acospi_rd(double x) {
2489
+ db_number xdb, zdb;
2490
+ double z, zp;
2491
+ int index;
2492
+ double asinh, asinm, asinl;
2493
+ double asinpih, asinpim, asinpil;
2494
+ double acosh, acosm, acosl;
2495
+ double acoshover, acosmover, acoslover;
2496
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
2497
+ double t1h, t1m, t1l;
2498
+ double t2h, t2m, t2l;
2499
+ double xabs;
2500
+
2501
+ /* Start already computations for argument reduction */
2502
+
2503
+ zdb.d = 1.0 + x * x;
2504
+
2505
+ xdb.d = x;
2506
+
2507
+ /* Special case handling */
2508
+
2509
+ /* Remove sign of x in floating-point */
2510
+ xabs = ABS(x);
2511
+ xdb.i[HI] &= 0x7fffffff;
2512
+
2513
+ /* If |x| < 2^(-54) we have
2514
+
2515
+ arccos(x)/pi = 1/2 * (1 + xi)
2516
+
2517
+ with 0 <= xi < 2^(-54)
2518
+
2519
+ Thus we have
2520
+
2521
+ acospi(x) =
2522
+
2523
+ (i) 0.5 if x <= 0
2524
+ (ii) 0.5 - 1/2 ulp(0.5) if x > 0
2525
+
2526
+ */
2527
+ if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
2528
+ if (x <= 0.0)
2529
+ return 0.5;
2530
+
2531
+ return HALFMINUSHALFULP;
2532
+ }
2533
+
2534
+ /* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
2535
+ if (xdb.i[HI] >= 0x3ff00000) {
2536
+ if (x == 1.0) {
2537
+ return 0.0;
2538
+ }
2539
+ if (x == -1.0) {
2540
+ return 1.0;
2541
+ }
2542
+ return (x-x)/0.0; /* return NaN */
2543
+ }
2544
+
2545
+ /* Argument reduction:
2546
+
2547
+ We have 10 intervals and 3 paths:
2548
+
2549
+ - interval 0 => path 1 using p0
2550
+ - interval 1-8 => path 2 using p
2551
+ - interval 9 => path 3 using p9
2552
+
2553
+ */
2554
+
2555
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
2556
+
2557
+ /* 0 <= index <= 15
2558
+
2559
+ index approximates roughly x^2
2560
+
2561
+ Map indexes to intervals as follows:
2562
+
2563
+ 0 -> 0
2564
+ 1 -> 1
2565
+ ...
2566
+ 8 -> 8
2567
+ 9 -> 9
2568
+ ...
2569
+ 15 -> 9
2570
+
2571
+ For this mapping, filter first the case 0 -> 0
2572
+ In consequence, 1 <= index <= 15, i.e.
2573
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
2574
+
2575
+ 0 -> 1
2576
+ ...
2577
+ 7 -> 8
2578
+ 8 -> 9
2579
+ ...
2580
+ 15 -> 9
2581
+
2582
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
2583
+
2584
+ */
2585
+
2586
+ if (index == 0) {
2587
+ /* Path 1 using p0 */
2588
+
2589
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
2590
+
2591
+ /* Recompose acos/pi
2592
+
2593
+ We have
2594
+
2595
+ arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
2596
+
2597
+ No cancellation possible because
2598
+
2599
+ |asin(x)/pi| <= 0.0837 for |x| <= 0.26
2600
+
2601
+ */
2602
+
2603
+ Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
2604
+
2605
+ Add122(&acosh,&acosm,0.5,asinpih,asinpim);
2606
+
2607
+ /* Rounding test */
2608
+
2609
+ TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2610
+
2611
+ /* Rounding test failed, launch accurate phase */
2612
+
2613
+ #if EVAL_PERF
2614
+ crlibm_second_step_taken++;
2615
+ #endif
2616
+
2617
+ p0_accu(&asinh, &asinm, &asinl, x);
2618
+
2619
+ /* Recompose acos/pi */
2620
+
2621
+ Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
2622
+
2623
+ Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2624
+
2625
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2626
+
2627
+ /* Final rounding */
2628
+
2629
+ ReturnRoundDownwards3(acosh,acosm,acosl);
2630
+
2631
+ }
2632
+
2633
+ index--;
2634
+ if ((index & 0x8) != 0) {
2635
+ /* Path 3 using p9 */
2636
+
2637
+ /* Do argument reduction using a MI_9 as a midpoint value
2638
+ for the polynomial and compute exactly zp = 2 * (1 - x)
2639
+ for the asymptotical approximation using a square root.
2640
+ */
2641
+
2642
+ z = xabs - MI_9;
2643
+ zp = 2.0 * (1.0 - xabs);
2644
+
2645
+ /* Polynomial approximation and square root extraction */
2646
+
2647
+ p9_quick(&p9h, &p9m, z);
2648
+
2649
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
2650
+
2651
+ /* Reconstruction */
2652
+
2653
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
2654
+
2655
+ Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
2656
+
2657
+ if (x > 0.0) {
2658
+ acosh = t2h;
2659
+ acosm = t2m;
2660
+ } else {
2661
+ /* arccos(-x)/pi = 1 - arccos(x)/pi */
2662
+ t2h = - t2h;
2663
+ t2m = - t2m;
2664
+
2665
+ Add122(&acosh,&acosm,1.0,t2h,t2m);
2666
+ }
2667
+
2668
+ /* Rounding test */
2669
+
2670
+ TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2671
+
2672
+ /* Rounding test failed, launch accurate phase */
2673
+
2674
+ #if EVAL_PERF
2675
+ crlibm_second_step_taken++;
2676
+ #endif
2677
+
2678
+ p9_accu(&p9h, &p9m, &p9l, z);
2679
+
2680
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
2681
+
2682
+ /* Reconstruction */
2683
+
2684
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
2685
+
2686
+ Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
2687
+
2688
+ if (x > 0.0) {
2689
+ Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
2690
+ } else {
2691
+ /* arccos(-x)/pi = 1 - arccos(x)/pi */
2692
+ t2h = - t2h;
2693
+ t2m = - t2m;
2694
+ t2l = - t2l;
2695
+
2696
+ Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
2697
+
2698
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2699
+ }
2700
+
2701
+ /* Final rounding */
2702
+
2703
+ ReturnRoundDownwards3(acosh,acosm,acosl);
2704
+
2705
+ }
2706
+
2707
+ /* Path 2 using p */
2708
+
2709
+ /* Do argument reduction using a table value for
2710
+ the midpoint value
2711
+ */
2712
+
2713
+ z = xabs - mi_i;
2714
+
2715
+ p_quick(&asinh, &asinm, z, index);
2716
+
2717
+ /* Recompose acos(x)/pi out of asin(abs(x))
2718
+
2719
+ In the case of a substraction, we will cancel
2720
+ not more than 1 bit.
2721
+
2722
+ */
2723
+
2724
+ if (x > 0.0) {
2725
+ asinh = - asinh;
2726
+ asinm = - asinm;
2727
+ }
2728
+
2729
+ Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
2730
+
2731
+ Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
2732
+
2733
+ /* Rounding test */
2734
+
2735
+ TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2736
+
2737
+ /* Rounding test failed, launch accurate phase */
2738
+
2739
+ #if EVAL_PERF
2740
+ crlibm_second_step_taken++;
2741
+ #endif
2742
+
2743
+ p_accu(&asinh, &asinm, &asinl, z, index);
2744
+
2745
+ /* Recompose acos(x) out of asin(abs(x))
2746
+
2747
+ In the case of a substraction, we will cancel
2748
+ not more than 1 bit.
2749
+
2750
+ */
2751
+
2752
+ if (x > 0.0) {
2753
+ asinh = - asinh;
2754
+ asinm = - asinm;
2755
+ asinl = - asinl;
2756
+ }
2757
+
2758
+ Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
2759
+
2760
+ Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2761
+
2762
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2763
+
2764
+ /* Final rounding */
2765
+
2766
+ ReturnRoundDownwards3(acosh,acosm,acosl);
2767
+
2768
+ }
2769
+
2770
+ double acospi_ru(double x) {
2771
+ db_number xdb, zdb;
2772
+ double z, zp;
2773
+ int index;
2774
+ double asinh, asinm, asinl;
2775
+ double asinpih, asinpim, asinpil;
2776
+ double acosh, acosm, acosl;
2777
+ double acoshover, acosmover, acoslover;
2778
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
2779
+ double t1h, t1m, t1l;
2780
+ double t2h, t2m, t2l;
2781
+ double xabs;
2782
+
2783
+ /* Start already computations for argument reduction */
2784
+
2785
+ zdb.d = 1.0 + x * x;
2786
+
2787
+ xdb.d = x;
2788
+
2789
+ /* Special case handling */
2790
+
2791
+ /* Remove sign of x in floating-point */
2792
+ xabs = ABS(x);
2793
+ xdb.i[HI] &= 0x7fffffff;
2794
+
2795
+ /* If |x| < 2^(-54) we have
2796
+
2797
+ arccos(x)/pi = 1/2 * (1 + xi)
2798
+
2799
+ with 0 <= xi < 2^(-54)
2800
+
2801
+ Thus we have
2802
+
2803
+ acospi(x) =
2804
+
2805
+ (i) 0.5 if x >= 0
2806
+ (ii) 0.5 + 1 ulp(0.5) if x < 0
2807
+
2808
+ */
2809
+ if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
2810
+ if (x >= 0.0)
2811
+ return 0.5;
2812
+
2813
+ return 0.50000000000000011102230246251565404236316680908203125;
2814
+ }
2815
+
2816
+ /* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
2817
+ if (xdb.i[HI] >= 0x3ff00000) {
2818
+ if (x == 1.0) {
2819
+ return 0.0;
2820
+ }
2821
+ if (x == -1.0) {
2822
+ return 1.0;
2823
+ }
2824
+ return (x-x)/0.0; /* return NaN */
2825
+ }
2826
+
2827
+ /* Argument reduction:
2828
+
2829
+ We have 10 intervals and 3 paths:
2830
+
2831
+ - interval 0 => path 1 using p0
2832
+ - interval 1-8 => path 2 using p
2833
+ - interval 9 => path 3 using p9
2834
+
2835
+ */
2836
+
2837
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
2838
+
2839
+ /* 0 <= index <= 15
2840
+
2841
+ index approximates roughly x^2
2842
+
2843
+ Map indexes to intervals as follows:
2844
+
2845
+ 0 -> 0
2846
+ 1 -> 1
2847
+ ...
2848
+ 8 -> 8
2849
+ 9 -> 9
2850
+ ...
2851
+ 15 -> 9
2852
+
2853
+ For this mapping, filter first the case 0 -> 0
2854
+ In consequence, 1 <= index <= 15, i.e.
2855
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
2856
+
2857
+ 0 -> 1
2858
+ ...
2859
+ 7 -> 8
2860
+ 8 -> 9
2861
+ ...
2862
+ 15 -> 9
2863
+
2864
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
2865
+
2866
+ */
2867
+
2868
+ if (index == 0) {
2869
+ /* Path 1 using p0 */
2870
+
2871
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
2872
+
2873
+ /* Recompose acos/pi
2874
+
2875
+ We have
2876
+
2877
+ arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
2878
+
2879
+ No cancellation possible because
2880
+
2881
+ |asin(x)/pi| <= 0.0837 for |x| <= 0.26
2882
+
2883
+ */
2884
+
2885
+ Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
2886
+
2887
+ Add122(&acosh,&acosm,0.5,asinpih,asinpim);
2888
+
2889
+ /* Rounding test */
2890
+
2891
+ TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
2892
+
2893
+ /* Rounding test failed, launch accurate phase */
2894
+
2895
+ #if EVAL_PERF
2896
+ crlibm_second_step_taken++;
2897
+ #endif
2898
+
2899
+ p0_accu(&asinh, &asinm, &asinl, x);
2900
+
2901
+ /* Recompose acos/pi */
2902
+
2903
+ Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
2904
+
2905
+ Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2906
+
2907
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2908
+
2909
+ /* Final rounding */
2910
+
2911
+ ReturnRoundUpwards3(acosh,acosm,acosl);
2912
+
2913
+ }
2914
+
2915
+ index--;
2916
+ if ((index & 0x8) != 0) {
2917
+ /* Path 3 using p9 */
2918
+
2919
+ /* Do argument reduction using a MI_9 as a midpoint value
2920
+ for the polynomial and compute exactly zp = 2 * (1 - x)
2921
+ for the asymptotical approximation using a square root.
2922
+ */
2923
+
2924
+ z = xabs - MI_9;
2925
+ zp = 2.0 * (1.0 - xabs);
2926
+
2927
+ /* Polynomial approximation and square root extraction */
2928
+
2929
+ p9_quick(&p9h, &p9m, z);
2930
+
2931
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
2932
+
2933
+ /* Reconstruction */
2934
+
2935
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
2936
+
2937
+ Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
2938
+
2939
+ if (x > 0.0) {
2940
+ acosh = t2h;
2941
+ acosm = t2m;
2942
+ } else {
2943
+ /* arccos(-x)/pi = 1 - arccos(x)/pi */
2944
+ t2h = - t2h;
2945
+ t2m = - t2m;
2946
+
2947
+ Add122(&acosh,&acosm,1.0,t2h,t2m);
2948
+ }
2949
+
2950
+ /* Rounding test */
2951
+
2952
+ TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
2953
+
2954
+ /* Rounding test failed, launch accurate phase */
2955
+
2956
+ #if EVAL_PERF
2957
+ crlibm_second_step_taken++;
2958
+ #endif
2959
+
2960
+ p9_accu(&p9h, &p9m, &p9l, z);
2961
+
2962
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
2963
+
2964
+ /* Reconstruction */
2965
+
2966
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
2967
+
2968
+ Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
2969
+
2970
+ if (x > 0.0) {
2971
+ Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
2972
+ } else {
2973
+ /* arccos(-x)/pi = 1 - arccos(x)/pi */
2974
+ t2h = - t2h;
2975
+ t2m = - t2m;
2976
+ t2l = - t2l;
2977
+
2978
+ Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
2979
+
2980
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2981
+ }
2982
+
2983
+ /* Final rounding */
2984
+
2985
+ ReturnRoundUpwards3(acosh,acosm,acosl);
2986
+
2987
+ }
2988
+
2989
+ /* Path 2 using p */
2990
+
2991
+ /* Do argument reduction using a table value for
2992
+ the midpoint value
2993
+ */
2994
+
2995
+ z = xabs - mi_i;
2996
+
2997
+ p_quick(&asinh, &asinm, z, index);
2998
+
2999
+ /* Recompose acos(x)/pi out of asin(abs(x))
3000
+
3001
+ In the case of a substraction, we will cancel
3002
+ not more than 1 bit.
3003
+
3004
+ */
3005
+
3006
+ if (x > 0.0) {
3007
+ asinh = - asinh;
3008
+ asinm = - asinm;
3009
+ }
3010
+
3011
+ Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
3012
+
3013
+ Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
3014
+
3015
+ /* Rounding test */
3016
+
3017
+ TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
3018
+
3019
+ /* Rounding test failed, launch accurate phase */
3020
+
3021
+ #if EVAL_PERF
3022
+ crlibm_second_step_taken++;
3023
+ #endif
3024
+
3025
+ p_accu(&asinh, &asinm, &asinl, z, index);
3026
+
3027
+ /* Recompose acos(x) out of asin(abs(x))
3028
+
3029
+ In the case of a substraction, we will cancel
3030
+ not more than 1 bit.
3031
+
3032
+ */
3033
+
3034
+ if (x > 0.0) {
3035
+ asinh = - asinh;
3036
+ asinm = - asinm;
3037
+ asinl = - asinl;
3038
+ }
3039
+
3040
+ Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
3041
+
3042
+ Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
3043
+
3044
+ Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
3045
+
3046
+ /* Final rounding */
3047
+
3048
+ ReturnRoundUpwards3(acosh,acosm,acosl);
3049
+
3050
+ }
3051
+
3052
+
3053
+
3054
+ double asinpi_rn(double x) {
3055
+ db_number xdb, zdb, asinhdb, tempdb;
3056
+ double sign, z, zp;
3057
+ int index;
3058
+ double asinh, asinm, asinl;
3059
+ double asinpih, asinpim, asinpil;
3060
+ double asinpihover, asinpimover, asinpilover;
3061
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
3062
+ double t1h, t1m, t1l;
3063
+ double t2h, t2m, t2l;
3064
+ double asinpi;
3065
+ double xabs;
3066
+ double xScaled;
3067
+ double xPih, xPim, xPil;
3068
+ double xPihover, xPimover, xPilover;
3069
+ double deltatemp, deltah, deltal;
3070
+ double temp1, temp2h, temp2l, temp3;
3071
+ double miulp;
3072
+
3073
+ /* Start already computations for argument reduction */
3074
+
3075
+ zdb.d = 1.0 + x * x;
3076
+
3077
+ xdb.d = x;
3078
+
3079
+ /* Special case handling */
3080
+
3081
+ /* Remove sign of x in floating-point */
3082
+ xabs = ABS(x);
3083
+ xdb.i[HI] &= 0x7fffffff;
3084
+
3085
+ /* If |x| < 2^(-60) we have
3086
+
3087
+ arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
3088
+
3089
+ with 0 <= xi < 2^(-122)
3090
+
3091
+ We have no bad case worser than
3092
+ 112 bits for asinpi(x) for |x| > 2^(-58)
3093
+ and the order 3 term of asinpi is still more
3094
+ far away.
3095
+
3096
+ */
3097
+ if (xdb.i[HI] < ASINPISIMPLEBOUND) {
3098
+ /* For a faster path for the exact case,
3099
+ we check first if x = 0
3100
+ */
3101
+
3102
+ if (x == 0.0)
3103
+ return x;
3104
+
3105
+ /* We want a relatively fast path for values where
3106
+ neither the input nor the output is subnormal
3107
+ because subnormal rounding is expensive.
3108
+
3109
+ Since
3110
+
3111
+ abs(double(asin(x)/pi)) <= 2^(-860)
3112
+
3113
+ for abs(x) <= 2^(-858), we filter for this (normal) value.
3114
+
3115
+ */
3116
+
3117
+ if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
3118
+
3119
+ /* Here abs(x) >= 2^(-858).
3120
+ The result is therefore clearly normal and
3121
+ the double precision numbers in the triple-double
3122
+ representation of TD(1/pi) * x are all normal, too.
3123
+
3124
+ For speed, we use a two-step approach.
3125
+ We know that
3126
+
3127
+ || x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
3128
+
3129
+ */
3130
+
3131
+ Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
3132
+
3133
+ if(xPih == (xPih + (xPim * RNROUNDCSTASINPI)))
3134
+ return xPih;
3135
+
3136
+ Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
3137
+
3138
+ Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3139
+
3140
+ ReturnRoundToNearest3(xPih,xPim,xPil);
3141
+
3142
+ }
3143
+
3144
+ /* Here abs(x) < 2^(-858)
3145
+
3146
+ Because of subnormals and especially because
3147
+ of the fact that 1/pi < 1, we must scale x
3148
+ appropriately. We compute hence:
3149
+
3150
+ asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
3151
+
3152
+ where the rounding procedure works temporarily on the scaled
3153
+ intermediate.
3154
+ */
3155
+
3156
+ xScaled = x * TWO1000;
3157
+
3158
+ Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
3159
+
3160
+ Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3161
+
3162
+ /* Produce a (possibly) subnormal intermediate rounding */
3163
+
3164
+ asinhdb.d = xPih * TWOM1000;
3165
+
3166
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3167
+ tempdb.i[HI] = asinhdb.i[HI];
3168
+ tempdb.i[LO] = asinhdb.i[LO];
3169
+ asinhdb.i[LO] = tempdb.i[LO];
3170
+ asinhdb.i[HI] = tempdb.i[HI];
3171
+ #else
3172
+ tempdb.d = asinhdb.d;
3173
+ #endif
3174
+
3175
+ /* Rescale the result */
3176
+
3177
+ temp1 = asinhdb.d * TWO1000;
3178
+
3179
+ /* Compute the scaled error, the operation is exact by Sterbenz' lemma */
3180
+
3181
+ deltatemp = xPih - temp1;
3182
+
3183
+ /* Sum up the errors, representing them on a double-double
3184
+
3185
+ This is exact for the normal rounding case and the error
3186
+ is neglectable in the subnormal rounding case.
3187
+ */
3188
+
3189
+ Add12Cond(temp2h,temp2l,deltatemp,xPim);
3190
+ temp3 = temp2l + xPil;
3191
+ Add12(deltah,deltal,temp2h,temp3);
3192
+
3193
+ /* Compute now a scaled 1/2 ulp of the intermediate result
3194
+ in the direction of delta
3195
+ */
3196
+
3197
+ if ((x >= 0.0) ^ (deltah >= 0.0))
3198
+ tempdb.l--;
3199
+ else
3200
+ tempdb.l++;
3201
+
3202
+ miulp = TWO999 * (tempdb.d - asinhdb.d);
3203
+
3204
+ /* We must correct the intermediate rounding
3205
+ if the error on deltah + deltal is greater
3206
+ in absolute value than miulp = 1/2 ulp in the
3207
+ right direction.
3208
+ */
3209
+
3210
+ if (ABS(deltah) < ABS(miulp)) {
3211
+ /* deltah is less than miulp, deltal is less than
3212
+ 1/2 the ulp of deltah. Thus deltah + deltal is
3213
+ less than miulp. We do not need to correct
3214
+ the intermediate rounding.
3215
+ */
3216
+ return asinhdb.d;
3217
+ }
3218
+
3219
+ if (ABS(deltah) > ABS(miulp)) {
3220
+ /* deltah is greater than miulp and deltal cannot correct it.
3221
+ We must correct the rounding.
3222
+
3223
+ tempdb.d (= asinhdb.d +/- 1 ulp) is the correct rounding.
3224
+ */
3225
+ return tempdb.d;
3226
+ }
3227
+
3228
+ /* Here deltah and miulp are equal in absolute value
3229
+
3230
+ We must correct the intermediate rounding iff the sign of deltal
3231
+ and deltah are the same.
3232
+
3233
+ */
3234
+
3235
+ if ((deltah >= 0.0) ^ (deltal >= 0.0)) {
3236
+ /* Here the sign is different, we return the
3237
+ intermediate rounding asinhdb.d
3238
+ */
3239
+ return asinhdb.d;
3240
+ }
3241
+
3242
+ /* Return the corrected result tempdb.d */
3243
+
3244
+ return tempdb.d;
3245
+ }
3246
+
3247
+ /* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
3248
+ if (xdb.i[HI] >= 0x3ff00000) {
3249
+ if (x == 1.0) {
3250
+ return 0.5;
3251
+ }
3252
+ if (x == -1.0) {
3253
+ return - 0.5;
3254
+ }
3255
+ return (x-x)/0.0; /* return NaN */
3256
+ }
3257
+
3258
+ /* Argument reduction:
3259
+
3260
+ We have 10 intervals and 3 paths:
3261
+
3262
+ - interval 0 => path 1 using p0
3263
+ - interval 1-8 => path 2 using p
3264
+ - interval 9 => path 3 using p9
3265
+
3266
+ */
3267
+
3268
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
3269
+
3270
+ /* 0 <= index <= 15
3271
+
3272
+ index approximates roughly x^2
3273
+
3274
+ Map indexes to intervals as follows:
3275
+
3276
+ 0 -> 0
3277
+ 1 -> 1
3278
+ ...
3279
+ 8 -> 8
3280
+ 9 -> 9
3281
+ ...
3282
+ 15 -> 9
3283
+
3284
+ For this mapping, filter first the case 0 -> 0
3285
+ In consequence, 1 <= index <= 15, i.e.
3286
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
3287
+
3288
+ 0 -> 1
3289
+ ...
3290
+ 7 -> 8
3291
+ 8 -> 9
3292
+ ...
3293
+ 15 -> 9
3294
+
3295
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
3296
+
3297
+ */
3298
+
3299
+ if (index == 0) {
3300
+ /* Path 1 using p0 */
3301
+
3302
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
3303
+
3304
+ Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
3305
+
3306
+ /* Rounding test */
3307
+
3308
+ if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
3309
+ return asinpih;
3310
+
3311
+ /* Rounding test failed, launch accurate phase */
3312
+
3313
+ #if EVAL_PERF
3314
+ crlibm_second_step_taken++;
3315
+ #endif
3316
+
3317
+ p0_accu(&asinh, &asinm, &asinl, x);
3318
+
3319
+ Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
3320
+
3321
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3322
+
3323
+ /* Final rounding */
3324
+
3325
+ ReturnRoundToNearest3(asinpih,asinpim,asinpil);
3326
+
3327
+ }
3328
+
3329
+ /* Strip off the sign of argument x */
3330
+ sign = 1.0;
3331
+ if (x < 0.0) sign = -sign;
3332
+
3333
+ index--;
3334
+ if ((index & 0x8) != 0) {
3335
+ /* Path 3 using p9 */
3336
+
3337
+ /* Do argument reduction using a MI_9 as a midpoint value
3338
+ for the polynomial and compute exactly zp = 2 * (1 - x)
3339
+ for the asymptotical approximation using a square root.
3340
+ */
3341
+
3342
+ z = xabs - MI_9;
3343
+ zp = 2.0 * (1.0 - xabs);
3344
+
3345
+ /* Polynomial approximation and square root extraction */
3346
+
3347
+ p9_quick(&p9h, &p9m, z);
3348
+ p9h = -p9h;
3349
+ p9m = -p9m;
3350
+
3351
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
3352
+
3353
+ /* Reconstruction */
3354
+
3355
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
3356
+ Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
3357
+
3358
+ Add122(&asinpih,&asinpim,0.5,t2h,t2m);
3359
+
3360
+ /* Rounding test */
3361
+
3362
+ if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
3363
+ return sign * asinpih;
3364
+
3365
+ /* Rounding test failed, launch accurate phase */
3366
+
3367
+ #if EVAL_PERF
3368
+ crlibm_second_step_taken++;
3369
+ #endif
3370
+
3371
+ p9_accu(&p9h, &p9m, &p9l, z);
3372
+ p9h = -p9h;
3373
+ p9m = -p9m;
3374
+ p9l = -p9l;
3375
+
3376
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
3377
+
3378
+ /* Reconstruction */
3379
+
3380
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
3381
+ Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
3382
+
3383
+ Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
3384
+
3385
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3386
+
3387
+ /* Final rounding */
3388
+
3389
+ RoundToNearest3(&asinpi,asinpih,asinpim,asinpil);
3390
+
3391
+ return sign * asinpi;
3392
+
3393
+ }
3394
+
3395
+ /* Path 2 using p */
3396
+
3397
+ /* Do argument reduction using a table value for
3398
+ the midpoint value
3399
+ */
3400
+
3401
+ z = xabs - mi_i;
3402
+
3403
+ p_quick(&asinh, &asinm, z, index);
3404
+
3405
+ Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
3406
+
3407
+ /* Rounding test */
3408
+
3409
+ if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
3410
+ return sign * asinpih;
3411
+
3412
+ /* Rounding test failed, launch accurate phase */
3413
+
3414
+ #if EVAL_PERF
3415
+ crlibm_second_step_taken++;
3416
+ #endif
3417
+
3418
+ p_accu(&asinh, &asinm, &asinl, z, index);
3419
+
3420
+ Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
3421
+
3422
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3423
+
3424
+ /* Final rounding */
3425
+
3426
+ RoundToNearest3(&asinpi,asinpih,asinpim,asinpil);
3427
+
3428
+ return sign * asinpi;
3429
+
3430
+ }
3431
+
3432
+ double asinpi_rd(double x) {
3433
+ db_number xdb, zdb, asinhdb;
3434
+ double sign, z, zp;
3435
+ int index;
3436
+ double asinh, asinm, asinl;
3437
+ double asinpih, asinpim, asinpil;
3438
+ double asinpihover, asinpimover, asinpilover;
3439
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
3440
+ double t1h, t1m, t1l;
3441
+ double t2h, t2m, t2l;
3442
+ double asinpi;
3443
+ double xabs;
3444
+ double xScaled;
3445
+ double xPih, xPim, xPil;
3446
+ double xPihover, xPimover, xPilover;
3447
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3448
+ db_number tempdb;
3449
+ #endif
3450
+ double deltatemp, deltah, deltal;
3451
+ double temp1, temp2h, temp2l, temp3;
3452
+
3453
+ /* Start already computations for argument reduction */
3454
+
3455
+ zdb.d = 1.0 + x * x;
3456
+
3457
+ xdb.d = x;
3458
+
3459
+ /* Special case handling */
3460
+
3461
+ /* Remove sign of x in floating-point */
3462
+ xabs = ABS(x);
3463
+ xdb.i[HI] &= 0x7fffffff;
3464
+
3465
+ /* If |x| < 2^(-60) we have
3466
+
3467
+ arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
3468
+
3469
+ with 0 <= xi < 2^(-122)
3470
+
3471
+ We have no bad case worser than
3472
+ 112 bits for asinpi(x) for |x| > 2^(-58)
3473
+ and the order 3 term of asinpi is still more
3474
+ far away.
3475
+
3476
+ */
3477
+ if (xdb.i[HI] < ASINPISIMPLEBOUND) {
3478
+ /* For a faster path for the exact case,
3479
+ we check first if x = 0
3480
+ */
3481
+
3482
+ if (x == 0.0)
3483
+ return x;
3484
+
3485
+ /* We want a relatively fast path for values where
3486
+ neither the input nor the output is subnormal
3487
+ because subnormal rounding is expensive.
3488
+
3489
+ Since
3490
+
3491
+ abs(double(asin(x)/pi)) <= 2^(-860)
3492
+
3493
+ for abs(x) <= 2^(-858), we filter for this (normal) value.
3494
+
3495
+ */
3496
+
3497
+ if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
3498
+
3499
+ /* Here abs(x) >= 2^(-858).
3500
+ The result is therefore clearly normal and
3501
+ the double precision numbers in the triple-double
3502
+ representation of TD(1/pi) * x are all normal, too.
3503
+
3504
+ For speed, we use a two-step approach.
3505
+ We know that
3506
+
3507
+ || x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
3508
+
3509
+ */
3510
+
3511
+ Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
3512
+
3513
+ TEST_AND_RETURN_RD(xPih, xPim, RDROUNDCSTASINPI);
3514
+
3515
+ Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
3516
+
3517
+ Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3518
+
3519
+ ReturnRoundDownwards3(xPih,xPim,xPil);
3520
+
3521
+ }
3522
+
3523
+ /* Here abs(x) < 2^(-858)
3524
+
3525
+ Because of subnormals and especially because
3526
+ of the fact that 1/pi < 1, we must scale x
3527
+ appropriately. We compute hence:
3528
+
3529
+ asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
3530
+
3531
+ where the rounding procedure works temporarily on the scaled
3532
+ intermediate.
3533
+ */
3534
+
3535
+ xScaled = x * TWO1000;
3536
+
3537
+ Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
3538
+
3539
+ Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3540
+
3541
+ /* Produce a (possibly) subnormal intermediate rounding */
3542
+
3543
+ asinhdb.d = xPih * TWOM1000;
3544
+
3545
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3546
+ tempdb.i[HI] = asinhdb.i[HI];
3547
+ tempdb.i[LO] = asinhdb.i[LO];
3548
+ asinhdb.i[LO] = tempdb.i[LO];
3549
+ asinhdb.i[HI] = tempdb.i[HI];
3550
+ #endif
3551
+
3552
+
3553
+ /* Rescale the result */
3554
+
3555
+ temp1 = asinhdb.d * TWO1000;
3556
+
3557
+ /* Compute the scaled error, the operation is exact by Sterbenz' lemma */
3558
+
3559
+ deltatemp = xPih - temp1;
3560
+
3561
+ /* Sum up the errors, representing them on a double-double
3562
+
3563
+ This is exact for the normal rounding case and the error
3564
+ is neglectable in the subnormal rounding case.
3565
+ */
3566
+
3567
+ Add12Cond(temp2h,temp2l,deltatemp,xPim);
3568
+ temp3 = temp2l + xPil;
3569
+ Add12(deltah,deltal,temp2h,temp3);
3570
+
3571
+ /* We are doing directed rounding. Thus we must correct the rounding
3572
+ if the sign of the error is not correct
3573
+ RD -> sign must be positive for correct rounding
3574
+ RU -> sign must be negative for correct rounding
3575
+ RZ -> sign must be positive for positive x and negative for negative x
3576
+ */
3577
+
3578
+ if (deltah >= 0.0) {
3579
+ /* The sign is correct, return the intermediate rounding */
3580
+ return asinhdb.d;
3581
+ }
3582
+
3583
+ /* The sign is not correct
3584
+
3585
+ RD -> subtract 1 ulp
3586
+ RU -> add 1 ulp
3587
+ RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
3588
+ */
3589
+
3590
+ if (x < 0.0)
3591
+ asinhdb.l++;
3592
+ else
3593
+ asinhdb.l--;
3594
+
3595
+ return asinhdb.d;
3596
+ }
3597
+
3598
+ /* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
3599
+ if (xdb.i[HI] >= 0x3ff00000) {
3600
+ if (x == 1.0) {
3601
+ return 0.5;
3602
+ }
3603
+ if (x == -1.0) {
3604
+ return - 0.5;
3605
+ }
3606
+ return (x-x)/0.0; /* return NaN */
3607
+ }
3608
+
3609
+ /* Argument reduction:
3610
+
3611
+ We have 10 intervals and 3 paths:
3612
+
3613
+ - interval 0 => path 1 using p0
3614
+ - interval 1-8 => path 2 using p
3615
+ - interval 9 => path 3 using p9
3616
+
3617
+ */
3618
+
3619
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
3620
+
3621
+ /* 0 <= index <= 15
3622
+
3623
+ index approximates roughly x^2
3624
+
3625
+ Map indexes to intervals as follows:
3626
+
3627
+ 0 -> 0
3628
+ 1 -> 1
3629
+ ...
3630
+ 8 -> 8
3631
+ 9 -> 9
3632
+ ...
3633
+ 15 -> 9
3634
+
3635
+ For this mapping, filter first the case 0 -> 0
3636
+ In consequence, 1 <= index <= 15, i.e.
3637
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
3638
+
3639
+ 0 -> 1
3640
+ ...
3641
+ 7 -> 8
3642
+ 8 -> 9
3643
+ ...
3644
+ 15 -> 9
3645
+
3646
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
3647
+
3648
+ */
3649
+
3650
+ if (index == 0) {
3651
+ /* Path 1 using p0 */
3652
+
3653
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
3654
+
3655
+ Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
3656
+
3657
+ /* Rounding test */
3658
+
3659
+ TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
3660
+
3661
+ /* Rounding test failed, launch accurate phase */
3662
+
3663
+ #if EVAL_PERF
3664
+ crlibm_second_step_taken++;
3665
+ #endif
3666
+
3667
+ p0_accu(&asinh, &asinm, &asinl, x);
3668
+
3669
+ Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
3670
+
3671
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3672
+
3673
+ /* Final rounding */
3674
+
3675
+ ReturnRoundDownwards3(asinpih,asinpim,asinpil);
3676
+
3677
+ }
3678
+
3679
+ /* Strip off the sign of argument x */
3680
+ sign = 1.0;
3681
+ if (x < 0.0) sign = -sign;
3682
+
3683
+ index--;
3684
+ if ((index & 0x8) != 0) {
3685
+ /* Path 3 using p9 */
3686
+
3687
+ /* Do argument reduction using a MI_9 as a midpoint value
3688
+ for the polynomial and compute exactly zp = 2 * (1 - x)
3689
+ for the asymptotical approximation using a square root.
3690
+ */
3691
+
3692
+ z = xabs - MI_9;
3693
+ zp = 2.0 * (1.0 - xabs);
3694
+
3695
+ /* Polynomial approximation and square root extraction */
3696
+
3697
+ p9_quick(&p9h, &p9m, z);
3698
+ p9h = -p9h;
3699
+ p9m = -p9m;
3700
+
3701
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
3702
+
3703
+ /* Reconstruction */
3704
+
3705
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
3706
+ Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
3707
+
3708
+ Add122(&asinpih,&asinpim,0.5,t2h,t2m);
3709
+
3710
+ /* Rounding test */
3711
+
3712
+ asinpih *= sign;
3713
+ asinpim *= sign;
3714
+
3715
+ TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
3716
+
3717
+ /* Rounding test failed, launch accurate phase */
3718
+
3719
+ #if EVAL_PERF
3720
+ crlibm_second_step_taken++;
3721
+ #endif
3722
+
3723
+ p9_accu(&p9h, &p9m, &p9l, z);
3724
+ p9h = -p9h;
3725
+ p9m = -p9m;
3726
+ p9l = -p9l;
3727
+
3728
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
3729
+
3730
+ /* Reconstruction */
3731
+
3732
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
3733
+ Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
3734
+
3735
+ Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
3736
+
3737
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3738
+
3739
+ /* Final rounding */
3740
+
3741
+ RoundDownwards3(&asinpi,asinpih,asinpim,asinpil);
3742
+
3743
+ return sign * asinpi;
3744
+
3745
+ }
3746
+
3747
+ /* Path 2 using p */
3748
+
3749
+ /* Do argument reduction using a table value for
3750
+ the midpoint value
3751
+ */
3752
+
3753
+ z = xabs - mi_i;
3754
+
3755
+ p_quick(&asinh, &asinm, z, index);
3756
+
3757
+ Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
3758
+
3759
+ /* Rounding test */
3760
+
3761
+ asinpih *= sign;
3762
+ asinpim *= sign;
3763
+
3764
+ TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
3765
+
3766
+ /* Rounding test failed, launch accurate phase */
3767
+
3768
+ #if EVAL_PERF
3769
+ crlibm_second_step_taken++;
3770
+ #endif
3771
+
3772
+ p_accu(&asinh, &asinm, &asinl, z, index);
3773
+
3774
+ Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
3775
+
3776
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3777
+
3778
+ /* Final rounding */
3779
+
3780
+ RoundDownwards3(&asinpi,asinpih,asinpim,asinpil);
3781
+
3782
+ return sign * asinpi;
3783
+
3784
+ }
3785
+
3786
+ double asinpi_ru(double x) {
3787
+ db_number xdb, zdb, asinhdb;
3788
+ double sign, z, zp;
3789
+ int index;
3790
+ double asinh, asinm, asinl;
3791
+ double asinpih, asinpim, asinpil;
3792
+ double asinpihover, asinpimover, asinpilover;
3793
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
3794
+ double t1h, t1m, t1l;
3795
+ double t2h, t2m, t2l;
3796
+ double asinpi;
3797
+ double xabs;
3798
+ double xScaled;
3799
+ double xPih, xPim, xPil;
3800
+ double xPihover, xPimover, xPilover;
3801
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3802
+ db_number tempdb;
3803
+ #endif
3804
+ double deltatemp, deltah, deltal;
3805
+ double temp1, temp2h, temp2l, temp3;
3806
+
3807
+ /* Start already computations for argument reduction */
3808
+
3809
+ zdb.d = 1.0 + x * x;
3810
+
3811
+ xdb.d = x;
3812
+
3813
+ /* Special case handling */
3814
+
3815
+ /* Remove sign of x in floating-point */
3816
+ xabs = ABS(x);
3817
+ xdb.i[HI] &= 0x7fffffff;
3818
+
3819
+ /* If |x| < 2^(-60) we have
3820
+
3821
+ arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
3822
+
3823
+ with 0 <= xi < 2^(-122)
3824
+
3825
+ We have no bad case worser than
3826
+ 112 bits for asinpi(x) for |x| > 2^(-58)
3827
+ and the order 3 term of asinpi is still more
3828
+ far away.
3829
+
3830
+ */
3831
+ if (xdb.i[HI] < ASINPISIMPLEBOUND) {
3832
+ /* For a faster path for the exact case,
3833
+ we check first if x = 0
3834
+ */
3835
+
3836
+ if (x == 0.0)
3837
+ return x;
3838
+
3839
+ /* We want a relatively fast path for values where
3840
+ neither the input nor the output is subnormal
3841
+ because subnormal rounding is expensive.
3842
+
3843
+ Since
3844
+
3845
+ abs(double(asin(x)/pi)) <= 2^(-860)
3846
+
3847
+ for abs(x) <= 2^(-858), we filter for this (normal) value.
3848
+
3849
+ */
3850
+
3851
+ if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
3852
+
3853
+ /* Here abs(x) >= 2^(-858).
3854
+ The result is therefore clearly normal and
3855
+ the double precision numbers in the triple-double
3856
+ representation of TD(1/pi) * x are all normal, too.
3857
+
3858
+ For speed, we use a two-step approach.
3859
+ We know that
3860
+
3861
+ || x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
3862
+
3863
+ */
3864
+
3865
+ Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
3866
+
3867
+ TEST_AND_RETURN_RU(xPih, xPim, RDROUNDCSTASINPI);
3868
+
3869
+ Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
3870
+
3871
+ Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3872
+
3873
+ ReturnRoundUpwards3(xPih,xPim,xPil);
3874
+
3875
+ }
3876
+
3877
+ /* Here abs(x) < 2^(-858)
3878
+
3879
+ Because of subnormals and especially because
3880
+ of the fact that 1/pi < 1, we must scale x
3881
+ appropriately. We compute hence:
3882
+
3883
+ asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
3884
+
3885
+ where the rounding procedure works temporarily on the scaled
3886
+ intermediate.
3887
+ */
3888
+
3889
+ xScaled = x * TWO1000;
3890
+
3891
+ Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
3892
+
3893
+ Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3894
+
3895
+ /* Produce a (possibly) subnormal intermediate rounding */
3896
+
3897
+ asinhdb.d = xPih * TWOM1000;
3898
+
3899
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3900
+ tempdb.i[HI] = asinhdb.i[HI];
3901
+ tempdb.i[LO] = asinhdb.i[LO];
3902
+ asinhdb.i[LO] = tempdb.i[LO];
3903
+ asinhdb.i[HI] = tempdb.i[HI];
3904
+ #endif
3905
+
3906
+ /* Rescale the result */
3907
+
3908
+ temp1 = asinhdb.d * TWO1000;
3909
+
3910
+ /* Compute the scaled error, the operation is exact by Sterbenz' lemma */
3911
+
3912
+ deltatemp = xPih - temp1;
3913
+
3914
+ /* Sum up the errors, representing them on a double-double
3915
+
3916
+ This is exact for the normal rounding case and the error
3917
+ is neglectable in the subnormal rounding case.
3918
+ */
3919
+
3920
+ Add12Cond(temp2h,temp2l,deltatemp,xPim);
3921
+ temp3 = temp2l + xPil;
3922
+ Add12(deltah,deltal,temp2h,temp3);
3923
+
3924
+ /* We are doing directed rounding. Thus we must correct the rounding
3925
+ if the sign of the error is not correct
3926
+ RD -> sign must be positive for correct rounding
3927
+ RU -> sign must be negative for correct rounding
3928
+ RZ -> sign must be positive for positive x and negative for negative x
3929
+ */
3930
+
3931
+ if (deltah <= 0.0) {
3932
+ /* The sign is correct, return the intermediate rounding */
3933
+ return asinhdb.d;
3934
+ }
3935
+
3936
+ /* The sign is not correct
3937
+
3938
+ RD -> subtract 1 ulp
3939
+ RU -> add 1 ulp
3940
+ RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
3941
+ */
3942
+
3943
+ if (x < 0.0)
3944
+ asinhdb.l--;
3945
+ else
3946
+ asinhdb.l++;
3947
+
3948
+ return asinhdb.d;
3949
+ }
3950
+
3951
+ /* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
3952
+ if (xdb.i[HI] >= 0x3ff00000) {
3953
+ if (x == 1.0) {
3954
+ return 0.5;
3955
+ }
3956
+ if (x == -1.0) {
3957
+ return - 0.5;
3958
+ }
3959
+ return (x-x)/0.0; /* return NaN */
3960
+ }
3961
+
3962
+ /* Argument reduction:
3963
+
3964
+ We have 10 intervals and 3 paths:
3965
+
3966
+ - interval 0 => path 1 using p0
3967
+ - interval 1-8 => path 2 using p
3968
+ - interval 9 => path 3 using p9
3969
+
3970
+ */
3971
+
3972
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
3973
+
3974
+ /* 0 <= index <= 15
3975
+
3976
+ index approximates roughly x^2
3977
+
3978
+ Map indexes to intervals as follows:
3979
+
3980
+ 0 -> 0
3981
+ 1 -> 1
3982
+ ...
3983
+ 8 -> 8
3984
+ 9 -> 9
3985
+ ...
3986
+ 15 -> 9
3987
+
3988
+ For this mapping, filter first the case 0 -> 0
3989
+ In consequence, 1 <= index <= 15, i.e.
3990
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
3991
+
3992
+ 0 -> 1
3993
+ ...
3994
+ 7 -> 8
3995
+ 8 -> 9
3996
+ ...
3997
+ 15 -> 9
3998
+
3999
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
4000
+
4001
+ */
4002
+
4003
+ if (index == 0) {
4004
+ /* Path 1 using p0 */
4005
+
4006
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
4007
+
4008
+ Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
4009
+
4010
+ /* Rounding test */
4011
+
4012
+ TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
4013
+
4014
+ /* Rounding test failed, launch accurate phase */
4015
+
4016
+ #if EVAL_PERF
4017
+ crlibm_second_step_taken++;
4018
+ #endif
4019
+
4020
+ p0_accu(&asinh, &asinm, &asinl, x);
4021
+
4022
+ Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
4023
+
4024
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4025
+
4026
+ /* Final rounding */
4027
+
4028
+ ReturnRoundUpwards3(asinpih,asinpim,asinpil);
4029
+
4030
+ }
4031
+
4032
+ /* Strip off the sign of argument x */
4033
+ sign = 1.0;
4034
+ if (x < 0.0) sign = -sign;
4035
+
4036
+ index--;
4037
+ if ((index & 0x8) != 0) {
4038
+ /* Path 3 using p9 */
4039
+
4040
+ /* Do argument reduction using a MI_9 as a midpoint value
4041
+ for the polynomial and compute exactly zp = 2 * (1 - x)
4042
+ for the asymptotical approximation using a square root.
4043
+ */
4044
+
4045
+ z = xabs - MI_9;
4046
+ zp = 2.0 * (1.0 - xabs);
4047
+
4048
+ /* Polynomial approximation and square root extraction */
4049
+
4050
+ p9_quick(&p9h, &p9m, z);
4051
+ p9h = -p9h;
4052
+ p9m = -p9m;
4053
+
4054
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
4055
+
4056
+ /* Reconstruction */
4057
+
4058
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
4059
+ Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
4060
+
4061
+ Add122(&asinpih,&asinpim,0.5,t2h,t2m);
4062
+
4063
+ /* Rounding test */
4064
+
4065
+ asinpih *= sign;
4066
+ asinpim *= sign;
4067
+
4068
+ TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
4069
+
4070
+ /* Rounding test failed, launch accurate phase */
4071
+
4072
+ #if EVAL_PERF
4073
+ crlibm_second_step_taken++;
4074
+ #endif
4075
+
4076
+ p9_accu(&p9h, &p9m, &p9l, z);
4077
+ p9h = -p9h;
4078
+ p9m = -p9m;
4079
+ p9l = -p9l;
4080
+
4081
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
4082
+
4083
+ /* Reconstruction */
4084
+
4085
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
4086
+ Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
4087
+
4088
+ Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
4089
+
4090
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4091
+
4092
+ /* Final rounding */
4093
+
4094
+ RoundUpwards3(&asinpi,asinpih,asinpim,asinpil);
4095
+
4096
+ return sign * asinpi;
4097
+
4098
+ }
4099
+
4100
+ /* Path 2 using p */
4101
+
4102
+ /* Do argument reduction using a table value for
4103
+ the midpoint value
4104
+ */
4105
+
4106
+ z = xabs - mi_i;
4107
+
4108
+ p_quick(&asinh, &asinm, z, index);
4109
+
4110
+ Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
4111
+
4112
+ /* Rounding test */
4113
+
4114
+ asinpih *= sign;
4115
+ asinpim *= sign;
4116
+
4117
+ TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
4118
+
4119
+ /* Rounding test failed, launch accurate phase */
4120
+
4121
+ #if EVAL_PERF
4122
+ crlibm_second_step_taken++;
4123
+ #endif
4124
+
4125
+ p_accu(&asinh, &asinm, &asinl, z, index);
4126
+
4127
+ Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
4128
+
4129
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4130
+
4131
+ /* Final rounding */
4132
+
4133
+ RoundUpwards3(&asinpi,asinpih,asinpim,asinpil);
4134
+
4135
+ return sign * asinpi;
4136
+
4137
+ }
4138
+
4139
+ double asinpi_rz(double x) {
4140
+ db_number xdb, zdb, asinhdb;
4141
+ double sign, z, zp;
4142
+ int index;
4143
+ double asinh, asinm, asinl;
4144
+ double asinpih, asinpim, asinpil;
4145
+ double asinpihover, asinpimover, asinpilover;
4146
+ double p9h, p9m, p9l, sqrh, sqrm, sqrl;
4147
+ double t1h, t1m, t1l;
4148
+ double t2h, t2m, t2l;
4149
+ double asinpi;
4150
+ double xabs;
4151
+ double xScaled;
4152
+ double xPih, xPim, xPil;
4153
+ double xPihover, xPimover, xPilover;
4154
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
4155
+ db_number tempdb;
4156
+ #endif
4157
+ double deltatemp, deltah, deltal;
4158
+ double temp1, temp2h, temp2l, temp3;
4159
+
4160
+ /* Start already computations for argument reduction */
4161
+
4162
+ zdb.d = 1.0 + x * x;
4163
+
4164
+ xdb.d = x;
4165
+
4166
+ /* Special case handling */
4167
+
4168
+ /* Remove sign of x in floating-point */
4169
+ xabs = ABS(x);
4170
+ xdb.i[HI] &= 0x7fffffff;
4171
+
4172
+ /* If |x| < 2^(-60) we have
4173
+
4174
+ arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
4175
+
4176
+ with 0 <= xi < 2^(-122)
4177
+
4178
+ We have no bad case worser than
4179
+ 112 bits for asinpi(x) for |x| > 2^(-58)
4180
+ and the order 3 term of asinpi is still more
4181
+ far away.
4182
+
4183
+ */
4184
+ if (xdb.i[HI] < ASINPISIMPLEBOUND) {
4185
+ /* For a faster path for the exact case,
4186
+ we check first if x = 0
4187
+ */
4188
+
4189
+ if (x == 0.0)
4190
+ return x;
4191
+
4192
+ /* We want a relatively fast path for values where
4193
+ neither the input nor the output is subnormal
4194
+ because subnormal rounding is expensive.
4195
+
4196
+ Since
4197
+
4198
+ abs(double(asin(x)/pi)) <= 2^(-860)
4199
+
4200
+ for abs(x) <= 2^(-858), we filter for this (normal) value.
4201
+
4202
+ */
4203
+
4204
+ if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
4205
+
4206
+ /* Here abs(x) >= 2^(-858).
4207
+ The result is therefore clearly normal and
4208
+ the double precision numbers in the triple-double
4209
+ representation of TD(1/pi) * x are all normal, too.
4210
+
4211
+ For speed, we use a two-step approach.
4212
+ We know that
4213
+
4214
+ || x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
4215
+
4216
+ */
4217
+
4218
+ Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
4219
+
4220
+ TEST_AND_RETURN_RZ(xPih, xPim, RDROUNDCSTASINPI);
4221
+
4222
+ Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
4223
+
4224
+ Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
4225
+
4226
+ ReturnRoundTowardsZero3(xPih,xPim,xPil);
4227
+
4228
+ }
4229
+
4230
+ /* Here abs(x) < 2^(-858)
4231
+
4232
+ Because of subnormals and especially because
4233
+ of the fact that 1/pi < 1, we must scale x
4234
+ appropriately. We compute hence:
4235
+
4236
+ asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
4237
+
4238
+ where the rounding procedure works temporarily on the scaled
4239
+ intermediate.
4240
+ */
4241
+
4242
+ xScaled = x * TWO1000;
4243
+
4244
+ Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
4245
+
4246
+ Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
4247
+
4248
+ /* Produce a (possibly) subnormal intermediate rounding */
4249
+
4250
+ asinhdb.d = xPih * TWOM1000;
4251
+
4252
+ #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
4253
+ tempdb.i[HI] = asinhdb.i[HI];
4254
+ tempdb.i[LO] = asinhdb.i[LO];
4255
+ asinhdb.i[LO] = tempdb.i[LO];
4256
+ asinhdb.i[HI] = tempdb.i[HI];
4257
+ #endif
4258
+
4259
+ /* Rescale the result */
4260
+
4261
+ temp1 = asinhdb.d * TWO1000;
4262
+
4263
+ /* Compute the scaled error, the operation is exact by Sterbenz' lemma */
4264
+
4265
+ deltatemp = xPih - temp1;
4266
+
4267
+ /* Sum up the errors, representing them on a double-double
4268
+
4269
+ This is exact for the normal rounding case and the error
4270
+ is neglectable in the subnormal rounding case.
4271
+ */
4272
+
4273
+ Add12Cond(temp2h,temp2l,deltatemp,xPim);
4274
+ temp3 = temp2l + xPil;
4275
+ Add12(deltah,deltal,temp2h,temp3);
4276
+
4277
+ /* We are doing directed rounding. Thus we must correct the rounding
4278
+ if the sign of the error is not correct
4279
+ RD -> sign must be positive for correct rounding
4280
+ RU -> sign must be negative for correct rounding
4281
+ RZ -> sign must be positive for positive x and negative for negative x
4282
+ */
4283
+
4284
+ if ((x > 0.0) ^ (deltah < 0.0)) {
4285
+ /* The sign is correct, return the intermediate rounding */
4286
+ return asinhdb.d;
4287
+ }
4288
+
4289
+ /* The sign is not correct
4290
+
4291
+ RD -> subtract 1 ulp
4292
+ RU -> add 1 ulp
4293
+ RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
4294
+ */
4295
+
4296
+ asinhdb.l--;
4297
+
4298
+ return asinhdb.d;
4299
+ }
4300
+
4301
+ /* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
4302
+ if (xdb.i[HI] >= 0x3ff00000) {
4303
+ if (x == 1.0) {
4304
+ return 0.5;
4305
+ }
4306
+ if (x == -1.0) {
4307
+ return - 0.5;
4308
+ }
4309
+ return (x-x)/0.0; /* return NaN */
4310
+ }
4311
+
4312
+ /* Argument reduction:
4313
+
4314
+ We have 10 intervals and 3 paths:
4315
+
4316
+ - interval 0 => path 1 using p0
4317
+ - interval 1-8 => path 2 using p
4318
+ - interval 9 => path 3 using p9
4319
+
4320
+ */
4321
+
4322
+ index = (0x000f0000 & zdb.i[HI]) >> 16;
4323
+
4324
+ /* 0 <= index <= 15
4325
+
4326
+ index approximates roughly x^2
4327
+
4328
+ Map indexes to intervals as follows:
4329
+
4330
+ 0 -> 0
4331
+ 1 -> 1
4332
+ ...
4333
+ 8 -> 8
4334
+ 9 -> 9
4335
+ ...
4336
+ 15 -> 9
4337
+
4338
+ For this mapping, filter first the case 0 -> 0
4339
+ In consequence, 1 <= index <= 15, i.e.
4340
+ 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
4341
+
4342
+ 0 -> 1
4343
+ ...
4344
+ 7 -> 8
4345
+ 8 -> 9
4346
+ ...
4347
+ 15 -> 9
4348
+
4349
+ Thus it suffices to check the 3rd bit of index - 1 after the first filter.
4350
+
4351
+ */
4352
+
4353
+ if (index == 0) {
4354
+ /* Path 1 using p0 */
4355
+
4356
+ p0_quick(&asinh, &asinm, x, xdb.i[HI]);
4357
+
4358
+ Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
4359
+
4360
+ /* Rounding test */
4361
+
4362
+ TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
4363
+
4364
+ /* Rounding test failed, launch accurate phase */
4365
+
4366
+ #if EVAL_PERF
4367
+ crlibm_second_step_taken++;
4368
+ #endif
4369
+
4370
+ p0_accu(&asinh, &asinm, &asinl, x);
4371
+
4372
+ Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
4373
+
4374
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4375
+
4376
+ /* Final rounding */
4377
+
4378
+ ReturnRoundTowardsZero3(asinpih,asinpim,asinpil);
4379
+
4380
+ }
4381
+
4382
+ /* Strip off the sign of argument x */
4383
+ sign = 1.0;
4384
+ if (x < 0.0) sign = -sign;
4385
+
4386
+ index--;
4387
+ if ((index & 0x8) != 0) {
4388
+ /* Path 3 using p9 */
4389
+
4390
+ /* Do argument reduction using a MI_9 as a midpoint value
4391
+ for the polynomial and compute exactly zp = 2 * (1 - x)
4392
+ for the asymptotical approximation using a square root.
4393
+ */
4394
+
4395
+ z = xabs - MI_9;
4396
+ zp = 2.0 * (1.0 - xabs);
4397
+
4398
+ /* Polynomial approximation and square root extraction */
4399
+
4400
+ p9_quick(&p9h, &p9m, z);
4401
+ p9h = -p9h;
4402
+ p9m = -p9m;
4403
+
4404
+ sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
4405
+
4406
+ /* Reconstruction */
4407
+
4408
+ Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
4409
+ Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
4410
+
4411
+ Add122(&asinpih,&asinpim,0.5,t2h,t2m);
4412
+
4413
+ /* Rounding test */
4414
+
4415
+ asinpih *= sign;
4416
+ asinpim *= sign;
4417
+
4418
+ TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
4419
+
4420
+ /* Rounding test failed, launch accurate phase */
4421
+
4422
+ #if EVAL_PERF
4423
+ crlibm_second_step_taken++;
4424
+ #endif
4425
+
4426
+ p9_accu(&p9h, &p9m, &p9l, z);
4427
+ p9h = -p9h;
4428
+ p9m = -p9m;
4429
+ p9l = -p9l;
4430
+
4431
+ Sqrt13(&sqrh,&sqrm,&sqrl,zp);
4432
+
4433
+ /* Reconstruction */
4434
+
4435
+ Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
4436
+ Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
4437
+
4438
+ Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
4439
+
4440
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4441
+
4442
+ /* Final rounding */
4443
+
4444
+ RoundTowardsZero3(&asinpi,asinpih,asinpim,asinpil);
4445
+
4446
+ return sign * asinpi;
4447
+
4448
+ }
4449
+
4450
+ /* Path 2 using p */
4451
+
4452
+ /* Do argument reduction using a table value for
4453
+ the midpoint value
4454
+ */
4455
+
4456
+ z = xabs - mi_i;
4457
+
4458
+ p_quick(&asinh, &asinm, z, index);
4459
+
4460
+ Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
4461
+
4462
+ /* Rounding test */
4463
+
4464
+ asinpih *= sign;
4465
+ asinpim *= sign;
4466
+
4467
+ TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
4468
+
4469
+ /* Rounding test failed, launch accurate phase */
4470
+
4471
+ #if EVAL_PERF
4472
+ crlibm_second_step_taken++;
4473
+ #endif
4474
+
4475
+ p_accu(&asinh, &asinm, &asinl, z, index);
4476
+
4477
+ Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
4478
+
4479
+ Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4480
+
4481
+ /* Final rounding */
4482
+
4483
+ RoundTowardsZero3(&asinpi,asinpih,asinpim,asinpil);
4484
+
4485
+ return sign * asinpi;
4486
+
4487
+ }
4488
+