crmf 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +105 -3
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +117 -20
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +108 -10
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1297 @@
1
+ /*
2
+ * Correctly rounded arcsine
3
+ *
4
+ * Author : Christoph Lauter (ENS Lyon)
5
+ *
6
+ * This file is part of the crlibm library developed by the Arenaire
7
+ * project at Ecole Normale Superieure de Lyon
8
+ *
9
+ * This program is free software; you can redistribute it and/or modify
10
+ * it under the terms of the GNU Lesser General Public License as published by
11
+ * the Free Software Foundation; either version 2 of the License, or
12
+ * (at your option) any later version.
13
+ *
14
+ * This program is distributed in the hope that it will be useful,
15
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ * GNU General Public License for more details.
18
+ *
19
+ * You should have received a copy of the GNU Lesser General Public License
20
+ * along with this program; if not, write to the Free Software
21
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22
+ */
23
+
24
+ #include <stdio.h>
25
+ #include <stdlib.h>
26
+ #include "crlibm.h"
27
+ #include "crlibm_private.h"
28
+ #include "triple-double.h"
29
+ #include "asin-td.h"
30
+
31
+ #define AVOID_FMA 1
32
+
33
+ void asin_accurate_lower(double *asinh, double *asinm, double *asinl, double x, double xSqh, double xSql, double sign) {
34
+ double highPoly;
35
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l;
36
+ double tt1h, tt1l;
37
+ double t8h, t8m, t8l, t9h, t9m, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l;
38
+ double tt8h, tt8m, tt8l, tt9h, tt9m, tt9l, tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
39
+ double xCubeh, xCubem, xCubel, tt13h, tt13m, tt13l, t13h, t13m, t13l, polyh, polym, polyl;
40
+ double tt11hover, tt11mover, tt11lover;
41
+
42
+ #if EVAL_PERF
43
+ crlibm_second_step_taken++;
44
+ #endif
45
+
46
+ /* Evaluate the polynomial of degree 37
47
+ Its coefficients start at tbl[0]
48
+
49
+ p(x) = x + x * x^2 * (c3 + x^2 * (c5 + ...
50
+
51
+ We receive x^2 as xSqh + xSql = x * x (exactly)
52
+ in argument
53
+
54
+ |x| <= 0.185 = 2^(-2.43)
55
+
56
+ Compute monomials 27 to 37 in double precision
57
+ monomials 13 to 25 in double-double and
58
+ 1 to 11 in triple-double precision in a
59
+ modified Horner form
60
+
61
+ */
62
+
63
+ /* Double computations */
64
+
65
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
66
+ highPoly = FMA(FMA(FMA(FMA(FMA(tbl[33],xSqh,tbl[32]),xSqh,tbl[31]),xSqh,tbl[30]),xSqh,tbl[29]),xSqh,tbl[28]);
67
+ #else
68
+ highPoly = tbl[28] + xSqh * (tbl[29] + xSqh * (tbl[30] + xSqh * (tbl[31] + xSqh * (tbl[32] + xSqh * tbl[33]))));
69
+ #endif
70
+
71
+ /* Double-double computations */
72
+
73
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
74
+ Add22(&t1h,&t1l,tbl[27],0,tt1h,tt1l);
75
+
76
+ MulAdd22(&t2h,&t2l,tbl[25],tbl[26],xSqh,xSql,t1h,t1l);
77
+ MulAdd22(&t3h,&t3l,tbl[23],tbl[24],xSqh,xSql,t2h,t2l);
78
+ MulAdd22(&t4h,&t4l,tbl[21],tbl[22],xSqh,xSql,t3h,t3l);
79
+ MulAdd22(&t5h,&t5l,tbl[19],tbl[20],xSqh,xSql,t4h,t4l);
80
+ MulAdd22(&t6h,&t6l,tbl[17],tbl[18],xSqh,xSql,t5h,t5l);
81
+ MulAdd22(&t7h,&t7l,tbl[15],tbl[16],xSqh,xSql,t6h,t6l);
82
+
83
+ /* Triple-double computations */
84
+
85
+ Mul23(&tt8h,&tt8m,&tt8l,xSqh,xSql,t7h,t7l); /* 149 - 48/53 */
86
+ Add33(&t8h,&t8m,&t8l,tbl[12],tbl[13],tbl[14],tt8h,tt8m,tt8l); /* 145 - 43/53 */
87
+ Mul233(&tt9h,&tt9m,&tt9l,xSqh,xSql,t8h,t8m,t8l); /* 139 - 39/53 */
88
+ Add33(&t9h,&t9m,&t9l,tbl[9],tbl[10],tbl[11],tt9h,tt9m,tt9l); /* 136 - 34/53 */
89
+ Mul233(&tt10h,&tt10m,&tt10l,xSqh,xSql,t9h,t9m,t9l); /* 130 - 30/53 */
90
+ Add33(&t10h,&t10m,&t10l,tbl[6],tbl[7],tbl[8],tt10h,tt10m,tt10l); /* 127 - 25/53 */
91
+ Mul233(&tt11hover,&tt11mover,&tt11lover,xSqh,xSql,t10h,t10m,t10l); /* 121 - 21/53 */
92
+
93
+ Renormalize3(&tt11h,&tt11m,&tt11l,tt11hover,tt11mover,tt11lover); /* infty - 52/53 */
94
+
95
+ Add33(&t11h,&t11m,&t11l,tbl[3],tbl[4],tbl[5],tt11h,tt11m,tt11l); /* 149 - 47/53 */
96
+ Mul233(&tt12h,&tt12m,&tt12l,xSqh,xSql,t11h,t11m,t11l); /* 143 - 43/53 */
97
+ Add33(&t12h,&t12m,&t12l,tbl[0],tbl[1],tbl[2],tt12h,tt12m,tt12l); /* 140 - 38/53 */
98
+
99
+ Mul123(&xCubeh,&xCubem,&xCubel,x,xSqh,xSql); /* 154 - 47/53 */
100
+
101
+ Mul33(&tt13h,&tt13m,&tt13l,xCubeh,xCubem,xCubel,t12h,t12m,t12l); /* 136 - 34/53 */
102
+ Add133(&t13h,&t13m,&t13l,x,tt13h,tt13m,tt13l); /* 138 - 32/53 */
103
+
104
+ Renormalize3(&polyh,&polym,&polyl,t13h,t13m,t13l); /* infty - 52/53 */
105
+ *asinh = sign * polyh;
106
+ *asinm = sign * polym;
107
+ *asinl = sign * polyl;
108
+ }
109
+
110
+
111
+ void asin_accurate_middle(double *asinh, double *asinm, double *asinl, double z, int i, double sign) {
112
+ double highPoly;
113
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l;
114
+ double t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l, t14h, t14m, t14l;
115
+ double t15h, t15m, t15l, t16h, t16m, t16l;
116
+ double tt1h, tt1l;
117
+ double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
118
+ double tt13h, tt13m, tt13l, tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l;
119
+ double polyh, polym, polyl, tt13hover, tt13mover, tt13lover;
120
+
121
+ #if EVAL_PERF
122
+ crlibm_second_step_taken++;
123
+ #endif
124
+
125
+ /* Evaluate the polynomial of degree 35
126
+ Its coefficients start at tbl[i+1]
127
+ Evaluate degrees 35 to 20 in double precision,
128
+ degrees 20 to 7 in double-double precision and
129
+ finally degrees 6 to 1 in triple-double.
130
+ The constant coefficient is a double-double, the
131
+ computations are nevertheless in triple-double
132
+ */
133
+
134
+ /* Double computations */
135
+
136
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
137
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
138
+ tbl[i+58] ,z,tbl[i+57]),z,tbl[i+56]),z,tbl[i+55]),z,tbl[i+54]),z,
139
+ tbl[i+53]),z,tbl[i+52]),z,tbl[i+51]),z,tbl[i+50]),z,tbl[i+49]),z,
140
+ tbl[i+48]),z,tbl[i+47]),z,tbl[i+46]),z,tbl[i+45]),z,tbl[i+44]),z,
141
+ tbl[i+43]),z,tbl[i+42]),z,tbl[i+41]),z,tbl[i+40]),z,tbl[i+39]);
142
+
143
+ #else
144
+ highPoly = tbl[i+39] + z * (tbl[i+40] + z * (tbl[i+41] + z * (tbl[i+42] + z * (
145
+ tbl[i+43] + z * (tbl[i+44] + z * (tbl[i+45] + z * (tbl[i+46] + z * (
146
+ tbl[i+47] + z * (tbl[i+48] + z * (tbl[i+49] + z * (tbl[i+50] + z * (
147
+ tbl[i+51] + z * (tbl[i+52] + z * (tbl[i+53] + z * (tbl[i+54] + z * (
148
+ tbl[i+55] + z * (tbl[i+56] + z * (tbl[i+57] + z * tbl[i+58]))))))))))))))))));
149
+ #endif
150
+
151
+
152
+ /* Double-double computations */
153
+
154
+ Mul12(&tt1h,&tt1l,z,highPoly);
155
+ Add22(&t1h,&t1l,tbl[i+37],tbl[i+38],tt1h,tt1l);
156
+
157
+ MulAdd212(&t2h,&t2l,tbl[i+35],tbl[i+36],z,t1h,t1l);
158
+ MulAdd212(&t3h,&t3l,tbl[i+33],tbl[i+34],z,t2h,t2l);
159
+ MulAdd212(&t4h,&t4l,tbl[i+31],tbl[i+32],z,t3h,t3l);
160
+ MulAdd212(&t5h,&t5l,tbl[i+29],tbl[i+30],z,t4h,t4l);
161
+ MulAdd212(&t6h,&t6l,tbl[i+27],tbl[i+28],z,t5h,t5l);
162
+ MulAdd212(&t7h,&t7l,tbl[i+25],tbl[i+26],z,t6h,t6l);
163
+ MulAdd212(&t8h,&t8l,tbl[i+23],tbl[i+24],z,t7h,t7l);
164
+ MulAdd212(&t9h,&t9l,tbl[i+21],tbl[i+22],z,t8h,t8l);
165
+
166
+ /* Triple-double computations */
167
+
168
+ Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
169
+ Add33(&t10h,&t10m,&t10l,tbl[i+18],tbl[i+19],tbl[i+20],tt10h,tt10m,tt10l); /* 144 - 42/53 */
170
+ Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 38/53 */
171
+ Add33(&t11h,&t11m,&t11l,tbl[i+15],tbl[i+16],tbl[i+17],tt11h,tt11m,tt11l); /* 136 - 33/53 */
172
+ Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 133 - 28/53 */
173
+ Add33(&t12h,&t12m,&t12l,tbl[i+12],tbl[i+13],tbl[i+14],tt12h,tt12m,tt12l); /* 125 - 23/53 */
174
+ Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 123 - 18/53 */
175
+
176
+ Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
177
+
178
+ Add33(&t13h,&t13m,&t13l,tbl[i+9],tbl[i+10],tbl[i+11],tt13h,tt13m,tt13l); /* 149 - 47/53 */
179
+ Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
180
+ Add33(&t14h,&t14m,&t14l,tbl[i+6],tbl[i+7],tbl[i+8],tt14h,tt14m,tt14l); /* 139 - 37/53 */
181
+ Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
182
+ Add33(&t15h,&t15m,&t15l,tbl[i+3],tbl[i+4],tbl[i+5],tt15h,tt15m,tt15l); /* 129 - 28/53 */
183
+ Mul133(&tt16h,&tt16m,&tt16l,z,t15h,t15m,t15l); /* 128 - 23/53 */
184
+ Add233(&t16h,&t16m,&t16l,tbl[i+1],tbl[i+2],tt16h,tt16m,tt16l); /* 126 - 19/53 */
185
+
186
+ Renormalize3(&polyh,&polym,&polyl,t16h,t16m,t16l); /* infty - 52/53 */
187
+ *asinh = sign * polyh;
188
+ *asinm = sign * polym;
189
+ *asinl = sign * polyl;
190
+ }
191
+
192
+
193
+ void asin_accurate_higher(double *asinh, double *asinm, double *asinl, double z, double sign) {
194
+ double highPoly;
195
+ double tt1h, tt1l;
196
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l;
197
+ double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l, tt13h, tt13m, tt13l;
198
+ double tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l, tt17h, tt17m, tt17l;
199
+ double t9h, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l;
200
+ double t14h, t14m, t14l, t15h, t15m, t15l, t16h, t16m, t16l, t17h, t17m, t17l;
201
+ double tt18h, tt18m, tt18l, polyh, polym, polyl;
202
+ double sqrtzh, sqrtzm, sqrtzl, twoZ, pTimesSh, pTimesSm, pTimesSl;
203
+ double allhover, allmover, alllover, allh, allm, alll;
204
+ double tt13hover, tt13mover, tt13lover, tt16hover, tt16mover, tt16lover;
205
+
206
+ #if EVAL_PERF
207
+ crlibm_second_step_taken++;
208
+ #endif
209
+
210
+ /* We evaluate asin(x) as
211
+
212
+ asin(x) = f(z) * sqrt(2*z) + Pi/2
213
+
214
+ with z = 1 - x and
215
+
216
+ f(z) = (asin(z) - Pi/2) / sqrt(2*z)
217
+
218
+ f(z) is approximated by p(z)
219
+
220
+ The polynomial p(z) is of degree 29
221
+ Its coefficients start at tbl[TBLIDX10]
222
+ Coefficients for degrees 29 to 18 are in double precision,
223
+ for degrees 17 to 9 in double-double precision and
224
+ finally for degrees 8 to 1 in triple-double.
225
+ The constant coefficient (-1) is not stored in the table,
226
+ the computations are nevertheless in triple-double
227
+ We evaluate the monomials in the precision in which
228
+ the correspondant coefficients are stored
229
+ The coefficients' values decrease very quickly
230
+ so even with |z| < 2^-2.18 we can compute degree 18
231
+ already in double precision
232
+
233
+ Compute than sqrt(2*z) as a triple-double
234
+ multiply in triple-double and add Pi/2
235
+ We will cancel no bit in the addition since
236
+ f(z) < 0.5 * Pi/2
237
+
238
+ */
239
+
240
+ /* Double computations */
241
+
242
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
243
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
244
+ tbl[TBLIDX10+53] ,z,tbl[TBLIDX10+52]),z,tbl[TBLIDX10+51]),z,
245
+ tbl[TBLIDX10+50]),z,tbl[TBLIDX10+49]),z,tbl[TBLIDX10+48]),z,
246
+ tbl[TBLIDX10+47]),z,tbl[TBLIDX10+46]),z,tbl[TBLIDX10+45]),z,
247
+ tbl[TBLIDX10+44]),z,tbl[TBLIDX10+43]),z,tbl[TBLIDX10+42]);
248
+ #else
249
+ highPoly = tbl[TBLIDX10+42] + z * (tbl[TBLIDX10+43] + z * (tbl[TBLIDX10+44] + z * (
250
+ tbl[TBLIDX10+45] + z * (tbl[TBLIDX10+46] + z * (tbl[TBLIDX10+47] + z * (
251
+ tbl[TBLIDX10+48] + z * (tbl[TBLIDX10+49] + z * (tbl[TBLIDX10+50] + z * (
252
+ tbl[TBLIDX10+51] + z * (tbl[TBLIDX10+52] + z * tbl[TBLIDX10+53]))))))))));
253
+ #endif
254
+
255
+ /* Double-double computations */
256
+
257
+ Mul12(&tt1h,&tt1l,z,highPoly);
258
+ Add22(&t1h,&t1l,tbl[TBLIDX10+40],tbl[TBLIDX10+41],tt1h,tt1l);
259
+
260
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+38],tbl[TBLIDX10+39],z,t1h,t1l);
261
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+36],tbl[TBLIDX10+37],z,t2h,t2l);
262
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+34],tbl[TBLIDX10+35],z,t3h,t3l);
263
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+32],tbl[TBLIDX10+33],z,t4h,t4l);
264
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+30],tbl[TBLIDX10+31],z,t5h,t5l);
265
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+28],tbl[TBLIDX10+29],z,t6h,t6l);
266
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+26],tbl[TBLIDX10+27],z,t7h,t7l);
267
+ MulAdd212(&t9h,&t9l,tbl[TBLIDX10+24],tbl[TBLIDX10+25],z,t8h,t8l);
268
+
269
+ /* Triple-double computations */
270
+
271
+ Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
272
+ Add33(&t10h,&t10m,&t10l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tbl[TBLIDX10+23],tt10h,tt10m,tt10l); /* 144 - 42/53 */
273
+ Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 37/53 */
274
+ Add33(&t11h,&t11m,&t11l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],tbl[TBLIDX10+20],tt11h,tt11m,tt11l); /* 134 - 32/53 */
275
+ Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 132 - 27/53 */
276
+ Add33(&t12h,&t12m,&t12l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],tbl[TBLIDX10+17],tt12h,tt12m,tt12l); /* 124 - 22/53 */
277
+ Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 122 - 17/53 */
278
+
279
+ Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
280
+
281
+ Add33(&t13h,&t13m,&t13l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],tbl[TBLIDX10+14],tt13h,tt13m,tt13l); /* 149 - 47/53 */
282
+ Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
283
+ Add33(&t14h,&t14m,&t14l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],tbl[TBLIDX10+11],tt14h,tt14m,tt14l); /* 139 - 37/53 */
284
+ Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
285
+ Add33(&t15h,&t15m,&t15l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],tbl[TBLIDX10+8],tt15h,tt15m,tt15l); /* 129 - 27/53 */
286
+ Mul133(&tt16hover,&tt16mover,&tt16lover,z,t15h,t15m,t15l); /* 127 - 22/53 */
287
+
288
+ Renormalize3(&tt16h,&tt16m,&tt16l,tt16hover,tt16mover,tt16lover); /* infty - 52/53 */
289
+
290
+ Add33(&t16h,&t16m,&t16l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],tbl[TBLIDX10+5],tt16h,tt16m,tt16l); /* 149 - 47/53 */
291
+ Mul133(&tt17h,&tt17m,&tt17l,z,t16h,t16m,t16l); /* 147 - 42/53 */
292
+ Add33(&t17h,&t17m,&t17l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],tbl[TBLIDX10+2],tt17h,tt17m,tt17l); /* 139 - 37/53 */
293
+ Mul133(&tt18h,&tt18m,&tt18l,z,t17h,t17m,t17l); /* 137 - 32/53 */
294
+ Add133(&polyh,&polym,&polyl,-1,tt18h,tt18m,tt18l); /* 136 - 30/53 */
295
+
296
+ /* Compute sqrt(2*z) as a triple-double */
297
+
298
+ twoZ = 2 * z;
299
+ Sqrt13(&sqrtzh,&sqrtzm,&sqrtzl,twoZ); /* 146 - 52/53 */
300
+
301
+ /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
302
+
303
+ Mul33(&pTimesSh,&pTimesSm,&pTimesSl,polyh,polym,polyl,sqrtzh,sqrtzm,sqrtzl); /* 128 - 26/53 */
304
+ Add33(&allhover,&allmover,&alllover,PIHALFH,PIHALFM,PIHALFL,pTimesSh,pTimesSm,pTimesSl); /* 126 - 21/53 */
305
+
306
+ /* Renormalize and multiply by sign */
307
+ Renormalize3(&allh,&allm,&alll,allhover,allmover,alllover); /* infty - 52/53 */
308
+ *asinh = sign * allh;
309
+ *asinm = sign * allm;
310
+ *asinl = sign * alll;
311
+ }
312
+
313
+
314
+
315
+
316
+
317
+
318
+
319
+
320
+ double asin_rn(double x) {
321
+ db_number xdb;
322
+ double sign, z, asinh, asinm, asinl;
323
+ int i;
324
+ double xSqh, xSql;
325
+ double tt1h, tt1l;
326
+ double tt6h, tt6l;
327
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
328
+ double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
329
+ double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
330
+ double tmp1, tmp2, tmp3, tmp4, tmp5;
331
+
332
+ /* Transform the argument into integer */
333
+ xdb.d = x;
334
+
335
+ /* Special case handling */
336
+
337
+ /* Strip off the sign of argument x */
338
+ if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
339
+ xdb.i[HI] &= 0x7fffffff;
340
+
341
+ /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
342
+ if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
343
+ return (x-x)/0.0; /* return NaN */
344
+ }
345
+
346
+ /* If |x| < 2^(-28) we have
347
+
348
+ arcsin(x) = x * ( 1 + xi )
349
+
350
+ with 0 <= xi < 2^(-55)
351
+
352
+ So we can decide the rounding without any computation
353
+ */
354
+ if (xdb.i[HI] < 0x3e300000) {
355
+ return x;
356
+ }
357
+
358
+ /* Recast x */
359
+ x = xdb.d;
360
+
361
+ /* Find correspondant interval and compute index to the table
362
+ We start by filtering the two special cases around 0 and 1
363
+ */
364
+
365
+ if (xdb.i[HI] < BOUND1) {
366
+ /* Special interval 0..BOUND1
367
+ The polynomial has no even monomials
368
+ We must prove extra accuracy in the interval 0..sin(2^(-18))
369
+ */
370
+
371
+ /* Quick phase starts */
372
+
373
+ /* Compute square of x for both quick and accurate phases */
374
+ Mul12(&xSqh,&xSql,x,x);
375
+
376
+ tmp4 = tbl[3];
377
+ tmp5 = tbl[4];
378
+ t4h = tmp4;
379
+ t4l = tmp5;
380
+ if (xdb.i[HI] > EXTRABOUND) {
381
+ /* Double precision evaluation */
382
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
383
+ highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
384
+ #else
385
+ highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
386
+ #endif
387
+
388
+ /* Double-double precision evaluation */
389
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
390
+ Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
391
+
392
+ MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
393
+ MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
394
+ MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
395
+ }
396
+
397
+ MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
398
+
399
+ Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
400
+ Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
401
+
402
+ Add12(tmp1,tmp2,x,tt6h);
403
+ tmp3 = tmp2 + tt6l;
404
+ Add12(polyh,polyl,tmp1,tmp3);
405
+
406
+ /* Multiply by sign */
407
+ asinh = sign * polyh;
408
+ asinm = sign * polyl;
409
+
410
+ /* Rounding test (on polyh+polyl, equivalently to asinh+asinm)
411
+ The RN rounding constant is at tbl[34]
412
+ */
413
+ if(polyh == (polyh + (polyl * tbl[34])))
414
+ return asinh;
415
+
416
+ /* Launch accurate phase */
417
+
418
+ asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
419
+
420
+ ReturnRoundToNearest3(asinh,asinm,asinl);
421
+ }
422
+
423
+ if (xdb.i[HI] >= BOUND9) {
424
+ /* Special interval BOUND9..1
425
+ We use an asymptotic development of arcsin in sqrt(1 - x)
426
+ */
427
+
428
+ /* Argument reduction for quick and accurate phase
429
+ z = 1 - x
430
+ The operation is exact as per Sterbenz' lemma
431
+ */
432
+
433
+ z = 1 - x;
434
+
435
+ /* Quick phase starts */
436
+
437
+ /* Double precision evaluation */
438
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
439
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
440
+ tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
441
+ tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
442
+ tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
443
+ tbl[TBLIDX10+24]);
444
+ #else
445
+ highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
446
+ tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
447
+ tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
448
+ tbl[TBLIDX10+42]))))))));
449
+ #endif
450
+
451
+ /* Double-double precision evaluation */
452
+ Mul12(&tt1h,&tt1l,z,highPoly);
453
+ Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
454
+
455
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
456
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
457
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
458
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
459
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
460
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
461
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
462
+ MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
463
+
464
+ /* Compute sqrt(2*z) as a double-double */
465
+
466
+ twoZ = 2 * z;
467
+ sqrt12(&sqrtzh,&sqrtzl,twoZ);
468
+
469
+ /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
470
+
471
+ Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
472
+ Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
473
+
474
+ /* Multiply by sign */
475
+ asinh = sign * allh;
476
+ asinm = sign * alll;
477
+
478
+ /* Rounding test
479
+ The RN rounding constant is at tbl[TBLIDX10+54]
480
+ */
481
+
482
+ if(allh == (allh + (alll * tbl[TBLIDX10+54])))
483
+ return asinh;
484
+
485
+ /* Launch accurate phase */
486
+
487
+ asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
488
+
489
+ ReturnRoundToNearest3(asinh,asinm,asinl);
490
+ }
491
+
492
+ /* General 8 main intervals
493
+ We can already suppose that BOUND1 <= x <= BOUND9
494
+ */
495
+
496
+ if (xdb.i[HI] < BOUND5) {
497
+ if (xdb.i[HI] < BOUND3) {
498
+ if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
499
+ } else {
500
+ if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
501
+ }
502
+ } else {
503
+ if (xdb.i[HI] < BOUND7) {
504
+ if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
505
+ } else {
506
+ if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
507
+ }
508
+ }
509
+
510
+ /* Argument reduction
511
+ i points to the interval midpoint value in the table
512
+ */
513
+ z = x - tbl[i];
514
+
515
+ /* Quick phase starts */
516
+
517
+ /* Double precision evaluation */
518
+
519
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
520
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
521
+ tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
522
+ tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
523
+ #else
524
+ highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
525
+ tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
526
+ tbl[i+33] + z * tbl[i+35]))))));
527
+ #endif
528
+
529
+ /* Double-double precision evaluation */
530
+
531
+ Mul12(&tt1h,&tt1l,z,highPoly);
532
+ Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
533
+
534
+ MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
535
+ MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
536
+ MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
537
+ MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
538
+ MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
539
+ MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
540
+
541
+ /* Multiply by sign */
542
+ asinh = sign * polyh;
543
+ asinm = sign * polyl;
544
+
545
+ /* Rounding test
546
+ The RN rounding constant is at tbl[i+59]
547
+ */
548
+ if(polyh == (polyh + (polyl * tbl[i+59])))
549
+ return asinh;
550
+
551
+ /* Launch accurate phase */
552
+
553
+ asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
554
+
555
+ ReturnRoundToNearest3(asinh,asinm,asinl);
556
+ }
557
+
558
+
559
+
560
+
561
+
562
+
563
+ double asin_ru(double x) {
564
+ db_number xdb;
565
+ double sign, z, asinh, asinm, asinl;
566
+ int i;
567
+ double xSqh, xSql;
568
+ double tt1h, tt1l;
569
+ double tt6h, tt6l;
570
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
571
+ double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
572
+ double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
573
+ double tmp1, tmp2, tmp3, tmp4, tmp5;
574
+
575
+ /* Transform the argument into integer */
576
+ xdb.d = x;
577
+
578
+ /* Special case handling */
579
+
580
+ /* Strip off the sign of argument x */
581
+ if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
582
+ xdb.i[HI] &= 0x7fffffff;
583
+
584
+ /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
585
+ if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
586
+ return (x-x)/0.0; /* return NaN */
587
+ }
588
+
589
+ /* If |x| < 2^(-28) we have
590
+
591
+ arcsin(x) = x * ( 1 + xi )
592
+
593
+ with 0 <= xi < 2^(-55)
594
+
595
+ So we can decide the rounding without any computation
596
+ */
597
+ if (xdb.i[HI] < 0x3e300000) {
598
+ /* If x == 0 then we got the algebraic result arcsin(0) = 0
599
+ If x < 0 then the truncation rest is negative but less than
600
+ 1 ulp; we round upwards by returning x
601
+ */
602
+ if (x <= 0) return x;
603
+ /* Otherwise the rest is positive, less than 1 ulp and the
604
+ image is not algebraic
605
+ We return x + 1ulp
606
+ */
607
+ xdb.l++;
608
+ return xdb.d;
609
+ }
610
+
611
+ /* Recast x */
612
+ x = xdb.d;
613
+
614
+ /* Find correspondant interval and compute index to the table
615
+ We start by filtering the two special cases around 0 and 1
616
+ */
617
+
618
+ if (xdb.i[HI] < BOUND1) {
619
+ /* Special interval 0..BOUND1
620
+ The polynomial has no even monomials
621
+ We must prove extra accuracy in the interval 0..sin(2^(-18))
622
+ */
623
+
624
+ /* Quick phase starts */
625
+
626
+ /* Compute square of x for both quick and accurate phases */
627
+ Mul12(&xSqh,&xSql,x,x);
628
+
629
+ tmp4 = tbl[3];
630
+ tmp5 = tbl[4];
631
+ t4h = tmp4;
632
+ t4l = tmp5;
633
+ if (xdb.i[HI] > EXTRABOUND) {
634
+ /* Double precision evaluation */
635
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
636
+ highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
637
+ #else
638
+ highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
639
+ #endif
640
+
641
+ /* Double-double precision evaluation */
642
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
643
+ Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
644
+
645
+ MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
646
+ MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
647
+ MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
648
+ }
649
+
650
+ MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
651
+
652
+ Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
653
+ Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
654
+
655
+ Add12(tmp1,tmp2,x,tt6h);
656
+ tmp3 = tmp2 + tt6l;
657
+ Add12(polyh,polyl,tmp1,tmp3);
658
+
659
+ /* Multiply by sign */
660
+ asinh = sign * polyh;
661
+ asinm = sign * polyl;
662
+
663
+ /* Rounding test
664
+ The RU rounding constant is at tbl[35]
665
+ */
666
+ TEST_AND_RETURN_RU(asinh, asinm, tbl[35]);
667
+
668
+ /* Launch accurate phase */
669
+
670
+ asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
671
+
672
+ ReturnRoundUpwards3(asinh,asinm,asinl);
673
+ }
674
+
675
+ if (xdb.i[HI] > BOUND9) {
676
+ /* Special interval BOUND9..1
677
+ We use an asymptotic development of arcsin in sqrt(1 - x)
678
+ */
679
+
680
+ /* Argument reduction for quick and accurate phase
681
+ z = 1 - x
682
+ The operation is exact as per Sterbenz' lemma
683
+ */
684
+
685
+ z = 1 - x;
686
+
687
+ /* Quick phase starts */
688
+
689
+ /* Double precision evaluation */
690
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
691
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
692
+ tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
693
+ tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
694
+ tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
695
+ tbl[TBLIDX10+24]);
696
+ #else
697
+ highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
698
+ tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
699
+ tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
700
+ tbl[TBLIDX10+42]))))))));
701
+ #endif
702
+
703
+ /* Double-double precision evaluation */
704
+ Mul12(&tt1h,&tt1l,z,highPoly);
705
+ Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
706
+
707
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
708
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
709
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
710
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
711
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
712
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
713
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
714
+ MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
715
+
716
+ /* Compute sqrt(2*z) as a double-double */
717
+
718
+ twoZ = 2 * z;
719
+ sqrt12(&sqrtzh,&sqrtzl,twoZ);
720
+
721
+ /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
722
+
723
+ Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
724
+ Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
725
+
726
+ /* Multiply by sign */
727
+ asinh = sign * allh;
728
+ asinm = sign * alll;
729
+
730
+ /* Rounding test
731
+ The RU rounding constant is at tbl[TBLIDX10+55]
732
+ */
733
+ TEST_AND_RETURN_RU(asinh, asinm, tbl[TBLIDX10+55]);
734
+
735
+ /* Launch accurate phase */
736
+
737
+ asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
738
+
739
+ ReturnRoundUpwards3(asinh,asinm,asinl);
740
+ }
741
+
742
+ /* General 8 main intervals
743
+ We can already suppose that BOUND1 <= x <= BOUND9
744
+ */
745
+
746
+ if (xdb.i[HI] < BOUND5) {
747
+ if (xdb.i[HI] < BOUND3) {
748
+ if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
749
+ } else {
750
+ if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
751
+ }
752
+ } else {
753
+ if (xdb.i[HI] < BOUND7) {
754
+ if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
755
+ } else {
756
+ if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
757
+ }
758
+ }
759
+
760
+ /* Argument reduction
761
+ i points to the interval midpoint value in the table
762
+ */
763
+ z = x - tbl[i];
764
+
765
+ /* Quick phase starts */
766
+
767
+ /* Double precision evaluation */
768
+
769
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
770
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
771
+ tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
772
+ tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
773
+ #else
774
+ highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
775
+ tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
776
+ tbl[i+33] + z * tbl[i+35]))))));
777
+ #endif
778
+
779
+ /* Double-double precision evaluation */
780
+
781
+ Mul12(&tt1h,&tt1l,z,highPoly);
782
+ Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
783
+
784
+ MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
785
+ MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
786
+ MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
787
+ MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
788
+ MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
789
+ MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
790
+
791
+ /* Multiply by sign */
792
+ asinh = sign * polyh;
793
+ asinm = sign * polyl;
794
+
795
+ /* Rounding test
796
+ The RU rounding constant is at tbl[i+60]
797
+ */
798
+ TEST_AND_RETURN_RU(asinh, asinm, tbl[i+60]);
799
+
800
+ /* Launch accurate phase */
801
+
802
+ asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
803
+
804
+ ReturnRoundUpwards3(asinh,asinm,asinl);
805
+ }
806
+
807
+ double asin_rd(double x) {
808
+ db_number xdb;
809
+ double sign, z, asinh, asinm, asinl;
810
+ int i;
811
+ double xSqh, xSql;
812
+ double tt1h, tt1l;
813
+ double tt6h, tt6l;
814
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
815
+ double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
816
+ double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
817
+ double tmp1, tmp2, tmp3, tmp4, tmp5;
818
+
819
+ /* Transform the argument into integer */
820
+ xdb.d = x;
821
+
822
+ /* Special case handling */
823
+
824
+ /* Strip off the sign of argument x */
825
+ if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
826
+ xdb.i[HI] &= 0x7fffffff;
827
+
828
+ /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
829
+ if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
830
+ return (x-x)/0.0; /* return NaN */
831
+ }
832
+
833
+ /* If |x| < 2^(-28) we have
834
+
835
+ arcsin(x) = x * ( 1 + xi )
836
+
837
+ with 0 <= xi < 2^(-55)
838
+
839
+ So we can decide the rounding without any computation
840
+ */
841
+ if (xdb.i[HI] < 0x3e300000) {
842
+ /* If x == 0 then we got the algebraic result arcsin(0) = 0
843
+ If x > 0 then the truncation rest is positive but less than
844
+ 1 ulp; we round downwards by returning x
845
+ */
846
+ if (x >= 0) return x;
847
+ /* Otherwise the rest is negative, less than 1 ulp and the
848
+ image is not algebraic
849
+ We return x - 1ulp
850
+ We stripped off the sign, so we add 1 ulp to -x (in xdb.d) and multiply by -1
851
+ */
852
+ xdb.l++;
853
+ return -1 * xdb.d;
854
+ }
855
+
856
+ /* Recast x */
857
+ x = xdb.d;
858
+
859
+ /* Find correspondant interval and compute index to the table
860
+ We start by filtering the two special cases around 0 and 1
861
+ */
862
+
863
+ if (xdb.i[HI] < BOUND1) {
864
+ /* Special interval 0..BOUND1
865
+ The polynomial has no even monomials
866
+ We must prove extra accuracy in the interval 0..sin(2^(-18))
867
+ */
868
+
869
+ /* Quick phase starts */
870
+
871
+ /* Compute square of x for both quick and accurate phases */
872
+ Mul12(&xSqh,&xSql,x,x);
873
+
874
+ tmp4 = tbl[3];
875
+ tmp5 = tbl[4];
876
+ t4h = tmp4;
877
+ t4l = tmp5;
878
+ if (xdb.i[HI] > EXTRABOUND) {
879
+ /* Double precision evaluation */
880
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
881
+ highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
882
+ #else
883
+ highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
884
+ #endif
885
+
886
+ /* Double-double precision evaluation */
887
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
888
+ Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
889
+
890
+ MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
891
+ MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
892
+ MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
893
+ }
894
+
895
+ MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
896
+
897
+ Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
898
+ Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
899
+
900
+ Add12(tmp1,tmp2,x,tt6h);
901
+ tmp3 = tmp2 + tt6l;
902
+ Add12(polyh,polyl,tmp1,tmp3);
903
+
904
+ /* Multiply by sign */
905
+ asinh = sign * polyh;
906
+ asinm = sign * polyl;
907
+
908
+ /* Rounding test
909
+ The RD rounding constant is at tbl[35]
910
+ */
911
+ TEST_AND_RETURN_RD(asinh, asinm, tbl[35]);
912
+
913
+ /* Launch accurate phase */
914
+
915
+ asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
916
+
917
+ ReturnRoundDownwards3(asinh,asinm,asinl);
918
+ }
919
+
920
+ if (xdb.i[HI] > BOUND9) {
921
+ /* Special interval BOUND9..1
922
+ We use an asymptotic development of arcsin in sqrt(1 - x)
923
+ */
924
+
925
+ /* Argument reduction for quick and accurate phase
926
+ z = 1 - x
927
+ The operation is exact as per Sterbenz' lemma
928
+ */
929
+
930
+ z = 1 - x;
931
+
932
+ /* Quick phase starts */
933
+
934
+ /* Double precision evaluation */
935
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
936
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
937
+ tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
938
+ tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
939
+ tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
940
+ tbl[TBLIDX10+24]);
941
+ #else
942
+ highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
943
+ tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
944
+ tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
945
+ tbl[TBLIDX10+42]))))))));
946
+ #endif
947
+
948
+ /* Double-double precision evaluation */
949
+ Mul12(&tt1h,&tt1l,z,highPoly);
950
+ Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
951
+
952
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
953
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
954
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
955
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
956
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
957
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
958
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
959
+ MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
960
+
961
+ /* Compute sqrt(2*z) as a double-double */
962
+
963
+ twoZ = 2 * z;
964
+ sqrt12(&sqrtzh,&sqrtzl,twoZ);
965
+
966
+ /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
967
+
968
+ Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
969
+ Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
970
+
971
+ /* Multiply by sign */
972
+ asinh = sign * allh;
973
+ asinm = sign * alll;
974
+
975
+ /* Rounding test
976
+ The RD rounding constant is at tbl[TBLIDX10+55]
977
+ */
978
+ TEST_AND_RETURN_RD(asinh, asinm, tbl[TBLIDX10+55]);
979
+
980
+ /* Launch accurate phase */
981
+
982
+ asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
983
+
984
+ ReturnRoundDownwards3(asinh,asinm,asinl);
985
+ }
986
+
987
+ /* General 8 main intervals
988
+ We can already suppose that BOUND1 <= x <= BOUND9
989
+ */
990
+
991
+ if (xdb.i[HI] < BOUND5) {
992
+ if (xdb.i[HI] < BOUND3) {
993
+ if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
994
+ } else {
995
+ if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
996
+ }
997
+ } else {
998
+ if (xdb.i[HI] < BOUND7) {
999
+ if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
1000
+ } else {
1001
+ if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
1002
+ }
1003
+ }
1004
+
1005
+ /* Argument reduction
1006
+ i points to the interval midpoint value in the table
1007
+ */
1008
+ z = x - tbl[i];
1009
+
1010
+ /* Quick phase starts */
1011
+
1012
+ /* Double precision evaluation */
1013
+
1014
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1015
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1016
+ tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
1017
+ tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
1018
+ #else
1019
+ highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
1020
+ tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
1021
+ tbl[i+33] + z * tbl[i+35]))))));
1022
+ #endif
1023
+
1024
+ /* Double-double precision evaluation */
1025
+
1026
+ Mul12(&tt1h,&tt1l,z,highPoly);
1027
+ Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
1028
+
1029
+ MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
1030
+ MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
1031
+ MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
1032
+ MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
1033
+ MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
1034
+ MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
1035
+
1036
+ /* Multiply by sign */
1037
+ asinh = sign * polyh;
1038
+ asinm = sign * polyl;
1039
+
1040
+ /* Rounding test
1041
+ The RD rounding constant is at tbl[i+60]
1042
+ */
1043
+ TEST_AND_RETURN_RD(asinh, asinm, tbl[i+60]);
1044
+
1045
+ /* Launch accurate phase */
1046
+
1047
+ asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
1048
+
1049
+ ReturnRoundDownwards3(asinh,asinm,asinl);
1050
+ }
1051
+
1052
+ double asin_rz(double x) {
1053
+ db_number xdb;
1054
+ double sign, z, asinh, asinm, asinl;
1055
+ int i;
1056
+ double xSqh, xSql;
1057
+ double tt1h, tt1l;
1058
+ double tt6h, tt6l;
1059
+ double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
1060
+ double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
1061
+ double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
1062
+ double tmp1, tmp2, tmp3, tmp4, tmp5;
1063
+
1064
+ /* Transform the argument into integer */
1065
+ xdb.d = x;
1066
+
1067
+ /* Special case handling */
1068
+
1069
+ /* Strip off the sign of argument x */
1070
+ if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
1071
+ xdb.i[HI] &= 0x7fffffff;
1072
+
1073
+ /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
1074
+ if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
1075
+ return (x-x)/0.0; /* return NaN */
1076
+ }
1077
+
1078
+ /* If |x| < 2^(-28) we have
1079
+
1080
+ arcsin(x) = x * ( 1 + xi )
1081
+
1082
+ with 0 <= xi < 2^(-55)
1083
+
1084
+ So we can decide the rounding without any computation
1085
+ */
1086
+ if (xdb.i[HI] < 0x3e300000) {
1087
+ /* If x == 0 the result is algebraic and equal to 0
1088
+ If x < 0 the truncation rest is negative and less than 1 ulp, we return x
1089
+ If x > 0 the truncation rest is positive and less than 1 ulp, we return x
1090
+ */
1091
+ return x;
1092
+ }
1093
+
1094
+ /* Recast x */
1095
+ x = xdb.d;
1096
+
1097
+ /* Find correspondant interval and compute index to the table
1098
+ We start by filtering the two special cases around 0 and 1
1099
+ */
1100
+
1101
+ if (xdb.i[HI] < BOUND1) {
1102
+ /* Special interval 0..BOUND1
1103
+ The polynomial has no even monomials
1104
+ We must prove extra accuracy in the interval 0..sin(2^(-18))
1105
+ */
1106
+
1107
+ /* Quick phase starts */
1108
+
1109
+ /* Compute square of x for both quick and accurate phases */
1110
+ Mul12(&xSqh,&xSql,x,x);
1111
+
1112
+ tmp4 = tbl[3];
1113
+ tmp5 = tbl[4];
1114
+ t4h = tmp4;
1115
+ t4l = tmp5;
1116
+ if (xdb.i[HI] > EXTRABOUND) {
1117
+ /* Double precision evaluation */
1118
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1119
+ highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
1120
+ #else
1121
+ highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
1122
+ #endif
1123
+
1124
+ /* Double-double precision evaluation */
1125
+ Mul12(&tt1h,&tt1l,xSqh,highPoly);
1126
+ Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
1127
+
1128
+ MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
1129
+ MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
1130
+ MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
1131
+ }
1132
+
1133
+ MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
1134
+
1135
+ Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
1136
+ Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
1137
+
1138
+ Add12(tmp1,tmp2,x,tt6h);
1139
+ tmp3 = tmp2 + tt6l;
1140
+ Add12(polyh,polyl,tmp1,tmp3);
1141
+
1142
+ /* Multiply by sign */
1143
+ asinh = sign * polyh;
1144
+ asinm = sign * polyl;
1145
+
1146
+ /* Rounding test
1147
+ The RZ rounding constant is at tbl[35]
1148
+ */
1149
+ TEST_AND_RETURN_RZ(asinh, asinm, tbl[35]);
1150
+
1151
+ /* Launch accurate phase */
1152
+
1153
+ asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
1154
+
1155
+ ReturnRoundTowardsZero3(asinh,asinm,asinl);
1156
+ }
1157
+
1158
+ if (xdb.i[HI] > BOUND9) {
1159
+ /* Special interval BOUND9..1
1160
+ We use an asymptotic development of arcsin in sqrt(1 - x)
1161
+ */
1162
+
1163
+ /* Argument reduction for quick and accurate phase
1164
+ z = 1 - x
1165
+ The operation is exact as per Sterbenz' lemma
1166
+ */
1167
+
1168
+ z = 1 - x;
1169
+
1170
+ /* Quick phase starts */
1171
+
1172
+ /* Double precision evaluation */
1173
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1174
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1175
+ tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
1176
+ tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
1177
+ tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
1178
+ tbl[TBLIDX10+24]);
1179
+ #else
1180
+ highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
1181
+ tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
1182
+ tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
1183
+ tbl[TBLIDX10+42]))))))));
1184
+ #endif
1185
+
1186
+ /* Double-double precision evaluation */
1187
+ Mul12(&tt1h,&tt1l,z,highPoly);
1188
+ Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
1189
+
1190
+ MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
1191
+ MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
1192
+ MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
1193
+ MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
1194
+ MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
1195
+ MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
1196
+ MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
1197
+ MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
1198
+
1199
+ /* Compute sqrt(2*z) as a double-double */
1200
+
1201
+ twoZ = 2 * z;
1202
+ sqrt12(&sqrtzh,&sqrtzl,twoZ);
1203
+
1204
+ /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
1205
+
1206
+ Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
1207
+ Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
1208
+
1209
+ /* Multiply by sign */
1210
+ asinh = sign * allh;
1211
+ asinm = sign * alll;
1212
+
1213
+ /* Rounding test
1214
+ The RZ rounding constant is at tbl[TBLIDX10+55]
1215
+ */
1216
+ TEST_AND_RETURN_RZ(asinh, asinm, tbl[TBLIDX10+55]);
1217
+
1218
+ /* Launch accurate phase */
1219
+
1220
+ asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
1221
+
1222
+ ReturnRoundTowardsZero3(asinh,asinm,asinl);
1223
+ }
1224
+
1225
+ /* General 8 main intervals
1226
+ We can already suppose that BOUND1 <= x <= BOUND9
1227
+ */
1228
+
1229
+ if (xdb.i[HI] < BOUND5) {
1230
+ if (xdb.i[HI] < BOUND3) {
1231
+ if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
1232
+ } else {
1233
+ if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
1234
+ }
1235
+ } else {
1236
+ if (xdb.i[HI] < BOUND7) {
1237
+ if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
1238
+ } else {
1239
+ if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
1240
+ }
1241
+ }
1242
+
1243
+ /* Argument reduction
1244
+ i points to the interval midpoint value in the table
1245
+ */
1246
+ z = x - tbl[i];
1247
+
1248
+ /* Quick phase starts */
1249
+
1250
+ /* Double precision evaluation */
1251
+
1252
+ #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1253
+ highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1254
+ tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
1255
+ tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
1256
+ #else
1257
+ highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
1258
+ tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
1259
+ tbl[i+33] + z * tbl[i+35]))))));
1260
+ #endif
1261
+
1262
+ /* Double-double precision evaluation */
1263
+
1264
+ Mul12(&tt1h,&tt1l,z,highPoly);
1265
+ Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
1266
+
1267
+ MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
1268
+ MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
1269
+ MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
1270
+ MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
1271
+ MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
1272
+ MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
1273
+
1274
+ /* Multiply by sign */
1275
+ asinh = sign * polyh;
1276
+ asinm = sign * polyl;
1277
+
1278
+ /* Rounding test
1279
+ The RZ rounding constant is at tbl[i+60]
1280
+ */
1281
+ TEST_AND_RETURN_RZ(asinh, asinm, tbl[i+60]);
1282
+
1283
+ /* Launch accurate phase */
1284
+
1285
+ asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
1286
+
1287
+ ReturnRoundTowardsZero3(asinh,asinm,asinl);
1288
+ }
1289
+
1290
+
1291
+
1292
+
1293
+
1294
+
1295
+
1296
+
1297
+