crmf 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +102 -1
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +16 -16
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +104 -3
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,1169 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include "crlibm.h"
4
+ /*
5
+ * Correctly rounded trigpi functions
6
+ *
7
+ * Authors : F. de Dinechin, S. Chevillard, C. Lauter (the latter two
8
+ * didn't write a line of this file, but wrote a tool that wrote a
9
+ * tool that wrote etc that wrote bits of code related to polynomial
10
+ * evaluation.)
11
+ *
12
+ * This file is part of the crlibm library developed by the Arenaire
13
+ * project at Ecole Normale Superieure de Lyon
14
+ *
15
+ * This program is free software; you can redistribute it and/or modify
16
+ * it under the terms of the GNU Lesser General Public License as published by
17
+ * the Free Software Foundation; either version 2 of the License, or
18
+ * (at your option) any later version.
19
+ *
20
+ * This program is distributed in the hope that it will be useful,
21
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ * GNU General Public License for more details.
24
+ *
25
+ * You should have received a copy of the GNU Lesser General Public License
26
+ * along with this program; if not, write to the Free Software
27
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
28
+ */
29
+ #include "crlibm_private.h"
30
+ #include "triple-double.h"
31
+ #include "trigpi.h"
32
+
33
+
34
+ /* TODO
35
+
36
+
37
+ Use the symmetries of the tables
38
+
39
+ Actually, use the tables from the standard trigo.
40
+
41
+ Write decent quick steps. Or hammer Christoph and Sylvain to do so.
42
+
43
+ */
44
+
45
+
46
+
47
+
48
+
49
+ /* This ugly bits of code in the beginning are polynomial evaluations
50
+ automagically generated and proven by Chevillard and Lauter's
51
+ tools
52
+ */
53
+
54
+
55
+ #define sinpiacc_coeff_1h 3.14159265358979311599796346854418516159057617187500000000000000000000000000000000e+00
56
+ #define sinpiacc_coeff_1m 1.22464679914735320717376402945839660462569212467758006379625612680683843791484833e-16
57
+ #define sinpiacc_coeff_1l -2.87889731599645993207191707893463395148177292198731390393739579574603302514349608e-33
58
+ #define sinpiacc_coeff_3h -5.16771278004997025590228076907806098461151123046875000000000000000000000000000000e+00
59
+ #define sinpiacc_coeff_3m 2.26656228257550136196266687046492287115561324595258696490418515168130397796630859e-16
60
+ #define sinpiacc_coeff_5h 2.55016403987734552316624103696085512638092041015625000000000000000000000000000000e+00
61
+ #define sinpiacc_coeff_5m -7.93098961936403945684716222915171282926664203267314023904077657789457589387893677e-17
62
+ #define sinpiacc_coeff_7h -5.99264529320792105338000510528218001127243041992187500000000000000000000000000000e-01
63
+ #define sinpiacc_coeff_9h 8.21458866130424236740026344705256633460521697998046875000000000000000000000000000e-02
64
+ #define sinpiacc_coeff_11h -7.37046804820839888960914976223648409359157085418701171875000000000000000000000000e-03
65
+
66
+
67
+ #define cospiacc_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
68
+ #define cospiacc_coeff_2h -4.93480220054467899615247006295248866081237792968750000000000000000000000000000000e+00
69
+ #define cospiacc_coeff_2m -3.13264775437072047133490817894057799839785556899468543790021612949203699827194214e-16
70
+ #define cospiacc_coeff_4h 4.05871212641676848420502210501581430435180664062500000000000000000000000000000000e+00
71
+ #define cospiacc_coeff_4m -2.66019969731660223662555032718185048048635055542576743903282476821914315223693848e-16
72
+ #define cospiacc_coeff_6h -1.33526276885458949905682857206556946039199829101562500000000000000000000000000000e+00
73
+ #define cospiacc_coeff_8h 2.35330630358513925859398341344785876572132110595703125000000000000000000000000000e-01
74
+ #define cospiacc_coeff_10h -2.58068327360992909313974763563237502239644527435302734375000000000000000000000000e-02
75
+
76
+ static void sincospiacc(double *sinpiacc_resh, double *sinpiacc_resm, double *sinpiacc_resl,
77
+ double *cospiacc_resh, double *cospiacc_resm, double *cospiacc_resl,
78
+ double x) {
79
+ double x2h, x2m;
80
+ double sinpiacc_t_1_0h;
81
+ double sinpiacc_t_2_0h;
82
+ double sinpiacc_t_3_0h;
83
+ double sinpiacc_t_4_0h;
84
+ double sinpiacc_t_5_0h, sinpiacc_t_5_0m;
85
+ double sinpiacc_t_6_0h, sinpiacc_t_6_0m;
86
+ double sinpiacc_t_7_0h, sinpiacc_t_7_0m;
87
+ double sinpiacc_t_8_0h, sinpiacc_t_8_0m;
88
+ double sinpiacc_t_9_0h, sinpiacc_t_9_0m, sinpiacc_t_9_0l;
89
+ double sinpiacc_t_10_0h, sinpiacc_t_10_0m, sinpiacc_t_10_0l;
90
+
91
+ double cospiacc_t_1_0h;
92
+ double cospiacc_t_2_0h;
93
+ double cospiacc_t_3_0h;
94
+ double cospiacc_t_4_0h;
95
+ double cospiacc_t_5_0h, cospiacc_t_5_0m;
96
+ double cospiacc_t_6_0h, cospiacc_t_6_0m;
97
+ double cospiacc_t_7_0h, cospiacc_t_7_0m;
98
+ double cospiacc_t_8_0h, cospiacc_t_8_0m;
99
+ double cospiacc_t_9_0h, cospiacc_t_9_0m, cospiacc_t_9_0l;
100
+
101
+ Mul12(&x2h,&x2m,x,x);
102
+
103
+ sinpiacc_t_1_0h = sinpiacc_coeff_11h;
104
+ sinpiacc_t_2_0h = sinpiacc_t_1_0h * x2h;
105
+ sinpiacc_t_3_0h = sinpiacc_coeff_9h + sinpiacc_t_2_0h;
106
+ sinpiacc_t_4_0h = sinpiacc_t_3_0h * x2h;
107
+ Add12(sinpiacc_t_5_0h,sinpiacc_t_5_0m,sinpiacc_coeff_7h,sinpiacc_t_4_0h);
108
+ MulAdd22(&sinpiacc_t_6_0h,&sinpiacc_t_6_0m,sinpiacc_coeff_5h,sinpiacc_coeff_5m,x2h,x2m,sinpiacc_t_5_0h,sinpiacc_t_5_0m);
109
+ MulAdd22(&sinpiacc_t_7_0h,&sinpiacc_t_7_0m,sinpiacc_coeff_3h,sinpiacc_coeff_3m,x2h,x2m,sinpiacc_t_6_0h,sinpiacc_t_6_0m);
110
+ Mul22(&sinpiacc_t_8_0h,&sinpiacc_t_8_0m,sinpiacc_t_7_0h,sinpiacc_t_7_0m,x2h,x2m);
111
+ Add233Cond(&sinpiacc_t_9_0h,&sinpiacc_t_9_0m,&sinpiacc_t_9_0l,sinpiacc_t_8_0h,sinpiacc_t_8_0m,sinpiacc_coeff_1h,sinpiacc_coeff_1m,sinpiacc_coeff_1l);
112
+ Mul133(&sinpiacc_t_10_0h,&sinpiacc_t_10_0m,&sinpiacc_t_10_0l,x,sinpiacc_t_9_0h,sinpiacc_t_9_0m,sinpiacc_t_9_0l);
113
+ Renormalize3(sinpiacc_resh,sinpiacc_resm,sinpiacc_resl,sinpiacc_t_10_0h,sinpiacc_t_10_0m,sinpiacc_t_10_0l);
114
+
115
+
116
+ cospiacc_t_1_0h = cospiacc_coeff_10h;
117
+ cospiacc_t_2_0h = cospiacc_t_1_0h * x2h;
118
+ cospiacc_t_3_0h = cospiacc_coeff_8h + cospiacc_t_2_0h;
119
+ cospiacc_t_4_0h = cospiacc_t_3_0h * x2h;
120
+ Add12(cospiacc_t_5_0h,cospiacc_t_5_0m,cospiacc_coeff_6h,cospiacc_t_4_0h);
121
+ MulAdd22(&cospiacc_t_6_0h,&cospiacc_t_6_0m,cospiacc_coeff_4h,cospiacc_coeff_4m,x2h,x2m,cospiacc_t_5_0h,cospiacc_t_5_0m);
122
+ MulAdd22(&cospiacc_t_7_0h,&cospiacc_t_7_0m,cospiacc_coeff_2h,cospiacc_coeff_2m,x2h,x2m,cospiacc_t_6_0h,cospiacc_t_6_0m);
123
+ Mul22(&cospiacc_t_8_0h,&cospiacc_t_8_0m,cospiacc_t_7_0h,cospiacc_t_7_0m,x2h,x2m);
124
+ Add123(&cospiacc_t_9_0h,&cospiacc_t_9_0m,&cospiacc_t_9_0l,cospiacc_coeff_0h,cospiacc_t_8_0h,cospiacc_t_8_0m);
125
+ *cospiacc_resh = cospiacc_t_9_0h; *cospiacc_resm = cospiacc_t_9_0m; *cospiacc_resl = cospiacc_t_9_0l;
126
+
127
+ }
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+ /* Comment on comparing sa, ca, sy and cy
140
+ either index=0, then sa=0 and ca=1, therefore t2=0, and the Add33 will be exact
141
+ or index !=0, and
142
+ -eps1 < sy < eps1 (but sy may be negative)
143
+ sa > eps1 (sa>0)
144
+ 1-eps2 < cy < 1
145
+ ca < 1-eps2
146
+ therefore
147
+ sacy = t2 >0
148
+ casy = t1 may be negative
149
+ abs(t1) <= abs(t2)
150
+ Unfortunately we need a stronger condition to use the Add33
151
+ */
152
+
153
+
154
+
155
+
156
+ static void sinpi_accurate(double *rh, double *rm, double *rl,
157
+ double y, int index, int quadrant)
158
+ {
159
+ double syh, sym, syl, cyh, cym, cyl, sah, sam, sal, cah, cam, cal;
160
+ double t1h, t1m, t1l, t2h, t2m, t2l;
161
+
162
+ sincospiacc(&syh, &sym, &syl, &cyh, &cym, &cyl, y);
163
+
164
+ sah=sincosTable[index].sh;
165
+ cah=sincosTable[index].ch;
166
+ sam=sincosTable[index].sm;
167
+ cam=sincosTable[index].cm;
168
+ sal=sincosTable[index].sl;
169
+ cal=sincosTable[index].cl;
170
+
171
+ if(quadrant==0 || quadrant==2) {
172
+ /* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
173
+ Mul33(&t1h,&t1m,&t1l, syh,sym,syl, cah,cam,cal);
174
+ Mul33(&t2h,&t2m,&t2l, sah,sam,sal, cyh,cym,cyl);
175
+ Add33Cond(rh, rm, rl, t2h,t2m,t2l, t1h,t1m,t1l);
176
+ }
177
+ else {
178
+ /* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
179
+ Mul33(&t1h,&t1m,&t1l, cyh,cym,cyl, cah,cam,cal);
180
+ Mul33(&t2h,&t2m,&t2l, sah,sam,sal, syh,sym,syl);
181
+ Add33Cond(rh, rm, rl, t1h,t1m,t1l, -t2h,-t2m,-t2l);
182
+ }
183
+
184
+ if (quadrant>=2) {
185
+ *rh = -*rh;
186
+ *rm = -*rm;
187
+ *rl = -*rl;
188
+ }
189
+
190
+ };
191
+
192
+
193
+
194
+
195
+
196
+
197
+ #define sinpiquick_coeff_1h 3.14159265358979311599796346854418516159057617187500000000000000000000000000000000e+00
198
+ #define sinpiquick_coeff_1m 1.22464971683184787123862072310058851157814368464452070561776508839102461934089661e-16
199
+ #define sinpiquick_coeff_3h -5.16771278004997025590228076907806098461151123046875000000000000000000000000000000e+00
200
+ #define sinpiquick_coeff_5h 2.55016403989992213041659852024167776107788085937500000000000000000000000000000000e+00
201
+ #define sinpiquick_coeff_7h -5.99263913290728922333983064163476228713989257812500000000000000000000000000000000e-01
202
+ #define cospiquick_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
203
+ #define cospiquick_coeff_2h -4.93480220054467899615247006295248866081237792968750000000000000000000000000000000e+00
204
+ #define cospiquick_coeff_4h 4.05871212632582167856298838160000741481781005859375000000000000000000000000000000e+00
205
+ #define cospiquick_coeff_6h -1.33525456323720947970912220625905320048332214355468750000000000000000000000000000e+00
206
+
207
+
208
+
209
+ static void cospi_accurate(double *rh, double *rm, double *rl,
210
+ double y, int index, int quadrant)
211
+ {
212
+ double syh, sym, syl, cyh, cym, cyl, sah, sam, sal, cah, cam, cal;
213
+ double t1h, t1m, t1l, t2h, t2m, t2l;
214
+
215
+ sincospiacc(&syh, &sym, &syl, &cyh, &cym, &cyl, y);
216
+
217
+ sah=sincosTable[index].sh;
218
+ cah=sincosTable[index].ch;
219
+ sam=sincosTable[index].sm;
220
+ cam=sincosTable[index].cm;
221
+ sal=sincosTable[index].sl;
222
+ cal=sincosTable[index].cl;
223
+
224
+ if(quadrant==0 || quadrant==2) {
225
+ /* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
226
+ Mul33(&t1h,&t1m,&t1l, cyh,cym,cyl, cah,cam,cal);
227
+ Mul33(&t2h,&t2m,&t2l, sah,sam,sal, syh,sym,syl);
228
+ Add33Cond(rh, rm, rl, t1h,t1m,t1l, -t2h,-t2m,-t2l);
229
+ }
230
+ else {
231
+ /* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
232
+ Mul33(&t1h,&t1m,&t1l, syh,sym,syl, cah,cam,cal);
233
+ Mul33(&t2h,&t2m,&t2l, sah,sam,sal, cyh,cym,cyl);
234
+ Add33Cond(rh, rm, rl, t2h,t2m,t2l, t1h,t1m,t1l);
235
+ }
236
+
237
+ if (quadrant==1 || quadrant==2) {
238
+ *rh = -*rh;
239
+ *rm = -*rm;
240
+ *rl = -*rl;
241
+ }
242
+
243
+ };
244
+
245
+
246
+
247
+
248
+
249
+
250
+ /* This one can clearly be improved. It was set up in less than one hour */
251
+ void sinpiquick(double *rh, double *rm, double x, int index, int quadrant) {
252
+ double x2h, x2m;
253
+ double sinpiquick_t_1_0h;
254
+ double sinpiquick_t_2_0h;
255
+ double sinpiquick_t_3_0h;
256
+ double sinpiquick_t_4_0h;
257
+ double sinpiquick_t_5_0h, sinpiquick_t_5_0m;
258
+ double sinpiquick_t_6_0h, sinpiquick_t_6_0m;
259
+ double sinpiquick_t_7_0h, sinpiquick_t_7_0m;
260
+ double syh, sym;
261
+ double cospiquick_t_1_0h;
262
+ double cospiquick_t_2_0h;
263
+ double cospiquick_t_3_0h;
264
+ double cospiquick_t_4_0h;
265
+ double cospiquick_t_5_0h, cospiquick_t_5_0m;
266
+ double cospiquick_t_6_0h, cospiquick_t_6_0m;
267
+ double cospiquick_t_7_0h, cospiquick_t_7_0m;
268
+ double cyh, cym;
269
+ double t1h, t1m, t2h, t2m, sah, sam, cah,cam;
270
+
271
+ Mul12(&x2h,&x2m,x,x);
272
+
273
+ sah=sincosTable[index].sh;
274
+ cah=sincosTable[index].ch;
275
+ sam=sincosTable[index].sm;
276
+ cam=sincosTable[index].cm;
277
+
278
+ sinpiquick_t_1_0h = sinpiquick_coeff_7h;
279
+ sinpiquick_t_2_0h = sinpiquick_t_1_0h * x2h;
280
+ sinpiquick_t_3_0h = sinpiquick_coeff_5h + sinpiquick_t_2_0h;
281
+ sinpiquick_t_4_0h = sinpiquick_t_3_0h * x2h;
282
+ Add12(sinpiquick_t_5_0h,sinpiquick_t_5_0m,sinpiquick_coeff_3h,sinpiquick_t_4_0h);
283
+ MulAdd22(&sinpiquick_t_6_0h,&sinpiquick_t_6_0m,sinpiquick_coeff_1h,sinpiquick_coeff_1m,x2h,x2m,sinpiquick_t_5_0h,sinpiquick_t_5_0m);
284
+ Mul122(&sinpiquick_t_7_0h,&sinpiquick_t_7_0m,x,sinpiquick_t_6_0h,sinpiquick_t_6_0m);
285
+ syh = sinpiquick_t_7_0h; sym = sinpiquick_t_7_0m;
286
+
287
+ cospiquick_t_1_0h = cospiquick_coeff_6h;
288
+ cospiquick_t_2_0h = cospiquick_t_1_0h * x2h;
289
+ cospiquick_t_3_0h = cospiquick_coeff_4h + cospiquick_t_2_0h;
290
+ cospiquick_t_4_0h = cospiquick_t_3_0h * x2h;
291
+ Add12(cospiquick_t_5_0h,cospiquick_t_5_0m,cospiquick_coeff_2h,cospiquick_t_4_0h);
292
+ Mul22(&cospiquick_t_6_0h,&cospiquick_t_6_0m,cospiquick_t_5_0h,cospiquick_t_5_0m,x2h,x2m);
293
+ Add122(&cospiquick_t_7_0h,&cospiquick_t_7_0m,cospiquick_coeff_0h,cospiquick_t_6_0h,cospiquick_t_6_0m);
294
+ cyh = cospiquick_t_7_0h; cym = cospiquick_t_7_0m;
295
+
296
+ /* Here comes the hand-written, unproven yet code */
297
+ if(quadrant==0 || quadrant==2) {
298
+ /* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
299
+ Mul22(&t1h,&t1m, syh,sym, cah,cam);
300
+ Mul22(&t2h,&t2m, sah,sam, cyh,cym);
301
+ Add22Cond(rh, rm, t2h,t2m, t1h,t1m);
302
+ }
303
+ else {
304
+ /* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
305
+ Mul22(&t1h,&t1m, cyh,cym, cah,cam);
306
+ Mul22(&t2h,&t2m, sah,sam, syh,sym);
307
+ Add22Cond(rh, rm, t1h,t1m, -t2h,-t2m);
308
+ }
309
+
310
+ if (quadrant>=2) {
311
+ *rh = -*rh;
312
+ *rm = -*rm;
313
+ }
314
+
315
+ }
316
+
317
+
318
+
319
+
320
+
321
+
322
+
323
+ double sinpi_rn(double x){
324
+ double xs, y,u, rh, rm, rl, sign,absx;
325
+ db_number xdb, t;
326
+ int32_t xih, absxih, index, quadrant;
327
+
328
+ if (x<0) absx = -x; else absx = x;
329
+
330
+ xdb.d = x;
331
+
332
+ xs = x*128.0;
333
+
334
+ /* argument reduction */
335
+ if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
336
+ t.d = xs;
337
+ t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
338
+ So what remains in t is an FP integer almost as large as x */
339
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
340
+ }
341
+
342
+ t.d = TWOTO5251 + xs;
343
+ u = t.d - TWOTO5251;
344
+ y = xs - u;
345
+ index = t.i[LO] & 0x3f;
346
+ quadrant = (t.i[LO] & 0xff) >>6;
347
+
348
+ /* Special case tests come late because the conversion FP to int is slow */
349
+ xih = xdb.i[HI];
350
+ absxih = xih & 0x7fffffff;
351
+ if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
352
+
353
+ if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
354
+
355
+ y = y * INV128;
356
+
357
+ /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
358
+ if (absxih>=0x7ff00000) {
359
+ xdb.l=0xfff8000000000000LL;
360
+ return xdb.d - xdb.d;
361
+ }
362
+
363
+ if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
364
+ return sign*0.0; /*signed */
365
+
366
+ if(absxih<=0x3E000000) /*2^{-31}*/ {
367
+ if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
368
+ /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
369
+ scs_t result;
370
+ scs_set_d(result, x );
371
+ scs_mul(result, PiSCS_ptr, result);
372
+ scs_get_d(&rh, result);
373
+ return rh;
374
+ }
375
+ /* First step for Pi*x. TODO: FMA-based optimisation */
376
+ const double DekkerConst = 134217729.; /* 2^27 +1 */
377
+ double tt, xh, xl;
378
+ /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
379
+ tt = x*DekkerConst;
380
+ xh = (x-tt)+tt;
381
+ xl = x-xh;
382
+ Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
383
+ if(rh == (rh + (rl * PIX_RNCST_SIN)))
384
+ return rh;
385
+ }
386
+ /* Fall here either if we have a large input, or if we have a small
387
+ input and the rounding test fails. */
388
+ sinpiquick(&rh, &rm, y, index, quadrant);
389
+ if (rh==rh+1.00001*rm) /* See trigpiquick.gappa. This first step is ridiculously too accurate */
390
+ return rh;
391
+ sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
392
+ ReturnRoundToNearest3(rh,rm,rl);
393
+ }
394
+
395
+
396
+
397
+
398
+
399
+
400
+
401
+
402
+
403
+ double sinpi_rd(double x){
404
+ double xs, y,u, rh, rm, rl, sign,absx;
405
+ db_number xdb, t;
406
+ int32_t xih, absxih, index, quadrant;
407
+
408
+ if (x<0) absx = -x; else absx = x;
409
+
410
+ xdb.d = x;
411
+
412
+ xs = x*128.0;
413
+
414
+ /* argument reduction */
415
+ if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
416
+ t.d = xs;
417
+ t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
418
+ So what remains in t is an FP integer almost as large as x */
419
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
420
+ }
421
+
422
+ t.d = TWOTO5251 + xs;
423
+ u = t.d - TWOTO5251;
424
+ y = xs - u;
425
+ index = t.i[LO] & 0x3f;
426
+ quadrant = (t.i[LO] & 0xff) >>6;
427
+
428
+ /* Special case tests come late because the conversion FP to int is slow */
429
+ xih = xdb.i[HI];
430
+ absxih = xih & 0x7fffffff;
431
+ if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
432
+
433
+ if(index==0 && y==0.0 && ((quadrant&1)==0)) return -0.0; /*signed, inspired by LIA-2 */
434
+
435
+ y = y * INV128;
436
+
437
+ /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
438
+ if (absxih>=0x7ff00000) {
439
+ xdb.l=0xfff8000000000000LL;
440
+ return xdb.d - xdb.d;
441
+ }
442
+
443
+ if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
444
+ return sign*0.0; /*signed */
445
+
446
+ if(absxih<=0x3E000000) /*2^{-31}*/ {
447
+ if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
448
+ /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
449
+ scs_t result;
450
+ scs_set_d(result, x );
451
+ scs_mul(result, PiSCS_ptr, result);
452
+ scs_get_d_minf(&rh, result);
453
+ return rh;
454
+ }
455
+ /* First step for Pi*x. TODO: FMA-based optimisation */
456
+ const double DekkerConst = 134217729.; /* 2^27 +1 */
457
+ double tt, xh, xl;
458
+ /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
459
+ tt = x*DekkerConst;
460
+ xh = (x-tt)+tt;
461
+ xl = x-xh;
462
+ Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
463
+ TEST_AND_RETURN_RD(rh,rl,PIX_EPS_SIN);
464
+ }
465
+ /* Fall here either if we have a large input, or if we have a small
466
+ input and the rounding test fails. */
467
+ sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
468
+
469
+ ReturnRoundDownwards3(rh,rm,rl);
470
+ };
471
+
472
+
473
+
474
+ double sinpi_ru(double x){
475
+ double xs, y,u, rh, rm, rl, sign,absx;
476
+ db_number xdb, t;
477
+ int32_t xih, absxih, index, quadrant;
478
+
479
+ if (x<0) absx = -x; else absx = x;
480
+
481
+ xdb.d = x;
482
+
483
+ xs = x*128.0;
484
+
485
+ /* argument reduction */
486
+ if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
487
+ t.d = xs;
488
+ t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
489
+ So what remains in t is an FP integer almost as large as x */
490
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
491
+ }
492
+
493
+ t.d = TWOTO5251 + xs;
494
+ u = t.d - TWOTO5251;
495
+ y = xs - u;
496
+ index = t.i[LO] & 0x3f;
497
+ quadrant = (t.i[LO] & 0xff) >>6;
498
+
499
+ /* Special case tests come late because the conversion FP to int is slow */
500
+ xih = xdb.i[HI];
501
+ absxih = xih & 0x7fffffff;
502
+ if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
503
+
504
+ if(index==0 && y==0.0 && ((quadrant&1)==0)) return +0.0; /*signed, inspired by LIA-2 */
505
+
506
+ y = y * INV128;
507
+
508
+ /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
509
+ if (absxih>=0x7ff00000) {
510
+ xdb.l=0xfff8000000000000LL;
511
+ return xdb.d - xdb.d;
512
+ }
513
+
514
+ if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
515
+ return sign*0.0; /*signed */
516
+
517
+ if(absxih<=0x3E000000) /*2^{-31}*/ {
518
+ if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
519
+ /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
520
+ scs_t result;
521
+ scs_set_d(result, x );
522
+ scs_mul(result, PiSCS_ptr, result);
523
+ scs_get_d_pinf(&rh, result);
524
+ return rh;
525
+ }
526
+ /* First step for Pi*x. TODO: FMA-based optimisation */
527
+ const double DekkerConst = 134217729.; /* 2^27 +1 */
528
+ double tt, xh, xl;
529
+ /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
530
+ tt = x*DekkerConst;
531
+ xh = (x-tt)+tt;
532
+ xl = x-xh;
533
+ Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
534
+ TEST_AND_RETURN_RU(rh,rl,PIX_EPS_SIN);
535
+ }
536
+ /* Fall here either if we have a large input, or if we have a small
537
+ input and the rounding test fails. */
538
+ sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
539
+
540
+ ReturnRoundUpwards3(rh,rm,rl);
541
+
542
+ };
543
+
544
+
545
+
546
+
547
+ double sinpi_rz(double x){
548
+ double xs, y,u, rh, rm, rl, sign,absx;
549
+ db_number xdb, t;
550
+ int32_t xih, absxih, index, quadrant;
551
+
552
+ if (x<0) absx = -x; else absx = x;
553
+
554
+ xdb.d = x;
555
+
556
+ xs = x*128.0;
557
+
558
+ /* argument reduction */
559
+ if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
560
+ t.d = xs;
561
+ t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
562
+ So what remains in t is an FP integer almost as large as x */
563
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
564
+ }
565
+
566
+ t.d = TWOTO5251 + xs;
567
+ u = t.d - TWOTO5251;
568
+ y = xs - u;
569
+ index = t.i[LO] & 0x3f;
570
+ quadrant = (t.i[LO] & 0xff) >>6;
571
+
572
+ /* Special case tests come late because the conversion FP to int is slow */
573
+ xih = xdb.i[HI];
574
+ absxih = xih & 0x7fffffff;
575
+ if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
576
+
577
+ if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
578
+
579
+ y = y * INV128;
580
+
581
+ /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
582
+ if (absxih>=0x7ff00000) {
583
+ xdb.l=0xfff8000000000000LL;
584
+ return xdb.d - xdb.d;
585
+ }
586
+
587
+ if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
588
+ return sign*0.0; /*signed */
589
+
590
+ if(absxih<=0x3E000000) /*2^{-31}*/ {
591
+ if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
592
+ /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
593
+ scs_t result;
594
+ scs_set_d(result, x );
595
+ scs_mul(result, PiSCS_ptr, result);
596
+ scs_get_d_zero(&rh, result);
597
+ return rh;
598
+ }
599
+ /* First step for Pi*x. TODO: FMA-based optimisation */
600
+ const double DekkerConst = 134217729.; /* 2^27 +1 */
601
+ double tt, xh, xl;
602
+ /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
603
+ tt = x*DekkerConst;
604
+ xh = (x-tt)+tt;
605
+ xl = x-xh;
606
+ Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
607
+ TEST_AND_RETURN_RZ(rh,rl,PIX_EPS_SIN);
608
+ }
609
+ /* Fall here either if we have a large input, or if we have a small
610
+ input and the rounding test fails. */
611
+ sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
612
+
613
+ ReturnRoundTowardsZero3(rh,rm,rl);
614
+ };
615
+
616
+
617
+
618
+
619
+
620
+
621
+
622
+
623
+
624
+ /* to nearest */
625
+ double cospi_rn(double x){
626
+ double xs, y,u, rh, rm, rl, absx;
627
+ db_number xdb, t;
628
+ int32_t xih, absxih, index, quadrant;
629
+
630
+ if (x<0) absx =-x; else absx=x;
631
+
632
+ xdb.d=x;
633
+ xs = x*128.0;
634
+
635
+ /* argument reduction.
636
+ We do it before the special case tests for performance,
637
+ it might compute garbage for inf, very large inputs, etc */
638
+ if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
639
+ t.d = xs;
640
+ t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
641
+ So what remains in t is an FP integer almost as large as x */
642
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
643
+ }
644
+
645
+ t.d = TWOTO5251 + xs;
646
+ u = t.d - TWOTO5251;
647
+ y = xs - u;
648
+ y = y * INV128;
649
+ index = t.i[LO] & 0x3f;
650
+ quadrant = (t.i[LO] & 0xff) >>6;
651
+
652
+ /* Special case tests come late because the conversion FP to int is slow */
653
+ xih = xdb.i[HI];
654
+ absxih = xih & 0x7fffffff;
655
+
656
+ /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
657
+
658
+ if (absxih>=0x7ff00000) {
659
+ xdb.l=0xfff8000000000000LL;
660
+ return xdb.d - xdb.d;
661
+ }
662
+
663
+ if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
664
+ return 1.0;
665
+
666
+ if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
667
+ /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
668
+ in this case */
669
+
670
+ if(index==0 && y==0. && quadrant==0) return 1.;
671
+ if(index==0 && y==0. && quadrant==2) return -1.;
672
+
673
+ if (absxih<0x3E26A09E) /* sqrt(2^-53)/4 */
674
+ return 1.0;
675
+ /* printf("\n\nint part = %f frac part = %f index=%d quadrant=%d \n", u, y, index, quadrant);
676
+ */
677
+
678
+ cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
679
+ ReturnRoundToNearest3(rh,rm,rl);
680
+ };
681
+
682
+
683
+
684
+
685
+
686
+ double cospi_rd(double x){
687
+ double xs, y,u, rh, rm, rl, absx;
688
+ db_number xdb, t;
689
+ int32_t xih, absxih, index, quadrant;
690
+
691
+ if (x<0) absx =-x; else absx=x;
692
+
693
+ xdb.d=x;
694
+ xs = x*128.0;
695
+
696
+
697
+ /* argument reduction.
698
+ We do it before the special case tests for performance,
699
+ it might compute garbage for inf, very large inputs, etc */
700
+ if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
701
+ t.d = xs;
702
+ t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
703
+ So what remains in t is an FP integer almost as large as x */
704
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
705
+ }
706
+
707
+ t.d = TWOTO5251 + xs;
708
+ u = t.d - TWOTO5251;
709
+ y = xs - u;
710
+ y = y * INV128;
711
+ index = t.i[LO] & 0x3f;
712
+ quadrant = (t.i[LO] & 0xff) >>6;
713
+
714
+
715
+ /* Special case tests come late because the conversion FP to int is slow */
716
+ xih = xdb.i[HI];
717
+ absxih = xih & 0x7fffffff;
718
+
719
+ /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
720
+
721
+ if (absxih>=0x7ff00000) {
722
+ xdb.l=0xfff8000000000000LL;
723
+ return xdb.d - xdb.d;
724
+ }
725
+
726
+ if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
727
+ return 1.0; /*signed */
728
+
729
+ if(index==0 && y==0. && ((quadrant&1)==1)) return -0.;
730
+
731
+ if(index==0 && y==0. && quadrant==0) return 1.;
732
+ if(index==0 && y==0. && quadrant==2) return -1.;
733
+
734
+ if (absxih<0x3E200000) /* 2^-29 */
735
+ return 0.9999999999999998889776975374843459576368331909179687500; /* 1-2^-53 */
736
+ /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
737
+ in this case */
738
+
739
+ cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
740
+ ReturnRoundDownwards3(rh,rm,rl);
741
+ };
742
+
743
+
744
+
745
+
746
+ double cospi_ru(double x){
747
+ double xs, y,u, rh, rm, rl, absx;
748
+ db_number xdb, t;
749
+ int32_t xih, absxih, index, quadrant;
750
+
751
+ if (x<0) absx =-x; else absx=x;
752
+
753
+ xdb.d=x;
754
+ xs = x*128.0;
755
+
756
+
757
+ /* argument reduction.
758
+ We do it before the special case tests for performance,
759
+ it might compute garbage for inf, very large inputs, etc */
760
+ if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
761
+ t.d = xs;
762
+ t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
763
+ So what remains in t is an FP integer almost as large as x */
764
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
765
+ }
766
+
767
+ t.d = TWOTO5251 + xs;
768
+ u = t.d - TWOTO5251;
769
+ y = xs - u;
770
+ y = y * INV128;
771
+ index = t.i[LO] & 0x3f;
772
+ quadrant = (t.i[LO] & 0xff) >>6;
773
+
774
+
775
+ /* Special case tests come late because the conversion FP to int is slow */
776
+ xih = xdb.i[HI];
777
+ absxih = xih & 0x7fffffff;
778
+
779
+ /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
780
+ if (absxih>=0x7ff00000) {
781
+ xdb.l=0xfff8000000000000LL;
782
+ return xdb.d - xdb.d;
783
+ }
784
+
785
+ if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
786
+ return 1.0; /*signed */
787
+
788
+ if(index==0 && y==0. && quadrant==0) return 1.;
789
+ if(index==0 && y==0. && quadrant==2) return -1.;
790
+
791
+ if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
792
+ /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
793
+ in this case */
794
+
795
+ if (absxih<0x3E200000) /* 2^-29 */
796
+ return 1;
797
+
798
+ cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
799
+ ReturnRoundUpwards3(rh,rm,rl);
800
+ };
801
+
802
+
803
+
804
+
805
+ double cospi_rz(double x){
806
+ double xs, y,u, rh, rm, rl, absx;
807
+ db_number xdb, t;
808
+ int32_t xih, absxih, index, quadrant;
809
+
810
+ if (x<0) absx =-x; else absx=x;
811
+
812
+ xdb.d=x;
813
+ xs = x*128.0;
814
+
815
+
816
+ /* argument reduction.
817
+ We do it before the special case tests for performance,
818
+ it might compute garbage for inf, very large inputs, etc */
819
+ if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
820
+ t.d = xs;
821
+ t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
822
+ So what remains in t is an FP integer almost as large as x */
823
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
824
+ }
825
+
826
+ t.d = TWOTO5251 + xs;
827
+ u = t.d - TWOTO5251;
828
+ y = xs - u;
829
+ y = y * INV128;
830
+ index = t.i[LO] & 0x3f;
831
+ quadrant = (t.i[LO] & 0xff) >>6;
832
+
833
+
834
+ /* Special case tests come late because the conversion FP to int is slow */
835
+ xih = xdb.i[HI];
836
+ absxih = xih & 0x7fffffff;
837
+
838
+ /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
839
+
840
+ if (absxih>=0x7ff00000) {
841
+ xdb.l=0xfff8000000000000LL;
842
+ return xdb.d - xdb.d;
843
+ }
844
+
845
+ if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
846
+ return 1.0; /*signed */
847
+
848
+ if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
849
+ /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
850
+ in this case */
851
+
852
+ if(index==0 && y==0. && quadrant==0) return 1.;
853
+ if(index==0 && y==0. && quadrant==2) return -1.;
854
+
855
+ if (absxih<0x3E200000) /* 2^-29 */
856
+ return 0.9999999999999998889776975374843459576368331909179687500; /* 1-2^-53 */
857
+
858
+ cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
859
+ ReturnRoundTowardsZero3(rh,rm,rl);
860
+ };
861
+
862
+
863
+
864
+
865
+
866
+
867
+ /* tangent of pi times x */
868
+ double tanpi_rn(double x){
869
+ double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
870
+ db_number xdb, t;
871
+ int32_t xih, absxih, index, quadrant;
872
+
873
+ if (x<0) absx = -x; else absx = x;
874
+
875
+ xdb.d = x;
876
+
877
+ xs = x*128.0;
878
+
879
+ /* argument reduction */
880
+ if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
881
+ t.d = xs;
882
+ t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
883
+ So what remains in t is an FP integer almost as large as x */
884
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
885
+ }
886
+
887
+ t.d = TWOTO5251 + xs;
888
+ u = t.d - TWOTO5251;
889
+ y = xs - u;
890
+ index = t.i[LO] & 0x3f;
891
+ quadrant = (t.i[LO] & 0xff) >>6;
892
+
893
+ /* Special case tests come late because the conversion FP to int is slow */
894
+ xih = xdb.i[HI];
895
+ absxih = xih & 0x7fffffff;
896
+ if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
897
+
898
+ if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
899
+ /* TODO ? No test for Pi/4. Such a value will lauch the accurate phase. */
900
+
901
+ y = y * INV128;
902
+
903
+ /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
904
+ if (absxih>=0x7ff00000) {
905
+ xdb.l=0xfff8000000000000LL;
906
+ return xdb.d - xdb.d;
907
+ }
908
+
909
+ if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
910
+ return sign*0.0; /*signed */
911
+
912
+ if(absxih<=0x3E000000) /*2^{-31}*/ {
913
+ if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
914
+ /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
915
+ scs_t result;
916
+ scs_set_d(result, x );
917
+ scs_mul(result, PiSCS_ptr, result);
918
+ scs_get_d(&rh, result);
919
+ return rh;
920
+ }
921
+ /* First step for Pi*x. TODO: FMA-based optimisation */
922
+ const double DekkerConst = 134217729.; /* 2^27 +1 */
923
+ double tt, xh, xl;
924
+ /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
925
+ tt = x*DekkerConst;
926
+ xh = (x-tt)+tt;
927
+ xl = x-xh;
928
+ Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
929
+ if(rh == (rh + (rl * PIX_RNCST_TAN)))
930
+ return rh;
931
+ }
932
+ /* Fall here either if we have a large input, or if we have a small
933
+ input and the rounding test fails. */
934
+ cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
935
+ Recpr33(&ich, &icm, &icl, ch, cm, cl);
936
+ sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
937
+ Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
938
+ ReturnRoundToNearest3(rh,rm,rl);
939
+
940
+ };
941
+
942
+
943
+
944
+
945
+ double tanpi_rd(double x){
946
+ double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
947
+ db_number xdb, t;
948
+ int32_t xih, absxih, index, quadrant;
949
+
950
+ if (x<0) absx = -x; else absx = x;
951
+
952
+ xdb.d = x;
953
+
954
+ xs = x*128.0;
955
+
956
+ /* argument reduction */
957
+ if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
958
+ t.d = xs;
959
+ t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
960
+ So what remains in t is an FP integer almost as large as x */
961
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
962
+ }
963
+
964
+ t.d = TWOTO5251 + xs;
965
+ u = t.d - TWOTO5251;
966
+ y = xs - u;
967
+ index = t.i[LO] & 0x3f;
968
+ quadrant = (t.i[LO] & 0xff) >>6;
969
+
970
+ /* Special case tests come late because the conversion FP to int is slow */
971
+ xih = xdb.i[HI];
972
+ absxih = xih & 0x7fffffff;
973
+ if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
974
+
975
+ if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
976
+
977
+ y = y * INV128;
978
+
979
+ /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
980
+ if (absxih>=0x7ff00000) {
981
+ xdb.l=0xfff8000000000000LL;
982
+ return xdb.d - xdb.d;
983
+ }
984
+
985
+ if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
986
+ return sign*0.0; /*signed */
987
+
988
+ if(absxih<=0x3E000000) /*2^{-31}*/ {
989
+ if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
990
+ /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
991
+ scs_t result;
992
+ scs_set_d(result, x );
993
+ scs_mul(result, PiSCS_ptr, result);
994
+ scs_get_d_minf(&rh, result);
995
+ return rh;
996
+ }
997
+ /* First step for Pi*x. TODO: FMA-based optimisation */
998
+ const double DekkerConst = 134217729.; /* 2^27 +1 */
999
+ double tt, xh, xl;
1000
+ /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1001
+ tt = x*DekkerConst;
1002
+ xh = (x-tt)+tt;
1003
+ xl = x-xh;
1004
+ Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1005
+ TEST_AND_RETURN_RD(rh,rl,PIX_EPS_SIN);
1006
+ }
1007
+ /* Fall here either if we have a large input, or if we have a small
1008
+ input and the rounding test fails. */
1009
+ cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1010
+ Recpr33(&ich, &icm, &icl, ch, cm, cl);
1011
+ sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1012
+ Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1013
+ ReturnRoundDownwards3(rh,rm,rl);
1014
+ };
1015
+
1016
+
1017
+
1018
+
1019
+
1020
+
1021
+
1022
+
1023
+
1024
+
1025
+ double tanpi_ru(double x){
1026
+ double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
1027
+ db_number xdb, t;
1028
+ int32_t xih, absxih, index, quadrant;
1029
+
1030
+ if (x<0) absx = -x; else absx = x;
1031
+
1032
+ xdb.d = x;
1033
+
1034
+ xs = x*128.0;
1035
+
1036
+ /* argument reduction */
1037
+ if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
1038
+ t.d = xs;
1039
+ t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
1040
+ So what remains in t is an FP integer almost as large as x */
1041
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
1042
+ }
1043
+
1044
+ t.d = TWOTO5251 + xs;
1045
+ u = t.d - TWOTO5251;
1046
+ y = xs - u;
1047
+ index = t.i[LO] & 0x3f;
1048
+ quadrant = (t.i[LO] & 0xff) >>6;
1049
+
1050
+ /* Special case tests come late because the conversion FP to int is slow */
1051
+ xih = xdb.i[HI];
1052
+ absxih = xih & 0x7fffffff;
1053
+ if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
1054
+
1055
+ if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
1056
+
1057
+ y = y * INV128;
1058
+
1059
+ /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
1060
+ if (absxih>=0x7ff00000) {
1061
+ xdb.l=0xfff8000000000000LL;
1062
+ return xdb.d - xdb.d;
1063
+ }
1064
+
1065
+ if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
1066
+ return sign*0.0; /*signed */
1067
+
1068
+ if(absxih<=0x3E000000) /*2^{-31}*/ {
1069
+ if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
1070
+ /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
1071
+ scs_t result;
1072
+ scs_set_d(result, x );
1073
+ scs_mul(result, PiSCS_ptr, result);
1074
+ scs_get_d_pinf(&rh, result);
1075
+ return rh;
1076
+ }
1077
+ /* First step for Pi*x. TODO: FMA-based optimisation */
1078
+ const double DekkerConst = 134217729.; /* 2^27 +1 */
1079
+ double tt, xh, xl;
1080
+ /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1081
+ tt = x*DekkerConst;
1082
+ xh = (x-tt)+tt;
1083
+ xl = x-xh;
1084
+ Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1085
+ TEST_AND_RETURN_RU(rh,rl,PIX_EPS_TAN);
1086
+ }
1087
+ /* Fall here either if we have a large input, or if we have a small
1088
+ input and the rounding test fails. */
1089
+ cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1090
+ Recpr33(&ich, &icm, &icl, ch, cm, cl);
1091
+ sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1092
+ Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1093
+ ReturnRoundUpwards3(rh,rm,rl);
1094
+ };
1095
+
1096
+
1097
+
1098
+ double tanpi_rz(double x){
1099
+ double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
1100
+ db_number xdb, t;
1101
+ int32_t xih, absxih, index, quadrant;
1102
+
1103
+ if (x<0) absx = -x; else absx = x;
1104
+
1105
+ xdb.d = x;
1106
+
1107
+ xs = x*128.0;
1108
+
1109
+ /* argument reduction */
1110
+ if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
1111
+ t.d = xs;
1112
+ t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
1113
+ So what remains in t is an FP integer almost as large as x */
1114
+ xs = xs-t.d; /* we are going to throw away the int part anyway */
1115
+ }
1116
+
1117
+ t.d = TWOTO5251 + xs;
1118
+ u = t.d - TWOTO5251;
1119
+ y = xs - u;
1120
+ index = t.i[LO] & 0x3f;
1121
+ quadrant = (t.i[LO] & 0xff) >>6;
1122
+
1123
+ /* Special case tests come late because the conversion FP to int is slow */
1124
+ xih = xdb.i[HI];
1125
+ absxih = xih & 0x7fffffff;
1126
+ if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
1127
+
1128
+ if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
1129
+
1130
+ y = y * INV128;
1131
+
1132
+ /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
1133
+ if (absxih>=0x7ff00000) {
1134
+ xdb.l=0xfff8000000000000LL;
1135
+ return xdb.d - xdb.d;
1136
+ }
1137
+
1138
+ if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
1139
+ return sign*0.0; /*signed */
1140
+
1141
+ if(absxih<=0x3E000000) /*2^{-31}*/ {
1142
+ if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
1143
+ /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
1144
+ scs_t result;
1145
+ scs_set_d(result, x );
1146
+ scs_mul(result, PiSCS_ptr, result);
1147
+ scs_get_d_zero(&rh, result);
1148
+ return rh;
1149
+ }
1150
+ /* First step for Pi*x. TODO: FMA-based optimisation */
1151
+ const double DekkerConst = 134217729.; /* 2^27 +1 */
1152
+ double tt, xh, xl;
1153
+ /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1154
+ tt = x*DekkerConst;
1155
+ xh = (x-tt)+tt;
1156
+ xl = x-xh;
1157
+ Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1158
+ TEST_AND_RETURN_RZ(rh,rl,PIX_EPS_SIN);
1159
+ }
1160
+ /* Fall here either if we have a large input, or if we have a small
1161
+ input and the rounding test fails. */
1162
+ cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1163
+ Recpr33(&ich, &icm, &icl, ch, cm, cl);
1164
+ sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1165
+ Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1166
+ ReturnRoundTowardsZero3(rh,rm,rl);
1167
+ };
1168
+
1169
+