intervals 0.3.56

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,728 @@
1
+
2
+ Digits := 100:
3
+ interface(quiet=true):
4
+ read "common-procedures.mpl":
5
+ with(orthopoly):
6
+
7
+
8
+ mkdir("TEMPTRIG"):
9
+
10
+
11
+
12
+ # - Evaluation scheme :
13
+ # case 1 : return x
14
+ # case 2 (or Fast) : compute a simple polynomial
15
+ # case 3 : do an argument reduction...
16
+
17
+
18
+
19
+ ########################################################
20
+ # Case 1 : Small arguments
21
+ # return x for sine and tan, return 1 for cos
22
+ ########################################################
23
+
24
+ xmax_return_x_for_sin := 2^(-26):
25
+ xmax_return_1_for_cos_RN := sqrt(2^(-53)):
26
+ xmax_return_1_for_cos_RDIR:=2^(-26):
27
+ one_rounded_down := evalf(1-ulp(1/2)):
28
+
29
+ xmax_return_x_for_tan := 2^(-27):
30
+
31
+
32
+
33
+
34
+ ########################################################
35
+ # Case 2 : simple polynomial approximation
36
+ ########################################################
37
+
38
+ # We want to use the same polynomial in case 2 and 3.
39
+ # So see after arg red
40
+
41
+ #################################################
42
+ # Case 3 : Argument reduction
43
+ #################################################
44
+
45
+
46
+
47
+ #################################################
48
+ # CODY and WAITE Argument reduction
49
+
50
+
51
+ C := Pi/256:
52
+ invC:= nearest(1/C):
53
+ reminvC := evalf(1/C - invC):
54
+ expC:=ieeedouble(C)[2]:
55
+ epsinvC := abs(reminvC*C):
56
+
57
+ # There are three sets of constants :
58
+ # - split redC into two constants, for small values when we are concerned with absolute error
59
+ # - split redC into three constants, for larger values
60
+ # - split redC into three doubles, for the cases when we need
61
+ # good relative precision on the result and fear cancellation
62
+
63
+
64
+
65
+
66
+ # Fastest reduction using two-part Cody and Waite (up to |k|=2^22)
67
+
68
+ bitsCh_0:=34:
69
+
70
+ # 1/2 <= C/2^(expC+1) <1
71
+ Ch:= round(evalf( C * 2^(bitsCh_0-expC-1))) / (2^(bitsCh_0-expC-1)):
72
+ # recompute bitsCh in case we are lucky (and we are for bitsCh_0=32)
73
+ bitsCh:=1+log2(op(2,ieeedouble(Ch)[3])) : # this means the log of the denominator
74
+
75
+ Cl:=nearest(C - Ch):
76
+ # Cody and Waite argument reduction will work for |k|<kmax_cw2
77
+ kmax_cw2:=2^(53-bitsCh):
78
+
79
+ # The constants to move to the .h file
80
+ RR_CW2_CH := Ch:
81
+ RR_CW2_MCL := -Cl:
82
+ XMAX_CODY_WAITE_2 := nearest(kmax_cw2*C):
83
+
84
+ # The error in this case (we need absolute error)
85
+ delta_repr_C_cw2 := abs(C-Ch-Cl):
86
+ delta_round_cw2 := kmax_cw2* 1/2 * ulp(Cl) :
87
+ delta_cody_waite_2 := kmax_cw2 * delta_repr_C_cw2 + delta_round_cw2:
88
+ # This is the delta on y, the reduced argument
89
+
90
+ #log2(%);
91
+
92
+
93
+
94
+ # Slower reduction using three-part Cody and Waite, up to |k|=2^31
95
+
96
+ bitsCh_0:=23: # 22 or 23
97
+ Ch:= round(evalf( C * 2^(bitsCh_0-expC-1))) / (2^(bitsCh_0-expC-1)):
98
+ # recompute bitsCh in case we are lucky
99
+ bitsCh:=1+log2(op(2,ieeedouble(Ch)[3])) : # this means the log of the denominator
100
+
101
+ r := C-Ch:
102
+ Cmed := round(evalf( r * 2^(2*bitsCh-expC-1))) / (2^(2*bitsCh-expC-1)):
103
+ bitsCmed:=1+log2(op(2,ieeedouble(Cmed)[3])) :
104
+
105
+ Cl:=nearest(C - Ch - Cmed):
106
+
107
+ kmax_cw3 := 2^min(53-bitsCh, 53-bitsCmed, 31):# Otherwise we have integer overflow
108
+
109
+
110
+
111
+ # The constants to move to the .h file
112
+ RR_CW3_CH := Ch:
113
+ RR_CW3_CM := Cmed:
114
+ RR_CW3_MCL := -Cl:
115
+ XMAX_CODY_WAITE_3 := nearest(kmax_cw3*C):
116
+
117
+ # The error in this case (we need absolute error)
118
+ delta_repr_C_cw3 := abs(C - Ch - Cmed - Cl):
119
+ delta_round_cw3 := kmax_cw3 * 1/2 * ulp(Cl) :
120
+ delta_cody_waite_3 := kmax_cw3 * delta_repr_C_cw3 + delta_round_cw3:
121
+ # This is the delta on y, the reduced argument
122
+
123
+ #log2(%);
124
+
125
+
126
+
127
+
128
+
129
+ # Third range reduction, using double-double arithmetic, for |k| up to 2^51-1
130
+
131
+ # This max int value can be produced by DOUBLE2LONGINT
132
+ kmax:=2^46-1:
133
+ XMAX_DDRR:=nearest(kmax*C):
134
+
135
+ #in this case we have C stored as 3 doubles
136
+ Ch := nearest(C):
137
+ Cmed := nearest(C-Ch):
138
+ Cl := nearest(C-Ch-Cmed):
139
+
140
+ RR_DD_MCH := -Ch:
141
+ RR_DD_MCM := -Cmed:
142
+ RR_DD_CL := Cl:
143
+
144
+ delta_repr_C := abs(C - Ch - Cmed - Cl):
145
+
146
+ # and we have only exact Add12 and Mul12 operations. The only place
147
+ # with possible rounding errors is:
148
+ # Add22 (pyh, pyl, (x + kch_h) , (kcm_l - kd*RR_DD_CL), th, tl) ;
149
+ # where (x + kch_h) is exact (Sterbenz) with up to kmax bits of cancellation
150
+ # and the error is simply the error in (kcm_l - kd*RR_DD_CL)
151
+ # At the very worst :
152
+ delta_round :=
153
+ kmax * 1/2 * ulp(Cl) # for kd*RR_DD_CL
154
+ + kmax*ulp(Cl) # for the subtraction
155
+ + 2^(-100) * Pi/512 : # for the Add22
156
+ delta_RR_DD := kmax * delta_repr_C + delta_round:
157
+
158
+ # the last case, Payne and Hanek reduction, gives a very small delta:
159
+ # red arg is on 9*30 bits, then rounded to a double-double (106 bits)
160
+ # This should, of course, be optimized some day
161
+ delta_PayneHanek := 2^(-100):
162
+
163
+ # Finally the max delta on the reduced argument is
164
+ delta_ArgRed := max(delta_cody_waite_2, delta_cody_waite_3,
165
+ delta_RR_DD, delta_PayneHanek):
166
+
167
+ #print("delta_ArgRed to move to the .gappa file = ", evalf(delta_ArgRed)):
168
+ #log2(delta_ArgRed);
169
+
170
+
171
+
172
+
173
+
174
+ # Now we use the above absolute error when k mod 256 <> 0
175
+ # otherwise we need to worry about relative accuracy of the result.
176
+
177
+ # First, what is the worst case for cancellation ?
178
+
179
+ emax := ieeedouble(XMAX_DDRR)[2] +1 :
180
+ # above emax, we will use Payne and Hanek so we do not worry
181
+
182
+ (wcn, wce, wceps) := WorstCaseForAdditiveRangeReduction(2,53,-8, emax, C):
183
+ wcx := wcn * 2^wce:
184
+ wck := round(wcx/C):
185
+ wcy := wcx - wck*C:
186
+
187
+ #log2(wcy); # y > 2^(-67);
188
+
189
+ # In these cases we use the double-double range reduction, for |k|<kmax_cw3
190
+ # and the relative precision in the worst case is for wcy
191
+
192
+ delta_round := kmax_cw3 * 1/2 * ulp(Cl) # for kd*RR_DD_CL
193
+ + kmax_cw3 * ulp(Cl) : # for the subtraction
194
+
195
+ delta_RR_DD := kmax_cw3 * delta_repr_C + delta_round:
196
+
197
+ eps_ArgRed := (1+delta_RR_DD/wcy)*(1+2^(-100)) -1:
198
+
199
+ #log2(eps_ArgRed);
200
+
201
+ # In all the other cases we use Payne and Hanek, and eps_ArgRed is
202
+ # much smaller, so this is the max.
203
+
204
+
205
+
206
+ ###########
207
+ # Polynomials for do_sine and do_cos, and for the case 2
208
+ degreeSin := 8:
209
+ degreeCos := 7:
210
+
211
+ maxepsk := (1+epsinvC)*(1+2^(-53))-1:
212
+
213
+ ymaxCase3 := evalf(Pi/512 + XMAX_DDRR*maxepsk):
214
+ #print("ymaxCase3 to move to the .gappa file = ", ymaxCase3):
215
+
216
+
217
+ y2maxCase3 := ymaxCase3^2:
218
+ # These are the parameters that can be varied to fine-tune performance
219
+ # (they should always be larger than Pi/512
220
+ xmaxCosCase2 := Pi/256:
221
+ xmaxSinCase2 := Pi/256:
222
+
223
+
224
+ x2maxSinCase2:= xmaxSinCase2^2:
225
+ x2maxCosCase2:= xmaxCosCase2^2:
226
+
227
+ # We had the difficulty here to find minimax polynomials which are good for case 2
228
+ # as well as for case 3. A simple solution was to set xmaxCosCase2 =ymaxCase3...
229
+ # However we found another answer: in the future we intend to use these polynomials for second
230
+ # step, too. Therefore, no minimax, only Taylor.
231
+
232
+ polySin:= poly_exact(convert( series(sin(x), x=0, degreeSin+1), polynom)):
233
+ polyTs := expand(polySin/x-1):
234
+ polyTs2 := subs(x=sqrt(y), polyTs):
235
+
236
+ polyCos := poly_exact (convert( series(cos(x), x=0, degreeCos+1), polynom)):
237
+ polyTc2 := subs(x=sqrt(y), polyCos - 1):
238
+
239
+ epsApproxSinCase2 := numapprox[infnorm]((x*polyTs+x -sin(x))/sin(x), x=0..xmaxSinCase2):
240
+ epsApproxSinCase3 := numapprox[infnorm]((x*polyTs +x -sin(x))/sin(x), x=0..ymaxCase3):
241
+
242
+ deltaApproxSinCase2 := numapprox[infnorm]((x*polyTs+x -sin(x)), x=0..xmaxSinCase2):
243
+ deltaApproxSinCase3 := numapprox[infnorm]((x*polyTs +x -sin(x)), x=0..ymaxCase3):
244
+
245
+ deltaApproxCosCase2:= numapprox[infnorm](polyCos - cos(x), x=0..xmaxCosCase2):
246
+ deltaApproxCosCase3:= numapprox[infnorm](polyCos - cos(x), x=0..ymaxCase3):
247
+
248
+ #print("deltaApproxSinCase3 to move to the .gappa file = ", deltaApproxSinCase3):
249
+ #print("deltaApproxCosCase3 to move to the .gappa file = ", deltaApproxCosCase3):
250
+
251
+
252
+ ########################## Case 2 for sine ###########################
253
+
254
+
255
+
256
+ # evaluate this polynomial in double. The error on x*x is at most half an ulp
257
+ errlist:=errlist_quickphase_horner(degree(polyTs2),0,0,2^(-53), 0):
258
+ (eps_rounding_Ts, delta_rounding_Ts, minTs, maxTs):=
259
+ compute_horner_rounding_error(polyTs2,y,x2maxSinCase2, errlist, true):
260
+
261
+ eps_poly_TsCase2 := numapprox[infnorm]((x*polyTs)/(sin(x)-x) -1, x=0..xmaxSinCase2):
262
+ maxeps2 := (1+eps_poly_TsCase2)*(1+eps_rounding_Ts)*(1+2^(-53))-1:
263
+
264
+ maxepstotalSinCase2 := maxeps2 * numapprox[infnorm](1-x/sin(x), x=0..xmaxSinCase2):
265
+ rnconstantSinCase2 := evalf(compute_rn_constant(maxepstotalSinCase2)):
266
+
267
+
268
+
269
+
270
+
271
+ ##################################### Case2 cos ###########################
272
+
273
+ # evaluate this polynomial in double. The error on x*x is at most half an ulp
274
+ errlist := errlist_quickphase_horner(degree(polyTc2),0,0,2**(-53), 0):
275
+ (eps_rounding_Tc, delta_rounding_Tc, minTc, maxTc):=
276
+ compute_horner_rounding_error(polyTc2,y,x2maxCosCase2, errlist, true):
277
+
278
+ # Then we have an Add12 which is exact. The result is greater then cos(xmaxCosCase2):
279
+ miny := cos(xmaxCosCase2):
280
+ maxepstotalCosCase2 := (delta_rounding_Tc + deltaApproxCosCase2) / miny :
281
+ #log2(%);
282
+ rnconstantCosCase2 := evalf(compute_rn_constant(maxepstotalCosCase2)):
283
+
284
+
285
+
286
+
287
+ ######################## Case2 Tangent #########################
288
+ #
289
+ # Compute the Taylor series
290
+ degreeTanCase2 := 12:
291
+ xmaxTanCase2 := 2**(-4):
292
+ xminTanCase2 := 2**(-30):
293
+
294
+ Poly_P := convert(series(tan(sqrt(x))/(x^(3/2))-1/x, x=0, degreeTanCase2*4),polynom):
295
+ Poly_cheb := numapprox[chebpade](Poly_P, x=xminTanCase2..xmaxTanCase2^2, [degreeTanCase2/2-2,0]):
296
+ polyTanCase2 := poly_exact2(expand(x + x^3 * subs(x=x^2, Poly_cheb)), 4):
297
+
298
+ #polyTanCase2 := poly_exact2(convert(series(tan(x), x=0, degreeTanCase2), polynom), 4):
299
+
300
+ maxepsApproxTanCase2:=numapprox[infnorm](1 - polyTanCase2 / tan(x), x=xminTanCase2..xmaxTanCase2):
301
+
302
+ maxepsOverXTanCase2 :=numapprox[infnorm]((1 - polyTanCase2 / tan(x))/x, x=xminTanCase2..xmaxTanCase2):
303
+ # Now we pass these values to Gappa
304
+
305
+ filename:="TEMPTRIG/TanCase2.sed":
306
+ fd:=fopen(filename, WRITE, TEXT):
307
+ t3h, t3l := hi_lo(coeff(polyTanCase2,x,3)):
308
+ fprintf(fd, " s/_t3h/%1.40e/g\n", t3h):
309
+ fprintf(fd, " s/_t3l/%1.40e/g\n", t3l):
310
+ fprintf(fd, " s/_t5/%1.40e/g\n", coeff(polyTanCase2,x,5)):
311
+ fprintf(fd, " s/_t7/%1.40e/g\n", coeff(polyTanCase2,x,7)):
312
+ fprintf(fd, " s/_t9/%1.40e/g\n", coeff(polyTanCase2,x,9)):
313
+ fprintf(fd, " s/_t11/%1.40e/g\n", coeff(polyTanCase2,x,11)):
314
+ fprintf(fd, " s/_xmax/%1.40e/g\n", xmaxTanCase2):
315
+ fprintf(fd, " s/_maxEpsApproxOverX/%1.40e/g\n", maxepsOverXTanCase2*1.00001):
316
+ fprintf(fd, " s/_maxEpsApprox/%1.40e/g\n", maxepsApproxTanCase2*1.00001):
317
+ fclose(fd):
318
+
319
+ printf("\n\n************ DONE TEMPTRIG/TanCase2.sed ************\n");
320
+ printf("Now you should use \n sed -f TEMPTRIG/TanCase2.sed trigoTanCase2.gappa | gappa > /dev/null \n");
321
+
322
+
323
+
324
+ maxepstotalTanCase2:=4.59602e-19: # Cut from Gappa output
325
+
326
+ log2(maxepstotalTanCase2): # almost 61 bits
327
+
328
+
329
+
330
+
331
+
332
+
333
+ ###############################################################################
334
+ # Computing errors for Case3 : now we have an error due to arg red
335
+
336
+ # First DoSinZero. The notations are those of the paper proof
337
+
338
+ # Approximation error already computed above as epsApproxSinCase3;
339
+
340
+ # polynomial evaluation in double, with an error on y*y of epsilonArgRed
341
+ errlist:=errlist_quickphase_horner(degree(polyTs2),0,0,eps_ArgRed, 0):
342
+ (epsRoundingTsSinZero, deltaRoundingTsSinZero, minTs, maxTs):=
343
+ compute_horner_rounding_error(polyTs2,y,y2maxCase3, errlist, true):
344
+
345
+ # just as in the paper proof
346
+ maxepsSinZero1 := (1+epsApproxSinCase3)*(1+epsRoundingTsSinZero)*(1+2^(-53))*(1+2^(-53)) - 1:
347
+
348
+ # just as in the paper proof. For x>0 the absolute values are as given
349
+
350
+ epstotalSinZero := ( (x-sin(x))*maxepsSinZero1 + x*eps_ArgRed + 2^(-53)*x^3/3 ) / sin(x):
351
+ maxepstotalSinZero := numapprox[infnorm]( epstotalSinZero , x=2^(-30)..ymaxCase3):
352
+
353
+ printf("\nMax rel error for DoSinZero is %1.5e, if it's smaller than 2^(-66) (%1.5e) then the proof is OK\n\n", maxepstotalSinZero, 2^(-66)):
354
+
355
+
356
+
357
+
358
+
359
+ ##############################SinCosCase3############################
360
+ SinCosSize:= 128: # size f the table
361
+
362
+
363
+ # The Gappa files in TEMPTRIG
364
+ for i from 1 to SinCosSize/2 do
365
+ filename:=cat("TEMPTRIG/SinACosA_",i,".sed"):
366
+ fd:=fopen(filename, WRITE, TEXT):
367
+
368
+ # The table values
369
+ s:=hi_lo(sin(i*Pi/(2*SinCosSize))):
370
+ c:=hi_lo(cos(i*Pi/(2*SinCosSize))):
371
+ fprintf(fd, " s/_cah/%1.40e/g\n", c[1]):
372
+ fprintf(fd, " s/_cal/%1.40e/g\n", c[2]):
373
+ fprintf(fd, " s/_sah/%1.40e/g\n", s[1]):
374
+ fprintf(fd, " s/_sal/%1.40e/g\n", s[2]):
375
+
376
+ # The polynomial coefficients
377
+ fprintf(fd, " s/_s3/%1.40e/g\n", coeff(polySin,x,3)):
378
+ fprintf(fd, " s/_s5/%1.40e/g\n", coeff(polySin,x,5)):
379
+ fprintf(fd, " s/_s7/%1.40e/g\n", coeff(polySin,x,7)):
380
+ fprintf(fd, " s/_s9/%1.40e/g\n", coeff(polySin,x,9)):
381
+ fprintf(fd, " s/_c2/%1.40e/g\n", coeff(polyCos,x,2)):
382
+ fprintf(fd, " s/_c4/%1.40e/g\n", coeff(polyCos,x,4)):
383
+ fprintf(fd, " s/_c6/%1.40e/g\n", coeff(polyCos,x,6)):
384
+ fprintf(fd, " s/_c8/%1.40e/g\n", coeff(polyCos,x,8)):
385
+
386
+ # The approximation errors
387
+ fprintf(fd, " s/_ymaxCase3/%1.40e/g\n", ymaxCase3*1.00001):
388
+ fprintf(fd, " s/_delta_ArgRed/%1.40e/g\n", delta_ArgRed*1.00001):
389
+ fprintf(fd, " s/_delta_approx_Sin_Case3/%1.40e/g\n", deltaApproxSinCase3*1.00001):
390
+ fprintf(fd, " s/_delta_approx_Cos_Case3/%1.40e/g\n", deltaApproxCosCase3*1.00001):
391
+
392
+ fclose(fd):
393
+ od:
394
+
395
+
396
+ printf("************ DONE TEMPTRIG/*.sed ************\n"):
397
+
398
+ # A shell script to use them
399
+ filename:="trigo_test.sh":
400
+ fd:=fopen(filename, WRITE, TEXT):
401
+ fprintf(fd, "#!/bin/sh\n"):
402
+ fprintf(fd, "# You probably need to edit the path to the gappa executable\n"):
403
+ fprintf(fd, "for file in TEMPTRIG/SinACosA*.sed \n"):
404
+ fprintf(fd, "do\n"):
405
+ fprintf(fd, " echo $file:\n"):
406
+ fprintf(fd, " sed -f $file trigoSinCosCase3.gappa | ~/gappa/src/gappa > /dev/null\n"):
407
+ fprintf(fd, " echo\n"):
408
+ fprintf(fd, "done\n"):
409
+ fclose(fd):
410
+
411
+ printf("************ DONE trigo_test.sh ************\n"):
412
+ printf("Now you should run\n"):
413
+ printf(" sh trigo_test.sh 2>TEMPTRIG/Gappa.out\n"):
414
+
415
+ printf("Then look at TEMPTRIG/Gappa.out. It shouldn't contain 'No proof'.\n This means that everything is OK and the rounding constants in TEMPTRIG/trigo_fast.h are proven upper bounds.\n\n"):
416
+
417
+
418
+
419
+
420
+ # This value has been validated by Gappa (using all the previous)
421
+ maxepstotalSinCosCase3:=3*2^(-66):
422
+ rnconstantSinCosCase3 := evalf(compute_rn_constant(maxepstotalSinCosCase3)):
423
+
424
+
425
+ # The error of sin, the error of cos, then the error of Div22
426
+ maxepstotalTanCase3:= 2.1*maxepstotalSinCosCase3:
427
+ rnconstantTanCase3 := evalf(compute_rn_constant(maxepstotalTanCase3)):
428
+
429
+
430
+
431
+
432
+
433
+ ##############################################
434
+ ## Compute constants for SCS arg red
435
+ oldDigits:=Digits:
436
+ Digits:=1000:
437
+ # for 2/Pi:
438
+ n:=round(2^(30*48)*evalf(2/Pi)):
439
+ digitlist:=[]:
440
+ for i from 1 to 48 do
441
+ r:=n mod (2^30):
442
+ n:=floor(n/(2^30)):
443
+ hexstring:= convert(convert(r,hex),string):
444
+ digitlist:=[hexstring, op(digitlist)]:
445
+ end:
446
+ digitlist:
447
+
448
+ # for 256/Pi:
449
+ n:=round(2^(30*47)*evalf(256/Pi)):
450
+ digitlist:=[]:
451
+ for i from 1 to 48 do
452
+ r:=n mod (2^30):
453
+ n:=floor(n/(2^30)):
454
+ hexstring:= convert(convert(r,hex),string):
455
+ digitlist:=[hexstring, op(digitlist)]:
456
+ end:
457
+ digitlist:
458
+ Digits:=oldDigits:
459
+
460
+
461
+
462
+ # an auxiliary output function:
463
+ # Outputs the high part of a double, and the double in comment.
464
+ # As all these high parts are used in code as
465
+ # if(absxhi < XMAX_COS_CASE2)
466
+ # we have to remove one LSB to the high part, or, divide var by
467
+ # (1+2^(-20))
468
+ # Now we have absxhi<highpart(var/(1+2^(-20))
469
+ # => absxhi*(1+2^(-20))<var
470
+ # => x<var
471
+ outputHighPart:=proc(cvarname, var)
472
+ local varMinusLSB:
473
+ Digits:=8:
474
+ varMinusLSB:=var/(1+2^(-20)):
475
+ ("#define " || cvarname || " 0x" || (ieeehexa(varMinusLSB)[1])
476
+ || " /* " || (convert(evalf(varMinusLSB),string)) || " */" )
477
+ end proc:
478
+
479
+
480
+
481
+
482
+
483
+ # Output:
484
+
485
+ filename:="TEMPTRIG/trigo_fast.h":
486
+ fd:=fopen(filename, WRITE, TEXT):
487
+
488
+ fprintf(fd, "#include \"crlibm.h\"\n#include \"crlibm_private.h\"\n"):
489
+
490
+ fprintf(fd, "\n/*File generated by maple/trigo.pl*/\n"):
491
+ fprintf(fd, "\n"):
492
+
493
+
494
+ fprintf(fd, "%s\n", outputHighPart("XMAX_RETURN_X_FOR_SIN", xmax_return_x_for_sin) ):
495
+ fprintf(fd, "%s\n", outputHighPart("XMAX_SIN_CASE2 ", xmaxSinCase2) ):
496
+
497
+ fprintf(fd, "%s\n", outputHighPart("XMAX_RETURN_1_FOR_COS_RN", xmax_return_1_for_cos_RN) ):
498
+ fprintf(fd, "%s\n", outputHighPart("XMAX_RETURN_1_FOR_COS_RDIR", xmax_return_1_for_cos_RDIR) ):
499
+ fprintf(fd, "%s\n", outputHighPart("XMAX_COS_CASE2 ", xmaxCosCase2) ):
500
+
501
+ fprintf(fd, "%s\n", outputHighPart("XMAX_RETURN_X_FOR_TAN", xmax_return_x_for_tan) ):
502
+ fprintf(fd, "%s\n", outputHighPart("XMAX_TAN_CASE2 ", xmaxTanCase2) ):
503
+
504
+ fprintf(fd, "\n"):
505
+ fprintf(fd, "#define ONE_ROUNDED_DOWN %1.25e \n", one_rounded_down):
506
+ fprintf(fd, "\n"):
507
+
508
+ fprintf(fd, "#define EPS_SIN_CASE2 %1.25e \n", maxepstotalSinCase2):
509
+ fprintf(fd, "#define RN_CST_SIN_CASE2 %1.25f \n", rnconstantSinCase2):
510
+ fprintf(fd, "\n"):
511
+ fprintf(fd, "#define EPS_COS_CASE2 %1.25e \n", maxepstotalCosCase2):
512
+ fprintf(fd, "#define RN_CST_COS_CASE2 %1.25f \n", rnconstantCosCase2):
513
+ fprintf(fd, "\n"):
514
+ fprintf(fd, "#define EPS_SINCOS_CASE3 %1.25e \n", maxepstotalSinCosCase3):
515
+ fprintf(fd, "#define RN_CST_SINCOS_CASE3 %1.25f \n", rnconstantSinCosCase3):
516
+ fprintf(fd, "\n"):
517
+ fprintf(fd, "#define EPS_TAN_CASE2 %1.25e \n", maxepstotalTanCase2):
518
+ fprintf(fd, "#define EPS_TAN_CASE3 %1.25e \n", maxepstotalTanCase3):
519
+ fprintf(fd, "#define RN_CST_TAN_CASE3 %1.25f \n", rnconstantTanCase3):
520
+
521
+ fprintf(fd, "\n"):
522
+
523
+ fprintf(fd, "#define INV_PIO256 %1.25f \n", 1/C):
524
+ fprintf(fd, "\n"):
525
+
526
+ fprintf(fd, "%s\n", outputHighPart("XMAX_CODY_WAITE_2", XMAX_CODY_WAITE_2) ):
527
+ fprintf(fd, "%s\n", outputHighPart("XMAX_CODY_WAITE_3", XMAX_CODY_WAITE_3) ):
528
+ fprintf(fd, "%s\n", outputHighPart("XMAX_DDRR ", XMAX_DDRR) ):
529
+ #fprintf(fd, "%s\n", outputHighPart("", ) ):
530
+ fprintf(fd, "\n"):
531
+
532
+ fprintf(fd, "#define RR_CW2_CH %1.25e\n", RR_CW2_CH):
533
+ fprintf(fd, "#define RR_CW2_MCL %1.25e\n", RR_CW2_MCL):
534
+ fprintf(fd, "\n"):
535
+
536
+ fprintf(fd, "#define RR_CW3_CH %1.25e\n", RR_CW3_CH):
537
+ fprintf(fd, "#define RR_CW3_CM %1.25e\n", RR_CW3_CM):
538
+ fprintf(fd, "#define RR_CW3_MCL %1.25e\n", RR_CW3_MCL):
539
+ fprintf(fd, "\n"):
540
+
541
+ fprintf(fd, "#define RR_DD_MCH %1.25e\n", RR_DD_MCH):
542
+ fprintf(fd, "#define RR_DD_MCM %1.25e\n", RR_DD_MCM):
543
+ fprintf(fd, "#define RR_DD_CL %1.25e\n", RR_DD_CL):
544
+ fprintf(fd, "\n"):
545
+
546
+ fprintf(fd, "\n"):
547
+
548
+ fprintf(fd, "\n\n"):
549
+
550
+ # The 256/Pi SCS array
551
+ fprintf(fd, "static const int digits_256_over_pi[] = \n{"):
552
+ for i from 0 to 11 do
553
+ for j from 1 to 4 do
554
+ fprintf(fd, " 0x%s, \t",digitlist[4*i+j]):
555
+ end:
556
+ fprintf(fd, "\n "):
557
+ end:
558
+ fprintf(fd, "};\n\n"):
559
+
560
+ # The Pi/256 SCS constant
561
+ fprintf(fd, "static const scs Pio256=\n"):
562
+ WriteSCS(fd, evalf(C)):
563
+ fprintf(fd, ";\n#define Pio256_ptr (scs_ptr)(& Pio256)\n\n"):
564
+
565
+ fprintf(fd,"#ifdef WORDS_BIGENDIAN\n"):
566
+ for isbig from 1 to 0 by -1 do
567
+
568
+ if(isbig=0) then
569
+ fprintf(fd,"#else\n"):
570
+ fi:
571
+
572
+ # The sine polynomial
573
+
574
+ fprintf(fd, "static db_number const s3 = "):
575
+ printendian(fd, coeff(polySin,x,3), isbig):
576
+ fprintf(fd, ";\n"):
577
+ fprintf(fd, "static db_number const s5 = "):
578
+ printendian(fd, coeff(polySin,x,5), isbig):
579
+ fprintf(fd, ";\n"):
580
+ fprintf(fd, "static db_number const s7 = "):
581
+ printendian(fd, coeff(polySin,x,7), isbig):
582
+ fprintf(fd, ";\n\n"):
583
+
584
+
585
+ # the cos polynomial
586
+
587
+ fprintf(fd, "static db_number const c2 = "):
588
+ printendian(fd, coeff(polyCos,x,2), isbig):
589
+ fprintf(fd, ";\n"):
590
+ fprintf(fd, "static db_number const c4 = "):
591
+ printendian(fd, coeff(polyCos,x,4), isbig):
592
+ fprintf(fd, ";\n"):
593
+ fprintf(fd, "static db_number const c6 = "):
594
+ printendian(fd, coeff(polyCos,x,6), isbig):
595
+ fprintf(fd, ";\n\n"):
596
+
597
+
598
+ # the tan polynomial
599
+
600
+ t3h, t3l := hi_lo(coeff(polyTanCase2,x,3)):
601
+ fprintf(fd, "static db_number const t3h = "):
602
+ printendian(fd, t3h, isbig):
603
+ fprintf(fd, ";\n"):
604
+ fprintf(fd, "static db_number const t3l = "):
605
+ printendian(fd, t3l, isbig):
606
+ fprintf(fd, ";\n"):
607
+ fprintf(fd, "static db_number const t5 = "):
608
+ printendian(fd, coeff(polyTanCase2,x,5), isbig):
609
+ fprintf(fd, ";\n"):
610
+ fprintf(fd, "static db_number const t7 = "):
611
+ printendian(fd, coeff(polyTanCase2,x,7), isbig):
612
+ fprintf(fd, ";\n"):
613
+ fprintf(fd, "static db_number const t9 = "):
614
+ printendian(fd, coeff(polyTanCase2,x,9), isbig):
615
+ fprintf(fd, ";\n"):
616
+ fprintf(fd, "static db_number const t11 = "):
617
+ printendian(fd, coeff(polyTanCase2,x,11), isbig):
618
+ fprintf(fd, ";\n\n"):
619
+
620
+
621
+ # The sincos table
622
+
623
+ fprintf(fd, "\n/* sine and cos of kPi/256 in double-double */\n"):
624
+ fprintf(fd, "static db_number const sincosTable[%d] =\n{\n", 4*(SinCosSize/2+1)):
625
+ for i from 0 to SinCosSize/2 do
626
+ s:=hi_lo(sin(i*Pi/(2*SinCosSize))):
627
+ c:=hi_lo(cos(i*Pi/(2*SinCosSize))):
628
+ printendian(fd,s[1],isbig):
629
+ fprintf(fd," ,\n"):
630
+ printendian(fd,s[2],isbig):
631
+ fprintf(fd," ,\n"):
632
+ printendian(fd,c[1],isbig):
633
+ fprintf(fd," ,\n"):
634
+ printendian(fd,c[2],isbig):
635
+ if i<SinCosSize-1 then fprintf(fd," ,\n"): fi:
636
+ od:
637
+ fprintf(fd, "\n};\n\n"):
638
+
639
+
640
+ od:
641
+ fprintf(fd,"#endif /* WORDS_BIGENDIAN */\n\n\n"):
642
+
643
+ fclose(fd):
644
+
645
+ printf("\n\n************ DONE TEMPTRIG/trigo_fast.h ************\n Copy it to the crlibm source directory.\n\n");
646
+
647
+
648
+ #################################################
649
+ # Stuff for the accurate phase,
650
+ #
651
+ # Should very bad cases be found in the future, only the degrees below
652
+ # should be increased, then this script rerun. Everything should
653
+ # compile OK with the newly generated trigo_accurate.h
654
+
655
+ printf("\n-------------------------------------------------------\n"):
656
+ printf("--------------------Accurate phase---------------------\n"):
657
+ printf("-------------------------------------------------------\n"):
658
+
659
+ xminSCS := 0:
660
+ xmaxSCS := Pi/4:
661
+
662
+
663
+ #-----------------Sine-----------------------
664
+ degreeSinSCS := 26:
665
+ Poly_P := convert(series(sin(sqrt(x))/(x^(3/2))-1/x, x=0, degreeSinSCS*4),polynom):
666
+ Poly_cheb := numapprox[chebpade](Poly_P, x=xminSCS..xmaxSCS^2, [degreeSinSCS/2-2,0]):
667
+ polySinSCS := poly_exact_SCS(expand(x + x^3 * subs(x=x^2, Poly_cheb))):
668
+
669
+ DEGREE_SIN_SCS := degree(polySinSCS):
670
+ maxepsApproxSinSCS:=numapprox[infnorm](1 - polySinSCS/sin(x), x=xminSCS..xmaxSCS):
671
+ printf("The sine polynomial for the second phase (degree %d) is accurate to %f bits on O..Pi/4\n", DEGREE_SIN_SCS, -log2(maxepsApproxSinSCS)):
672
+ # For the proof of the very bad cases, near zero
673
+ maxepsApproxSinSCSNearZero:=numapprox[infnorm](1 - polySinSCS/sin(x), x=0..2^(-17)):
674
+ printf(" ... and accurate to %f bits on O..2^(-17)\n", -log2(maxepsApproxSinSCSNearZero)):
675
+
676
+
677
+ #-----------------Cos-----------------------
678
+ degreeCosSCS := 28:
679
+ Poly_P := convert(series((cos(sqrt(x)) -1)/x, x=0, degreeCosSCS*4),polynom):
680
+ Poly_cheb := numapprox[chebpade](Poly_P, x=xminSCS..xmaxSCS^2, [degreeCosSCS/2-2,0]):
681
+ polyCosSCS := poly_exact_SCS(expand(1 + x^2 * subs(x=x^2, Poly_cheb))):
682
+ DEGREE_COS_SCS := degree(polyCosSCS):
683
+
684
+ maxepsApproxCosSCS:=numapprox[infnorm](1 - polyCosSCS/cos(x), x=xminSCS..xmaxSCS):
685
+ printf("The cos polynomial for the second phase (degree %d) is accurate to %f bits on O..Pi/4\n", degree(polyCosSCS), -log2(maxepsApproxCosSCS)):
686
+ # For the proof of the very bad cases, near zero
687
+ maxepsApproxCosSCSNearZero:=numapprox[infnorm](1 - polyCosSCS/cos(x), x=0..2^(-18)):
688
+ printf(" ... and accurate to %f bits on O..2^(-18)\n", -log2(maxepsApproxCosSCSNearZero)):
689
+
690
+
691
+
692
+ #-----------------Tan-----------------------
693
+ degreeTanSCS := 70:
694
+ Poly_P := convert(series(tan(sqrt(x))/(x^(3/2))-1/x, x=0, degreeTanSCS*4),polynom):
695
+ Poly_cheb := numapprox[chebpade](Poly_P, x=xminSCS..xmaxSCS^2, [degreeTanSCS/2-2,0]):
696
+ polyTanSCS := poly_exact_SCS(expand(x + x^3 * subs(x=x^2, Poly_cheb))):
697
+
698
+ DEGREE_TAN_SCS := degree(polyTanSCS):
699
+ maxepsApproxTanSCS:=numapprox[infnorm](1 - polyTanSCS/tan(x), x=xminSCS..xmaxSCS):
700
+ printf("The tan polynomial for the second phase (degree %d) is accurate to %f bits on O..Pi/4\n", DEGREE_TAN_SCS, -log2(maxepsApproxTanSCS)):
701
+ # For the proof of the very bad cases, near zero
702
+ maxepsApproxTanSCSNearZero:=numapprox[infnorm](1 - polyTanSCS/tan(x), x=0..2^(-17)):
703
+ printf(" ... and accurate to %f bits on O..2^(-17)\n", -log2(maxepsApproxTanSCSNearZero)):
704
+
705
+
706
+ filename:="TEMPTRIG/trigo_accurate.h":
707
+ fd:=fopen(filename, WRITE, TEXT):
708
+ fprintf(fd, "/*File generated by maple/trigo.mpl*/\n\n"):
709
+ fprintf(fd, "#include \"crlibm.h\"\n#include \"crlibm_private.h\"\n\n"):
710
+ fprintf(fd, "#define DEGREE_SIN_SCS %d \n", DEGREE_SIN_SCS):
711
+ fprintf(fd, "#define DEGREE_COS_SCS %d \n", DEGREE_COS_SCS):
712
+ fprintf(fd, "#define DEGREE_TAN_SCS %d \n", DEGREE_TAN_SCS):
713
+ fprintf(fd, "\n"):
714
+ fprintf(fd, "#define sin_scs_poly_ptr (scs_ptr)&sin_scs_poly \n"):
715
+ fprintf(fd, "#define cos_scs_poly_ptr (scs_ptr)&cos_scs_poly \n"):
716
+ fprintf(fd, "#define tan_scs_poly_ptr (scs_ptr)&tan_scs_poly \n"):
717
+ fprintf(fd, "\n"):
718
+ Write_SCS_poly(fd, sin_scs_poly, polySinSCS):
719
+ fprintf(fd, "\n\n"):
720
+ Write_SCS_poly(fd, cos_scs_poly, polyCosSCS):
721
+ fprintf(fd, "\n\n"):
722
+ Write_SCS_poly(fd, tan_scs_poly, polyTanSCS):
723
+ fprintf(fd, "\n"):
724
+ fclose(fd):
725
+
726
+ printf("\n\n************ DONE TEMPTRIG/trigo_accurate.h ************\n Copy it to the crlibm source directory.\n\n");
727
+
728
+