intervals 0.3.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,728 @@
1
+
2
+ Digits := 100:
3
+ interface(quiet=true):
4
+ read "common-procedures.mpl":
5
+ with(orthopoly):
6
+
7
+
8
+ mkdir("TEMPTRIG"):
9
+
10
+
11
+
12
+ # - Evaluation scheme :
13
+ # case 1 : return x
14
+ # case 2 (or Fast) : compute a simple polynomial
15
+ # case 3 : do an argument reduction...
16
+
17
+
18
+
19
+ ########################################################
20
+ # Case 1 : Small arguments
21
+ # return x for sine and tan, return 1 for cos
22
+ ########################################################
23
+
24
+ xmax_return_x_for_sin := 2^(-26):
25
+ xmax_return_1_for_cos_RN := sqrt(2^(-53)):
26
+ xmax_return_1_for_cos_RDIR:=2^(-26):
27
+ one_rounded_down := evalf(1-ulp(1/2)):
28
+
29
+ xmax_return_x_for_tan := 2^(-27):
30
+
31
+
32
+
33
+
34
+ ########################################################
35
+ # Case 2 : simple polynomial approximation
36
+ ########################################################
37
+
38
+ # We want to use the same polynomial in case 2 and 3.
39
+ # So see after arg red
40
+
41
+ #################################################
42
+ # Case 3 : Argument reduction
43
+ #################################################
44
+
45
+
46
+
47
+ #################################################
48
+ # CODY and WAITE Argument reduction
49
+
50
+
51
+ C := Pi/256:
52
+ invC:= nearest(1/C):
53
+ reminvC := evalf(1/C - invC):
54
+ expC:=ieeedouble(C)[2]:
55
+ epsinvC := abs(reminvC*C):
56
+
57
+ # There are three sets of constants :
58
+ # - split redC into two constants, for small values when we are concerned with absolute error
59
+ # - split redC into three constants, for larger values
60
+ # - split redC into three doubles, for the cases when we need
61
+ # good relative precision on the result and fear cancellation
62
+
63
+
64
+
65
+
66
+ # Fastest reduction using two-part Cody and Waite (up to |k|=2^22)
67
+
68
+ bitsCh_0:=34:
69
+
70
+ # 1/2 <= C/2^(expC+1) <1
71
+ Ch:= round(evalf( C * 2^(bitsCh_0-expC-1))) / (2^(bitsCh_0-expC-1)):
72
+ # recompute bitsCh in case we are lucky (and we are for bitsCh_0=32)
73
+ bitsCh:=1+log2(op(2,ieeedouble(Ch)[3])) : # this means the log of the denominator
74
+
75
+ Cl:=nearest(C - Ch):
76
+ # Cody and Waite argument reduction will work for |k|<kmax_cw2
77
+ kmax_cw2:=2^(53-bitsCh):
78
+
79
+ # The constants to move to the .h file
80
+ RR_CW2_CH := Ch:
81
+ RR_CW2_MCL := -Cl:
82
+ XMAX_CODY_WAITE_2 := nearest(kmax_cw2*C):
83
+
84
+ # The error in this case (we need absolute error)
85
+ delta_repr_C_cw2 := abs(C-Ch-Cl):
86
+ delta_round_cw2 := kmax_cw2* 1/2 * ulp(Cl) :
87
+ delta_cody_waite_2 := kmax_cw2 * delta_repr_C_cw2 + delta_round_cw2:
88
+ # This is the delta on y, the reduced argument
89
+
90
+ #log2(%);
91
+
92
+
93
+
94
+ # Slower reduction using three-part Cody and Waite, up to |k|=2^31
95
+
96
+ bitsCh_0:=23: # 22 or 23
97
+ Ch:= round(evalf( C * 2^(bitsCh_0-expC-1))) / (2^(bitsCh_0-expC-1)):
98
+ # recompute bitsCh in case we are lucky
99
+ bitsCh:=1+log2(op(2,ieeedouble(Ch)[3])) : # this means the log of the denominator
100
+
101
+ r := C-Ch:
102
+ Cmed := round(evalf( r * 2^(2*bitsCh-expC-1))) / (2^(2*bitsCh-expC-1)):
103
+ bitsCmed:=1+log2(op(2,ieeedouble(Cmed)[3])) :
104
+
105
+ Cl:=nearest(C - Ch - Cmed):
106
+
107
+ kmax_cw3 := 2^min(53-bitsCh, 53-bitsCmed, 31):# Otherwise we have integer overflow
108
+
109
+
110
+
111
+ # The constants to move to the .h file
112
+ RR_CW3_CH := Ch:
113
+ RR_CW3_CM := Cmed:
114
+ RR_CW3_MCL := -Cl:
115
+ XMAX_CODY_WAITE_3 := nearest(kmax_cw3*C):
116
+
117
+ # The error in this case (we need absolute error)
118
+ delta_repr_C_cw3 := abs(C - Ch - Cmed - Cl):
119
+ delta_round_cw3 := kmax_cw3 * 1/2 * ulp(Cl) :
120
+ delta_cody_waite_3 := kmax_cw3 * delta_repr_C_cw3 + delta_round_cw3:
121
+ # This is the delta on y, the reduced argument
122
+
123
+ #log2(%);
124
+
125
+
126
+
127
+
128
+
129
+ # Third range reduction, using double-double arithmetic, for |k| up to 2^51-1
130
+
131
+ # This max int value can be produced by DOUBLE2LONGINT
132
+ kmax:=2^46-1:
133
+ XMAX_DDRR:=nearest(kmax*C):
134
+
135
+ #in this case we have C stored as 3 doubles
136
+ Ch := nearest(C):
137
+ Cmed := nearest(C-Ch):
138
+ Cl := nearest(C-Ch-Cmed):
139
+
140
+ RR_DD_MCH := -Ch:
141
+ RR_DD_MCM := -Cmed:
142
+ RR_DD_CL := Cl:
143
+
144
+ delta_repr_C := abs(C - Ch - Cmed - Cl):
145
+
146
+ # and we have only exact Add12 and Mul12 operations. The only place
147
+ # with possible rounding errors is:
148
+ # Add22 (pyh, pyl, (x + kch_h) , (kcm_l - kd*RR_DD_CL), th, tl) ;
149
+ # where (x + kch_h) is exact (Sterbenz) with up to kmax bits of cancellation
150
+ # and the error is simply the error in (kcm_l - kd*RR_DD_CL)
151
+ # At the very worst :
152
+ delta_round :=
153
+ kmax * 1/2 * ulp(Cl) # for kd*RR_DD_CL
154
+ + kmax*ulp(Cl) # for the subtraction
155
+ + 2^(-100) * Pi/512 : # for the Add22
156
+ delta_RR_DD := kmax * delta_repr_C + delta_round:
157
+
158
+ # the last case, Payne and Hanek reduction, gives a very small delta:
159
+ # red arg is on 9*30 bits, then rounded to a double-double (106 bits)
160
+ # This should, of course, be optimized some day
161
+ delta_PayneHanek := 2^(-100):
162
+
163
+ # Finally the max delta on the reduced argument is
164
+ delta_ArgRed := max(delta_cody_waite_2, delta_cody_waite_3,
165
+ delta_RR_DD, delta_PayneHanek):
166
+
167
+ #print("delta_ArgRed to move to the .gappa file = ", evalf(delta_ArgRed)):
168
+ #log2(delta_ArgRed);
169
+
170
+
171
+
172
+
173
+
174
+ # Now we use the above absolute error when k mod 256 <> 0
175
+ # otherwise we need to worry about relative accuracy of the result.
176
+
177
+ # First, what is the worst case for cancellation ?
178
+
179
+ emax := ieeedouble(XMAX_DDRR)[2] +1 :
180
+ # above emax, we will use Payne and Hanek so we do not worry
181
+
182
+ (wcn, wce, wceps) := WorstCaseForAdditiveRangeReduction(2,53,-8, emax, C):
183
+ wcx := wcn * 2^wce:
184
+ wck := round(wcx/C):
185
+ wcy := wcx - wck*C:
186
+
187
+ #log2(wcy); # y > 2^(-67);
188
+
189
+ # In these cases we use the double-double range reduction, for |k|<kmax_cw3
190
+ # and the relative precision in the worst case is for wcy
191
+
192
+ delta_round := kmax_cw3 * 1/2 * ulp(Cl) # for kd*RR_DD_CL
193
+ + kmax_cw3 * ulp(Cl) : # for the subtraction
194
+
195
+ delta_RR_DD := kmax_cw3 * delta_repr_C + delta_round:
196
+
197
+ eps_ArgRed := (1+delta_RR_DD/wcy)*(1+2^(-100)) -1:
198
+
199
+ #log2(eps_ArgRed);
200
+
201
+ # In all the other cases we use Payne and Hanek, and eps_ArgRed is
202
+ # much smaller, so this is the max.
203
+
204
+
205
+
206
+ ###########
207
+ # Polynomials for do_sine and do_cos, and for the case 2
208
+ degreeSin := 8:
209
+ degreeCos := 7:
210
+
211
+ maxepsk := (1+epsinvC)*(1+2^(-53))-1:
212
+
213
+ ymaxCase3 := evalf(Pi/512 + XMAX_DDRR*maxepsk):
214
+ #print("ymaxCase3 to move to the .gappa file = ", ymaxCase3):
215
+
216
+
217
+ y2maxCase3 := ymaxCase3^2:
218
+ # These are the parameters that can be varied to fine-tune performance
219
+ # (they should always be larger than Pi/512
220
+ xmaxCosCase2 := Pi/256:
221
+ xmaxSinCase2 := Pi/256:
222
+
223
+
224
+ x2maxSinCase2:= xmaxSinCase2^2:
225
+ x2maxCosCase2:= xmaxCosCase2^2:
226
+
227
+ # We had the difficulty here to find minimax polynomials which are good for case 2
228
+ # as well as for case 3. A simple solution was to set xmaxCosCase2 =ymaxCase3...
229
+ # However we found another answer: in the future we intend to use these polynomials for second
230
+ # step, too. Therefore, no minimax, only Taylor.
231
+
232
+ polySin:= poly_exact(convert( series(sin(x), x=0, degreeSin+1), polynom)):
233
+ polyTs := expand(polySin/x-1):
234
+ polyTs2 := subs(x=sqrt(y), polyTs):
235
+
236
+ polyCos := poly_exact (convert( series(cos(x), x=0, degreeCos+1), polynom)):
237
+ polyTc2 := subs(x=sqrt(y), polyCos - 1):
238
+
239
+ epsApproxSinCase2 := numapprox[infnorm]((x*polyTs+x -sin(x))/sin(x), x=0..xmaxSinCase2):
240
+ epsApproxSinCase3 := numapprox[infnorm]((x*polyTs +x -sin(x))/sin(x), x=0..ymaxCase3):
241
+
242
+ deltaApproxSinCase2 := numapprox[infnorm]((x*polyTs+x -sin(x)), x=0..xmaxSinCase2):
243
+ deltaApproxSinCase3 := numapprox[infnorm]((x*polyTs +x -sin(x)), x=0..ymaxCase3):
244
+
245
+ deltaApproxCosCase2:= numapprox[infnorm](polyCos - cos(x), x=0..xmaxCosCase2):
246
+ deltaApproxCosCase3:= numapprox[infnorm](polyCos - cos(x), x=0..ymaxCase3):
247
+
248
+ #print("deltaApproxSinCase3 to move to the .gappa file = ", deltaApproxSinCase3):
249
+ #print("deltaApproxCosCase3 to move to the .gappa file = ", deltaApproxCosCase3):
250
+
251
+
252
+ ########################## Case 2 for sine ###########################
253
+
254
+
255
+
256
+ # evaluate this polynomial in double. The error on x*x is at most half an ulp
257
+ errlist:=errlist_quickphase_horner(degree(polyTs2),0,0,2^(-53), 0):
258
+ (eps_rounding_Ts, delta_rounding_Ts, minTs, maxTs):=
259
+ compute_horner_rounding_error(polyTs2,y,x2maxSinCase2, errlist, true):
260
+
261
+ eps_poly_TsCase2 := numapprox[infnorm]((x*polyTs)/(sin(x)-x) -1, x=0..xmaxSinCase2):
262
+ maxeps2 := (1+eps_poly_TsCase2)*(1+eps_rounding_Ts)*(1+2^(-53))-1:
263
+
264
+ maxepstotalSinCase2 := maxeps2 * numapprox[infnorm](1-x/sin(x), x=0..xmaxSinCase2):
265
+ rnconstantSinCase2 := evalf(compute_rn_constant(maxepstotalSinCase2)):
266
+
267
+
268
+
269
+
270
+
271
+ ##################################### Case2 cos ###########################
272
+
273
+ # evaluate this polynomial in double. The error on x*x is at most half an ulp
274
+ errlist := errlist_quickphase_horner(degree(polyTc2),0,0,2**(-53), 0):
275
+ (eps_rounding_Tc, delta_rounding_Tc, minTc, maxTc):=
276
+ compute_horner_rounding_error(polyTc2,y,x2maxCosCase2, errlist, true):
277
+
278
+ # Then we have an Add12 which is exact. The result is greater then cos(xmaxCosCase2):
279
+ miny := cos(xmaxCosCase2):
280
+ maxepstotalCosCase2 := (delta_rounding_Tc + deltaApproxCosCase2) / miny :
281
+ #log2(%);
282
+ rnconstantCosCase2 := evalf(compute_rn_constant(maxepstotalCosCase2)):
283
+
284
+
285
+
286
+
287
+ ######################## Case2 Tangent #########################
288
+ #
289
+ # Compute the Taylor series
290
+ degreeTanCase2 := 12:
291
+ xmaxTanCase2 := 2**(-4):
292
+ xminTanCase2 := 2**(-30):
293
+
294
+ Poly_P := convert(series(tan(sqrt(x))/(x^(3/2))-1/x, x=0, degreeTanCase2*4),polynom):
295
+ Poly_cheb := numapprox[chebpade](Poly_P, x=xminTanCase2..xmaxTanCase2^2, [degreeTanCase2/2-2,0]):
296
+ polyTanCase2 := poly_exact2(expand(x + x^3 * subs(x=x^2, Poly_cheb)), 4):
297
+
298
+ #polyTanCase2 := poly_exact2(convert(series(tan(x), x=0, degreeTanCase2), polynom), 4):
299
+
300
+ maxepsApproxTanCase2:=numapprox[infnorm](1 - polyTanCase2 / tan(x), x=xminTanCase2..xmaxTanCase2):
301
+
302
+ maxepsOverXTanCase2 :=numapprox[infnorm]((1 - polyTanCase2 / tan(x))/x, x=xminTanCase2..xmaxTanCase2):
303
+ # Now we pass these values to Gappa
304
+
305
+ filename:="TEMPTRIG/TanCase2.sed":
306
+ fd:=fopen(filename, WRITE, TEXT):
307
+ t3h, t3l := hi_lo(coeff(polyTanCase2,x,3)):
308
+ fprintf(fd, " s/_t3h/%1.40e/g\n", t3h):
309
+ fprintf(fd, " s/_t3l/%1.40e/g\n", t3l):
310
+ fprintf(fd, " s/_t5/%1.40e/g\n", coeff(polyTanCase2,x,5)):
311
+ fprintf(fd, " s/_t7/%1.40e/g\n", coeff(polyTanCase2,x,7)):
312
+ fprintf(fd, " s/_t9/%1.40e/g\n", coeff(polyTanCase2,x,9)):
313
+ fprintf(fd, " s/_t11/%1.40e/g\n", coeff(polyTanCase2,x,11)):
314
+ fprintf(fd, " s/_xmax/%1.40e/g\n", xmaxTanCase2):
315
+ fprintf(fd, " s/_maxEpsApproxOverX/%1.40e/g\n", maxepsOverXTanCase2*1.00001):
316
+ fprintf(fd, " s/_maxEpsApprox/%1.40e/g\n", maxepsApproxTanCase2*1.00001):
317
+ fclose(fd):
318
+
319
+ printf("\n\n************ DONE TEMPTRIG/TanCase2.sed ************\n");
320
+ printf("Now you should use \n sed -f TEMPTRIG/TanCase2.sed trigoTanCase2.gappa | gappa > /dev/null \n");
321
+
322
+
323
+
324
+ maxepstotalTanCase2:=4.59602e-19: # Cut from Gappa output
325
+
326
+ log2(maxepstotalTanCase2): # almost 61 bits
327
+
328
+
329
+
330
+
331
+
332
+
333
+ ###############################################################################
334
+ # Computing errors for Case3 : now we have an error due to arg red
335
+
336
+ # First DoSinZero. The notations are those of the paper proof
337
+
338
+ # Approximation error already computed above as epsApproxSinCase3;
339
+
340
+ # polynomial evaluation in double, with an error on y*y of epsilonArgRed
341
+ errlist:=errlist_quickphase_horner(degree(polyTs2),0,0,eps_ArgRed, 0):
342
+ (epsRoundingTsSinZero, deltaRoundingTsSinZero, minTs, maxTs):=
343
+ compute_horner_rounding_error(polyTs2,y,y2maxCase3, errlist, true):
344
+
345
+ # just as in the paper proof
346
+ maxepsSinZero1 := (1+epsApproxSinCase3)*(1+epsRoundingTsSinZero)*(1+2^(-53))*(1+2^(-53)) - 1:
347
+
348
+ # just as in the paper proof. For x>0 the absolute values are as given
349
+
350
+ epstotalSinZero := ( (x-sin(x))*maxepsSinZero1 + x*eps_ArgRed + 2^(-53)*x^3/3 ) / sin(x):
351
+ maxepstotalSinZero := numapprox[infnorm]( epstotalSinZero , x=2^(-30)..ymaxCase3):
352
+
353
+ printf("\nMax rel error for DoSinZero is %1.5e, if it's smaller than 2^(-66) (%1.5e) then the proof is OK\n\n", maxepstotalSinZero, 2^(-66)):
354
+
355
+
356
+
357
+
358
+
359
+ ##############################SinCosCase3############################
360
+ SinCosSize:= 128: # size f the table
361
+
362
+
363
+ # The Gappa files in TEMPTRIG
364
+ for i from 1 to SinCosSize/2 do
365
+ filename:=cat("TEMPTRIG/SinACosA_",i,".sed"):
366
+ fd:=fopen(filename, WRITE, TEXT):
367
+
368
+ # The table values
369
+ s:=hi_lo(sin(i*Pi/(2*SinCosSize))):
370
+ c:=hi_lo(cos(i*Pi/(2*SinCosSize))):
371
+ fprintf(fd, " s/_cah/%1.40e/g\n", c[1]):
372
+ fprintf(fd, " s/_cal/%1.40e/g\n", c[2]):
373
+ fprintf(fd, " s/_sah/%1.40e/g\n", s[1]):
374
+ fprintf(fd, " s/_sal/%1.40e/g\n", s[2]):
375
+
376
+ # The polynomial coefficients
377
+ fprintf(fd, " s/_s3/%1.40e/g\n", coeff(polySin,x,3)):
378
+ fprintf(fd, " s/_s5/%1.40e/g\n", coeff(polySin,x,5)):
379
+ fprintf(fd, " s/_s7/%1.40e/g\n", coeff(polySin,x,7)):
380
+ fprintf(fd, " s/_s9/%1.40e/g\n", coeff(polySin,x,9)):
381
+ fprintf(fd, " s/_c2/%1.40e/g\n", coeff(polyCos,x,2)):
382
+ fprintf(fd, " s/_c4/%1.40e/g\n", coeff(polyCos,x,4)):
383
+ fprintf(fd, " s/_c6/%1.40e/g\n", coeff(polyCos,x,6)):
384
+ fprintf(fd, " s/_c8/%1.40e/g\n", coeff(polyCos,x,8)):
385
+
386
+ # The approximation errors
387
+ fprintf(fd, " s/_ymaxCase3/%1.40e/g\n", ymaxCase3*1.00001):
388
+ fprintf(fd, " s/_delta_ArgRed/%1.40e/g\n", delta_ArgRed*1.00001):
389
+ fprintf(fd, " s/_delta_approx_Sin_Case3/%1.40e/g\n", deltaApproxSinCase3*1.00001):
390
+ fprintf(fd, " s/_delta_approx_Cos_Case3/%1.40e/g\n", deltaApproxCosCase3*1.00001):
391
+
392
+ fclose(fd):
393
+ od:
394
+
395
+
396
+ printf("************ DONE TEMPTRIG/*.sed ************\n"):
397
+
398
+ # A shell script to use them
399
+ filename:="trigo_test.sh":
400
+ fd:=fopen(filename, WRITE, TEXT):
401
+ fprintf(fd, "#!/bin/sh\n"):
402
+ fprintf(fd, "# You probably need to edit the path to the gappa executable\n"):
403
+ fprintf(fd, "for file in TEMPTRIG/SinACosA*.sed \n"):
404
+ fprintf(fd, "do\n"):
405
+ fprintf(fd, " echo $file:\n"):
406
+ fprintf(fd, " sed -f $file trigoSinCosCase3.gappa | ~/gappa/src/gappa > /dev/null\n"):
407
+ fprintf(fd, " echo\n"):
408
+ fprintf(fd, "done\n"):
409
+ fclose(fd):
410
+
411
+ printf("************ DONE trigo_test.sh ************\n"):
412
+ printf("Now you should run\n"):
413
+ printf(" sh trigo_test.sh 2>TEMPTRIG/Gappa.out\n"):
414
+
415
+ printf("Then look at TEMPTRIG/Gappa.out. It shouldn't contain 'No proof'.\n This means that everything is OK and the rounding constants in TEMPTRIG/trigo_fast.h are proven upper bounds.\n\n"):
416
+
417
+
418
+
419
+
420
+ # This value has been validated by Gappa (using all the previous)
421
+ maxepstotalSinCosCase3:=3*2^(-66):
422
+ rnconstantSinCosCase3 := evalf(compute_rn_constant(maxepstotalSinCosCase3)):
423
+
424
+
425
+ # The error of sin, the error of cos, then the error of Div22
426
+ maxepstotalTanCase3:= 2.1*maxepstotalSinCosCase3:
427
+ rnconstantTanCase3 := evalf(compute_rn_constant(maxepstotalTanCase3)):
428
+
429
+
430
+
431
+
432
+
433
+ ##############################################
434
+ ## Compute constants for SCS arg red
435
+ oldDigits:=Digits:
436
+ Digits:=1000:
437
+ # for 2/Pi:
438
+ n:=round(2^(30*48)*evalf(2/Pi)):
439
+ digitlist:=[]:
440
+ for i from 1 to 48 do
441
+ r:=n mod (2^30):
442
+ n:=floor(n/(2^30)):
443
+ hexstring:= convert(convert(r,hex),string):
444
+ digitlist:=[hexstring, op(digitlist)]:
445
+ end:
446
+ digitlist:
447
+
448
+ # for 256/Pi:
449
+ n:=round(2^(30*47)*evalf(256/Pi)):
450
+ digitlist:=[]:
451
+ for i from 1 to 48 do
452
+ r:=n mod (2^30):
453
+ n:=floor(n/(2^30)):
454
+ hexstring:= convert(convert(r,hex),string):
455
+ digitlist:=[hexstring, op(digitlist)]:
456
+ end:
457
+ digitlist:
458
+ Digits:=oldDigits:
459
+
460
+
461
+
462
+ # an auxiliary output function:
463
+ # Outputs the high part of a double, and the double in comment.
464
+ # As all these high parts are used in code as
465
+ # if(absxhi < XMAX_COS_CASE2)
466
+ # we have to remove one LSB to the high part, or, divide var by
467
+ # (1+2^(-20))
468
+ # Now we have absxhi<highpart(var/(1+2^(-20))
469
+ # => absxhi*(1+2^(-20))<var
470
+ # => x<var
471
+ outputHighPart:=proc(cvarname, var)
472
+ local varMinusLSB:
473
+ Digits:=8:
474
+ varMinusLSB:=var/(1+2^(-20)):
475
+ ("#define " || cvarname || " 0x" || (ieeehexa(varMinusLSB)[1])
476
+ || " /* " || (convert(evalf(varMinusLSB),string)) || " */" )
477
+ end proc:
478
+
479
+
480
+
481
+
482
+
483
+ # Output:
484
+
485
+ filename:="TEMPTRIG/trigo_fast.h":
486
+ fd:=fopen(filename, WRITE, TEXT):
487
+
488
+ fprintf(fd, "#include \"crlibm.h\"\n#include \"crlibm_private.h\"\n"):
489
+
490
+ fprintf(fd, "\n/*File generated by maple/trigo.pl*/\n"):
491
+ fprintf(fd, "\n"):
492
+
493
+
494
+ fprintf(fd, "%s\n", outputHighPart("XMAX_RETURN_X_FOR_SIN", xmax_return_x_for_sin) ):
495
+ fprintf(fd, "%s\n", outputHighPart("XMAX_SIN_CASE2 ", xmaxSinCase2) ):
496
+
497
+ fprintf(fd, "%s\n", outputHighPart("XMAX_RETURN_1_FOR_COS_RN", xmax_return_1_for_cos_RN) ):
498
+ fprintf(fd, "%s\n", outputHighPart("XMAX_RETURN_1_FOR_COS_RDIR", xmax_return_1_for_cos_RDIR) ):
499
+ fprintf(fd, "%s\n", outputHighPart("XMAX_COS_CASE2 ", xmaxCosCase2) ):
500
+
501
+ fprintf(fd, "%s\n", outputHighPart("XMAX_RETURN_X_FOR_TAN", xmax_return_x_for_tan) ):
502
+ fprintf(fd, "%s\n", outputHighPart("XMAX_TAN_CASE2 ", xmaxTanCase2) ):
503
+
504
+ fprintf(fd, "\n"):
505
+ fprintf(fd, "#define ONE_ROUNDED_DOWN %1.25e \n", one_rounded_down):
506
+ fprintf(fd, "\n"):
507
+
508
+ fprintf(fd, "#define EPS_SIN_CASE2 %1.25e \n", maxepstotalSinCase2):
509
+ fprintf(fd, "#define RN_CST_SIN_CASE2 %1.25f \n", rnconstantSinCase2):
510
+ fprintf(fd, "\n"):
511
+ fprintf(fd, "#define EPS_COS_CASE2 %1.25e \n", maxepstotalCosCase2):
512
+ fprintf(fd, "#define RN_CST_COS_CASE2 %1.25f \n", rnconstantCosCase2):
513
+ fprintf(fd, "\n"):
514
+ fprintf(fd, "#define EPS_SINCOS_CASE3 %1.25e \n", maxepstotalSinCosCase3):
515
+ fprintf(fd, "#define RN_CST_SINCOS_CASE3 %1.25f \n", rnconstantSinCosCase3):
516
+ fprintf(fd, "\n"):
517
+ fprintf(fd, "#define EPS_TAN_CASE2 %1.25e \n", maxepstotalTanCase2):
518
+ fprintf(fd, "#define EPS_TAN_CASE3 %1.25e \n", maxepstotalTanCase3):
519
+ fprintf(fd, "#define RN_CST_TAN_CASE3 %1.25f \n", rnconstantTanCase3):
520
+
521
+ fprintf(fd, "\n"):
522
+
523
+ fprintf(fd, "#define INV_PIO256 %1.25f \n", 1/C):
524
+ fprintf(fd, "\n"):
525
+
526
+ fprintf(fd, "%s\n", outputHighPart("XMAX_CODY_WAITE_2", XMAX_CODY_WAITE_2) ):
527
+ fprintf(fd, "%s\n", outputHighPart("XMAX_CODY_WAITE_3", XMAX_CODY_WAITE_3) ):
528
+ fprintf(fd, "%s\n", outputHighPart("XMAX_DDRR ", XMAX_DDRR) ):
529
+ #fprintf(fd, "%s\n", outputHighPart("", ) ):
530
+ fprintf(fd, "\n"):
531
+
532
+ fprintf(fd, "#define RR_CW2_CH %1.25e\n", RR_CW2_CH):
533
+ fprintf(fd, "#define RR_CW2_MCL %1.25e\n", RR_CW2_MCL):
534
+ fprintf(fd, "\n"):
535
+
536
+ fprintf(fd, "#define RR_CW3_CH %1.25e\n", RR_CW3_CH):
537
+ fprintf(fd, "#define RR_CW3_CM %1.25e\n", RR_CW3_CM):
538
+ fprintf(fd, "#define RR_CW3_MCL %1.25e\n", RR_CW3_MCL):
539
+ fprintf(fd, "\n"):
540
+
541
+ fprintf(fd, "#define RR_DD_MCH %1.25e\n", RR_DD_MCH):
542
+ fprintf(fd, "#define RR_DD_MCM %1.25e\n", RR_DD_MCM):
543
+ fprintf(fd, "#define RR_DD_CL %1.25e\n", RR_DD_CL):
544
+ fprintf(fd, "\n"):
545
+
546
+ fprintf(fd, "\n"):
547
+
548
+ fprintf(fd, "\n\n"):
549
+
550
+ # The 256/Pi SCS array
551
+ fprintf(fd, "static const int digits_256_over_pi[] = \n{"):
552
+ for i from 0 to 11 do
553
+ for j from 1 to 4 do
554
+ fprintf(fd, " 0x%s, \t",digitlist[4*i+j]):
555
+ end:
556
+ fprintf(fd, "\n "):
557
+ end:
558
+ fprintf(fd, "};\n\n"):
559
+
560
+ # The Pi/256 SCS constant
561
+ fprintf(fd, "static const scs Pio256=\n"):
562
+ WriteSCS(fd, evalf(C)):
563
+ fprintf(fd, ";\n#define Pio256_ptr (scs_ptr)(& Pio256)\n\n"):
564
+
565
+ fprintf(fd,"#ifdef WORDS_BIGENDIAN\n"):
566
+ for isbig from 1 to 0 by -1 do
567
+
568
+ if(isbig=0) then
569
+ fprintf(fd,"#else\n"):
570
+ fi:
571
+
572
+ # The sine polynomial
573
+
574
+ fprintf(fd, "static db_number const s3 = "):
575
+ printendian(fd, coeff(polySin,x,3), isbig):
576
+ fprintf(fd, ";\n"):
577
+ fprintf(fd, "static db_number const s5 = "):
578
+ printendian(fd, coeff(polySin,x,5), isbig):
579
+ fprintf(fd, ";\n"):
580
+ fprintf(fd, "static db_number const s7 = "):
581
+ printendian(fd, coeff(polySin,x,7), isbig):
582
+ fprintf(fd, ";\n\n"):
583
+
584
+
585
+ # the cos polynomial
586
+
587
+ fprintf(fd, "static db_number const c2 = "):
588
+ printendian(fd, coeff(polyCos,x,2), isbig):
589
+ fprintf(fd, ";\n"):
590
+ fprintf(fd, "static db_number const c4 = "):
591
+ printendian(fd, coeff(polyCos,x,4), isbig):
592
+ fprintf(fd, ";\n"):
593
+ fprintf(fd, "static db_number const c6 = "):
594
+ printendian(fd, coeff(polyCos,x,6), isbig):
595
+ fprintf(fd, ";\n\n"):
596
+
597
+
598
+ # the tan polynomial
599
+
600
+ t3h, t3l := hi_lo(coeff(polyTanCase2,x,3)):
601
+ fprintf(fd, "static db_number const t3h = "):
602
+ printendian(fd, t3h, isbig):
603
+ fprintf(fd, ";\n"):
604
+ fprintf(fd, "static db_number const t3l = "):
605
+ printendian(fd, t3l, isbig):
606
+ fprintf(fd, ";\n"):
607
+ fprintf(fd, "static db_number const t5 = "):
608
+ printendian(fd, coeff(polyTanCase2,x,5), isbig):
609
+ fprintf(fd, ";\n"):
610
+ fprintf(fd, "static db_number const t7 = "):
611
+ printendian(fd, coeff(polyTanCase2,x,7), isbig):
612
+ fprintf(fd, ";\n"):
613
+ fprintf(fd, "static db_number const t9 = "):
614
+ printendian(fd, coeff(polyTanCase2,x,9), isbig):
615
+ fprintf(fd, ";\n"):
616
+ fprintf(fd, "static db_number const t11 = "):
617
+ printendian(fd, coeff(polyTanCase2,x,11), isbig):
618
+ fprintf(fd, ";\n\n"):
619
+
620
+
621
+ # The sincos table
622
+
623
+ fprintf(fd, "\n/* sine and cos of kPi/256 in double-double */\n"):
624
+ fprintf(fd, "static db_number const sincosTable[%d] =\n{\n", 4*(SinCosSize/2+1)):
625
+ for i from 0 to SinCosSize/2 do
626
+ s:=hi_lo(sin(i*Pi/(2*SinCosSize))):
627
+ c:=hi_lo(cos(i*Pi/(2*SinCosSize))):
628
+ printendian(fd,s[1],isbig):
629
+ fprintf(fd," ,\n"):
630
+ printendian(fd,s[2],isbig):
631
+ fprintf(fd," ,\n"):
632
+ printendian(fd,c[1],isbig):
633
+ fprintf(fd," ,\n"):
634
+ printendian(fd,c[2],isbig):
635
+ if i<SinCosSize-1 then fprintf(fd," ,\n"): fi:
636
+ od:
637
+ fprintf(fd, "\n};\n\n"):
638
+
639
+
640
+ od:
641
+ fprintf(fd,"#endif /* WORDS_BIGENDIAN */\n\n\n"):
642
+
643
+ fclose(fd):
644
+
645
+ printf("\n\n************ DONE TEMPTRIG/trigo_fast.h ************\n Copy it to the crlibm source directory.\n\n");
646
+
647
+
648
+ #################################################
649
+ # Stuff for the accurate phase,
650
+ #
651
+ # Should very bad cases be found in the future, only the degrees below
652
+ # should be increased, then this script rerun. Everything should
653
+ # compile OK with the newly generated trigo_accurate.h
654
+
655
+ printf("\n-------------------------------------------------------\n"):
656
+ printf("--------------------Accurate phase---------------------\n"):
657
+ printf("-------------------------------------------------------\n"):
658
+
659
+ xminSCS := 0:
660
+ xmaxSCS := Pi/4:
661
+
662
+
663
+ #-----------------Sine-----------------------
664
+ degreeSinSCS := 26:
665
+ Poly_P := convert(series(sin(sqrt(x))/(x^(3/2))-1/x, x=0, degreeSinSCS*4),polynom):
666
+ Poly_cheb := numapprox[chebpade](Poly_P, x=xminSCS..xmaxSCS^2, [degreeSinSCS/2-2,0]):
667
+ polySinSCS := poly_exact_SCS(expand(x + x^3 * subs(x=x^2, Poly_cheb))):
668
+
669
+ DEGREE_SIN_SCS := degree(polySinSCS):
670
+ maxepsApproxSinSCS:=numapprox[infnorm](1 - polySinSCS/sin(x), x=xminSCS..xmaxSCS):
671
+ printf("The sine polynomial for the second phase (degree %d) is accurate to %f bits on O..Pi/4\n", DEGREE_SIN_SCS, -log2(maxepsApproxSinSCS)):
672
+ # For the proof of the very bad cases, near zero
673
+ maxepsApproxSinSCSNearZero:=numapprox[infnorm](1 - polySinSCS/sin(x), x=0..2^(-17)):
674
+ printf(" ... and accurate to %f bits on O..2^(-17)\n", -log2(maxepsApproxSinSCSNearZero)):
675
+
676
+
677
+ #-----------------Cos-----------------------
678
+ degreeCosSCS := 28:
679
+ Poly_P := convert(series((cos(sqrt(x)) -1)/x, x=0, degreeCosSCS*4),polynom):
680
+ Poly_cheb := numapprox[chebpade](Poly_P, x=xminSCS..xmaxSCS^2, [degreeCosSCS/2-2,0]):
681
+ polyCosSCS := poly_exact_SCS(expand(1 + x^2 * subs(x=x^2, Poly_cheb))):
682
+ DEGREE_COS_SCS := degree(polyCosSCS):
683
+
684
+ maxepsApproxCosSCS:=numapprox[infnorm](1 - polyCosSCS/cos(x), x=xminSCS..xmaxSCS):
685
+ printf("The cos polynomial for the second phase (degree %d) is accurate to %f bits on O..Pi/4\n", degree(polyCosSCS), -log2(maxepsApproxCosSCS)):
686
+ # For the proof of the very bad cases, near zero
687
+ maxepsApproxCosSCSNearZero:=numapprox[infnorm](1 - polyCosSCS/cos(x), x=0..2^(-18)):
688
+ printf(" ... and accurate to %f bits on O..2^(-18)\n", -log2(maxepsApproxCosSCSNearZero)):
689
+
690
+
691
+
692
+ #-----------------Tan-----------------------
693
+ degreeTanSCS := 70:
694
+ Poly_P := convert(series(tan(sqrt(x))/(x^(3/2))-1/x, x=0, degreeTanSCS*4),polynom):
695
+ Poly_cheb := numapprox[chebpade](Poly_P, x=xminSCS..xmaxSCS^2, [degreeTanSCS/2-2,0]):
696
+ polyTanSCS := poly_exact_SCS(expand(x + x^3 * subs(x=x^2, Poly_cheb))):
697
+
698
+ DEGREE_TAN_SCS := degree(polyTanSCS):
699
+ maxepsApproxTanSCS:=numapprox[infnorm](1 - polyTanSCS/tan(x), x=xminSCS..xmaxSCS):
700
+ printf("The tan polynomial for the second phase (degree %d) is accurate to %f bits on O..Pi/4\n", DEGREE_TAN_SCS, -log2(maxepsApproxTanSCS)):
701
+ # For the proof of the very bad cases, near zero
702
+ maxepsApproxTanSCSNearZero:=numapprox[infnorm](1 - polyTanSCS/tan(x), x=0..2^(-17)):
703
+ printf(" ... and accurate to %f bits on O..2^(-17)\n", -log2(maxepsApproxTanSCSNearZero)):
704
+
705
+
706
+ filename:="TEMPTRIG/trigo_accurate.h":
707
+ fd:=fopen(filename, WRITE, TEXT):
708
+ fprintf(fd, "/*File generated by maple/trigo.mpl*/\n\n"):
709
+ fprintf(fd, "#include \"crlibm.h\"\n#include \"crlibm_private.h\"\n\n"):
710
+ fprintf(fd, "#define DEGREE_SIN_SCS %d \n", DEGREE_SIN_SCS):
711
+ fprintf(fd, "#define DEGREE_COS_SCS %d \n", DEGREE_COS_SCS):
712
+ fprintf(fd, "#define DEGREE_TAN_SCS %d \n", DEGREE_TAN_SCS):
713
+ fprintf(fd, "\n"):
714
+ fprintf(fd, "#define sin_scs_poly_ptr (scs_ptr)&sin_scs_poly \n"):
715
+ fprintf(fd, "#define cos_scs_poly_ptr (scs_ptr)&cos_scs_poly \n"):
716
+ fprintf(fd, "#define tan_scs_poly_ptr (scs_ptr)&tan_scs_poly \n"):
717
+ fprintf(fd, "\n"):
718
+ Write_SCS_poly(fd, sin_scs_poly, polySinSCS):
719
+ fprintf(fd, "\n\n"):
720
+ Write_SCS_poly(fd, cos_scs_poly, polyCosSCS):
721
+ fprintf(fd, "\n\n"):
722
+ Write_SCS_poly(fd, tan_scs_poly, polyTanSCS):
723
+ fprintf(fd, "\n"):
724
+ fclose(fd):
725
+
726
+ printf("\n\n************ DONE TEMPTRIG/trigo_accurate.h ************\n Copy it to the crlibm source directory.\n\n");
727
+
728
+