intervals 0.3.56
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.txt +1 -0
- data/ext/crlibm/AUTHORS +2 -0
- data/ext/crlibm/COPYING +504 -0
- data/ext/crlibm/ChangeLog +80 -0
- data/ext/crlibm/INSTALL +182 -0
- data/ext/crlibm/Makefile.am +84 -0
- data/ext/crlibm/Makefile.in +530 -0
- data/ext/crlibm/NEWS +0 -0
- data/ext/crlibm/README +31 -0
- data/ext/crlibm/TODO +47 -0
- data/ext/crlibm/VERSION +1 -0
- data/ext/crlibm/aclocal.m4 +989 -0
- data/ext/crlibm/atan-itanium.c +846 -0
- data/ext/crlibm/atan-pentium.c +261 -0
- data/ext/crlibm/atan_accurate.c +244 -0
- data/ext/crlibm/atan_accurate.h +191 -0
- data/ext/crlibm/atan_fast.c +324 -0
- data/ext/crlibm/atan_fast.h +678 -0
- data/ext/crlibm/config.guess +1461 -0
- data/ext/crlibm/config.sub +1566 -0
- data/ext/crlibm/configure +7517 -0
- data/ext/crlibm/configure.ac +364 -0
- data/ext/crlibm/crlibm.h +125 -0
- data/ext/crlibm/crlibm_config.h +149 -0
- data/ext/crlibm/crlibm_config.h.in +148 -0
- data/ext/crlibm/crlibm_private.c +293 -0
- data/ext/crlibm/crlibm_private.h +658 -0
- data/ext/crlibm/csh_fast.c +631 -0
- data/ext/crlibm/csh_fast.h +771 -0
- data/ext/crlibm/double-extended.h +496 -0
- data/ext/crlibm/exp-td.c +962 -0
- data/ext/crlibm/exp-td.h +685 -0
- data/ext/crlibm/exp_accurate.c +197 -0
- data/ext/crlibm/exp_accurate.h +85 -0
- data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
- data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
- data/ext/crlibm/gappa/log-de.gappa +81 -0
- data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
- data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
- data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
- data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
- data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
- data/ext/crlibm/gappa/log-td.gappa +156 -0
- data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
- data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
- data/ext/crlibm/install-sh +269 -0
- data/ext/crlibm/log-de.c +431 -0
- data/ext/crlibm/log-de.h +732 -0
- data/ext/crlibm/log-td.c +852 -0
- data/ext/crlibm/log-td.h +819 -0
- data/ext/crlibm/log10-td.c +906 -0
- data/ext/crlibm/log10-td.h +823 -0
- data/ext/crlibm/log2-td.c +935 -0
- data/ext/crlibm/log2-td.h +821 -0
- data/ext/crlibm/maple/atan.mpl +359 -0
- data/ext/crlibm/maple/common-procedures.mpl +997 -0
- data/ext/crlibm/maple/csh.mpl +446 -0
- data/ext/crlibm/maple/double-extended.mpl +151 -0
- data/ext/crlibm/maple/exp-td.mpl +195 -0
- data/ext/crlibm/maple/log-de.mpl +243 -0
- data/ext/crlibm/maple/log-td.mpl +316 -0
- data/ext/crlibm/maple/log10-td.mpl +345 -0
- data/ext/crlibm/maple/log2-td.mpl +334 -0
- data/ext/crlibm/maple/trigo.mpl +728 -0
- data/ext/crlibm/maple/triple-double.mpl +58 -0
- data/ext/crlibm/missing +198 -0
- data/ext/crlibm/mkinstalldirs +40 -0
- data/ext/crlibm/rem_pio2_accurate.c +219 -0
- data/ext/crlibm/rem_pio2_accurate.h +53 -0
- data/ext/crlibm/scs_lib/AUTHORS +3 -0
- data/ext/crlibm/scs_lib/COPYING +504 -0
- data/ext/crlibm/scs_lib/ChangeLog +16 -0
- data/ext/crlibm/scs_lib/INSTALL +215 -0
- data/ext/crlibm/scs_lib/Makefile.am +18 -0
- data/ext/crlibm/scs_lib/Makefile.in +328 -0
- data/ext/crlibm/scs_lib/NEWS +0 -0
- data/ext/crlibm/scs_lib/README +9 -0
- data/ext/crlibm/scs_lib/TODO +4 -0
- data/ext/crlibm/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm/scs_lib/config.guess +1461 -0
- data/ext/crlibm/scs_lib/config.sub +1566 -0
- data/ext/crlibm/scs_lib/configure +6226 -0
- data/ext/crlibm/scs_lib/division_scs.c +110 -0
- data/ext/crlibm/scs_lib/double2scs.c +174 -0
- data/ext/crlibm/scs_lib/install-sh +269 -0
- data/ext/crlibm/scs_lib/missing +198 -0
- data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
- data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
- data/ext/crlibm/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm/scs_lib/print_scs.c +73 -0
- data/ext/crlibm/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm/scs_lib/scs.h +353 -0
- data/ext/crlibm/scs_lib/scs2double.c +391 -0
- data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm/scs_lib/scs_private.c +23 -0
- data/ext/crlibm/scs_lib/scs_private.h +133 -0
- data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
- data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm/stamp-h.in +1 -0
- data/ext/crlibm/tests/Makefile.am +43 -0
- data/ext/crlibm/tests/Makefile.in +396 -0
- data/ext/crlibm/tests/blind_test.c +148 -0
- data/ext/crlibm/tests/generate_test_vectors.c +258 -0
- data/ext/crlibm/tests/soak_test.c +334 -0
- data/ext/crlibm/tests/test_common.c +627 -0
- data/ext/crlibm/tests/test_common.h +28 -0
- data/ext/crlibm/tests/test_perf.c +570 -0
- data/ext/crlibm/tests/test_val.c +249 -0
- data/ext/crlibm/trigo_accurate.c +500 -0
- data/ext/crlibm/trigo_accurate.h +331 -0
- data/ext/crlibm/trigo_fast.c +1219 -0
- data/ext/crlibm/trigo_fast.h +639 -0
- data/ext/crlibm/triple-double.h +878 -0
- data/ext/extconf.rb +31 -0
- data/ext/fpu.c +107 -0
- data/ext/jamis-mod.rb +591 -0
- data/lib/fpu.rb +287 -0
- data/lib/interval.rb +1170 -0
- data/lib/intervals.rb +212 -0
- data/lib/struct_float.rb +133 -0
- data/test/data_atan.txt +360 -0
- data/test/data_cos.txt +346 -0
- data/test/data_cosh.txt +3322 -0
- data/test/data_exp.txt +3322 -0
- data/test/data_log.txt +141 -0
- data/test/data_sin.txt +140 -0
- data/test/data_sinh.txt +3322 -0
- data/test/data_tan.txt +342 -0
- metadata +186 -0
@@ -0,0 +1,997 @@
|
|
1
|
+
|
2
|
+
#####################################################################
|
3
|
+
# Useful procedures for IEEE doubles
|
4
|
+
|
5
|
+
|
6
|
+
#---------------------------------------------------------------------
|
7
|
+
|
8
|
+
log2:=proc(x) evalf( log[2](x)) end proc:
|
9
|
+
|
10
|
+
|
11
|
+
#---------------------------------------------------------------------
|
12
|
+
# ieeedouble converts a number to IEEE double format.
|
13
|
+
# returns sign (-1 or 1), exponent between -1022 and 1023, mantissa as a fraction between 0.5 and 1.
|
14
|
+
ieeedouble:=proc(xx)
|
15
|
+
local x, sgn, logabsx, exponent, mantissa, infmantissa,powermin,powermax,expmin,expmax,expmiddle,powermiddle;
|
16
|
+
Digits := 100;
|
17
|
+
x := evalf(xx);
|
18
|
+
if (x=0) then sgn, exponent, mantissa := 1, -1022, 0
|
19
|
+
else
|
20
|
+
if (x < 0) then sgn := -1
|
21
|
+
else sgn := 1
|
22
|
+
fi:
|
23
|
+
x := abs(x);
|
24
|
+
if x >= 2^(1023)*(2-2^(-53)) then mantissa := infinity; exponent := 1023
|
25
|
+
else if x <= 2^(-1075) then mantissa := 0; exponent := -1022
|
26
|
+
else
|
27
|
+
if x <= 2^(-1022) then exponent := -1022
|
28
|
+
else
|
29
|
+
# x is between 2^(-1022) and 2^(1024)
|
30
|
+
powermin := 2^(-1022); expmin := -1022;
|
31
|
+
powermax := 2^1024; expmax := 1024;
|
32
|
+
while (expmax-expmin > 1) do
|
33
|
+
expmiddle := round((expmax+expmin)/2);
|
34
|
+
powermiddle := 2^expmiddle;
|
35
|
+
if x >= powermiddle then
|
36
|
+
powermin := powermiddle;
|
37
|
+
expmin := expmiddle
|
38
|
+
else
|
39
|
+
powermax := powermiddle;
|
40
|
+
expmax := expmiddle
|
41
|
+
fi
|
42
|
+
od;
|
43
|
+
# now, expmax - expmin = 1 and powermin <= x < powermax,
|
44
|
+
# powermin = 2^expmin and powermax = 2^expmax, so expmin is the exponent of x
|
45
|
+
exponent := expmin;
|
46
|
+
fi;
|
47
|
+
infmantissa := x*2^(52-exponent);
|
48
|
+
if frac(infmantissa) <> 0.5 then mantissa := round(infmantissa)
|
49
|
+
else
|
50
|
+
mantissa := floor(infmantissa);
|
51
|
+
if type(mantissa,odd) then mantissa := mantissa+1 fi
|
52
|
+
fi;
|
53
|
+
mantissa := mantissa*2^(-52);
|
54
|
+
fi;
|
55
|
+
fi;
|
56
|
+
fi;
|
57
|
+
sgn,exponent,mantissa;
|
58
|
+
end:
|
59
|
+
|
60
|
+
#---------------------------------------------------------------------
|
61
|
+
# ieeedoubleRU converts a number to IEEE double format rounding upwards.
|
62
|
+
# returns sign (-1 or 1), exponent between -1022 and 1023, mantissa as a fraction between 0.5 and 1.
|
63
|
+
ieeedoubleRU:=proc(xx)
|
64
|
+
local x, sgn, logabsx, exponent, mantissa, infmantissa,powermin,powermax,expmin,expmax,expmiddle,powermiddle;
|
65
|
+
Digits := 100;
|
66
|
+
x := evalf(xx);
|
67
|
+
if (x=0) then sgn, exponent, mantissa := 1, -1022, 0
|
68
|
+
else
|
69
|
+
if (x < 0) then sgn := -1
|
70
|
+
else sgn := 1
|
71
|
+
fi:
|
72
|
+
x := abs(x);
|
73
|
+
if x >= 2^(1023)*(2-2^(-53)) then mantissa := infinity; exponent := 1023
|
74
|
+
else if x <= 2^(-1075) then mantissa := 0; exponent := -1022
|
75
|
+
else
|
76
|
+
if x <= 2^(-1022) then exponent := -1022
|
77
|
+
else
|
78
|
+
# x is between 2^(-1022) and 2^(1024)
|
79
|
+
powermin := 2^(-1022); expmin := -1022;
|
80
|
+
powermax := 2^1024; expmax := 1024;
|
81
|
+
while (expmax-expmin > 1) do
|
82
|
+
expmiddle := round((expmax+expmin)/2);
|
83
|
+
powermiddle := 2^expmiddle;
|
84
|
+
if x >= powermiddle then
|
85
|
+
powermin := powermiddle;
|
86
|
+
expmin := expmiddle
|
87
|
+
else
|
88
|
+
powermax := powermiddle;
|
89
|
+
expmax := expmiddle
|
90
|
+
fi
|
91
|
+
od;
|
92
|
+
# now, expmax - expmin = 1 and powermin <= x < powermax,
|
93
|
+
# powermin = 2^expmin and powermax = 2^expmax, so expmin is the exponent of x
|
94
|
+
exponent := expmin;
|
95
|
+
fi;
|
96
|
+
infmantissa := x*2^(52-exponent);
|
97
|
+
if frac(infmantissa) <> 0 then mantissa := ceil(infmantissa)
|
98
|
+
else
|
99
|
+
mantissa := infmantissa;
|
100
|
+
fi;
|
101
|
+
mantissa := mantissa*2^(-52);
|
102
|
+
fi;
|
103
|
+
fi;
|
104
|
+
fi;
|
105
|
+
sgn,exponent,mantissa;
|
106
|
+
end:
|
107
|
+
|
108
|
+
#---------------------------------------------------------------------
|
109
|
+
# ieeedoubleRD converts a number to IEEE double format rounding upwards.
|
110
|
+
# returns sign (-1 or 1), exponent between -1022 and 1023, mantissa as a fraction between 0.5 and 1.
|
111
|
+
ieeedoubleRD:=proc(xx)
|
112
|
+
local x, sgn, logabsx, exponent, mantissa, infmantissa,powermin,powermax,expmin,expmax,expmiddle,powermiddle;
|
113
|
+
Digits := 100;
|
114
|
+
x := evalf(xx);
|
115
|
+
if (x=0) then sgn, exponent, mantissa := 1, -1022, 0
|
116
|
+
else
|
117
|
+
if (x < 0) then sgn := -1
|
118
|
+
else sgn := 1
|
119
|
+
fi:
|
120
|
+
x := abs(x);
|
121
|
+
if x >= 2^(1023)*(2-2^(-53)) then mantissa := infinity; exponent := 1023
|
122
|
+
else if x <= 2^(-1075) then mantissa := 0; exponent := -1022
|
123
|
+
else
|
124
|
+
if x <= 2^(-1022) then exponent := -1022
|
125
|
+
else
|
126
|
+
# x is between 2^(-1022) and 2^(1024)
|
127
|
+
powermin := 2^(-1022); expmin := -1022;
|
128
|
+
powermax := 2^1024; expmax := 1024;
|
129
|
+
while (expmax-expmin > 1) do
|
130
|
+
expmiddle := round((expmax+expmin)/2);
|
131
|
+
powermiddle := 2^expmiddle;
|
132
|
+
if x >= powermiddle then
|
133
|
+
powermin := powermiddle;
|
134
|
+
expmin := expmiddle
|
135
|
+
else
|
136
|
+
powermax := powermiddle;
|
137
|
+
expmax := expmiddle
|
138
|
+
fi
|
139
|
+
od;
|
140
|
+
# now, expmax - expmin = 1 and powermin <= x < powermax,
|
141
|
+
# powermin = 2^expmin and powermax = 2^expmax, so expmin is the exponent of x
|
142
|
+
exponent := expmin;
|
143
|
+
fi;
|
144
|
+
infmantissa := x*2^(52-exponent);
|
145
|
+
if frac(infmantissa) <> 0 then mantissa := floor(infmantissa)
|
146
|
+
else
|
147
|
+
mantissa := infmantissa;
|
148
|
+
fi;
|
149
|
+
mantissa := mantissa*2^(-52);
|
150
|
+
fi;
|
151
|
+
fi;
|
152
|
+
fi;
|
153
|
+
sgn,exponent,mantissa;
|
154
|
+
end:
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
#---------------------------------------------------------------------
|
160
|
+
# pulp returns the precision of the ulp of x
|
161
|
+
|
162
|
+
pulp:=proc(x)
|
163
|
+
local flt, ulpy:
|
164
|
+
flt:=ieeedouble(x):
|
165
|
+
ulpy:=-52+flt[2]:
|
166
|
+
end proc:
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
#---------------------------------------------------------------------
|
171
|
+
# ulp returns the absolute value of the ulp of x
|
172
|
+
ulp:=proc(x)
|
173
|
+
2**(pulp(x)):
|
174
|
+
end proc:
|
175
|
+
|
176
|
+
|
177
|
+
|
178
|
+
|
179
|
+
#---------------------------------------------------------------------
|
180
|
+
# Returns nearest IEEE double:
|
181
|
+
nearest := proc(x)
|
182
|
+
local sgn, exponent, mantissa:
|
183
|
+
|
184
|
+
sgn,exponent,mantissa := ieeedouble(x):
|
185
|
+
sgn*mantissa*2^(exponent):
|
186
|
+
|
187
|
+
end:
|
188
|
+
|
189
|
+
#---------------------------------------------------------------------
|
190
|
+
# Returns RU IEEE double:
|
191
|
+
roundUp := proc(x)
|
192
|
+
local sgn, exponent, mantissa:
|
193
|
+
|
194
|
+
sgn,exponent,mantissa := ieeedoubleRU(x):
|
195
|
+
sgn*mantissa*2^(exponent):
|
196
|
+
|
197
|
+
end:
|
198
|
+
|
199
|
+
#---------------------------------------------------------------------
|
200
|
+
# Returns RD IEEE double:
|
201
|
+
roundDown := proc(x)
|
202
|
+
local sgn, exponent, mantissa:
|
203
|
+
|
204
|
+
sgn,exponent,mantissa := ieeedoubleRD(x):
|
205
|
+
sgn*mantissa*2^(exponent):
|
206
|
+
|
207
|
+
end:
|
208
|
+
|
209
|
+
#---------------------------------------------------------------------
|
210
|
+
# Returns RZ IEEE double:
|
211
|
+
roundToZero := proc(x)
|
212
|
+
if evalf(x) > 0 then roundDown(x) else roundUp(x) fi:
|
213
|
+
end:
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
|
218
|
+
#---------------------------------------------------------------------
|
219
|
+
# ieehexa returns a string containing the hexadecimal representation of the double nearest to input x.
|
220
|
+
|
221
|
+
ieeehexa:= proc(x)
|
222
|
+
local hex2, xx, longint, expo, sgn, frac, resultat:
|
223
|
+
if(x=0) then resultat:=["00000000","00000000"]:
|
224
|
+
elif(x=-0) then resultat:=["80000000","00000000"]: # nice try
|
225
|
+
else
|
226
|
+
xx:=ieeedouble(x):
|
227
|
+
sgn:=xx[1]:
|
228
|
+
expo:=xx[2]:
|
229
|
+
frac:=xx[3]:
|
230
|
+
if (expo = -1023) then
|
231
|
+
longint := (frac)*2^51 : # subnormal
|
232
|
+
else
|
233
|
+
longint := (frac-1)*2^52 + (expo+1023)*2^52:
|
234
|
+
fi:
|
235
|
+
if (sgn=-1) then
|
236
|
+
longint := longint + 2^63:
|
237
|
+
fi:
|
238
|
+
longint := longint + 2^64: # to get all the hexadecimal digits when we'll convert to string
|
239
|
+
hex2:=convert(longint, hex):
|
240
|
+
hex2:=convert(hex2, string):
|
241
|
+
|
242
|
+
resultat:=[substring(hex2,2..9), substring(hex2,10..18)]:
|
243
|
+
fi:
|
244
|
+
resultat:
|
245
|
+
end proc:
|
246
|
+
|
247
|
+
|
248
|
+
|
249
|
+
#---------------------------------------------------------------------
|
250
|
+
# reciprocal of the previous
|
251
|
+
hexa2ieee:= proc(hexa)
|
252
|
+
local dec, bin, expo, mantis, sgn, hex1, hex2, hexcat, res:
|
253
|
+
|
254
|
+
hex1:= op(1, hexa):
|
255
|
+
hex2:= op(2, hexa):
|
256
|
+
hexcat:= cat(hex1, hex2):
|
257
|
+
dec:= convert(hexcat, decimal, hex):
|
258
|
+
|
259
|
+
if(dec >= 2^63) then
|
260
|
+
dec := dec - 2^63:
|
261
|
+
sgn:= -1:
|
262
|
+
else
|
263
|
+
sgn:= 1:
|
264
|
+
fi:
|
265
|
+
expo:= trunc(dec/(2^52)) - 1023:
|
266
|
+
if(expo=-1023) then
|
267
|
+
mantis:= frac(dec/(2^51)): # denormal
|
268
|
+
else
|
269
|
+
mantis:= 1+frac(dec/(2^52)):
|
270
|
+
fi:
|
271
|
+
res:= evalf(sgn*2^(expo)*mantis):
|
272
|
+
res:
|
273
|
+
end proc:
|
274
|
+
|
275
|
+
#---------------------------------------------------------------------
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
# Print a number x in Low or Big Endian representation in opened file "fd":
|
280
|
+
printendian:=proc(fd,x,isbig)
|
281
|
+
local xhl:
|
282
|
+
xhl:=ieeehexa(x):
|
283
|
+
|
284
|
+
if(isbig=0 or isbig=1) then
|
285
|
+
fprintf(fd,"{{0x%+0.8s,0x%+0.8s}} /* %+0.10e */", xhl[2-isbig], xhl[isbig+1], x):
|
286
|
+
else
|
287
|
+
print("ERROR, isbig must be equal to 0 or 1"):
|
288
|
+
end if:
|
289
|
+
end proc:
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
#---------------------------------------------------------------------
|
294
|
+
# hi_lo takes an arbitrary precision number x and returns two doubles such that:
|
295
|
+
# x ~ x_hi + x_lo
|
296
|
+
hi_lo:= proc(x)
|
297
|
+
local x_hi, x_lo, res:
|
298
|
+
x_hi:= nearest(evalf(x)):
|
299
|
+
res:=x-x_hi:
|
300
|
+
if (res = 0) then
|
301
|
+
x_lo:=0:
|
302
|
+
else
|
303
|
+
x_lo:=nearest(evalf(res)):
|
304
|
+
end if:
|
305
|
+
x_hi,x_lo:
|
306
|
+
end:
|
307
|
+
|
308
|
+
|
309
|
+
|
310
|
+
#---------------------------------------------------------------------
|
311
|
+
# same as hi_lo, but returns hexadecimal strings
|
312
|
+
ieeehexa2:=proc(x)
|
313
|
+
local reshi, reslo, hexhi, hexlo:
|
314
|
+
reshi:=nearest(x):
|
315
|
+
hexhi:=ieee2hexa(reshi):
|
316
|
+
reslo:= nearest(x-reshi):
|
317
|
+
hexlo:=ieee2hexa(reslo):
|
318
|
+
reshi, reslo:
|
319
|
+
end proc:
|
320
|
+
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
#---------------------------------------------------------------------
|
325
|
+
# Computes the constant for the round-to-nearest test.
|
326
|
+
# delta is the overall relative error of the approximation scheme
|
327
|
+
compute_rn_constant := proc(delta)
|
328
|
+
local k:
|
329
|
+
k := trunc(-log2(delta)) - 53:
|
330
|
+
nearest( 1+ 2**(-52) + (2**(54+k)*delta) / ( (2**k-1) * (1-2**(-53)) ) ):
|
331
|
+
end proc:
|
332
|
+
|
333
|
+
|
334
|
+
|
335
|
+
#---------------------------------------------------------------------
|
336
|
+
# Takes a real number, and prints the bits after the 53th of its nearest IEEE floating-point number
|
337
|
+
|
338
|
+
showHowDifficultToRound:=proc(x)
|
339
|
+
local xb,xs,s,e,m:
|
340
|
+
Digits:=200:
|
341
|
+
s,e,m := ieeedouble(x):
|
342
|
+
xb:=convert(evalf(x*2^(-e)),binary):
|
343
|
+
xs:=convert(xb, string):
|
344
|
+
substring(xs,55..153)
|
345
|
+
end proc:
|
346
|
+
|
347
|
+
|
348
|
+
|
349
|
+
#####################################################################
|
350
|
+
|
351
|
+
# Stuff about truncated polynomials
|
352
|
+
|
353
|
+
# Truncate a polynomial
|
354
|
+
|
355
|
+
#---------------------------------------------------------------------
|
356
|
+
#poly_exact takes a polynomial in x with arbitrary precision
|
357
|
+
# coefficients, and returns a truncated polynomial where coefficients
|
358
|
+
# are IEEE doubles.
|
359
|
+
|
360
|
+
poly_exact:=proc(P)
|
361
|
+
local deg,i, coef, coef_t, Q:
|
362
|
+
Q:= 0:
|
363
|
+
convert(Q, polynom):
|
364
|
+
deg:=degree(P,x):
|
365
|
+
for i from 0 to deg do
|
366
|
+
coef:=coeff(P,x,i):
|
367
|
+
coef_t:=nearest(coef):
|
368
|
+
Q:= Q + coef_t*x^i:
|
369
|
+
od:
|
370
|
+
return(Q):
|
371
|
+
end:
|
372
|
+
|
373
|
+
|
374
|
+
#---------------------------------------------------------------------
|
375
|
+
#Like poly_exact, but the n first coefficients are exactly
|
376
|
+
# representable as the sum of two doubles. (to actually get the two
|
377
|
+
# doubles, use procedure hi_lo)
|
378
|
+
|
379
|
+
poly_exact2:=proc(P,n)
|
380
|
+
local deg,i, coef, coef_hi, coef_lo, Q:
|
381
|
+
Q:= 0:
|
382
|
+
convert(Q, polynom):
|
383
|
+
deg:=degree(P,x):
|
384
|
+
for i from 0 to deg do
|
385
|
+
coef :=coeff(P,x,i):
|
386
|
+
coef_hi, coef_lo:=hi_lo(coef):
|
387
|
+
Q:= Q + coef_hi*x^i:
|
388
|
+
if(i<n) then
|
389
|
+
Q := Q + coef_lo*x^i:
|
390
|
+
fi:
|
391
|
+
od:
|
392
|
+
return(Q):
|
393
|
+
end:
|
394
|
+
|
395
|
+
|
396
|
+
#---------------------------------------------------------------------
|
397
|
+
# OBSOLETE use compute_horner_rounding_error below
|
398
|
+
# Compute a bound on the accumulated rounding error caused by the Horner evaluation of a truncated polynomial
|
399
|
+
# P is the polynomial.
|
400
|
+
# xmax is the max value of |x|.
|
401
|
+
# n is the degree when P is computed in double double. The first double-double operation is an addition.
|
402
|
+
|
403
|
+
# returns max absolute error, min of the function, max of the function.
|
404
|
+
|
405
|
+
# This procedure also checks on the fly that the fast (test-free) versions of the double-double addition can be used, i.e. that for all x, at each Horner step i computing ci+x*Si, we have |ci|>|x*Si|. It prints warnings if it not the case.
|
406
|
+
|
407
|
+
compute_abs_rounding_error:=proc(poly,xmax, nn)
|
408
|
+
local n, deg, delta, deltap, i, S, P, Snorm, Smin, Smax, prec:
|
409
|
+
deltap:=0:
|
410
|
+
delta:=0:
|
411
|
+
deg:=degree(poly):
|
412
|
+
|
413
|
+
prec:=53: # precision of the first iterations
|
414
|
+
|
415
|
+
S:=coeff(poly, x, deg):
|
416
|
+
Smax:=abs(S):
|
417
|
+
Smin:=Smax:
|
418
|
+
|
419
|
+
if nn<0 then n:=0: else n:=nn: fi:# sometimes called by compute_rel_rounding_error with n=-1
|
420
|
+
|
421
|
+
for i from (deg-1) to 0 by -1 do
|
422
|
+
P:= convert(S*x, polynom):
|
423
|
+
Smin := abs(coeff(poly,x,i)) - xmax*Smax :
|
424
|
+
if(Smin<=0) then
|
425
|
+
printf("Warning! in compute_abs_rounding_error, Smin<=0 at iteration %d, consider decreasing xmax\n",i):
|
426
|
+
fi:
|
427
|
+
delta:= evalf(xmax*deltap + 2**(-prec)*xmax*Smax):
|
428
|
+
if i<n then
|
429
|
+
# fast Add22 ?
|
430
|
+
if abs(coeff(poly,x,i)) < xmax*Smax # may be improved to xmax*Smax/2
|
431
|
+
then printf("WARNING Add22 cannot be used at step %d, use Add22Cond\n" , i ):
|
432
|
+
printf(" coeff=%1.20e, xmax*Smax=%1.20e" , abs(coeff(poly,x,i)), xmax*Smax ):
|
433
|
+
fi:
|
434
|
+
fi:
|
435
|
+
S:=convert(P+coeff(poly,x,i), polynom):
|
436
|
+
Snorm:=evalf(numapprox[infnorm](S, x=-xmax..xmax)):
|
437
|
+
if i=n-1 then prec:=100: fi: # from the addition of the n-1-th iteration
|
438
|
+
deltap:= evalf(delta + 2**(-prec)*(delta + Snorm)):
|
439
|
+
Smax := Snorm + deltap:
|
440
|
+
od:
|
441
|
+
deltap, Smin, Smax:
|
442
|
+
end proc:
|
443
|
+
|
444
|
+
|
445
|
+
|
446
|
+
#---------------------------------------------------------------------
|
447
|
+
# OBSOLETE use compute_horner_rounding_error below
|
448
|
+
# Computes the total relative rounding error
|
449
|
+
compute_rel_rounding_error:=proc(poly,xmax, n)
|
450
|
+
local deg, p, rho, deltap, Smin, Smax:
|
451
|
+
|
452
|
+
deg:=degree(poly):
|
453
|
+
if(n>0) then p:=100: else p:=53: fi:
|
454
|
+
|
455
|
+
if coeff(poly,x, 0) = 0 then
|
456
|
+
deltap, Smin, Smax := compute_abs_rounding_error(poly/x,xmax, n-1):
|
457
|
+
rho := (2^(-p)*(Smax+deltap) +deltap ) / Smin :
|
458
|
+
else
|
459
|
+
deltap, Smin, Smax := compute_abs_rounding_error(poly,xmax, n):
|
460
|
+
rho := deltap / Smin:
|
461
|
+
fi:
|
462
|
+
rho:
|
463
|
+
end proc:
|
464
|
+
|
465
|
+
|
466
|
+
|
467
|
+
|
468
|
+
#---------------------------------------------------------------------
|
469
|
+
# OBSOLETE use compute_horner_rounding_error below
|
470
|
+
# Computes the accumulated rounding error during the polynomial evaluation.
|
471
|
+
# P is the polynomial.
|
472
|
+
# xmax is the max value of |x|.
|
473
|
+
# n is the degree when P is computed in double double. The first double-double operation is a multiplication (probably less useful).
|
474
|
+
|
475
|
+
# returns max absolute error, min of the function, max of the function.
|
476
|
+
|
477
|
+
# This procedure also checks on the fly that the fast (test-free) versions of the double-double addition can be used, i.e. that for all x, at each Horner step i computing ci+x*Si, we have |ci|>|x*Si|. It prints warnings if it not the case.
|
478
|
+
|
479
|
+
compute_abs_rounding_error_firstmult:=proc(poly,xmax, nn)
|
480
|
+
local n, deg, delta, deltap, i, S, P, Snorm, Smin, Smax, prec:
|
481
|
+
deltap:=0:
|
482
|
+
delta:=0:
|
483
|
+
deg:=degree(poly):
|
484
|
+
|
485
|
+
prec:=53: # precision of the first iterations
|
486
|
+
|
487
|
+
S:=coeff(poly, x, deg):
|
488
|
+
Smax:=abs(S):
|
489
|
+
Smin:=Smax:
|
490
|
+
|
491
|
+
if nn<0 then n:=0: else n:=nn: fi:# sometimes called by compute_rel_rounding_error with n=-1
|
492
|
+
|
493
|
+
for i from (deg-1) to 0 by -1 do
|
494
|
+
if i=n-1 then prec:=100: fi: # from the mult of the n-1-th iteration
|
495
|
+
P:= convert(S*x, polynom):
|
496
|
+
Smin := abs(coeff(poly,x,i)) - xmax*Smax :
|
497
|
+
if(Smin<=0) then
|
498
|
+
printf("Warning! in compute_abs_rounding_error, Smin<=0 at iteration %d, consider decreasing xmax\n",i):
|
499
|
+
fi:
|
500
|
+
delta:= evalf(xmax*deltap + 2**(-prec)*xmax*Smax):
|
501
|
+
if i<n then
|
502
|
+
# fast Add22 ?
|
503
|
+
if abs(coeff(poly,x,i)) < xmax*Smax # may be improved to xmax*Smax/2
|
504
|
+
then printf("WARNING Add22 cannot be used at step %d, use Add22Cond\n" , i ):
|
505
|
+
printf(" coeff=%1.20e, xmax*Smax=%1.20e" , abs(coeff(poly,x,i)), xmax*Smax ):
|
506
|
+
fi:
|
507
|
+
fi:
|
508
|
+
S:=convert(P+coeff(poly,x,i), polynom):
|
509
|
+
Snorm:=evalf(numapprox[infnorm](S, x=-xmax..xmax)):
|
510
|
+
deltap:= evalf(delta + 2**(-prec)*(delta + Snorm)):
|
511
|
+
Smax := Snorm + deltap:
|
512
|
+
od:
|
513
|
+
deltap, Smin, Smax:
|
514
|
+
end proc:
|
515
|
+
|
516
|
+
|
517
|
+
|
518
|
+
|
519
|
+
#---------------------------------------------------------------------
|
520
|
+
# OBSOLETE use compute_horner_rounding_error below
|
521
|
+
# Computes the total relative rounding error
|
522
|
+
compute_rel_rounding_error_firstmult:=proc(poly,xmax, n)
|
523
|
+
local deg, p, rho, deltap, Smin, Smax:
|
524
|
+
|
525
|
+
deg:=degree(poly):
|
526
|
+
if(n>0) then p:=100: else p:=53: fi:
|
527
|
+
|
528
|
+
if coeff(poly,x, 0) = 0 then
|
529
|
+
deltap, Smin, Smax := compute_abs_rounding_error_firstmult(poly/x,xmax, n-1):
|
530
|
+
rho := (2^(-p)*(Smax+deltap) +deltap ) / Smin :
|
531
|
+
else
|
532
|
+
deltap, Smin, Smax := compute_abs_rounding_error_firstmult(poly,xmax, n):
|
533
|
+
rho := deltap / Smin:
|
534
|
+
fi:
|
535
|
+
rho:
|
536
|
+
end proc:
|
537
|
+
|
538
|
+
|
539
|
+
|
540
|
+
|
541
|
+
# Compute a good truncated polynomial approximation for a function
|
542
|
+
# Computes an approximation to a function of x f, as a truncated polynomial of deegree deg with the n first coefficients exactly representable as double-double.
|
543
|
+
# The function f(x) must have as input interval xmin..xmax
|
544
|
+
# returns [ truncated polynomial, relative approx error of trunc. poly. , infinite precision polynomial, rel. error of inf. prec. poly ]
|
545
|
+
poly_trunc_classic:=proc(f,deg,xmin,xmax,n)
|
546
|
+
local pe, repe, pt, ppe, rept, maxpt:
|
547
|
+
pe:=numapprox[minimax]( f, x=xmin..xmax, [deg,0], 1, 'err'):
|
548
|
+
pt := poly_exact2(pe,n):
|
549
|
+
rept := numapprox[infnorm]( 1-pt/f, x=xmin..xmax) :
|
550
|
+
maxpt := numapprox[infnorm]( pt, x=xmin..xmax) :
|
551
|
+
pt,rept, maxpt:
|
552
|
+
end proc:
|
553
|
+
|
554
|
+
|
555
|
+
|
556
|
+
#---------------------------------------------------------------------
|
557
|
+
# Computes a truncated polynomial of degree deg with the two first coefficients stored as double-doubles.
|
558
|
+
# The function f(x) must have as input interval xmin..xmax
|
559
|
+
# returns [ truncated polynomial, relative error of trunc. poly. , infinite precision polynomial, rel. error of inf. prec. poly ]
|
560
|
+
poly_trunc_f2d_2:=proc(f,deg,xmin,xmax)
|
561
|
+
local pe, repe, pt, c0, c1, c2, ppe, abserr, relerr, maxpt, err:
|
562
|
+
pe:=numapprox[minimax]( f, x=xmin..xmax, [deg,0], 1, 'err'):
|
563
|
+
pt := poly_exact2(pe,2):
|
564
|
+
c0:=coeff(pt,x,0):
|
565
|
+
c1:=coeff(pt,x,1):
|
566
|
+
c2:=coeff(pt,x,2):
|
567
|
+
ppe:=numapprox[minimax]( (f - c0 - c1 * x - c2*x*x) , x=xmin..xmax, [deg,0], 1, 'err'):
|
568
|
+
ppe:=expand(ppe) - coeff(ppe,x,0) - coeff(ppe,x,1)*x- coeff(ppe,x,2)*x*x:
|
569
|
+
pt := poly_exact2(ppe,2) + c0 + c1*x + c2*x*x:
|
570
|
+
abserr := numapprox[infnorm]( pt-f, x=xmin..xmax) :
|
571
|
+
relerr := numapprox[infnorm]( 1-pt/f, x=xmin..xmax) :
|
572
|
+
maxpt := numapprox[infnorm]( pt, x=xmin..xmax) :
|
573
|
+
pt,abserr,relerr,maxpt:
|
574
|
+
end proc:
|
575
|
+
|
576
|
+
|
577
|
+
|
578
|
+
|
579
|
+
#---------------------------------------------------------------------
|
580
|
+
# Computes a truncated polynomial of degree deg with the first coefficient stored as double-double.
|
581
|
+
# The function f(x) must have as input interval xmin..xmax
|
582
|
+
# returns [ truncated polynomial, relative error of trunc. poly. , infinite precision polynomial, rel. error of inf. prec. poly ]
|
583
|
+
poly_trunc_f2d_1:=proc(f,deg,xmin,xmax)
|
584
|
+
local pe, repe, pt, c0, c1, ppe, relerr, abserr, maxpt, err:
|
585
|
+
pe:=numapprox[minimax]( f , x=xmin..xmax, [deg,0], 1, 'err'):
|
586
|
+
pt := poly_exact2(pe,1):
|
587
|
+
c0:=coeff(pt,x,0):
|
588
|
+
c1:=coeff(pt,x,1):
|
589
|
+
ppe:=numapprox[minimax]( (f - c0 - c1 * x) , x=xmin..xmax, [deg,0], 1, 'err'):
|
590
|
+
ppe:=ppe - coeff(ppe,x,0) - coeff(ppe,x,1)*x:
|
591
|
+
pt := poly_exact2(ppe,1) + c0 + c1*x:
|
592
|
+
abserr := numapprox[infnorm]( pt-f, x=xmin..xmax) :
|
593
|
+
relerr := numapprox[infnorm]( 1-pt/f, x=xmin..xmax) :
|
594
|
+
maxpt := numapprox[infnorm]( pt, x=xmin..xmax) :
|
595
|
+
pt,abserr,relerr,maxpt:
|
596
|
+
end proc:
|
597
|
+
|
598
|
+
|
599
|
+
|
600
|
+
#---------------------------------------------------------------------
|
601
|
+
# compute_horner_rounding_error
|
602
|
+
|
603
|
+
# Computes a bound on the accumulated rounding error caused by the Horner evaluation of a truncated polynomial
|
604
|
+
# It is designed to allow evaluating the error for various schemes:
|
605
|
+
# - with or without an error on x
|
606
|
+
# - using SCS operators
|
607
|
+
# - using double and double-double operators.
|
608
|
+
# Arguments:
|
609
|
+
# P is the polynomial.
|
610
|
+
# xmax is the max value of |x|.
|
611
|
+
# errors is a list of size n where n is the degree of the polynomial.
|
612
|
+
# Each element of this list is a triple (epsx,epsmul,epsadd) where
|
613
|
+
# epsx is the relative error on x at each step,
|
614
|
+
# epsadd is the max relative error on the addition
|
615
|
+
# epsmul is the max relative error on the multiplication.
|
616
|
+
# This allows to handle SCS Horner, as well as evaluation starting in double and ending in double-double.
|
617
|
+
# check_dd is a flag, if set to 1 the procedure also checks on the fly that the fast (test-free) versions
|
618
|
+
# of the double-double addition can be used, i.e. that for all x, at each Horner step i computing ci+x*Si,
|
619
|
+
# we have |ci|>|x*Si|. It prints warnings if it not the case.
|
620
|
+
# returns (epsprimek, deltak, minP, maxP) where (see the doc)
|
621
|
+
# epsprimek is the max rel error of the last multiplication (useful if the coeff of degree 0 is 0)
|
622
|
+
# deltak is the max absolute error of the last addition (useful if the reconstruction adds something to result of the polynomial)
|
623
|
+
# minP min of the evaluated polynomial (useful to compute a relative error out of deltak)
|
624
|
+
# maxP max of the evaluated polynomial.
|
625
|
+
|
626
|
+
compute_horner_rounding_error:=proc(poly, x, xmax, errors, check_dd)
|
627
|
+
local deg, Sk, maxSk, minSk, epsaddk, epsmulk, deltaaddk, k, ck, epsx, epsmul, epsadd, deltaadd, Pk, maxPk:
|
628
|
+
|
629
|
+
if assigned(x) then
|
630
|
+
printf("Error in compute_horner_rounding_error, polynomial variable is assigned\n"):
|
631
|
+
return 'procname(args)':
|
632
|
+
fi:
|
633
|
+
|
634
|
+
deg:=degree(poly,x):
|
635
|
+
if(deg<0) then printf("ERROR: negative degree in compute_abs_rounding_error"): return 'procname(args)': fi:
|
636
|
+
|
637
|
+
Sk:=coeff(poly, x, deg):
|
638
|
+
maxSk:=abs(Sk):
|
639
|
+
minSk:=maxSk:
|
640
|
+
epsmulk:=0:
|
641
|
+
deltaaddk:=0:
|
642
|
+
epsaddk:=0:
|
643
|
+
|
644
|
+
for k from (deg) to 1 by -1 do
|
645
|
+
|
646
|
+
# the errors to consider for this step
|
647
|
+
epsx := errors[k][1]:
|
648
|
+
epsadd := errors[k][2]:
|
649
|
+
epsmul := errors[k][3]:
|
650
|
+
|
651
|
+
# multiplication operation
|
652
|
+
Pk:= convert(Sk*x, polynom):
|
653
|
+
maxPk:=numapprox[infnorm](Pk, x=-xmax..xmax):
|
654
|
+
|
655
|
+
ck:=coeff(poly,x,k-1):
|
656
|
+
epsmulk:=evalf( (1+epsx)*(1+epsaddk)*(1+epsmul)-1 + 10^(-Digits+2) ):
|
657
|
+
|
658
|
+
#addition
|
659
|
+
if(ck=0) then
|
660
|
+
Sk:=Pk:
|
661
|
+
maxSk := maxPk:
|
662
|
+
minSk:=0:
|
663
|
+
deltaaddk:= evalf(epsmulk*maxPk):
|
664
|
+
epsaddk:=epsmulk:
|
665
|
+
else
|
666
|
+
Sk:=convert(Pk+ck , polynom):
|
667
|
+
maxSk:=numapprox[infnorm](Sk, x=-xmax..xmax):
|
668
|
+
minSk:=minimize(abs(Sk), x=-xmax..xmax):
|
669
|
+
if(epsadd=2^(-53)) then # compute deltadd exactly as the max half ulp of the result
|
670
|
+
deltaadd := 0.5*ulp(maxSk+epsmulk*maxSk):
|
671
|
+
else # compute deltaadd out of the relative error
|
672
|
+
deltaadd := epsadd * (maxSk+epsmulk*maxSk):
|
673
|
+
fi:
|
674
|
+
deltaaddk := evalf( epsmulk*maxPk + deltaadd + 10^(-Digits+2) ):
|
675
|
+
epsaddk := deltaaddk/minSk + 10^(-Digits+2) :
|
676
|
+
# warnings
|
677
|
+
if (minSk=0) then
|
678
|
+
printf("Warning! in compute_abs_rounding_error, minSk=0 at iteration %d, consider decreasing xmax\n",k):
|
679
|
+
fi:
|
680
|
+
fi:
|
681
|
+
printf("step %d epsmulk=%1.4e deltaaddk=%1.4e minSk=%1.4e maxSk=%1.4e\n", k, epsmulk, deltaaddk, minSk, maxSk):
|
682
|
+
|
683
|
+
|
684
|
+
# if (epsadd=2**(-103)) then
|
685
|
+
# # fast Add22 ?
|
686
|
+
# if abs(coeff(poly,x,k)) < xmax*maxSk # may be improved to xmax*Smax/2
|
687
|
+
# then printf("WARNING Add22 cannot be used at step %d, use Add22Cond\n" , k ):
|
688
|
+
# printf(" coeff=%1.20e, xmax*Smax=%1.20e" , abs(coeff(poly,x,i)), xmax*maxSk ):
|
689
|
+
# fi:
|
690
|
+
# fi:
|
691
|
+
|
692
|
+
od:
|
693
|
+
|
694
|
+
return (epsmulk, deltaaddk, minSk, maxSk)
|
695
|
+
end proc:
|
696
|
+
|
697
|
+
|
698
|
+
#---------------------------------------------------------------------
|
699
|
+
# An helper function to build an errlist for the previous procedure.
|
700
|
+
# Arguments:
|
701
|
+
# n is the degree of the polynomial,
|
702
|
+
# ddadd, ddmul number of (final) double-double operations
|
703
|
+
# depsx error on x in the double steps
|
704
|
+
# ddepsx error on x in the double-double steps (when x is represented by a double-double)
|
705
|
+
|
706
|
+
errlist_quickphase_horner := proc(n,ddadd, ddmul,depsx, ddepsx)
|
707
|
+
local nddadd, epsadd, nddmul, epsmul, epsx:
|
708
|
+
if n=0
|
709
|
+
then []
|
710
|
+
else
|
711
|
+
if ddadd>0 then
|
712
|
+
nddadd:=ddadd-1:
|
713
|
+
epsadd:=2**(-103):
|
714
|
+
else
|
715
|
+
nddadd:=ddadd:
|
716
|
+
epsadd:=2**(-53):
|
717
|
+
fi:
|
718
|
+
if ddmul>0 then
|
719
|
+
nddmul:=ddmul-1:
|
720
|
+
epsmul:=2**(-102):
|
721
|
+
epsx:=ddepsx:
|
722
|
+
else
|
723
|
+
nddmul:=ddmul:
|
724
|
+
epsmul:=2**(-53):
|
725
|
+
epsx:=depsx:
|
726
|
+
fi:
|
727
|
+
[ [epsx,epsadd,epsmul] , op(errlist_quickphase_horner(n-1, nddadd, nddmul, depsx,ddepsx))]
|
728
|
+
fi:
|
729
|
+
end proc:
|
730
|
+
|
731
|
+
|
732
|
+
|
733
|
+
#---------------------------------------------------------------------
|
734
|
+
# Finding the worst cases for additive range reduction
|
735
|
+
# cut and paste from the web page of Muller's book
|
736
|
+
# Much faster if called with Digits very large and an evalf() on C
|
737
|
+
# but then check
|
738
|
+
|
739
|
+
WorstCaseForAdditiveRangeReduction:=proc(B,n,emin,emax,C)
|
740
|
+
local epsilonmin,powerofBoverC,e,a,Plast,r,Qlast, Q,P,NewQ,NewP,epsilon, numbermin,expmin,l:
|
741
|
+
epsilonmin := 12345.0 :
|
742
|
+
powerofBoverC := B^(emin-n)/C:
|
743
|
+
for e from emin-n+1 to emax-n+1 do
|
744
|
+
powerofBoverC := B*powerofBoverC:
|
745
|
+
a := floor(powerofBoverC):
|
746
|
+
Plast := a:
|
747
|
+
r := 1/(powerofBoverC-a):
|
748
|
+
a := floor(r):
|
749
|
+
Qlast := 1:
|
750
|
+
Q := a:
|
751
|
+
P := Plast*a+1:
|
752
|
+
while Q < B^n-1 do
|
753
|
+
r := 1/(r-a):
|
754
|
+
a := floor(r):
|
755
|
+
NewQ := Q*a+Qlast:
|
756
|
+
NewP := P*a+Plast:
|
757
|
+
Qlast := Q:
|
758
|
+
Plast := P:
|
759
|
+
Q := NewQ:
|
760
|
+
P := NewP
|
761
|
+
od:
|
762
|
+
epsilon := evalf(C*abs(Plast-Qlast*powerofBoverC)):
|
763
|
+
if epsilon < epsilonmin then
|
764
|
+
epsilonmin := epsilon: numbermin := Qlast:
|
765
|
+
expmin := e
|
766
|
+
fi
|
767
|
+
od:
|
768
|
+
print('mantissa',numbermin):
|
769
|
+
print('exponent',expmin):
|
770
|
+
print('epsilon',epsilonmin):
|
771
|
+
l := evalf(log(epsilonmin)/log(B),10):
|
772
|
+
print(numberofdigits,l):
|
773
|
+
(numbermin, expmin, epsilonmin)
|
774
|
+
end proc:
|
775
|
+
|
776
|
+
|
777
|
+
|
778
|
+
|
779
|
+
|
780
|
+
|
781
|
+
|
782
|
+
#####################################################################
|
783
|
+
|
784
|
+
# Stuff for SCS
|
785
|
+
#####################################################################
|
786
|
+
|
787
|
+
# Global parameters
|
788
|
+
# Don�t forget to set all the parameters here
|
789
|
+
SCS_NB_WORDS := 8:
|
790
|
+
SCS_NB_BITS := 30:
|
791
|
+
|
792
|
+
|
793
|
+
|
794
|
+
|
795
|
+
|
796
|
+
#---------------------------------------------------------------------
|
797
|
+
# This procedure convert a decimal number into it SCS representation.
|
798
|
+
# x : input number to convert into it SCS representation
|
799
|
+
real_to_SCS := proc(x)
|
800
|
+
local exception, index, sgn, mantissa, nb, i:
|
801
|
+
|
802
|
+
if x <> 0 then
|
803
|
+
exception := 1:
|
804
|
+
if x > 0 then
|
805
|
+
sgn := 1:
|
806
|
+
nb := x:
|
807
|
+
elif x < 0 then
|
808
|
+
sgn := -1:
|
809
|
+
nb := -x:
|
810
|
+
end if:
|
811
|
+
|
812
|
+
index := 0:
|
813
|
+
|
814
|
+
if nb >= 1 then
|
815
|
+
for i from 0 while nb > (2^(SCS_NB_BITS+1)-1) do
|
816
|
+
index := index+1:
|
817
|
+
nb := nb * 2^(-SCS_NB_BITS):
|
818
|
+
end do:
|
819
|
+
else
|
820
|
+
for i from 0 while nb < 1 do
|
821
|
+
index := index-1:
|
822
|
+
nb := nb * 2^(SCS_NB_BITS):
|
823
|
+
end do:
|
824
|
+
end if:
|
825
|
+
|
826
|
+
for i from 0 by 1 to (SCS_NB_WORDS-1) do
|
827
|
+
mantissa[i] := trunc(nb):
|
828
|
+
nb := (nb - mantissa[i]) * 2^(SCS_NB_BITS):
|
829
|
+
end do:
|
830
|
+
else
|
831
|
+
for i from 0 by 1 to (SCS_NB_WORDS-1) do
|
832
|
+
mantissa[i] := 0:
|
833
|
+
end do:
|
834
|
+
|
835
|
+
index := 1:
|
836
|
+
exception := x:
|
837
|
+
sgn := 1:
|
838
|
+
end if:
|
839
|
+
mantissa[SCS_NB_WORDS] := exception:
|
840
|
+
mantissa[SCS_NB_WORDS+1] := index:
|
841
|
+
mantissa[SCS_NB_WORDS+2] := sgn:
|
842
|
+
|
843
|
+
return mantissa:
|
844
|
+
end proc:
|
845
|
+
|
846
|
+
|
847
|
+
|
848
|
+
|
849
|
+
#---------------------------------------------------------------------
|
850
|
+
# Convert an SCS number into a rational number
|
851
|
+
|
852
|
+
SCS_to_real := proc(tab)
|
853
|
+
local res, i:
|
854
|
+
|
855
|
+
if (tab[SCS_NB_WORDS] <> 1) then
|
856
|
+
return tab[SCS_NB_WORDS]:
|
857
|
+
end if:
|
858
|
+
|
859
|
+
res := 0:
|
860
|
+
for i from (SCS_NB_WORDS-1) by -1 while i>=0 do
|
861
|
+
res := 2^(-SCS_NB_BITS)*res + tab[i]
|
862
|
+
end do:
|
863
|
+
|
864
|
+
res := tab[SCS_NB_WORDS+2]*(res * 2.^(SCS_NB_BITS * tab[SCS_NB_WORDS+1])):
|
865
|
+
|
866
|
+
return res:
|
867
|
+
|
868
|
+
end proc:
|
869
|
+
|
870
|
+
|
871
|
+
|
872
|
+
#---------------------------------------------------------------------
|
873
|
+
# This procedure truncates the coefficients of a polynomial to SCS
|
874
|
+
# numbers, so that we can then evaluate its approximation error
|
875
|
+
# (equivalent to poly_exact for the doubles)
|
876
|
+
poly_exact_SCS:=proc(P)
|
877
|
+
local deg,i, coef, coef_t, Q:
|
878
|
+
Q:= 0:
|
879
|
+
convert(Q, polynom):
|
880
|
+
deg:=degree(P,x):
|
881
|
+
for i from 0 to deg do
|
882
|
+
coef:=coeff(P,x,i):
|
883
|
+
coef_t:=SCS_to_real(real_to_SCS(evalf(coef))):
|
884
|
+
Q:= Q + coef_t*x^i:
|
885
|
+
od:
|
886
|
+
return(Q):
|
887
|
+
end:
|
888
|
+
|
889
|
+
|
890
|
+
|
891
|
+
#---------------------------------------------------------------------
|
892
|
+
# Write Into file fd the SCSS number stored into the table tab where
|
893
|
+
# tab[0..(SCS_NB_WORDS-1)] store the mantissa
|
894
|
+
# tab[SCS_NB_WORDS] store the exception
|
895
|
+
# tab[SCS_NB_WORDS+1] store the index
|
896
|
+
# tab[SCS_NB_WORDS+2] store the sign
|
897
|
+
# You probably want to use WriteSCS below !
|
898
|
+
|
899
|
+
WriteSCS_from_table := proc(fd, tab)
|
900
|
+
local i:
|
901
|
+
|
902
|
+
fprintf(fd,"{{"):
|
903
|
+
|
904
|
+
fprintf(fd,"0x%+0.8x, ", tab[0]):
|
905
|
+
for i from 1 by 1 to (SCS_NB_WORDS-2) do
|
906
|
+
fprintf(fd,"0x%+0.8x, ", tab[i]):
|
907
|
+
if (i mod 4 = 3) then
|
908
|
+
fprintf(fd,"\n"):
|
909
|
+
fi:
|
910
|
+
end do:
|
911
|
+
fprintf(fd,"0x%+0.8x},\n", tab[SCS_NB_WORDS-1]):
|
912
|
+
if (tab[SCS_NB_WORDS]=1) then
|
913
|
+
fprintf(fd,"DB_ONE, %3d, %3d ", tab[SCS_NB_WORDS+1], tab[SCS_NB_WORDS+2]):
|
914
|
+
else
|
915
|
+
# the only other possible value is 0 so ...
|
916
|
+
fprintf(fd,"{0x00000000, 0x00000000}, %3d, %3d ", tab[SCS_NB_WORDS+1], tab[SCS_NB_WORDS+2]):
|
917
|
+
end if:
|
918
|
+
|
919
|
+
fprintf(fd, "} \n"):
|
920
|
+
end proc:
|
921
|
+
|
922
|
+
|
923
|
+
#---------------------------------------------------------------------
|
924
|
+
# Write a real number as an SCS array to a file
|
925
|
+
|
926
|
+
WriteSCS := proc (fd,x)
|
927
|
+
WriteSCS_from_table(fd , real_to_SCS (x)):
|
928
|
+
end:
|
929
|
+
|
930
|
+
|
931
|
+
|
932
|
+
#---------------------------------------------------------------------
|
933
|
+
# A procedure to count the non-zero coefficients of a polynomial (to store it)
|
934
|
+
|
935
|
+
get_nb_terms := proc(poly)
|
936
|
+
local i, deg_poly:
|
937
|
+
|
938
|
+
deg_poly := degree(poly):
|
939
|
+
for i from deg_poly by -1 while i>=0 do
|
940
|
+
if coeff(poly, x, i)=0 then
|
941
|
+
deg_poly := deg_poly-1:
|
942
|
+
end if:
|
943
|
+
end do:
|
944
|
+
|
945
|
+
return deg_poly:
|
946
|
+
end proc:
|
947
|
+
|
948
|
+
|
949
|
+
|
950
|
+
|
951
|
+
|
952
|
+
|
953
|
+
|
954
|
+
|
955
|
+
#---------------------------------------------------------------------
|
956
|
+
# Write a polynomial to an array of SCS coefficients
|
957
|
+
|
958
|
+
# fd : file where to put the result
|
959
|
+
# poly : input polynom
|
960
|
+
# name : name of the array
|
961
|
+
|
962
|
+
Write_SCS_poly := proc(fd, name, poly)
|
963
|
+
local i, deg:
|
964
|
+
#fclose(fd):
|
965
|
+
try
|
966
|
+
finally
|
967
|
+
fprintf(fd,"static const scs %s [%d]=\n", name, get_nb_terms(poly)+1):
|
968
|
+
deg := degree(poly):
|
969
|
+
|
970
|
+
fprintf(fd,"/* ~%1.50e */ \n{", coeff(poly, x, deg)):
|
971
|
+
WriteSCS(fd, coeff(poly, x, deg)):
|
972
|
+
for i from (deg-1) by (-1) while i>=0 do
|
973
|
+
if (coeff(poly, x, i)<>0) then
|
974
|
+
fprintf(fd,",\n/* ~%1.50e */ \n", coeff(poly, x, i)):
|
975
|
+
WriteSCS(fd, coeff(poly, x, i), 0):
|
976
|
+
end if:
|
977
|
+
end do:
|
978
|
+
fprintf(fd,"};\n"):
|
979
|
+
end try:
|
980
|
+
end proc:
|
981
|
+
|
982
|
+
|
983
|
+
|
984
|
+
|
985
|
+
|
986
|
+
|
987
|
+
|
988
|
+
|
989
|
+
|
990
|
+
|
991
|
+
|
992
|
+
|
993
|
+
|
994
|
+
|
995
|
+
|
996
|
+
|
997
|
+
|