intervals 0.3.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.txt +1 -0
- data/ext/crlibm/AUTHORS +2 -0
- data/ext/crlibm/COPYING +504 -0
- data/ext/crlibm/ChangeLog +80 -0
- data/ext/crlibm/INSTALL +182 -0
- data/ext/crlibm/Makefile.am +84 -0
- data/ext/crlibm/Makefile.in +530 -0
- data/ext/crlibm/NEWS +0 -0
- data/ext/crlibm/README +31 -0
- data/ext/crlibm/TODO +47 -0
- data/ext/crlibm/VERSION +1 -0
- data/ext/crlibm/aclocal.m4 +989 -0
- data/ext/crlibm/atan-itanium.c +846 -0
- data/ext/crlibm/atan-pentium.c +261 -0
- data/ext/crlibm/atan_accurate.c +244 -0
- data/ext/crlibm/atan_accurate.h +191 -0
- data/ext/crlibm/atan_fast.c +324 -0
- data/ext/crlibm/atan_fast.h +678 -0
- data/ext/crlibm/config.guess +1461 -0
- data/ext/crlibm/config.sub +1566 -0
- data/ext/crlibm/configure +7517 -0
- data/ext/crlibm/configure.ac +364 -0
- data/ext/crlibm/crlibm.h +125 -0
- data/ext/crlibm/crlibm_config.h +149 -0
- data/ext/crlibm/crlibm_config.h.in +148 -0
- data/ext/crlibm/crlibm_private.c +293 -0
- data/ext/crlibm/crlibm_private.h +658 -0
- data/ext/crlibm/csh_fast.c +631 -0
- data/ext/crlibm/csh_fast.h +771 -0
- data/ext/crlibm/double-extended.h +496 -0
- data/ext/crlibm/exp-td.c +962 -0
- data/ext/crlibm/exp-td.h +685 -0
- data/ext/crlibm/exp_accurate.c +197 -0
- data/ext/crlibm/exp_accurate.h +85 -0
- data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
- data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
- data/ext/crlibm/gappa/log-de.gappa +81 -0
- data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
- data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
- data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
- data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
- data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
- data/ext/crlibm/gappa/log-td.gappa +156 -0
- data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
- data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
- data/ext/crlibm/install-sh +269 -0
- data/ext/crlibm/log-de.c +431 -0
- data/ext/crlibm/log-de.h +732 -0
- data/ext/crlibm/log-td.c +852 -0
- data/ext/crlibm/log-td.h +819 -0
- data/ext/crlibm/log10-td.c +906 -0
- data/ext/crlibm/log10-td.h +823 -0
- data/ext/crlibm/log2-td.c +935 -0
- data/ext/crlibm/log2-td.h +821 -0
- data/ext/crlibm/maple/atan.mpl +359 -0
- data/ext/crlibm/maple/common-procedures.mpl +997 -0
- data/ext/crlibm/maple/csh.mpl +446 -0
- data/ext/crlibm/maple/double-extended.mpl +151 -0
- data/ext/crlibm/maple/exp-td.mpl +195 -0
- data/ext/crlibm/maple/log-de.mpl +243 -0
- data/ext/crlibm/maple/log-td.mpl +316 -0
- data/ext/crlibm/maple/log10-td.mpl +345 -0
- data/ext/crlibm/maple/log2-td.mpl +334 -0
- data/ext/crlibm/maple/trigo.mpl +728 -0
- data/ext/crlibm/maple/triple-double.mpl +58 -0
- data/ext/crlibm/missing +198 -0
- data/ext/crlibm/mkinstalldirs +40 -0
- data/ext/crlibm/rem_pio2_accurate.c +219 -0
- data/ext/crlibm/rem_pio2_accurate.h +53 -0
- data/ext/crlibm/scs_lib/AUTHORS +3 -0
- data/ext/crlibm/scs_lib/COPYING +504 -0
- data/ext/crlibm/scs_lib/ChangeLog +16 -0
- data/ext/crlibm/scs_lib/INSTALL +215 -0
- data/ext/crlibm/scs_lib/Makefile.am +18 -0
- data/ext/crlibm/scs_lib/Makefile.in +328 -0
- data/ext/crlibm/scs_lib/NEWS +0 -0
- data/ext/crlibm/scs_lib/README +9 -0
- data/ext/crlibm/scs_lib/TODO +4 -0
- data/ext/crlibm/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm/scs_lib/config.guess +1461 -0
- data/ext/crlibm/scs_lib/config.sub +1566 -0
- data/ext/crlibm/scs_lib/configure +6226 -0
- data/ext/crlibm/scs_lib/division_scs.c +110 -0
- data/ext/crlibm/scs_lib/double2scs.c +174 -0
- data/ext/crlibm/scs_lib/install-sh +269 -0
- data/ext/crlibm/scs_lib/missing +198 -0
- data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
- data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
- data/ext/crlibm/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm/scs_lib/print_scs.c +73 -0
- data/ext/crlibm/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm/scs_lib/scs.h +353 -0
- data/ext/crlibm/scs_lib/scs2double.c +391 -0
- data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm/scs_lib/scs_private.c +23 -0
- data/ext/crlibm/scs_lib/scs_private.h +133 -0
- data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
- data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm/stamp-h.in +1 -0
- data/ext/crlibm/tests/Makefile.am +43 -0
- data/ext/crlibm/tests/Makefile.in +396 -0
- data/ext/crlibm/tests/blind_test.c +148 -0
- data/ext/crlibm/tests/generate_test_vectors.c +258 -0
- data/ext/crlibm/tests/soak_test.c +334 -0
- data/ext/crlibm/tests/test_common.c +627 -0
- data/ext/crlibm/tests/test_common.h +28 -0
- data/ext/crlibm/tests/test_perf.c +570 -0
- data/ext/crlibm/tests/test_val.c +249 -0
- data/ext/crlibm/trigo_accurate.c +500 -0
- data/ext/crlibm/trigo_accurate.h +331 -0
- data/ext/crlibm/trigo_fast.c +1219 -0
- data/ext/crlibm/trigo_fast.h +639 -0
- data/ext/crlibm/triple-double.h +878 -0
- data/ext/extconf.rb +31 -0
- data/ext/fpu.c +107 -0
- data/ext/jamis-mod.rb +591 -0
- data/lib/fpu.rb +287 -0
- data/lib/interval.rb +1170 -0
- data/lib/intervals.rb +212 -0
- data/lib/struct_float.rb +133 -0
- data/test/data_atan.txt +360 -0
- data/test/data_cos.txt +346 -0
- data/test/data_cosh.txt +3322 -0
- data/test/data_exp.txt +3322 -0
- data/test/data_log.txt +141 -0
- data/test/data_sin.txt +140 -0
- data/test/data_sinh.txt +3322 -0
- data/test/data_tan.txt +342 -0
- metadata +186 -0
@@ -0,0 +1,997 @@
|
|
1
|
+
|
2
|
+
#####################################################################
|
3
|
+
# Useful procedures for IEEE doubles
|
4
|
+
|
5
|
+
|
6
|
+
#---------------------------------------------------------------------
|
7
|
+
|
8
|
+
log2:=proc(x) evalf( log[2](x)) end proc:
|
9
|
+
|
10
|
+
|
11
|
+
#---------------------------------------------------------------------
|
12
|
+
# ieeedouble converts a number to IEEE double format.
|
13
|
+
# returns sign (-1 or 1), exponent between -1022 and 1023, mantissa as a fraction between 0.5 and 1.
|
14
|
+
ieeedouble:=proc(xx)
|
15
|
+
local x, sgn, logabsx, exponent, mantissa, infmantissa,powermin,powermax,expmin,expmax,expmiddle,powermiddle;
|
16
|
+
Digits := 100;
|
17
|
+
x := evalf(xx);
|
18
|
+
if (x=0) then sgn, exponent, mantissa := 1, -1022, 0
|
19
|
+
else
|
20
|
+
if (x < 0) then sgn := -1
|
21
|
+
else sgn := 1
|
22
|
+
fi:
|
23
|
+
x := abs(x);
|
24
|
+
if x >= 2^(1023)*(2-2^(-53)) then mantissa := infinity; exponent := 1023
|
25
|
+
else if x <= 2^(-1075) then mantissa := 0; exponent := -1022
|
26
|
+
else
|
27
|
+
if x <= 2^(-1022) then exponent := -1022
|
28
|
+
else
|
29
|
+
# x is between 2^(-1022) and 2^(1024)
|
30
|
+
powermin := 2^(-1022); expmin := -1022;
|
31
|
+
powermax := 2^1024; expmax := 1024;
|
32
|
+
while (expmax-expmin > 1) do
|
33
|
+
expmiddle := round((expmax+expmin)/2);
|
34
|
+
powermiddle := 2^expmiddle;
|
35
|
+
if x >= powermiddle then
|
36
|
+
powermin := powermiddle;
|
37
|
+
expmin := expmiddle
|
38
|
+
else
|
39
|
+
powermax := powermiddle;
|
40
|
+
expmax := expmiddle
|
41
|
+
fi
|
42
|
+
od;
|
43
|
+
# now, expmax - expmin = 1 and powermin <= x < powermax,
|
44
|
+
# powermin = 2^expmin and powermax = 2^expmax, so expmin is the exponent of x
|
45
|
+
exponent := expmin;
|
46
|
+
fi;
|
47
|
+
infmantissa := x*2^(52-exponent);
|
48
|
+
if frac(infmantissa) <> 0.5 then mantissa := round(infmantissa)
|
49
|
+
else
|
50
|
+
mantissa := floor(infmantissa);
|
51
|
+
if type(mantissa,odd) then mantissa := mantissa+1 fi
|
52
|
+
fi;
|
53
|
+
mantissa := mantissa*2^(-52);
|
54
|
+
fi;
|
55
|
+
fi;
|
56
|
+
fi;
|
57
|
+
sgn,exponent,mantissa;
|
58
|
+
end:
|
59
|
+
|
60
|
+
#---------------------------------------------------------------------
|
61
|
+
# ieeedoubleRU converts a number to IEEE double format rounding upwards.
|
62
|
+
# returns sign (-1 or 1), exponent between -1022 and 1023, mantissa as a fraction between 0.5 and 1.
|
63
|
+
ieeedoubleRU:=proc(xx)
|
64
|
+
local x, sgn, logabsx, exponent, mantissa, infmantissa,powermin,powermax,expmin,expmax,expmiddle,powermiddle;
|
65
|
+
Digits := 100;
|
66
|
+
x := evalf(xx);
|
67
|
+
if (x=0) then sgn, exponent, mantissa := 1, -1022, 0
|
68
|
+
else
|
69
|
+
if (x < 0) then sgn := -1
|
70
|
+
else sgn := 1
|
71
|
+
fi:
|
72
|
+
x := abs(x);
|
73
|
+
if x >= 2^(1023)*(2-2^(-53)) then mantissa := infinity; exponent := 1023
|
74
|
+
else if x <= 2^(-1075) then mantissa := 0; exponent := -1022
|
75
|
+
else
|
76
|
+
if x <= 2^(-1022) then exponent := -1022
|
77
|
+
else
|
78
|
+
# x is between 2^(-1022) and 2^(1024)
|
79
|
+
powermin := 2^(-1022); expmin := -1022;
|
80
|
+
powermax := 2^1024; expmax := 1024;
|
81
|
+
while (expmax-expmin > 1) do
|
82
|
+
expmiddle := round((expmax+expmin)/2);
|
83
|
+
powermiddle := 2^expmiddle;
|
84
|
+
if x >= powermiddle then
|
85
|
+
powermin := powermiddle;
|
86
|
+
expmin := expmiddle
|
87
|
+
else
|
88
|
+
powermax := powermiddle;
|
89
|
+
expmax := expmiddle
|
90
|
+
fi
|
91
|
+
od;
|
92
|
+
# now, expmax - expmin = 1 and powermin <= x < powermax,
|
93
|
+
# powermin = 2^expmin and powermax = 2^expmax, so expmin is the exponent of x
|
94
|
+
exponent := expmin;
|
95
|
+
fi;
|
96
|
+
infmantissa := x*2^(52-exponent);
|
97
|
+
if frac(infmantissa) <> 0 then mantissa := ceil(infmantissa)
|
98
|
+
else
|
99
|
+
mantissa := infmantissa;
|
100
|
+
fi;
|
101
|
+
mantissa := mantissa*2^(-52);
|
102
|
+
fi;
|
103
|
+
fi;
|
104
|
+
fi;
|
105
|
+
sgn,exponent,mantissa;
|
106
|
+
end:
|
107
|
+
|
108
|
+
#---------------------------------------------------------------------
|
109
|
+
# ieeedoubleRD converts a number to IEEE double format rounding upwards.
|
110
|
+
# returns sign (-1 or 1), exponent between -1022 and 1023, mantissa as a fraction between 0.5 and 1.
|
111
|
+
ieeedoubleRD:=proc(xx)
|
112
|
+
local x, sgn, logabsx, exponent, mantissa, infmantissa,powermin,powermax,expmin,expmax,expmiddle,powermiddle;
|
113
|
+
Digits := 100;
|
114
|
+
x := evalf(xx);
|
115
|
+
if (x=0) then sgn, exponent, mantissa := 1, -1022, 0
|
116
|
+
else
|
117
|
+
if (x < 0) then sgn := -1
|
118
|
+
else sgn := 1
|
119
|
+
fi:
|
120
|
+
x := abs(x);
|
121
|
+
if x >= 2^(1023)*(2-2^(-53)) then mantissa := infinity; exponent := 1023
|
122
|
+
else if x <= 2^(-1075) then mantissa := 0; exponent := -1022
|
123
|
+
else
|
124
|
+
if x <= 2^(-1022) then exponent := -1022
|
125
|
+
else
|
126
|
+
# x is between 2^(-1022) and 2^(1024)
|
127
|
+
powermin := 2^(-1022); expmin := -1022;
|
128
|
+
powermax := 2^1024; expmax := 1024;
|
129
|
+
while (expmax-expmin > 1) do
|
130
|
+
expmiddle := round((expmax+expmin)/2);
|
131
|
+
powermiddle := 2^expmiddle;
|
132
|
+
if x >= powermiddle then
|
133
|
+
powermin := powermiddle;
|
134
|
+
expmin := expmiddle
|
135
|
+
else
|
136
|
+
powermax := powermiddle;
|
137
|
+
expmax := expmiddle
|
138
|
+
fi
|
139
|
+
od;
|
140
|
+
# now, expmax - expmin = 1 and powermin <= x < powermax,
|
141
|
+
# powermin = 2^expmin and powermax = 2^expmax, so expmin is the exponent of x
|
142
|
+
exponent := expmin;
|
143
|
+
fi;
|
144
|
+
infmantissa := x*2^(52-exponent);
|
145
|
+
if frac(infmantissa) <> 0 then mantissa := floor(infmantissa)
|
146
|
+
else
|
147
|
+
mantissa := infmantissa;
|
148
|
+
fi;
|
149
|
+
mantissa := mantissa*2^(-52);
|
150
|
+
fi;
|
151
|
+
fi;
|
152
|
+
fi;
|
153
|
+
sgn,exponent,mantissa;
|
154
|
+
end:
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
#---------------------------------------------------------------------
|
160
|
+
# pulp returns the precision of the ulp of x
|
161
|
+
|
162
|
+
pulp:=proc(x)
|
163
|
+
local flt, ulpy:
|
164
|
+
flt:=ieeedouble(x):
|
165
|
+
ulpy:=-52+flt[2]:
|
166
|
+
end proc:
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
#---------------------------------------------------------------------
|
171
|
+
# ulp returns the absolute value of the ulp of x
|
172
|
+
ulp:=proc(x)
|
173
|
+
2**(pulp(x)):
|
174
|
+
end proc:
|
175
|
+
|
176
|
+
|
177
|
+
|
178
|
+
|
179
|
+
#---------------------------------------------------------------------
|
180
|
+
# Returns nearest IEEE double:
|
181
|
+
nearest := proc(x)
|
182
|
+
local sgn, exponent, mantissa:
|
183
|
+
|
184
|
+
sgn,exponent,mantissa := ieeedouble(x):
|
185
|
+
sgn*mantissa*2^(exponent):
|
186
|
+
|
187
|
+
end:
|
188
|
+
|
189
|
+
#---------------------------------------------------------------------
|
190
|
+
# Returns RU IEEE double:
|
191
|
+
roundUp := proc(x)
|
192
|
+
local sgn, exponent, mantissa:
|
193
|
+
|
194
|
+
sgn,exponent,mantissa := ieeedoubleRU(x):
|
195
|
+
sgn*mantissa*2^(exponent):
|
196
|
+
|
197
|
+
end:
|
198
|
+
|
199
|
+
#---------------------------------------------------------------------
|
200
|
+
# Returns RD IEEE double:
|
201
|
+
roundDown := proc(x)
|
202
|
+
local sgn, exponent, mantissa:
|
203
|
+
|
204
|
+
sgn,exponent,mantissa := ieeedoubleRD(x):
|
205
|
+
sgn*mantissa*2^(exponent):
|
206
|
+
|
207
|
+
end:
|
208
|
+
|
209
|
+
#---------------------------------------------------------------------
|
210
|
+
# Returns RZ IEEE double:
|
211
|
+
roundToZero := proc(x)
|
212
|
+
if evalf(x) > 0 then roundDown(x) else roundUp(x) fi:
|
213
|
+
end:
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
|
218
|
+
#---------------------------------------------------------------------
|
219
|
+
# ieehexa returns a string containing the hexadecimal representation of the double nearest to input x.
|
220
|
+
|
221
|
+
ieeehexa:= proc(x)
|
222
|
+
local hex2, xx, longint, expo, sgn, frac, resultat:
|
223
|
+
if(x=0) then resultat:=["00000000","00000000"]:
|
224
|
+
elif(x=-0) then resultat:=["80000000","00000000"]: # nice try
|
225
|
+
else
|
226
|
+
xx:=ieeedouble(x):
|
227
|
+
sgn:=xx[1]:
|
228
|
+
expo:=xx[2]:
|
229
|
+
frac:=xx[3]:
|
230
|
+
if (expo = -1023) then
|
231
|
+
longint := (frac)*2^51 : # subnormal
|
232
|
+
else
|
233
|
+
longint := (frac-1)*2^52 + (expo+1023)*2^52:
|
234
|
+
fi:
|
235
|
+
if (sgn=-1) then
|
236
|
+
longint := longint + 2^63:
|
237
|
+
fi:
|
238
|
+
longint := longint + 2^64: # to get all the hexadecimal digits when we'll convert to string
|
239
|
+
hex2:=convert(longint, hex):
|
240
|
+
hex2:=convert(hex2, string):
|
241
|
+
|
242
|
+
resultat:=[substring(hex2,2..9), substring(hex2,10..18)]:
|
243
|
+
fi:
|
244
|
+
resultat:
|
245
|
+
end proc:
|
246
|
+
|
247
|
+
|
248
|
+
|
249
|
+
#---------------------------------------------------------------------
|
250
|
+
# reciprocal of the previous
|
251
|
+
hexa2ieee:= proc(hexa)
|
252
|
+
local dec, bin, expo, mantis, sgn, hex1, hex2, hexcat, res:
|
253
|
+
|
254
|
+
hex1:= op(1, hexa):
|
255
|
+
hex2:= op(2, hexa):
|
256
|
+
hexcat:= cat(hex1, hex2):
|
257
|
+
dec:= convert(hexcat, decimal, hex):
|
258
|
+
|
259
|
+
if(dec >= 2^63) then
|
260
|
+
dec := dec - 2^63:
|
261
|
+
sgn:= -1:
|
262
|
+
else
|
263
|
+
sgn:= 1:
|
264
|
+
fi:
|
265
|
+
expo:= trunc(dec/(2^52)) - 1023:
|
266
|
+
if(expo=-1023) then
|
267
|
+
mantis:= frac(dec/(2^51)): # denormal
|
268
|
+
else
|
269
|
+
mantis:= 1+frac(dec/(2^52)):
|
270
|
+
fi:
|
271
|
+
res:= evalf(sgn*2^(expo)*mantis):
|
272
|
+
res:
|
273
|
+
end proc:
|
274
|
+
|
275
|
+
#---------------------------------------------------------------------
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
# Print a number x in Low or Big Endian representation in opened file "fd":
|
280
|
+
printendian:=proc(fd,x,isbig)
|
281
|
+
local xhl:
|
282
|
+
xhl:=ieeehexa(x):
|
283
|
+
|
284
|
+
if(isbig=0 or isbig=1) then
|
285
|
+
fprintf(fd,"{{0x%+0.8s,0x%+0.8s}} /* %+0.10e */", xhl[2-isbig], xhl[isbig+1], x):
|
286
|
+
else
|
287
|
+
print("ERROR, isbig must be equal to 0 or 1"):
|
288
|
+
end if:
|
289
|
+
end proc:
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
#---------------------------------------------------------------------
|
294
|
+
# hi_lo takes an arbitrary precision number x and returns two doubles such that:
|
295
|
+
# x ~ x_hi + x_lo
|
296
|
+
hi_lo:= proc(x)
|
297
|
+
local x_hi, x_lo, res:
|
298
|
+
x_hi:= nearest(evalf(x)):
|
299
|
+
res:=x-x_hi:
|
300
|
+
if (res = 0) then
|
301
|
+
x_lo:=0:
|
302
|
+
else
|
303
|
+
x_lo:=nearest(evalf(res)):
|
304
|
+
end if:
|
305
|
+
x_hi,x_lo:
|
306
|
+
end:
|
307
|
+
|
308
|
+
|
309
|
+
|
310
|
+
#---------------------------------------------------------------------
|
311
|
+
# same as hi_lo, but returns hexadecimal strings
|
312
|
+
ieeehexa2:=proc(x)
|
313
|
+
local reshi, reslo, hexhi, hexlo:
|
314
|
+
reshi:=nearest(x):
|
315
|
+
hexhi:=ieee2hexa(reshi):
|
316
|
+
reslo:= nearest(x-reshi):
|
317
|
+
hexlo:=ieee2hexa(reslo):
|
318
|
+
reshi, reslo:
|
319
|
+
end proc:
|
320
|
+
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
#---------------------------------------------------------------------
|
325
|
+
# Computes the constant for the round-to-nearest test.
|
326
|
+
# delta is the overall relative error of the approximation scheme
|
327
|
+
compute_rn_constant := proc(delta)
|
328
|
+
local k:
|
329
|
+
k := trunc(-log2(delta)) - 53:
|
330
|
+
nearest( 1+ 2**(-52) + (2**(54+k)*delta) / ( (2**k-1) * (1-2**(-53)) ) ):
|
331
|
+
end proc:
|
332
|
+
|
333
|
+
|
334
|
+
|
335
|
+
#---------------------------------------------------------------------
|
336
|
+
# Takes a real number, and prints the bits after the 53th of its nearest IEEE floating-point number
|
337
|
+
|
338
|
+
showHowDifficultToRound:=proc(x)
|
339
|
+
local xb,xs,s,e,m:
|
340
|
+
Digits:=200:
|
341
|
+
s,e,m := ieeedouble(x):
|
342
|
+
xb:=convert(evalf(x*2^(-e)),binary):
|
343
|
+
xs:=convert(xb, string):
|
344
|
+
substring(xs,55..153)
|
345
|
+
end proc:
|
346
|
+
|
347
|
+
|
348
|
+
|
349
|
+
#####################################################################
|
350
|
+
|
351
|
+
# Stuff about truncated polynomials
|
352
|
+
|
353
|
+
# Truncate a polynomial
|
354
|
+
|
355
|
+
#---------------------------------------------------------------------
|
356
|
+
#poly_exact takes a polynomial in x with arbitrary precision
|
357
|
+
# coefficients, and returns a truncated polynomial where coefficients
|
358
|
+
# are IEEE doubles.
|
359
|
+
|
360
|
+
poly_exact:=proc(P)
|
361
|
+
local deg,i, coef, coef_t, Q:
|
362
|
+
Q:= 0:
|
363
|
+
convert(Q, polynom):
|
364
|
+
deg:=degree(P,x):
|
365
|
+
for i from 0 to deg do
|
366
|
+
coef:=coeff(P,x,i):
|
367
|
+
coef_t:=nearest(coef):
|
368
|
+
Q:= Q + coef_t*x^i:
|
369
|
+
od:
|
370
|
+
return(Q):
|
371
|
+
end:
|
372
|
+
|
373
|
+
|
374
|
+
#---------------------------------------------------------------------
|
375
|
+
#Like poly_exact, but the n first coefficients are exactly
|
376
|
+
# representable as the sum of two doubles. (to actually get the two
|
377
|
+
# doubles, use procedure hi_lo)
|
378
|
+
|
379
|
+
poly_exact2:=proc(P,n)
|
380
|
+
local deg,i, coef, coef_hi, coef_lo, Q:
|
381
|
+
Q:= 0:
|
382
|
+
convert(Q, polynom):
|
383
|
+
deg:=degree(P,x):
|
384
|
+
for i from 0 to deg do
|
385
|
+
coef :=coeff(P,x,i):
|
386
|
+
coef_hi, coef_lo:=hi_lo(coef):
|
387
|
+
Q:= Q + coef_hi*x^i:
|
388
|
+
if(i<n) then
|
389
|
+
Q := Q + coef_lo*x^i:
|
390
|
+
fi:
|
391
|
+
od:
|
392
|
+
return(Q):
|
393
|
+
end:
|
394
|
+
|
395
|
+
|
396
|
+
#---------------------------------------------------------------------
|
397
|
+
# OBSOLETE use compute_horner_rounding_error below
|
398
|
+
# Compute a bound on the accumulated rounding error caused by the Horner evaluation of a truncated polynomial
|
399
|
+
# P is the polynomial.
|
400
|
+
# xmax is the max value of |x|.
|
401
|
+
# n is the degree when P is computed in double double. The first double-double operation is an addition.
|
402
|
+
|
403
|
+
# returns max absolute error, min of the function, max of the function.
|
404
|
+
|
405
|
+
# This procedure also checks on the fly that the fast (test-free) versions of the double-double addition can be used, i.e. that for all x, at each Horner step i computing ci+x*Si, we have |ci|>|x*Si|. It prints warnings if it not the case.
|
406
|
+
|
407
|
+
compute_abs_rounding_error:=proc(poly,xmax, nn)
|
408
|
+
local n, deg, delta, deltap, i, S, P, Snorm, Smin, Smax, prec:
|
409
|
+
deltap:=0:
|
410
|
+
delta:=0:
|
411
|
+
deg:=degree(poly):
|
412
|
+
|
413
|
+
prec:=53: # precision of the first iterations
|
414
|
+
|
415
|
+
S:=coeff(poly, x, deg):
|
416
|
+
Smax:=abs(S):
|
417
|
+
Smin:=Smax:
|
418
|
+
|
419
|
+
if nn<0 then n:=0: else n:=nn: fi:# sometimes called by compute_rel_rounding_error with n=-1
|
420
|
+
|
421
|
+
for i from (deg-1) to 0 by -1 do
|
422
|
+
P:= convert(S*x, polynom):
|
423
|
+
Smin := abs(coeff(poly,x,i)) - xmax*Smax :
|
424
|
+
if(Smin<=0) then
|
425
|
+
printf("Warning! in compute_abs_rounding_error, Smin<=0 at iteration %d, consider decreasing xmax\n",i):
|
426
|
+
fi:
|
427
|
+
delta:= evalf(xmax*deltap + 2**(-prec)*xmax*Smax):
|
428
|
+
if i<n then
|
429
|
+
# fast Add22 ?
|
430
|
+
if abs(coeff(poly,x,i)) < xmax*Smax # may be improved to xmax*Smax/2
|
431
|
+
then printf("WARNING Add22 cannot be used at step %d, use Add22Cond\n" , i ):
|
432
|
+
printf(" coeff=%1.20e, xmax*Smax=%1.20e" , abs(coeff(poly,x,i)), xmax*Smax ):
|
433
|
+
fi:
|
434
|
+
fi:
|
435
|
+
S:=convert(P+coeff(poly,x,i), polynom):
|
436
|
+
Snorm:=evalf(numapprox[infnorm](S, x=-xmax..xmax)):
|
437
|
+
if i=n-1 then prec:=100: fi: # from the addition of the n-1-th iteration
|
438
|
+
deltap:= evalf(delta + 2**(-prec)*(delta + Snorm)):
|
439
|
+
Smax := Snorm + deltap:
|
440
|
+
od:
|
441
|
+
deltap, Smin, Smax:
|
442
|
+
end proc:
|
443
|
+
|
444
|
+
|
445
|
+
|
446
|
+
#---------------------------------------------------------------------
|
447
|
+
# OBSOLETE use compute_horner_rounding_error below
|
448
|
+
# Computes the total relative rounding error
|
449
|
+
compute_rel_rounding_error:=proc(poly,xmax, n)
|
450
|
+
local deg, p, rho, deltap, Smin, Smax:
|
451
|
+
|
452
|
+
deg:=degree(poly):
|
453
|
+
if(n>0) then p:=100: else p:=53: fi:
|
454
|
+
|
455
|
+
if coeff(poly,x, 0) = 0 then
|
456
|
+
deltap, Smin, Smax := compute_abs_rounding_error(poly/x,xmax, n-1):
|
457
|
+
rho := (2^(-p)*(Smax+deltap) +deltap ) / Smin :
|
458
|
+
else
|
459
|
+
deltap, Smin, Smax := compute_abs_rounding_error(poly,xmax, n):
|
460
|
+
rho := deltap / Smin:
|
461
|
+
fi:
|
462
|
+
rho:
|
463
|
+
end proc:
|
464
|
+
|
465
|
+
|
466
|
+
|
467
|
+
|
468
|
+
#---------------------------------------------------------------------
|
469
|
+
# OBSOLETE use compute_horner_rounding_error below
|
470
|
+
# Computes the accumulated rounding error during the polynomial evaluation.
|
471
|
+
# P is the polynomial.
|
472
|
+
# xmax is the max value of |x|.
|
473
|
+
# n is the degree when P is computed in double double. The first double-double operation is a multiplication (probably less useful).
|
474
|
+
|
475
|
+
# returns max absolute error, min of the function, max of the function.
|
476
|
+
|
477
|
+
# This procedure also checks on the fly that the fast (test-free) versions of the double-double addition can be used, i.e. that for all x, at each Horner step i computing ci+x*Si, we have |ci|>|x*Si|. It prints warnings if it not the case.
|
478
|
+
|
479
|
+
compute_abs_rounding_error_firstmult:=proc(poly,xmax, nn)
|
480
|
+
local n, deg, delta, deltap, i, S, P, Snorm, Smin, Smax, prec:
|
481
|
+
deltap:=0:
|
482
|
+
delta:=0:
|
483
|
+
deg:=degree(poly):
|
484
|
+
|
485
|
+
prec:=53: # precision of the first iterations
|
486
|
+
|
487
|
+
S:=coeff(poly, x, deg):
|
488
|
+
Smax:=abs(S):
|
489
|
+
Smin:=Smax:
|
490
|
+
|
491
|
+
if nn<0 then n:=0: else n:=nn: fi:# sometimes called by compute_rel_rounding_error with n=-1
|
492
|
+
|
493
|
+
for i from (deg-1) to 0 by -1 do
|
494
|
+
if i=n-1 then prec:=100: fi: # from the mult of the n-1-th iteration
|
495
|
+
P:= convert(S*x, polynom):
|
496
|
+
Smin := abs(coeff(poly,x,i)) - xmax*Smax :
|
497
|
+
if(Smin<=0) then
|
498
|
+
printf("Warning! in compute_abs_rounding_error, Smin<=0 at iteration %d, consider decreasing xmax\n",i):
|
499
|
+
fi:
|
500
|
+
delta:= evalf(xmax*deltap + 2**(-prec)*xmax*Smax):
|
501
|
+
if i<n then
|
502
|
+
# fast Add22 ?
|
503
|
+
if abs(coeff(poly,x,i)) < xmax*Smax # may be improved to xmax*Smax/2
|
504
|
+
then printf("WARNING Add22 cannot be used at step %d, use Add22Cond\n" , i ):
|
505
|
+
printf(" coeff=%1.20e, xmax*Smax=%1.20e" , abs(coeff(poly,x,i)), xmax*Smax ):
|
506
|
+
fi:
|
507
|
+
fi:
|
508
|
+
S:=convert(P+coeff(poly,x,i), polynom):
|
509
|
+
Snorm:=evalf(numapprox[infnorm](S, x=-xmax..xmax)):
|
510
|
+
deltap:= evalf(delta + 2**(-prec)*(delta + Snorm)):
|
511
|
+
Smax := Snorm + deltap:
|
512
|
+
od:
|
513
|
+
deltap, Smin, Smax:
|
514
|
+
end proc:
|
515
|
+
|
516
|
+
|
517
|
+
|
518
|
+
|
519
|
+
#---------------------------------------------------------------------
|
520
|
+
# OBSOLETE use compute_horner_rounding_error below
|
521
|
+
# Computes the total relative rounding error
|
522
|
+
compute_rel_rounding_error_firstmult:=proc(poly,xmax, n)
|
523
|
+
local deg, p, rho, deltap, Smin, Smax:
|
524
|
+
|
525
|
+
deg:=degree(poly):
|
526
|
+
if(n>0) then p:=100: else p:=53: fi:
|
527
|
+
|
528
|
+
if coeff(poly,x, 0) = 0 then
|
529
|
+
deltap, Smin, Smax := compute_abs_rounding_error_firstmult(poly/x,xmax, n-1):
|
530
|
+
rho := (2^(-p)*(Smax+deltap) +deltap ) / Smin :
|
531
|
+
else
|
532
|
+
deltap, Smin, Smax := compute_abs_rounding_error_firstmult(poly,xmax, n):
|
533
|
+
rho := deltap / Smin:
|
534
|
+
fi:
|
535
|
+
rho:
|
536
|
+
end proc:
|
537
|
+
|
538
|
+
|
539
|
+
|
540
|
+
|
541
|
+
# Compute a good truncated polynomial approximation for a function
|
542
|
+
# Computes an approximation to a function of x f, as a truncated polynomial of deegree deg with the n first coefficients exactly representable as double-double.
|
543
|
+
# The function f(x) must have as input interval xmin..xmax
|
544
|
+
# returns [ truncated polynomial, relative approx error of trunc. poly. , infinite precision polynomial, rel. error of inf. prec. poly ]
|
545
|
+
poly_trunc_classic:=proc(f,deg,xmin,xmax,n)
|
546
|
+
local pe, repe, pt, ppe, rept, maxpt:
|
547
|
+
pe:=numapprox[minimax]( f, x=xmin..xmax, [deg,0], 1, 'err'):
|
548
|
+
pt := poly_exact2(pe,n):
|
549
|
+
rept := numapprox[infnorm]( 1-pt/f, x=xmin..xmax) :
|
550
|
+
maxpt := numapprox[infnorm]( pt, x=xmin..xmax) :
|
551
|
+
pt,rept, maxpt:
|
552
|
+
end proc:
|
553
|
+
|
554
|
+
|
555
|
+
|
556
|
+
#---------------------------------------------------------------------
|
557
|
+
# Computes a truncated polynomial of degree deg with the two first coefficients stored as double-doubles.
|
558
|
+
# The function f(x) must have as input interval xmin..xmax
|
559
|
+
# returns [ truncated polynomial, relative error of trunc. poly. , infinite precision polynomial, rel. error of inf. prec. poly ]
|
560
|
+
poly_trunc_f2d_2:=proc(f,deg,xmin,xmax)
|
561
|
+
local pe, repe, pt, c0, c1, c2, ppe, abserr, relerr, maxpt, err:
|
562
|
+
pe:=numapprox[minimax]( f, x=xmin..xmax, [deg,0], 1, 'err'):
|
563
|
+
pt := poly_exact2(pe,2):
|
564
|
+
c0:=coeff(pt,x,0):
|
565
|
+
c1:=coeff(pt,x,1):
|
566
|
+
c2:=coeff(pt,x,2):
|
567
|
+
ppe:=numapprox[minimax]( (f - c0 - c1 * x - c2*x*x) , x=xmin..xmax, [deg,0], 1, 'err'):
|
568
|
+
ppe:=expand(ppe) - coeff(ppe,x,0) - coeff(ppe,x,1)*x- coeff(ppe,x,2)*x*x:
|
569
|
+
pt := poly_exact2(ppe,2) + c0 + c1*x + c2*x*x:
|
570
|
+
abserr := numapprox[infnorm]( pt-f, x=xmin..xmax) :
|
571
|
+
relerr := numapprox[infnorm]( 1-pt/f, x=xmin..xmax) :
|
572
|
+
maxpt := numapprox[infnorm]( pt, x=xmin..xmax) :
|
573
|
+
pt,abserr,relerr,maxpt:
|
574
|
+
end proc:
|
575
|
+
|
576
|
+
|
577
|
+
|
578
|
+
|
579
|
+
#---------------------------------------------------------------------
|
580
|
+
# Computes a truncated polynomial of degree deg with the first coefficient stored as double-double.
|
581
|
+
# The function f(x) must have as input interval xmin..xmax
|
582
|
+
# returns [ truncated polynomial, relative error of trunc. poly. , infinite precision polynomial, rel. error of inf. prec. poly ]
|
583
|
+
poly_trunc_f2d_1:=proc(f,deg,xmin,xmax)
|
584
|
+
local pe, repe, pt, c0, c1, ppe, relerr, abserr, maxpt, err:
|
585
|
+
pe:=numapprox[minimax]( f , x=xmin..xmax, [deg,0], 1, 'err'):
|
586
|
+
pt := poly_exact2(pe,1):
|
587
|
+
c0:=coeff(pt,x,0):
|
588
|
+
c1:=coeff(pt,x,1):
|
589
|
+
ppe:=numapprox[minimax]( (f - c0 - c1 * x) , x=xmin..xmax, [deg,0], 1, 'err'):
|
590
|
+
ppe:=ppe - coeff(ppe,x,0) - coeff(ppe,x,1)*x:
|
591
|
+
pt := poly_exact2(ppe,1) + c0 + c1*x:
|
592
|
+
abserr := numapprox[infnorm]( pt-f, x=xmin..xmax) :
|
593
|
+
relerr := numapprox[infnorm]( 1-pt/f, x=xmin..xmax) :
|
594
|
+
maxpt := numapprox[infnorm]( pt, x=xmin..xmax) :
|
595
|
+
pt,abserr,relerr,maxpt:
|
596
|
+
end proc:
|
597
|
+
|
598
|
+
|
599
|
+
|
600
|
+
#---------------------------------------------------------------------
|
601
|
+
# compute_horner_rounding_error
|
602
|
+
|
603
|
+
# Computes a bound on the accumulated rounding error caused by the Horner evaluation of a truncated polynomial
|
604
|
+
# It is designed to allow evaluating the error for various schemes:
|
605
|
+
# - with or without an error on x
|
606
|
+
# - using SCS operators
|
607
|
+
# - using double and double-double operators.
|
608
|
+
# Arguments:
|
609
|
+
# P is the polynomial.
|
610
|
+
# xmax is the max value of |x|.
|
611
|
+
# errors is a list of size n where n is the degree of the polynomial.
|
612
|
+
# Each element of this list is a triple (epsx,epsmul,epsadd) where
|
613
|
+
# epsx is the relative error on x at each step,
|
614
|
+
# epsadd is the max relative error on the addition
|
615
|
+
# epsmul is the max relative error on the multiplication.
|
616
|
+
# This allows to handle SCS Horner, as well as evaluation starting in double and ending in double-double.
|
617
|
+
# check_dd is a flag, if set to 1 the procedure also checks on the fly that the fast (test-free) versions
|
618
|
+
# of the double-double addition can be used, i.e. that for all x, at each Horner step i computing ci+x*Si,
|
619
|
+
# we have |ci|>|x*Si|. It prints warnings if it not the case.
|
620
|
+
# returns (epsprimek, deltak, minP, maxP) where (see the doc)
|
621
|
+
# epsprimek is the max rel error of the last multiplication (useful if the coeff of degree 0 is 0)
|
622
|
+
# deltak is the max absolute error of the last addition (useful if the reconstruction adds something to result of the polynomial)
|
623
|
+
# minP min of the evaluated polynomial (useful to compute a relative error out of deltak)
|
624
|
+
# maxP max of the evaluated polynomial.
|
625
|
+
|
626
|
+
compute_horner_rounding_error:=proc(poly, x, xmax, errors, check_dd)
|
627
|
+
local deg, Sk, maxSk, minSk, epsaddk, epsmulk, deltaaddk, k, ck, epsx, epsmul, epsadd, deltaadd, Pk, maxPk:
|
628
|
+
|
629
|
+
if assigned(x) then
|
630
|
+
printf("Error in compute_horner_rounding_error, polynomial variable is assigned\n"):
|
631
|
+
return 'procname(args)':
|
632
|
+
fi:
|
633
|
+
|
634
|
+
deg:=degree(poly,x):
|
635
|
+
if(deg<0) then printf("ERROR: negative degree in compute_abs_rounding_error"): return 'procname(args)': fi:
|
636
|
+
|
637
|
+
Sk:=coeff(poly, x, deg):
|
638
|
+
maxSk:=abs(Sk):
|
639
|
+
minSk:=maxSk:
|
640
|
+
epsmulk:=0:
|
641
|
+
deltaaddk:=0:
|
642
|
+
epsaddk:=0:
|
643
|
+
|
644
|
+
for k from (deg) to 1 by -1 do
|
645
|
+
|
646
|
+
# the errors to consider for this step
|
647
|
+
epsx := errors[k][1]:
|
648
|
+
epsadd := errors[k][2]:
|
649
|
+
epsmul := errors[k][3]:
|
650
|
+
|
651
|
+
# multiplication operation
|
652
|
+
Pk:= convert(Sk*x, polynom):
|
653
|
+
maxPk:=numapprox[infnorm](Pk, x=-xmax..xmax):
|
654
|
+
|
655
|
+
ck:=coeff(poly,x,k-1):
|
656
|
+
epsmulk:=evalf( (1+epsx)*(1+epsaddk)*(1+epsmul)-1 + 10^(-Digits+2) ):
|
657
|
+
|
658
|
+
#addition
|
659
|
+
if(ck=0) then
|
660
|
+
Sk:=Pk:
|
661
|
+
maxSk := maxPk:
|
662
|
+
minSk:=0:
|
663
|
+
deltaaddk:= evalf(epsmulk*maxPk):
|
664
|
+
epsaddk:=epsmulk:
|
665
|
+
else
|
666
|
+
Sk:=convert(Pk+ck , polynom):
|
667
|
+
maxSk:=numapprox[infnorm](Sk, x=-xmax..xmax):
|
668
|
+
minSk:=minimize(abs(Sk), x=-xmax..xmax):
|
669
|
+
if(epsadd=2^(-53)) then # compute deltadd exactly as the max half ulp of the result
|
670
|
+
deltaadd := 0.5*ulp(maxSk+epsmulk*maxSk):
|
671
|
+
else # compute deltaadd out of the relative error
|
672
|
+
deltaadd := epsadd * (maxSk+epsmulk*maxSk):
|
673
|
+
fi:
|
674
|
+
deltaaddk := evalf( epsmulk*maxPk + deltaadd + 10^(-Digits+2) ):
|
675
|
+
epsaddk := deltaaddk/minSk + 10^(-Digits+2) :
|
676
|
+
# warnings
|
677
|
+
if (minSk=0) then
|
678
|
+
printf("Warning! in compute_abs_rounding_error, minSk=0 at iteration %d, consider decreasing xmax\n",k):
|
679
|
+
fi:
|
680
|
+
fi:
|
681
|
+
printf("step %d epsmulk=%1.4e deltaaddk=%1.4e minSk=%1.4e maxSk=%1.4e\n", k, epsmulk, deltaaddk, minSk, maxSk):
|
682
|
+
|
683
|
+
|
684
|
+
# if (epsadd=2**(-103)) then
|
685
|
+
# # fast Add22 ?
|
686
|
+
# if abs(coeff(poly,x,k)) < xmax*maxSk # may be improved to xmax*Smax/2
|
687
|
+
# then printf("WARNING Add22 cannot be used at step %d, use Add22Cond\n" , k ):
|
688
|
+
# printf(" coeff=%1.20e, xmax*Smax=%1.20e" , abs(coeff(poly,x,i)), xmax*maxSk ):
|
689
|
+
# fi:
|
690
|
+
# fi:
|
691
|
+
|
692
|
+
od:
|
693
|
+
|
694
|
+
return (epsmulk, deltaaddk, minSk, maxSk)
|
695
|
+
end proc:
|
696
|
+
|
697
|
+
|
698
|
+
#---------------------------------------------------------------------
|
699
|
+
# An helper function to build an errlist for the previous procedure.
|
700
|
+
# Arguments:
|
701
|
+
# n is the degree of the polynomial,
|
702
|
+
# ddadd, ddmul number of (final) double-double operations
|
703
|
+
# depsx error on x in the double steps
|
704
|
+
# ddepsx error on x in the double-double steps (when x is represented by a double-double)
|
705
|
+
|
706
|
+
errlist_quickphase_horner := proc(n,ddadd, ddmul,depsx, ddepsx)
|
707
|
+
local nddadd, epsadd, nddmul, epsmul, epsx:
|
708
|
+
if n=0
|
709
|
+
then []
|
710
|
+
else
|
711
|
+
if ddadd>0 then
|
712
|
+
nddadd:=ddadd-1:
|
713
|
+
epsadd:=2**(-103):
|
714
|
+
else
|
715
|
+
nddadd:=ddadd:
|
716
|
+
epsadd:=2**(-53):
|
717
|
+
fi:
|
718
|
+
if ddmul>0 then
|
719
|
+
nddmul:=ddmul-1:
|
720
|
+
epsmul:=2**(-102):
|
721
|
+
epsx:=ddepsx:
|
722
|
+
else
|
723
|
+
nddmul:=ddmul:
|
724
|
+
epsmul:=2**(-53):
|
725
|
+
epsx:=depsx:
|
726
|
+
fi:
|
727
|
+
[ [epsx,epsadd,epsmul] , op(errlist_quickphase_horner(n-1, nddadd, nddmul, depsx,ddepsx))]
|
728
|
+
fi:
|
729
|
+
end proc:
|
730
|
+
|
731
|
+
|
732
|
+
|
733
|
+
#---------------------------------------------------------------------
|
734
|
+
# Finding the worst cases for additive range reduction
|
735
|
+
# cut and paste from the web page of Muller's book
|
736
|
+
# Much faster if called with Digits very large and an evalf() on C
|
737
|
+
# but then check
|
738
|
+
|
739
|
+
WorstCaseForAdditiveRangeReduction:=proc(B,n,emin,emax,C)
|
740
|
+
local epsilonmin,powerofBoverC,e,a,Plast,r,Qlast, Q,P,NewQ,NewP,epsilon, numbermin,expmin,l:
|
741
|
+
epsilonmin := 12345.0 :
|
742
|
+
powerofBoverC := B^(emin-n)/C:
|
743
|
+
for e from emin-n+1 to emax-n+1 do
|
744
|
+
powerofBoverC := B*powerofBoverC:
|
745
|
+
a := floor(powerofBoverC):
|
746
|
+
Plast := a:
|
747
|
+
r := 1/(powerofBoverC-a):
|
748
|
+
a := floor(r):
|
749
|
+
Qlast := 1:
|
750
|
+
Q := a:
|
751
|
+
P := Plast*a+1:
|
752
|
+
while Q < B^n-1 do
|
753
|
+
r := 1/(r-a):
|
754
|
+
a := floor(r):
|
755
|
+
NewQ := Q*a+Qlast:
|
756
|
+
NewP := P*a+Plast:
|
757
|
+
Qlast := Q:
|
758
|
+
Plast := P:
|
759
|
+
Q := NewQ:
|
760
|
+
P := NewP
|
761
|
+
od:
|
762
|
+
epsilon := evalf(C*abs(Plast-Qlast*powerofBoverC)):
|
763
|
+
if epsilon < epsilonmin then
|
764
|
+
epsilonmin := epsilon: numbermin := Qlast:
|
765
|
+
expmin := e
|
766
|
+
fi
|
767
|
+
od:
|
768
|
+
print('mantissa',numbermin):
|
769
|
+
print('exponent',expmin):
|
770
|
+
print('epsilon',epsilonmin):
|
771
|
+
l := evalf(log(epsilonmin)/log(B),10):
|
772
|
+
print(numberofdigits,l):
|
773
|
+
(numbermin, expmin, epsilonmin)
|
774
|
+
end proc:
|
775
|
+
|
776
|
+
|
777
|
+
|
778
|
+
|
779
|
+
|
780
|
+
|
781
|
+
|
782
|
+
#####################################################################
|
783
|
+
|
784
|
+
# Stuff for SCS
|
785
|
+
#####################################################################
|
786
|
+
|
787
|
+
# Global parameters
|
788
|
+
# Don�t forget to set all the parameters here
|
789
|
+
SCS_NB_WORDS := 8:
|
790
|
+
SCS_NB_BITS := 30:
|
791
|
+
|
792
|
+
|
793
|
+
|
794
|
+
|
795
|
+
|
796
|
+
#---------------------------------------------------------------------
|
797
|
+
# This procedure convert a decimal number into it SCS representation.
|
798
|
+
# x : input number to convert into it SCS representation
|
799
|
+
real_to_SCS := proc(x)
|
800
|
+
local exception, index, sgn, mantissa, nb, i:
|
801
|
+
|
802
|
+
if x <> 0 then
|
803
|
+
exception := 1:
|
804
|
+
if x > 0 then
|
805
|
+
sgn := 1:
|
806
|
+
nb := x:
|
807
|
+
elif x < 0 then
|
808
|
+
sgn := -1:
|
809
|
+
nb := -x:
|
810
|
+
end if:
|
811
|
+
|
812
|
+
index := 0:
|
813
|
+
|
814
|
+
if nb >= 1 then
|
815
|
+
for i from 0 while nb > (2^(SCS_NB_BITS+1)-1) do
|
816
|
+
index := index+1:
|
817
|
+
nb := nb * 2^(-SCS_NB_BITS):
|
818
|
+
end do:
|
819
|
+
else
|
820
|
+
for i from 0 while nb < 1 do
|
821
|
+
index := index-1:
|
822
|
+
nb := nb * 2^(SCS_NB_BITS):
|
823
|
+
end do:
|
824
|
+
end if:
|
825
|
+
|
826
|
+
for i from 0 by 1 to (SCS_NB_WORDS-1) do
|
827
|
+
mantissa[i] := trunc(nb):
|
828
|
+
nb := (nb - mantissa[i]) * 2^(SCS_NB_BITS):
|
829
|
+
end do:
|
830
|
+
else
|
831
|
+
for i from 0 by 1 to (SCS_NB_WORDS-1) do
|
832
|
+
mantissa[i] := 0:
|
833
|
+
end do:
|
834
|
+
|
835
|
+
index := 1:
|
836
|
+
exception := x:
|
837
|
+
sgn := 1:
|
838
|
+
end if:
|
839
|
+
mantissa[SCS_NB_WORDS] := exception:
|
840
|
+
mantissa[SCS_NB_WORDS+1] := index:
|
841
|
+
mantissa[SCS_NB_WORDS+2] := sgn:
|
842
|
+
|
843
|
+
return mantissa:
|
844
|
+
end proc:
|
845
|
+
|
846
|
+
|
847
|
+
|
848
|
+
|
849
|
+
#---------------------------------------------------------------------
|
850
|
+
# Convert an SCS number into a rational number
|
851
|
+
|
852
|
+
SCS_to_real := proc(tab)
|
853
|
+
local res, i:
|
854
|
+
|
855
|
+
if (tab[SCS_NB_WORDS] <> 1) then
|
856
|
+
return tab[SCS_NB_WORDS]:
|
857
|
+
end if:
|
858
|
+
|
859
|
+
res := 0:
|
860
|
+
for i from (SCS_NB_WORDS-1) by -1 while i>=0 do
|
861
|
+
res := 2^(-SCS_NB_BITS)*res + tab[i]
|
862
|
+
end do:
|
863
|
+
|
864
|
+
res := tab[SCS_NB_WORDS+2]*(res * 2.^(SCS_NB_BITS * tab[SCS_NB_WORDS+1])):
|
865
|
+
|
866
|
+
return res:
|
867
|
+
|
868
|
+
end proc:
|
869
|
+
|
870
|
+
|
871
|
+
|
872
|
+
#---------------------------------------------------------------------
|
873
|
+
# This procedure truncates the coefficients of a polynomial to SCS
|
874
|
+
# numbers, so that we can then evaluate its approximation error
|
875
|
+
# (equivalent to poly_exact for the doubles)
|
876
|
+
poly_exact_SCS:=proc(P)
|
877
|
+
local deg,i, coef, coef_t, Q:
|
878
|
+
Q:= 0:
|
879
|
+
convert(Q, polynom):
|
880
|
+
deg:=degree(P,x):
|
881
|
+
for i from 0 to deg do
|
882
|
+
coef:=coeff(P,x,i):
|
883
|
+
coef_t:=SCS_to_real(real_to_SCS(evalf(coef))):
|
884
|
+
Q:= Q + coef_t*x^i:
|
885
|
+
od:
|
886
|
+
return(Q):
|
887
|
+
end:
|
888
|
+
|
889
|
+
|
890
|
+
|
891
|
+
#---------------------------------------------------------------------
|
892
|
+
# Write Into file fd the SCSS number stored into the table tab where
|
893
|
+
# tab[0..(SCS_NB_WORDS-1)] store the mantissa
|
894
|
+
# tab[SCS_NB_WORDS] store the exception
|
895
|
+
# tab[SCS_NB_WORDS+1] store the index
|
896
|
+
# tab[SCS_NB_WORDS+2] store the sign
|
897
|
+
# You probably want to use WriteSCS below !
|
898
|
+
|
899
|
+
WriteSCS_from_table := proc(fd, tab)
|
900
|
+
local i:
|
901
|
+
|
902
|
+
fprintf(fd,"{{"):
|
903
|
+
|
904
|
+
fprintf(fd,"0x%+0.8x, ", tab[0]):
|
905
|
+
for i from 1 by 1 to (SCS_NB_WORDS-2) do
|
906
|
+
fprintf(fd,"0x%+0.8x, ", tab[i]):
|
907
|
+
if (i mod 4 = 3) then
|
908
|
+
fprintf(fd,"\n"):
|
909
|
+
fi:
|
910
|
+
end do:
|
911
|
+
fprintf(fd,"0x%+0.8x},\n", tab[SCS_NB_WORDS-1]):
|
912
|
+
if (tab[SCS_NB_WORDS]=1) then
|
913
|
+
fprintf(fd,"DB_ONE, %3d, %3d ", tab[SCS_NB_WORDS+1], tab[SCS_NB_WORDS+2]):
|
914
|
+
else
|
915
|
+
# the only other possible value is 0 so ...
|
916
|
+
fprintf(fd,"{0x00000000, 0x00000000}, %3d, %3d ", tab[SCS_NB_WORDS+1], tab[SCS_NB_WORDS+2]):
|
917
|
+
end if:
|
918
|
+
|
919
|
+
fprintf(fd, "} \n"):
|
920
|
+
end proc:
|
921
|
+
|
922
|
+
|
923
|
+
#---------------------------------------------------------------------
|
924
|
+
# Write a real number as an SCS array to a file
|
925
|
+
|
926
|
+
WriteSCS := proc (fd,x)
|
927
|
+
WriteSCS_from_table(fd , real_to_SCS (x)):
|
928
|
+
end:
|
929
|
+
|
930
|
+
|
931
|
+
|
932
|
+
#---------------------------------------------------------------------
|
933
|
+
# A procedure to count the non-zero coefficients of a polynomial (to store it)
|
934
|
+
|
935
|
+
get_nb_terms := proc(poly)
|
936
|
+
local i, deg_poly:
|
937
|
+
|
938
|
+
deg_poly := degree(poly):
|
939
|
+
for i from deg_poly by -1 while i>=0 do
|
940
|
+
if coeff(poly, x, i)=0 then
|
941
|
+
deg_poly := deg_poly-1:
|
942
|
+
end if:
|
943
|
+
end do:
|
944
|
+
|
945
|
+
return deg_poly:
|
946
|
+
end proc:
|
947
|
+
|
948
|
+
|
949
|
+
|
950
|
+
|
951
|
+
|
952
|
+
|
953
|
+
|
954
|
+
|
955
|
+
#---------------------------------------------------------------------
|
956
|
+
# Write a polynomial to an array of SCS coefficients
|
957
|
+
|
958
|
+
# fd : file where to put the result
|
959
|
+
# poly : input polynom
|
960
|
+
# name : name of the array
|
961
|
+
|
962
|
+
Write_SCS_poly := proc(fd, name, poly)
|
963
|
+
local i, deg:
|
964
|
+
#fclose(fd):
|
965
|
+
try
|
966
|
+
finally
|
967
|
+
fprintf(fd,"static const scs %s [%d]=\n", name, get_nb_terms(poly)+1):
|
968
|
+
deg := degree(poly):
|
969
|
+
|
970
|
+
fprintf(fd,"/* ~%1.50e */ \n{", coeff(poly, x, deg)):
|
971
|
+
WriteSCS(fd, coeff(poly, x, deg)):
|
972
|
+
for i from (deg-1) by (-1) while i>=0 do
|
973
|
+
if (coeff(poly, x, i)<>0) then
|
974
|
+
fprintf(fd,",\n/* ~%1.50e */ \n", coeff(poly, x, i)):
|
975
|
+
WriteSCS(fd, coeff(poly, x, i), 0):
|
976
|
+
end if:
|
977
|
+
end do:
|
978
|
+
fprintf(fd,"};\n"):
|
979
|
+
end try:
|
980
|
+
end proc:
|
981
|
+
|
982
|
+
|
983
|
+
|
984
|
+
|
985
|
+
|
986
|
+
|
987
|
+
|
988
|
+
|
989
|
+
|
990
|
+
|
991
|
+
|
992
|
+
|
993
|
+
|
994
|
+
|
995
|
+
|
996
|
+
|
997
|
+
|