flt 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +41 -0
- data/License.txt +20 -0
- data/Manifest.txt +42 -0
- data/README.txt +557 -0
- data/Rakefile +34 -0
- data/lib/flt.rb +9 -0
- data/lib/flt/b.rb +6 -0
- data/lib/flt/bigdecimal.rb +151 -0
- data/lib/flt/bin_num.rb +250 -0
- data/lib/flt/d.rb +6 -0
- data/lib/flt/dec_num.rb +1239 -0
- data/lib/flt/float.rb +458 -0
- data/lib/flt/math.rb +66 -0
- data/lib/flt/num.rb +4211 -0
- data/lib/flt/sugar.rb +102 -0
- data/lib/flt/support.rb +1335 -0
- data/lib/flt/tolerance.rb +561 -0
- data/lib/flt/tolerance/sugar.rb +77 -0
- data/lib/flt/version.rb +9 -0
- data/setup.rb +1585 -0
- data/tasks/ann.rake +80 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +192 -0
- data/tasks/git.rake +40 -0
- data/tasks/manifest.rake +48 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +39 -0
- data/tasks/rdoc.rake +50 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/setup.rb +279 -0
- data/tasks/spec.rake +54 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +40 -0
- data/test/all_tests.rb +23 -0
- data/test/helper.rb +101 -0
- data/test/reader.rb +68 -0
- data/test/test_basic.rb +396 -0
- data/test/test_bin.rb +245 -0
- data/test/test_bin_arithmetic.rb +94 -0
- data/test/test_binfloat_conversion.rb +24 -0
- data/test/test_coercion.rb +22 -0
- data/test/test_comparisons.rb +53 -0
- data/test/test_dectest.rb +216 -0
- data/test/test_define_conversions.rb +144 -0
- data/test/test_epsilon.rb +55 -0
- data/test/test_exact.rb +147 -0
- data/test/test_flags.rb +34 -0
- data/test/test_multithreading.rb +32 -0
- data/test/test_num_constructor.rb +133 -0
- data/test/test_odd_even.rb +78 -0
- data/test/test_round.rb +104 -0
- data/test/test_to_int.rb +104 -0
- data/test/test_to_rf.rb +36 -0
- data/test/test_tol.rb +102 -0
- data/test/test_ulp.rb +127 -0
- metadata +147 -0
data/lib/flt/float.rb
ADDED
@@ -0,0 +1,458 @@
|
|
1
|
+
# Support classes for homogeneous treatment of Float and Num values by defining Float.context
|
2
|
+
#
|
3
|
+
# The set of constants with Float metadata is also augmented.
|
4
|
+
|
5
|
+
require 'flt'
|
6
|
+
|
7
|
+
# Float constants.
|
8
|
+
#
|
9
|
+
# Note that this uses the "fractional significand" interpretation,
|
10
|
+
# i.e. the significand has the radix point before its first digit.
|
11
|
+
#
|
12
|
+
# Float::RADIX : b = Radix of exponent representation,2
|
13
|
+
#
|
14
|
+
# Float::MANT_DIG : p = bits (base-RADIX digits) in the significand
|
15
|
+
#
|
16
|
+
# Float::DIG : q = Number of decimal digits such that any floating-point number with q
|
17
|
+
# decimal digits can be rounded into a floating-point number with p radix b
|
18
|
+
# digits and back again without change to the q decimal digits,
|
19
|
+
# q = p * log10(b) if b is a power of 10
|
20
|
+
# q = floor((p - 1) * log10(b)) otherwise
|
21
|
+
# ((Float::MANT_DIG-1)*Math.log(FLoat::RADIX)/Math.log(10)).floor
|
22
|
+
#
|
23
|
+
# Float::MIN_EXP : emin = Minimum int x such that Float::RADIX**(x-1) is a normalized float
|
24
|
+
#
|
25
|
+
# Float::MIN_10_EXP : Minimum negative integer such that 10 raised to that power is in the
|
26
|
+
# range of normalized floating-point numbers,
|
27
|
+
# ceil(log10(b) * (emin - 1))
|
28
|
+
#
|
29
|
+
# Float::MAX_EXP : emax = Maximum int x such that Float::RADIX**(x-1) is a representable float
|
30
|
+
#
|
31
|
+
# Float::MAX_10_EXP : Maximum integer such that 10 raised to that power is in the range of
|
32
|
+
# representable finite floating-point numbers,
|
33
|
+
# floor(log10((1 - b**-p) * b**emax))
|
34
|
+
#
|
35
|
+
# Float::MAX : Maximum representable finite floating-point number
|
36
|
+
# (1 - b**-p) * b**emax
|
37
|
+
#
|
38
|
+
# Float::EPSILON : The difference between 1 and the least value greater than 1 that is
|
39
|
+
# representable in the given floating point type
|
40
|
+
# b**(1-p)
|
41
|
+
# Math.ldexp(*Math.frexp(1).collect{|e| e.kind_of?(Integer) ? e-(Float::MANT_DIG-1) : e})
|
42
|
+
#
|
43
|
+
# Float::MIN : Minimum normalized positive floating-point number
|
44
|
+
# b**(emin - 1).
|
45
|
+
#
|
46
|
+
# Float::ROUNDS : Addition rounds to 0: zero, 1: nearest, 2: +inf, 3: -inf, -1: unknown.
|
47
|
+
#
|
48
|
+
# Additional contants defined here:
|
49
|
+
#
|
50
|
+
# Float::DECIMAL_DIG : Number of decimal digits, n, such that any floating-point number can be rounded
|
51
|
+
# to a floating-point number with n decimal digits and back again without
|
52
|
+
# change to the value,
|
53
|
+
# pmax * log10(b) if b is a power of 10
|
54
|
+
# ceil(1 + pmax * log10(b)) otherwise
|
55
|
+
# DECIMAL_DIG = (MANT_DIG*Math.log(RADIX)/Math.log(10)).ceil+1
|
56
|
+
#
|
57
|
+
# Float::MIN_N : Minimum normalized number == MAX_D.next == MIN
|
58
|
+
#
|
59
|
+
# Float::MAX_D : Maximum denormal number == MIN_N.prev
|
60
|
+
#
|
61
|
+
# Float::MIN_D : Minimum non zero positive denormal number == 0.0.next
|
62
|
+
#
|
63
|
+
# Float::MAX_F : Maximum significand
|
64
|
+
class Float
|
65
|
+
|
66
|
+
DECIMAL_DIG = (MANT_DIG*Math.log(RADIX)/Math.log(10)).ceil+1
|
67
|
+
|
68
|
+
# Minimum normalized number == MAX_D.next
|
69
|
+
MIN_N = Math.ldexp(0.5,Float::MIN_EXP) # == nxt(MAX_D) == Float::MIN
|
70
|
+
|
71
|
+
# Maximum denormal number == MIN_N.prev
|
72
|
+
MAX_D = Math.ldexp(Math.ldexp(1,Float::MANT_DIG-1)-1,Float::MIN_EXP-Float::MANT_DIG)
|
73
|
+
|
74
|
+
# Minimum non zero positive denormal number == 0.0.next
|
75
|
+
MIN_D = Math.ldexp(1,Float::MIN_EXP-Float::MANT_DIG);
|
76
|
+
|
77
|
+
# Maximum significand == Math.ldexp(Math.ldexp(1,Float::MANT_DIG)-1,-Float::MANT_DIG)
|
78
|
+
MAX_F = Math.frexp(Float::MAX)[0] == Math.ldexp(Math.ldexp(1,Float::MANT_DIG)-1,-Float::MANT_DIG)
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
require 'singleton'
|
83
|
+
|
84
|
+
# Context class with some of the Flt::Num context functionality, to allow the use of Float numbers
|
85
|
+
# similarly to other Num values; this eases the implementation of functions compatible with either
|
86
|
+
# Num or Float values.
|
87
|
+
class Flt::FloatContext
|
88
|
+
|
89
|
+
include Singleton
|
90
|
+
|
91
|
+
def num_class
|
92
|
+
Float
|
93
|
+
end
|
94
|
+
|
95
|
+
def Num(*args)
|
96
|
+
args = *args if args.size==1 && args.first.is_a?(Array)
|
97
|
+
if args.size==3
|
98
|
+
Math.ldexp(args[0]*args[1],args[2])
|
99
|
+
elsif args.size==2
|
100
|
+
Math.ldexp(args[0],args[1])
|
101
|
+
elsif args.size==1
|
102
|
+
Float(*args)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def radix
|
107
|
+
Float::RADIX
|
108
|
+
end
|
109
|
+
|
110
|
+
# NaN (not a number value)
|
111
|
+
def nan
|
112
|
+
0.0/0.0
|
113
|
+
end
|
114
|
+
|
115
|
+
# zero value with specified sign
|
116
|
+
def zero(sign=+1)
|
117
|
+
(sign < 0) ? -0.0 : 0.0
|
118
|
+
end
|
119
|
+
|
120
|
+
# infinity value with specified sign
|
121
|
+
def infinity(sign=+1)
|
122
|
+
(sign < 0) ? -1.0/0.0 : 1.0/0.0
|
123
|
+
end
|
124
|
+
|
125
|
+
def int_radix_power(n)
|
126
|
+
1 << n
|
127
|
+
end
|
128
|
+
|
129
|
+
# This is the difference between 1.0 and the smallest floating-point
|
130
|
+
# value greater than 1.0, radix_power(1-significand_precision)
|
131
|
+
#
|
132
|
+
# We have:
|
133
|
+
# Float.epsilon == (1.0.next-1.0)
|
134
|
+
def epsilon(sign=+1)
|
135
|
+
(sign < 0) ? -Float::EPSILON : Float::EPSILON
|
136
|
+
end
|
137
|
+
|
138
|
+
# The strict epsilon is the smallest value that produces something different from 1.0
|
139
|
+
# wehen added to 1.0. It may be smaller than the general epsilon, because
|
140
|
+
# of the particular rounding rules used with the floating point format.
|
141
|
+
# This is only meaningful when well-defined rules are used for rounding the result
|
142
|
+
# of floating-point addition.
|
143
|
+
#
|
144
|
+
# We have:
|
145
|
+
# (Float.strict_epsilon+1.0) == 1.0.next
|
146
|
+
# (Float.strict_epsilon.prev+1.0) == 1.0
|
147
|
+
def strict_epsilon(sign=+1, round=nil)
|
148
|
+
# We don't rely on Float::ROUNDS
|
149
|
+
eps = minimum_nonzero
|
150
|
+
unless (1.0+eps) > 1.0
|
151
|
+
f,e = Math.frexp(1)
|
152
|
+
eps = Math.ldexp(f.next,e-Float::MANT_DIG)
|
153
|
+
if (1.0+eps) > 1.0
|
154
|
+
eps
|
155
|
+
else
|
156
|
+
eps = Math.ldexp(f,e-Float::MANT_DIG)
|
157
|
+
unless (1.0+eps) > 1.0
|
158
|
+
else
|
159
|
+
eps = Math.ldexp(f,e-Float::MANT_DIG+1)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
eps
|
164
|
+
end
|
165
|
+
|
166
|
+
# This is the maximum relative error corresponding to 1/2 ulp:
|
167
|
+
# (radix/2)*radix_power(-significand_precision) == epsilon/2
|
168
|
+
# This is called "machine epsilon" in [Goldberg]
|
169
|
+
# We have:
|
170
|
+
#
|
171
|
+
# Float.half_epsilon == 0.5*Float.epsilon
|
172
|
+
def half_epsilon(sign=+1)
|
173
|
+
# 0.5*epsilon(sign)
|
174
|
+
f,e = Math.frexp(1)
|
175
|
+
Math.ldexp(f, e-Float::MANT_DIG)
|
176
|
+
end
|
177
|
+
|
178
|
+
# minimum normal Float value (with specified sign)
|
179
|
+
def minimum_normal(sign=+1)
|
180
|
+
(sign < 0) ? -Float::MIN_N : Float::MIN_N
|
181
|
+
end
|
182
|
+
|
183
|
+
# maximum subnormal (denormalized) Float value (with specified sign)
|
184
|
+
def maximum_subnormal(sign=+1)
|
185
|
+
(sign < 0) ? -Float::MAX_D : Float::MAX_D
|
186
|
+
end
|
187
|
+
|
188
|
+
# minimum (subnormal) nonzero Float value, with specified sign
|
189
|
+
def minimum_nonzero(sign=+1)
|
190
|
+
(sign < 0) ? -Float::MIN_D : Float::MIN_D
|
191
|
+
end
|
192
|
+
|
193
|
+
# maximum finite Float value, with specified sign
|
194
|
+
def maximum_finite(sign=+1)
|
195
|
+
(sign < 0) ? -Float::MAX : Float::MAX
|
196
|
+
end
|
197
|
+
|
198
|
+
def precision
|
199
|
+
Float::MANT_DIG
|
200
|
+
end
|
201
|
+
|
202
|
+
def maximum_coefficient
|
203
|
+
int_radix_power(precision)-1
|
204
|
+
end
|
205
|
+
|
206
|
+
def minimum_normalized_coefficient
|
207
|
+
num_class.int_radix_power(precision-1)
|
208
|
+
end
|
209
|
+
|
210
|
+
def exact?
|
211
|
+
false
|
212
|
+
end
|
213
|
+
|
214
|
+
# detect actual rounding mode
|
215
|
+
def rounding
|
216
|
+
Flt::Support::AuxiliarFunctions.detect_float_rounding
|
217
|
+
end
|
218
|
+
|
219
|
+
def emin
|
220
|
+
Float::MIN_EXP-1
|
221
|
+
end
|
222
|
+
|
223
|
+
def emax
|
224
|
+
Float::MAX_EXP-1
|
225
|
+
end
|
226
|
+
|
227
|
+
def etiny
|
228
|
+
Float::MIN_EXP - Float::MANT_DIG
|
229
|
+
end
|
230
|
+
|
231
|
+
def etop
|
232
|
+
Float::MAX_EXP - Float::MANT_DIG
|
233
|
+
end
|
234
|
+
|
235
|
+
def next_plus(x)
|
236
|
+
Flt::FloatContext.neighbours(x).last
|
237
|
+
end
|
238
|
+
|
239
|
+
def next_minus(x)
|
240
|
+
Flt::FloatContext.neighbours(x).first
|
241
|
+
end
|
242
|
+
|
243
|
+
def next_toward(x, y)
|
244
|
+
x, y = x.to_f, y.to_f
|
245
|
+
comparison = x <=> y
|
246
|
+
return x.copy_sign(y) if comparison == 0
|
247
|
+
if comparison == -1
|
248
|
+
result = x.next_plus(context)
|
249
|
+
else # comparison == 1
|
250
|
+
result = x.next_minus(context)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
# Sign: -1 for minus, +1 for plus, nil for nan (note that Float zero is signed)
|
255
|
+
def sign(x)
|
256
|
+
x = x.to_f
|
257
|
+
if x.nan?
|
258
|
+
nil
|
259
|
+
elsif x.zero?
|
260
|
+
# Note that (x.to_s[0,1] == "-" ? -1 : +1) fails under mswin32
|
261
|
+
# because in that platform (-0.0).to_s == '0.0'
|
262
|
+
(1/x < 0) ? -1 : +1
|
263
|
+
else
|
264
|
+
x < 0 ? -1 : +1
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
# Return copy of x with the sign of y
|
269
|
+
def copy_sign(x, y)
|
270
|
+
self_sign = sign(x)
|
271
|
+
other_sign = y.is_a?(Integer) ? (y < 0 ? -1 : +1) : sign(y)
|
272
|
+
if self_sign && other_sign
|
273
|
+
if self_sign == other_sign
|
274
|
+
x.to_f
|
275
|
+
else
|
276
|
+
-x.to_f
|
277
|
+
end
|
278
|
+
else
|
279
|
+
nan
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
# Returns the internal representation of the number, composed of:
|
284
|
+
# * a sign which is +1 for plus and -1 for minus
|
285
|
+
# * a coefficient (significand) which is a nonnegative integer
|
286
|
+
# * an exponent (an integer) or :inf, :nan or :snan for special values
|
287
|
+
# The value of non-special numbers is sign*coefficient*10^exponent
|
288
|
+
def split(x)
|
289
|
+
x = x.to_f
|
290
|
+
sign = sign(x)
|
291
|
+
if x.nan?
|
292
|
+
exp = :nan
|
293
|
+
elsif x.infinite?
|
294
|
+
exp = :inf
|
295
|
+
else
|
296
|
+
coeff,exp = Math.frexp(x)
|
297
|
+
coeff = coeff.abs
|
298
|
+
if exp < Float::MIN_EXP
|
299
|
+
# denormalized number
|
300
|
+
coeff = Math.ldexp(coeff, exp-Float::MIN_EXP+Float::MANT_DIG).to_i
|
301
|
+
exp = Float::MIN_EXP-Float::MANT_DIG
|
302
|
+
else
|
303
|
+
# normalized number
|
304
|
+
coeff = Math.ldexp(coeff, Float::MANT_DIG).to_i
|
305
|
+
exp -= Float::MANT_DIG
|
306
|
+
end
|
307
|
+
end
|
308
|
+
[sign, coeff, exp]
|
309
|
+
end
|
310
|
+
|
311
|
+
# Return the value of the number as an signed integer and a scale.
|
312
|
+
def to_int_scale(x)
|
313
|
+
x = x.to_f
|
314
|
+
if special?(x)
|
315
|
+
nil
|
316
|
+
else
|
317
|
+
coeff,exp = Math.frexp(x)
|
318
|
+
coeff = coeff
|
319
|
+
if exp < Float::MIN_EXP
|
320
|
+
# denormalized number
|
321
|
+
coeff = Math.ldexp(coeff, exp-Float::MIN_EXP+Float::MANT_DIG).to_i
|
322
|
+
exp = Float::MIN_EXP-Float::MANT_DIG
|
323
|
+
else
|
324
|
+
# normalized number
|
325
|
+
coeff = Math.ldexp(coeff, Float::MANT_DIG).to_i
|
326
|
+
exp -= Float::MANT_DIG
|
327
|
+
end
|
328
|
+
[coeff, exp]
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
# ulp (unit in the last place) according to the definition proposed by J.M. Muller in
|
333
|
+
# "On the definition of ulp(x)" INRIA No. 5504
|
334
|
+
def ulp(x, mode=:low)
|
335
|
+
x = x.to_f
|
336
|
+
return x if x.nan?
|
337
|
+
x = x.abs
|
338
|
+
if x < Math.ldexp(1,Float::MIN_EXP) # x < Float::RADIX*Float::MIN_N
|
339
|
+
x = Math.ldexp(1,Float::MIN_EXP-Float::MANT_DIG) # res = Float::MIN_D
|
340
|
+
elsif x > Float::MAX # x > Math.ldexp(1-Math.ldexp(1,-Float::MANT_DIG),Float::MAX_EXP)
|
341
|
+
x = Math.ldexp(1,Float::MAX_EXP-Float::MANT_DIG) # res = Float::MAX - Float::MAX.prev
|
342
|
+
else
|
343
|
+
f,e = Math.frexp(x.to_f)
|
344
|
+
e -= 1 if f==Math.ldexp(1,-1) if mode==:low # assign the smaller ulp to radix powers
|
345
|
+
x = Math.ldexp(1,e-Float::MANT_DIG)
|
346
|
+
end
|
347
|
+
x
|
348
|
+
end
|
349
|
+
|
350
|
+
def special?(x)
|
351
|
+
x.nan? || x.infinite?
|
352
|
+
end
|
353
|
+
|
354
|
+
def normal?(x)
|
355
|
+
if x.special? || x.zero?
|
356
|
+
false
|
357
|
+
else
|
358
|
+
x.abs >= Float::MIN_N
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def subnormal?
|
363
|
+
if x.special? || x.zero?
|
364
|
+
false
|
365
|
+
else
|
366
|
+
x.abs < Float::MIN_N
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
def plus(x)
|
371
|
+
x.to_f
|
372
|
+
end
|
373
|
+
|
374
|
+
def minus(x)
|
375
|
+
-x.to_f
|
376
|
+
end
|
377
|
+
|
378
|
+
class << self
|
379
|
+
# Compute the adjacent floating point values: largest value not larger than
|
380
|
+
# this and smallest not smaller.
|
381
|
+
def neighbours(x)
|
382
|
+
f,e = Math.frexp(x.to_f)
|
383
|
+
e = Float::MIN_EXP if f==0
|
384
|
+
e = [Float::MIN_EXP,e].max
|
385
|
+
dx = Math.ldexp(1,e-Float::MANT_DIG) #Math.ldexp(Math.ldexp(1.0,-Float::MANT_DIG),e)
|
386
|
+
|
387
|
+
min_f = 0.5 #0.5==Math.ldexp(2**(bits-1),-Float::MANT_DIG)
|
388
|
+
max_f = 1.0 - Math.ldexp(1,-Float::MANT_DIG)
|
389
|
+
|
390
|
+
if f==max_f
|
391
|
+
high = x + dx*2
|
392
|
+
elsif f==-min_f && e!=Float::MIN_EXP
|
393
|
+
high = x + dx/2
|
394
|
+
else
|
395
|
+
high = x + dx
|
396
|
+
end
|
397
|
+
if e==Float::MIN_EXP || f!=min_f
|
398
|
+
low = x - dx
|
399
|
+
elsif f==-max_f
|
400
|
+
high = x - dx*2
|
401
|
+
else
|
402
|
+
low = x - dx/2
|
403
|
+
end
|
404
|
+
[low, high]
|
405
|
+
end
|
406
|
+
|
407
|
+
def float_method(*methods) #:nodoc:
|
408
|
+
methods.each do |method|
|
409
|
+
if method.is_a?(Array)
|
410
|
+
float_method, context_method = method
|
411
|
+
else
|
412
|
+
float_method = context_method = method
|
413
|
+
end
|
414
|
+
define_method(context_method) do |x|
|
415
|
+
x.to_f.send float_method
|
416
|
+
end
|
417
|
+
end
|
418
|
+
end
|
419
|
+
|
420
|
+
def float_binary_operator(method, op) #:nodoc:
|
421
|
+
define_method(method) do |x,y|
|
422
|
+
x.to_f.send(op,y)
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def math_function(*methods) #:nodoc:
|
427
|
+
methods.each do |method|
|
428
|
+
define_method(method) do |x|
|
429
|
+
Math.send(method, x.to_f)
|
430
|
+
end
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
end
|
435
|
+
|
436
|
+
float_method :nan?, :infinite?, :zero?, :abs
|
437
|
+
float_binary_operator :add, :+
|
438
|
+
float_binary_operator :subtract, :-
|
439
|
+
float_binary_operator :multiply, :*
|
440
|
+
float_binary_operator :divide, :/
|
441
|
+
float_binary_operator :power, :**
|
442
|
+
|
443
|
+
math_function :log, :log10, :exp, :sqrt,
|
444
|
+
:sin, :cos, :tan, :asin, :acos, :atan,
|
445
|
+
:sinh, :cosh, :tanh, :asinh, :acosh, :atanh
|
446
|
+
end
|
447
|
+
|
448
|
+
# Return a (limited) context object for Float.
|
449
|
+
# This eases the implementation of functions compatible with either Num or Float values.
|
450
|
+
def Float.context
|
451
|
+
Flt::FloatContext.instance
|
452
|
+
end
|
453
|
+
|
454
|
+
# Is Float('...') correctly rounded, even for subnormal numbers?
|
455
|
+
def Flt.float_correctly_rounded?
|
456
|
+
# That doesn't seem to be the case for mswin32
|
457
|
+
@float_correctly_rounded ||= RUBY_PLATFORM.match(/mswin32/).nil?
|
458
|
+
end
|