flt 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/History.txt +41 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +42 -0
  4. data/README.txt +557 -0
  5. data/Rakefile +34 -0
  6. data/lib/flt.rb +9 -0
  7. data/lib/flt/b.rb +6 -0
  8. data/lib/flt/bigdecimal.rb +151 -0
  9. data/lib/flt/bin_num.rb +250 -0
  10. data/lib/flt/d.rb +6 -0
  11. data/lib/flt/dec_num.rb +1239 -0
  12. data/lib/flt/float.rb +458 -0
  13. data/lib/flt/math.rb +66 -0
  14. data/lib/flt/num.rb +4211 -0
  15. data/lib/flt/sugar.rb +102 -0
  16. data/lib/flt/support.rb +1335 -0
  17. data/lib/flt/tolerance.rb +561 -0
  18. data/lib/flt/tolerance/sugar.rb +77 -0
  19. data/lib/flt/version.rb +9 -0
  20. data/setup.rb +1585 -0
  21. data/tasks/ann.rake +80 -0
  22. data/tasks/bones.rake +20 -0
  23. data/tasks/gem.rake +192 -0
  24. data/tasks/git.rake +40 -0
  25. data/tasks/manifest.rake +48 -0
  26. data/tasks/notes.rake +27 -0
  27. data/tasks/post_load.rake +39 -0
  28. data/tasks/rdoc.rake +50 -0
  29. data/tasks/rubyforge.rake +55 -0
  30. data/tasks/setup.rb +279 -0
  31. data/tasks/spec.rake +54 -0
  32. data/tasks/svn.rake +47 -0
  33. data/tasks/test.rake +40 -0
  34. data/test/all_tests.rb +23 -0
  35. data/test/helper.rb +101 -0
  36. data/test/reader.rb +68 -0
  37. data/test/test_basic.rb +396 -0
  38. data/test/test_bin.rb +245 -0
  39. data/test/test_bin_arithmetic.rb +94 -0
  40. data/test/test_binfloat_conversion.rb +24 -0
  41. data/test/test_coercion.rb +22 -0
  42. data/test/test_comparisons.rb +53 -0
  43. data/test/test_dectest.rb +216 -0
  44. data/test/test_define_conversions.rb +144 -0
  45. data/test/test_epsilon.rb +55 -0
  46. data/test/test_exact.rb +147 -0
  47. data/test/test_flags.rb +34 -0
  48. data/test/test_multithreading.rb +32 -0
  49. data/test/test_num_constructor.rb +133 -0
  50. data/test/test_odd_even.rb +78 -0
  51. data/test/test_round.rb +104 -0
  52. data/test/test_to_int.rb +104 -0
  53. data/test/test_to_rf.rb +36 -0
  54. data/test/test_tol.rb +102 -0
  55. data/test/test_ulp.rb +127 -0
  56. metadata +147 -0
data/lib/flt/float.rb ADDED
@@ -0,0 +1,458 @@
1
+ # Support classes for homogeneous treatment of Float and Num values by defining Float.context
2
+ #
3
+ # The set of constants with Float metadata is also augmented.
4
+
5
+ require 'flt'
6
+
7
+ # Float constants.
8
+ #
9
+ # Note that this uses the "fractional significand" interpretation,
10
+ # i.e. the significand has the radix point before its first digit.
11
+ #
12
+ # Float::RADIX : b = Radix of exponent representation,2
13
+ #
14
+ # Float::MANT_DIG : p = bits (base-RADIX digits) in the significand
15
+ #
16
+ # Float::DIG : q = Number of decimal digits such that any floating-point number with q
17
+ # decimal digits can be rounded into a floating-point number with p radix b
18
+ # digits and back again without change to the q decimal digits,
19
+ # q = p * log10(b) if b is a power of 10
20
+ # q = floor((p - 1) * log10(b)) otherwise
21
+ # ((Float::MANT_DIG-1)*Math.log(FLoat::RADIX)/Math.log(10)).floor
22
+ #
23
+ # Float::MIN_EXP : emin = Minimum int x such that Float::RADIX**(x-1) is a normalized float
24
+ #
25
+ # Float::MIN_10_EXP : Minimum negative integer such that 10 raised to that power is in the
26
+ # range of normalized floating-point numbers,
27
+ # ceil(log10(b) * (emin - 1))
28
+ #
29
+ # Float::MAX_EXP : emax = Maximum int x such that Float::RADIX**(x-1) is a representable float
30
+ #
31
+ # Float::MAX_10_EXP : Maximum integer such that 10 raised to that power is in the range of
32
+ # representable finite floating-point numbers,
33
+ # floor(log10((1 - b**-p) * b**emax))
34
+ #
35
+ # Float::MAX : Maximum representable finite floating-point number
36
+ # (1 - b**-p) * b**emax
37
+ #
38
+ # Float::EPSILON : The difference between 1 and the least value greater than 1 that is
39
+ # representable in the given floating point type
40
+ # b**(1-p)
41
+ # Math.ldexp(*Math.frexp(1).collect{|e| e.kind_of?(Integer) ? e-(Float::MANT_DIG-1) : e})
42
+ #
43
+ # Float::MIN : Minimum normalized positive floating-point number
44
+ # b**(emin - 1).
45
+ #
46
+ # Float::ROUNDS : Addition rounds to 0: zero, 1: nearest, 2: +inf, 3: -inf, -1: unknown.
47
+ #
48
+ # Additional contants defined here:
49
+ #
50
+ # Float::DECIMAL_DIG : Number of decimal digits, n, such that any floating-point number can be rounded
51
+ # to a floating-point number with n decimal digits and back again without
52
+ # change to the value,
53
+ # pmax * log10(b) if b is a power of 10
54
+ # ceil(1 + pmax * log10(b)) otherwise
55
+ # DECIMAL_DIG = (MANT_DIG*Math.log(RADIX)/Math.log(10)).ceil+1
56
+ #
57
+ # Float::MIN_N : Minimum normalized number == MAX_D.next == MIN
58
+ #
59
+ # Float::MAX_D : Maximum denormal number == MIN_N.prev
60
+ #
61
+ # Float::MIN_D : Minimum non zero positive denormal number == 0.0.next
62
+ #
63
+ # Float::MAX_F : Maximum significand
64
+ class Float
65
+
66
+ DECIMAL_DIG = (MANT_DIG*Math.log(RADIX)/Math.log(10)).ceil+1
67
+
68
+ # Minimum normalized number == MAX_D.next
69
+ MIN_N = Math.ldexp(0.5,Float::MIN_EXP) # == nxt(MAX_D) == Float::MIN
70
+
71
+ # Maximum denormal number == MIN_N.prev
72
+ MAX_D = Math.ldexp(Math.ldexp(1,Float::MANT_DIG-1)-1,Float::MIN_EXP-Float::MANT_DIG)
73
+
74
+ # Minimum non zero positive denormal number == 0.0.next
75
+ MIN_D = Math.ldexp(1,Float::MIN_EXP-Float::MANT_DIG);
76
+
77
+ # Maximum significand == Math.ldexp(Math.ldexp(1,Float::MANT_DIG)-1,-Float::MANT_DIG)
78
+ MAX_F = Math.frexp(Float::MAX)[0] == Math.ldexp(Math.ldexp(1,Float::MANT_DIG)-1,-Float::MANT_DIG)
79
+
80
+ end
81
+
82
+ require 'singleton'
83
+
84
+ # Context class with some of the Flt::Num context functionality, to allow the use of Float numbers
85
+ # similarly to other Num values; this eases the implementation of functions compatible with either
86
+ # Num or Float values.
87
+ class Flt::FloatContext
88
+
89
+ include Singleton
90
+
91
+ def num_class
92
+ Float
93
+ end
94
+
95
+ def Num(*args)
96
+ args = *args if args.size==1 && args.first.is_a?(Array)
97
+ if args.size==3
98
+ Math.ldexp(args[0]*args[1],args[2])
99
+ elsif args.size==2
100
+ Math.ldexp(args[0],args[1])
101
+ elsif args.size==1
102
+ Float(*args)
103
+ end
104
+ end
105
+
106
+ def radix
107
+ Float::RADIX
108
+ end
109
+
110
+ # NaN (not a number value)
111
+ def nan
112
+ 0.0/0.0
113
+ end
114
+
115
+ # zero value with specified sign
116
+ def zero(sign=+1)
117
+ (sign < 0) ? -0.0 : 0.0
118
+ end
119
+
120
+ # infinity value with specified sign
121
+ def infinity(sign=+1)
122
+ (sign < 0) ? -1.0/0.0 : 1.0/0.0
123
+ end
124
+
125
+ def int_radix_power(n)
126
+ 1 << n
127
+ end
128
+
129
+ # This is the difference between 1.0 and the smallest floating-point
130
+ # value greater than 1.0, radix_power(1-significand_precision)
131
+ #
132
+ # We have:
133
+ # Float.epsilon == (1.0.next-1.0)
134
+ def epsilon(sign=+1)
135
+ (sign < 0) ? -Float::EPSILON : Float::EPSILON
136
+ end
137
+
138
+ # The strict epsilon is the smallest value that produces something different from 1.0
139
+ # wehen added to 1.0. It may be smaller than the general epsilon, because
140
+ # of the particular rounding rules used with the floating point format.
141
+ # This is only meaningful when well-defined rules are used for rounding the result
142
+ # of floating-point addition.
143
+ #
144
+ # We have:
145
+ # (Float.strict_epsilon+1.0) == 1.0.next
146
+ # (Float.strict_epsilon.prev+1.0) == 1.0
147
+ def strict_epsilon(sign=+1, round=nil)
148
+ # We don't rely on Float::ROUNDS
149
+ eps = minimum_nonzero
150
+ unless (1.0+eps) > 1.0
151
+ f,e = Math.frexp(1)
152
+ eps = Math.ldexp(f.next,e-Float::MANT_DIG)
153
+ if (1.0+eps) > 1.0
154
+ eps
155
+ else
156
+ eps = Math.ldexp(f,e-Float::MANT_DIG)
157
+ unless (1.0+eps) > 1.0
158
+ else
159
+ eps = Math.ldexp(f,e-Float::MANT_DIG+1)
160
+ end
161
+ end
162
+ end
163
+ eps
164
+ end
165
+
166
+ # This is the maximum relative error corresponding to 1/2 ulp:
167
+ # (radix/2)*radix_power(-significand_precision) == epsilon/2
168
+ # This is called "machine epsilon" in [Goldberg]
169
+ # We have:
170
+ #
171
+ # Float.half_epsilon == 0.5*Float.epsilon
172
+ def half_epsilon(sign=+1)
173
+ # 0.5*epsilon(sign)
174
+ f,e = Math.frexp(1)
175
+ Math.ldexp(f, e-Float::MANT_DIG)
176
+ end
177
+
178
+ # minimum normal Float value (with specified sign)
179
+ def minimum_normal(sign=+1)
180
+ (sign < 0) ? -Float::MIN_N : Float::MIN_N
181
+ end
182
+
183
+ # maximum subnormal (denormalized) Float value (with specified sign)
184
+ def maximum_subnormal(sign=+1)
185
+ (sign < 0) ? -Float::MAX_D : Float::MAX_D
186
+ end
187
+
188
+ # minimum (subnormal) nonzero Float value, with specified sign
189
+ def minimum_nonzero(sign=+1)
190
+ (sign < 0) ? -Float::MIN_D : Float::MIN_D
191
+ end
192
+
193
+ # maximum finite Float value, with specified sign
194
+ def maximum_finite(sign=+1)
195
+ (sign < 0) ? -Float::MAX : Float::MAX
196
+ end
197
+
198
+ def precision
199
+ Float::MANT_DIG
200
+ end
201
+
202
+ def maximum_coefficient
203
+ int_radix_power(precision)-1
204
+ end
205
+
206
+ def minimum_normalized_coefficient
207
+ num_class.int_radix_power(precision-1)
208
+ end
209
+
210
+ def exact?
211
+ false
212
+ end
213
+
214
+ # detect actual rounding mode
215
+ def rounding
216
+ Flt::Support::AuxiliarFunctions.detect_float_rounding
217
+ end
218
+
219
+ def emin
220
+ Float::MIN_EXP-1
221
+ end
222
+
223
+ def emax
224
+ Float::MAX_EXP-1
225
+ end
226
+
227
+ def etiny
228
+ Float::MIN_EXP - Float::MANT_DIG
229
+ end
230
+
231
+ def etop
232
+ Float::MAX_EXP - Float::MANT_DIG
233
+ end
234
+
235
+ def next_plus(x)
236
+ Flt::FloatContext.neighbours(x).last
237
+ end
238
+
239
+ def next_minus(x)
240
+ Flt::FloatContext.neighbours(x).first
241
+ end
242
+
243
+ def next_toward(x, y)
244
+ x, y = x.to_f, y.to_f
245
+ comparison = x <=> y
246
+ return x.copy_sign(y) if comparison == 0
247
+ if comparison == -1
248
+ result = x.next_plus(context)
249
+ else # comparison == 1
250
+ result = x.next_minus(context)
251
+ end
252
+ end
253
+
254
+ # Sign: -1 for minus, +1 for plus, nil for nan (note that Float zero is signed)
255
+ def sign(x)
256
+ x = x.to_f
257
+ if x.nan?
258
+ nil
259
+ elsif x.zero?
260
+ # Note that (x.to_s[0,1] == "-" ? -1 : +1) fails under mswin32
261
+ # because in that platform (-0.0).to_s == '0.0'
262
+ (1/x < 0) ? -1 : +1
263
+ else
264
+ x < 0 ? -1 : +1
265
+ end
266
+ end
267
+
268
+ # Return copy of x with the sign of y
269
+ def copy_sign(x, y)
270
+ self_sign = sign(x)
271
+ other_sign = y.is_a?(Integer) ? (y < 0 ? -1 : +1) : sign(y)
272
+ if self_sign && other_sign
273
+ if self_sign == other_sign
274
+ x.to_f
275
+ else
276
+ -x.to_f
277
+ end
278
+ else
279
+ nan
280
+ end
281
+ end
282
+
283
+ # Returns the internal representation of the number, composed of:
284
+ # * a sign which is +1 for plus and -1 for minus
285
+ # * a coefficient (significand) which is a nonnegative integer
286
+ # * an exponent (an integer) or :inf, :nan or :snan for special values
287
+ # The value of non-special numbers is sign*coefficient*10^exponent
288
+ def split(x)
289
+ x = x.to_f
290
+ sign = sign(x)
291
+ if x.nan?
292
+ exp = :nan
293
+ elsif x.infinite?
294
+ exp = :inf
295
+ else
296
+ coeff,exp = Math.frexp(x)
297
+ coeff = coeff.abs
298
+ if exp < Float::MIN_EXP
299
+ # denormalized number
300
+ coeff = Math.ldexp(coeff, exp-Float::MIN_EXP+Float::MANT_DIG).to_i
301
+ exp = Float::MIN_EXP-Float::MANT_DIG
302
+ else
303
+ # normalized number
304
+ coeff = Math.ldexp(coeff, Float::MANT_DIG).to_i
305
+ exp -= Float::MANT_DIG
306
+ end
307
+ end
308
+ [sign, coeff, exp]
309
+ end
310
+
311
+ # Return the value of the number as an signed integer and a scale.
312
+ def to_int_scale(x)
313
+ x = x.to_f
314
+ if special?(x)
315
+ nil
316
+ else
317
+ coeff,exp = Math.frexp(x)
318
+ coeff = coeff
319
+ if exp < Float::MIN_EXP
320
+ # denormalized number
321
+ coeff = Math.ldexp(coeff, exp-Float::MIN_EXP+Float::MANT_DIG).to_i
322
+ exp = Float::MIN_EXP-Float::MANT_DIG
323
+ else
324
+ # normalized number
325
+ coeff = Math.ldexp(coeff, Float::MANT_DIG).to_i
326
+ exp -= Float::MANT_DIG
327
+ end
328
+ [coeff, exp]
329
+ end
330
+ end
331
+
332
+ # ulp (unit in the last place) according to the definition proposed by J.M. Muller in
333
+ # "On the definition of ulp(x)" INRIA No. 5504
334
+ def ulp(x, mode=:low)
335
+ x = x.to_f
336
+ return x if x.nan?
337
+ x = x.abs
338
+ if x < Math.ldexp(1,Float::MIN_EXP) # x < Float::RADIX*Float::MIN_N
339
+ x = Math.ldexp(1,Float::MIN_EXP-Float::MANT_DIG) # res = Float::MIN_D
340
+ elsif x > Float::MAX # x > Math.ldexp(1-Math.ldexp(1,-Float::MANT_DIG),Float::MAX_EXP)
341
+ x = Math.ldexp(1,Float::MAX_EXP-Float::MANT_DIG) # res = Float::MAX - Float::MAX.prev
342
+ else
343
+ f,e = Math.frexp(x.to_f)
344
+ e -= 1 if f==Math.ldexp(1,-1) if mode==:low # assign the smaller ulp to radix powers
345
+ x = Math.ldexp(1,e-Float::MANT_DIG)
346
+ end
347
+ x
348
+ end
349
+
350
+ def special?(x)
351
+ x.nan? || x.infinite?
352
+ end
353
+
354
+ def normal?(x)
355
+ if x.special? || x.zero?
356
+ false
357
+ else
358
+ x.abs >= Float::MIN_N
359
+ end
360
+ end
361
+
362
+ def subnormal?
363
+ if x.special? || x.zero?
364
+ false
365
+ else
366
+ x.abs < Float::MIN_N
367
+ end
368
+ end
369
+
370
+ def plus(x)
371
+ x.to_f
372
+ end
373
+
374
+ def minus(x)
375
+ -x.to_f
376
+ end
377
+
378
+ class << self
379
+ # Compute the adjacent floating point values: largest value not larger than
380
+ # this and smallest not smaller.
381
+ def neighbours(x)
382
+ f,e = Math.frexp(x.to_f)
383
+ e = Float::MIN_EXP if f==0
384
+ e = [Float::MIN_EXP,e].max
385
+ dx = Math.ldexp(1,e-Float::MANT_DIG) #Math.ldexp(Math.ldexp(1.0,-Float::MANT_DIG),e)
386
+
387
+ min_f = 0.5 #0.5==Math.ldexp(2**(bits-1),-Float::MANT_DIG)
388
+ max_f = 1.0 - Math.ldexp(1,-Float::MANT_DIG)
389
+
390
+ if f==max_f
391
+ high = x + dx*2
392
+ elsif f==-min_f && e!=Float::MIN_EXP
393
+ high = x + dx/2
394
+ else
395
+ high = x + dx
396
+ end
397
+ if e==Float::MIN_EXP || f!=min_f
398
+ low = x - dx
399
+ elsif f==-max_f
400
+ high = x - dx*2
401
+ else
402
+ low = x - dx/2
403
+ end
404
+ [low, high]
405
+ end
406
+
407
+ def float_method(*methods) #:nodoc:
408
+ methods.each do |method|
409
+ if method.is_a?(Array)
410
+ float_method, context_method = method
411
+ else
412
+ float_method = context_method = method
413
+ end
414
+ define_method(context_method) do |x|
415
+ x.to_f.send float_method
416
+ end
417
+ end
418
+ end
419
+
420
+ def float_binary_operator(method, op) #:nodoc:
421
+ define_method(method) do |x,y|
422
+ x.to_f.send(op,y)
423
+ end
424
+ end
425
+
426
+ def math_function(*methods) #:nodoc:
427
+ methods.each do |method|
428
+ define_method(method) do |x|
429
+ Math.send(method, x.to_f)
430
+ end
431
+ end
432
+ end
433
+
434
+ end
435
+
436
+ float_method :nan?, :infinite?, :zero?, :abs
437
+ float_binary_operator :add, :+
438
+ float_binary_operator :subtract, :-
439
+ float_binary_operator :multiply, :*
440
+ float_binary_operator :divide, :/
441
+ float_binary_operator :power, :**
442
+
443
+ math_function :log, :log10, :exp, :sqrt,
444
+ :sin, :cos, :tan, :asin, :acos, :atan,
445
+ :sinh, :cosh, :tanh, :asinh, :acosh, :atanh
446
+ end
447
+
448
+ # Return a (limited) context object for Float.
449
+ # This eases the implementation of functions compatible with either Num or Float values.
450
+ def Float.context
451
+ Flt::FloatContext.instance
452
+ end
453
+
454
+ # Is Float('...') correctly rounded, even for subnormal numbers?
455
+ def Flt.float_correctly_rounded?
456
+ # That doesn't seem to be the case for mswin32
457
+ @float_correctly_rounded ||= RUBY_PLATFORM.match(/mswin32/).nil?
458
+ end