distribution 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +9 -0
  3. data/Manifest.txt +12 -0
  4. data/README.txt +24 -7
  5. data/Rakefile +2 -0
  6. data/benchmark/binomial_coefficient.rb +55 -0
  7. data/benchmark/factorial_method.rb +4 -1
  8. data/benchmark/odd.rb +22 -0
  9. data/lib/distribution.rb +11 -7
  10. data/lib/distribution/binomial.rb +22 -20
  11. data/lib/distribution/binomial/gsl.rb +14 -0
  12. data/lib/distribution/binomial/java.rb +9 -0
  13. data/lib/distribution/binomial/ruby.rb +26 -0
  14. data/lib/distribution/bivariatenormal.rb +7 -5
  15. data/lib/distribution/bivariatenormal/java.rb +9 -0
  16. data/lib/distribution/chisquare.rb +1 -0
  17. data/lib/distribution/chisquare/gsl.rb +3 -3
  18. data/lib/distribution/chisquare/java.rb +9 -0
  19. data/lib/distribution/f.rb +1 -0
  20. data/lib/distribution/f/gsl.rb +3 -3
  21. data/lib/distribution/f/java.rb +9 -0
  22. data/lib/distribution/hypergeometric.rb +2 -0
  23. data/lib/distribution/hypergeometric/gsl.rb +2 -2
  24. data/lib/distribution/hypergeometric/java.rb +9 -0
  25. data/lib/distribution/hypergeometric/ruby.rb +24 -27
  26. data/lib/distribution/math_extension.rb +90 -8
  27. data/lib/distribution/normal.rb +1 -0
  28. data/lib/distribution/normal/java.rb +9 -0
  29. data/lib/distribution/t.rb +2 -0
  30. data/lib/distribution/t/java.rb +9 -0
  31. data/lib/distribution/t/statistics2.rb +1 -1
  32. data/spec/binomial_spec.rb +118 -0
  33. data/spec/distribution_spec.rb +1 -1
  34. data/spec/f_spec.rb +1 -1
  35. data/spec/hypergeometric_spec.rb +49 -13
  36. data/spec/math_extension_spec.rb +57 -9
  37. data/spec/normal_spec.rb +7 -3
  38. data/spec/shorthand_spec.rb +16 -2
  39. data/spec/t_spec.rb +1 -1
  40. metadata +44 -5
  41. metadata.gz.sig +0 -0
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module ChiSquare
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,6 +1,7 @@
1
1
  require 'distribution/f/ruby'
2
2
  require 'distribution/f/gsl'
3
3
  require 'distribution/f/statistics2'
4
+ require 'distribution/f/java'
4
5
  module Distribution
5
6
  # Calculate cdf and inverse cdf for Chi Square Distribution.
6
7
  #
@@ -3,14 +3,14 @@ module Distribution
3
3
  module GSL_
4
4
  class << self
5
5
  def pdf(x,k1,k2)
6
- GSL::Ran.fdist_pdf(x,k1,k2)
6
+ GSL::Ran.fdist_pdf(x.to_f,k1,k2)
7
7
  end
8
8
  # Return the P-value of the corresponding integral with
9
9
  # k degrees of freedom
10
10
  #
11
11
  # Distribution::F.p_value(0.95,1,2)
12
12
  def p_value(pr,k1,k2)
13
- GSL::Cdf.fdist_Pinv(pr,k1,k2)
13
+ GSL::Cdf.fdist_Pinv(pr.to_f,k1,k2)
14
14
  end
15
15
  # F cumulative distribution function (cdf).
16
16
  #
@@ -20,7 +20,7 @@ module Distribution
20
20
  # Distribution::F.cdf(20,3,2)
21
21
  #
22
22
  def cdf(x, k1, k2)
23
- GSL::Cdf.fdist_P(x.to_f,k1,k2)
23
+ GSL::Cdf.fdist_P(x.to_f.to_f,k1,k2)
24
24
  end
25
25
  end
26
26
  end
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module F
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,5 +1,7 @@
1
1
  require 'distribution/hypergeometric/ruby'
2
2
  require 'distribution/hypergeometric/gsl'
3
+ require 'distribution/hypergeometric/java'
4
+
3
5
 
4
6
  module Distribution
5
7
  # From Wikipedia:
@@ -5,9 +5,9 @@ module Distribution
5
5
  def pdf(k, m, n, total) # :nodoc:
6
6
  GSL::Ran::hypergeometric_pdf(k, m, total-m, n)
7
7
  end
8
- # The GSL::Cdf function for hypergeometric is broken:
8
+ # The GSL::Cdf function for hypergeometric
9
9
  #
10
- def cdf_broken(k, m, n, total) # :nodoc:
10
+ def cdf(k, m, n, total) # :nodoc:
11
11
  GSL::Cdf::hypergeometric_P(k, m, total-m, n)
12
12
  end
13
13
 
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module Hypergeometric
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,32 +1,13 @@
1
- # Added by John O. Woods, SciRuby project.
2
-
3
- # Extensions to Fixnum for Hypergeometric calculations in pure Ruby.
4
- # ==Reference
5
- # * http://bluebones.net/2007/09/combinatorics-in-ruby/
6
- # * http://mathworld.wolfram.com/StirlingsApproximation.html
7
- class Fixnum
8
- # +k+-combination of a set of size +self+
9
- def choose(k)
10
- Math.factorial(self) / (Math.factorial(k) * Math.factorial(self - k))
11
- end
12
-
13
- # Fast combination calculation using Gosper's approximation of factorials.
14
- def fast_choose(k)
15
- Math.fast_factorial(self).quo(Math.fast_factorial(self - k) * Math.fast_factorial(k))
16
- end
17
- end
1
+ # Added by John O. Woods, SciRuby project.
2
+ # Optimized by Claudio Bustos
18
3
 
19
4
  module Distribution
20
5
  module Hypergeometric
21
6
  module Ruby_
22
7
  class << self
23
- # Calculates PDF quickly. Not guaranteed to produce any accuracy, since it uses Stirling's approximation.
24
- # This can be improved, most likely, by writing specific cases of when to use fast_choose and when to use
25
- # choose.
26
- def pdf_aprox(k, m, n, total)
27
- m.fast_choose(k) * (total-m).fast_choose(n-k).quo( total.fast_choose(n))
8
+ def bc(n,k)
9
+ Math.binomial_coefficient(n,k)
28
10
  end
29
-
30
11
  # Hypergeometric probability density function
31
12
  #
32
13
  # Probability p(+k+, +m+, +n+, +total+) of drawing sets of size +m+ and +n+ with an intersection of size +k+
@@ -39,19 +20,26 @@ module Distribution
39
20
  min_m_n=m<n ? m : n
40
21
  max_t=[0,m+n-total].max
41
22
  return 0 if k>min_m_n or k<max_t
42
- m.choose(k) * (total-m).choose(n-k) / total.choose(n).to_f
23
+ (bc(m,k) * bc(total-m,n-k)).quo(bc(total,n))
24
+ end
25
+
26
+
27
+
28
+ def pdf_with_den(k,m,n,total,den)
29
+ (bc(m,k) * bc(total-m,n-k)).quo(den)
43
30
  end
44
31
 
45
32
  # p-value:
46
33
 
47
34
  def p_value(pr, m, n, total)
48
35
  ac=0
36
+ den=bc(total,n)
37
+
49
38
  (0..total).each do |i|
50
- ac+=pdf(i,m,n,total)
39
+ ac+=pdf_with_den(i,m,n,total,den)
51
40
  return i if ac>=pr
52
41
  end
53
42
  end
54
-
55
43
  # Cumulative distribution function.
56
44
  # The probability of obtain, from a sample of
57
45
  # size +n+, +k+ or less elements
@@ -61,8 +49,17 @@ module Distribution
61
49
  def cdf(k, m, n, total)
62
50
  raise "k>m" if k>m
63
51
  raise "k>n" if k>n
64
- (0..k).collect { |ki| pdf(ki,m,n,total) }.inject { |sum,v| sum+v}
52
+ # Store the den
53
+ den=bc(total,n)
54
+ (0..k).collect { |ki| pdf_with_den(ki,m,n,total,den) }.inject { |sum,v| sum+v}
65
55
  end
56
+
57
+
58
+ alias :exact_pdf :pdf
59
+ alias :exact_p_value :p_value
60
+ alias :exact_cdf :cdf
61
+
62
+
66
63
  end
67
64
  end
68
65
  end
@@ -1,4 +1,16 @@
1
- require 'prime'
1
+ if RUBY_VERSION<"1.9"
2
+ require 'mathn'
3
+ def Prime.each(upper,&block)
4
+ @primes=Prime.new
5
+ @primes.each do |prime|
6
+ break if prime > upper.to_i
7
+ block.call(prime)
8
+ end
9
+ end
10
+ else
11
+ require 'prime'
12
+ end
13
+
2
14
  require 'bigdecimal'
3
15
  require 'bigdecimal/math'
4
16
 
@@ -38,30 +50,31 @@ module Distribution
38
50
  return SmallOddSwing[n] if (n<33)
39
51
  sqrtN = Math.sqrt(n).floor
40
52
  count=0
53
+
41
54
  Prime.each(n/3) do |prime|
42
55
  next if prime<3
43
56
  if (prime<=sqrtN)
44
57
  q=n
45
58
  _p=1
46
- while((q/=prime)>0) do
47
- if ((q&1)==1)
59
+
60
+ while((q=(q/prime).truncate)>0) do
61
+ if((q%2)==1)
48
62
  _p*=prime
49
63
  end
50
64
  end
51
-
52
65
  if _p>1
53
66
  @prime_list[count]=_p
54
67
  count+=1
55
68
  end
56
69
 
57
70
  else
58
- if ((n/prime)&1==1)
71
+ if ((n/prime).truncate%2==1)
59
72
  @prime_list[count]=prime
60
73
  count+=1
61
74
  end
62
75
  end
63
76
  end
64
- prod=get_primorial(n/2+1,n)
77
+ prod=get_primorial((n/2).truncate+1,n)
65
78
  prod * @prime_list[0,count].inject(1) {|ac,v| ac*v}
66
79
  end
67
80
  def get_primorial(low,up)
@@ -152,6 +165,37 @@ module Distribution
152
165
  def beta(x,y)
153
166
  (gamma(x)*gamma(y)).quo(gamma(x+y))
154
167
  end
168
+ # I_x(a,b): Regularized incomplete beta function
169
+ #
170
+ # Source:
171
+ #
172
+ def regularized_beta_function(x,a,b)
173
+ return 1 if x==1
174
+ #incomplete_beta(x,a,b).quo(beta(a,b))
175
+ m=a
176
+ n=b+a-1
177
+ (m..n).inject(0) {|sum,j|
178
+ sum+(binomial_coefficient(n,j)* x**j * (1-x)**(n-j))
179
+ }
180
+
181
+ end
182
+ # B_x(a,b) : Incomplete beta function
183
+ # Should be replaced by
184
+ # http://lib.stat.cmu.edu/apstat/63
185
+ def incomplete_beta(x,a,b)
186
+ raise "Not work"
187
+ return beta(a,b) if x==1
188
+
189
+ ((x**a * (1-x)**b).quo(a)) * hyper_f(a+b,1,a+1,x)
190
+ end
191
+ def permutations(x,n)
192
+ factorial(x).quo(factorial(x-n))
193
+ end
194
+
195
+ def rising_factorial(x,n)
196
+ factorial(x+n-1).quo(factorial(x-1))
197
+ end
198
+
155
199
 
156
200
  LOG_2PI = Math.log(2 * Math::PI)# log(2PI)
157
201
  N = 8
@@ -192,17 +236,55 @@ module Distribution
192
236
  end
193
237
  Math.exp(loggamma(x))
194
238
  end
239
+ # Binomial coeffients, or:
240
+ # ( n )
241
+ # ( k )
242
+ # Gives the number of different k size subsets of a set size n
243
+ #
244
+ # Replaces (n,k) for (n, n-k) if k>n-k
245
+ #
246
+ # (n) n^k' (n)..(n-k+1)
247
+ # ( ) = ---- = ------------
248
+ # (k) k! k!
249
+ #
250
+ def binomial_coefficient(n,k)
251
+ return 1 if (k==0 or k==n)
252
+ k=[k, n-k].min
253
+ (((n-k+1)..n).inject(1) {|ac,v| ac * v}).quo(factorial(k))
254
+ # Other way to calcule binomial is this:
255
+ # (1..k).inject(1) {|ac, i| (ac*(n-k+i).quo(i))}
256
+ end
257
+ # Approximate binomial coefficient, using gamma function.
258
+ # The fastest method, until we fall on BigDecimal!
259
+ def binomial_coefficient_gamma(n,k)
260
+ return 1 if (k==0 or k==n)
261
+ k=[k, n-k].min
262
+
263
+ val=gamma(n+1) / (gamma(k+1)*gamma(n-k+1))
264
+ if (val.nan?)
265
+ lg=lgamma(n+1) - (lgamma(k+1)+lgamma(n-k+1))
266
+ val=Math.exp(lg)
267
+ # Crash again! We require BigDecimals
268
+ if val.infinite?
269
+ val=BigMath.exp(BigDecimal(lg.to_s),16)
270
+ end
271
+ end
272
+
273
+ val
274
+ end
195
275
  end
196
276
  end
197
277
 
198
278
  module Math
199
279
  include Distribution::MathExtension
200
- module_function :factorial, :beta, :gamma, :gosper, :loggamma, :fast_factorial
280
+ alias :lgamma :loggamma
281
+
282
+ module_function :factorial, :beta, :gamma, :gosper, :loggamma, :lgamma, :binomial_coefficient, :binomial_coefficient_gamma, :regularized_beta_function, :incomplete_beta, :permutations, :rising_factorial , :fast_factorial
201
283
  end
202
284
 
203
285
  # Necessary on Ruby 1.9
204
286
  module CMath # :nodoc:
205
287
  include Distribution::MathExtension
206
- module_function :factorial, :beta, :gamma, :gosper, :loggamma, :fast_factorial
288
+ module_function :factorial, :beta, :gosper, :loggamma, :binomial_coefficient, :binomial_coefficient_gamma, :regularized_beta_function, :incomplete_beta, :permutations, :rising_factorial, :fast_factorial
207
289
  end
208
290
 
@@ -1,6 +1,7 @@
1
1
  require 'distribution/normal/ruby'
2
2
  require 'distribution/normal/gsl'
3
3
  require 'distribution/normal/statistics2'
4
+ require 'distribution/normal/java'
4
5
 
5
6
  module Distribution
6
7
  # From Wikipedia:
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module Normal
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,6 +1,8 @@
1
1
  require 'distribution/t/ruby'
2
2
  require 'distribution/t/gsl'
3
3
  require 'distribution/t/statistics2'
4
+ require 'distribution/t/java'
5
+
4
6
  module Distribution
5
7
 
6
8
  # Calculate statisticals for T Distribution.
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module T
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -11,7 +11,7 @@ module Distribution
11
11
 
12
12
 
13
13
  # There are some problem on i686 with t on statistics2
14
- if !RbConfig::CONFIG['arch']=~/i686/
14
+ if true or !RbConfig::CONFIG['arch']=~/i686/
15
15
  # T cumulative distribution function (cdf).
16
16
  #
17
17
  # Returns the integral of t-distribution
@@ -0,0 +1,118 @@
1
+ require File.expand_path(File.dirname(__FILE__)+"/spec_helper.rb")
2
+
3
+ describe Distribution::Binomial do
4
+
5
+ shared_examples_for "binomial engine" do
6
+ it "should return correct pdf" do
7
+ if @engine.respond_to? :pdf
8
+ [10,100,1000].each do |n|
9
+ [1.quo(4),1.quo(2),3.quo(4)].each do |pr|
10
+ [0, 1,n/2,n-1].each do |x|
11
+ exp=Math.binomial_coefficient(n,x)*pr**x*(1-pr)**(n-x)
12
+ obs=@engine.pdf(x,n,pr)
13
+ obs.should be_within(1e-5).of(exp), "For pdf(#{x},#{n},#{pr}) expected #{exp}, obtained #{obs}"
14
+
15
+ end
16
+ end
17
+ end
18
+ else
19
+ pending("No #{@engine}.pdf")
20
+ end
21
+ end
22
+ it_only_with_gsl "should return correct cdf for n<=100" do
23
+ if @engine.respond_to? :pdf
24
+ [10,100].each do |n|
25
+ [0.25,0.5,0.75].each do |pr|
26
+ [1,n/2,n-1].each do |x|
27
+ exp=GSL::Cdf.binomial_P(x,pr,n)
28
+ obs=@engine.cdf(x,n,pr)
29
+ exp.should be_within(1e-5).of(obs), "For cdf(#{x},#{n},#{pr}) expected #{exp}, obtained #{obs}"
30
+ end
31
+ end
32
+ end
33
+ else
34
+ pending("No #{@engine}.cdf")
35
+ end
36
+ end
37
+
38
+
39
+
40
+ end
41
+
42
+ describe "singleton" do
43
+ before do
44
+ @engine=Distribution::Binomial
45
+ end
46
+ it_should_behave_like "binomial engine"
47
+
48
+
49
+ it {@engine.should respond_to(:exact_pdf) }
50
+ it {
51
+ pending("No exact_p_value")
52
+ @engine.should respond_to(:exact_p_value)
53
+ }
54
+
55
+ it "exact_pdf should not return a Float if not float is used as parameter" do
56
+ @engine.exact_pdf(1,1,1).should_not be_a(Float)
57
+ @engine.exact_pdf(16, 80, 1.quo(2)).should_not be_a(Float)
58
+ end
59
+
60
+
61
+
62
+
63
+ end
64
+
65
+ describe Distribution::Binomial::Ruby_ do
66
+ before do
67
+ @engine=Distribution::Binomial::Ruby_
68
+ end
69
+ it_should_behave_like "binomial engine"
70
+
71
+ it "should return correct cdf for n>100" do
72
+ pending("incomplete beta function is slow. Should be replaced for a faster one")
73
+ end
74
+
75
+ it "should return correct p_value for n<=100" do
76
+ pending("Can't calculate with precision x using p")
77
+ [10,100].each do |n|
78
+ [0.25,0.5,0.75].each do |pr|
79
+ [n/2].each do |x|
80
+ cdf=@engine.cdf(x,n,pr)
81
+ p_value=@engine.p_value(cdf,n,pr)
82
+
83
+ p_value.should eq(x), "For p_value(#{cdf},#{n},#{pr}) expected #{x}, obtained #{p_value}"
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ end
90
+ if Distribution.has_gsl?
91
+ describe Distribution::Binomial::GSL_ do
92
+ before do
93
+ @engine=Distribution::Binomial::GSL_
94
+ end
95
+ it_should_behave_like "binomial engine"
96
+ end
97
+ end
98
+ #if Distribution.has_statistics2?
99
+ # describe Distribution::Binomial::Statistics2_ do
100
+ #
101
+ # before do
102
+ # @engine=Distribution::Binomial::Statistics2_
103
+ # end
104
+ #it_should_behave_like "binomial engine"
105
+ # end
106
+ #end
107
+
108
+ if Distribution.has_java?
109
+ describe Distribution::Binomial::Java_ do
110
+ before do
111
+ @engine=Distribution::Binomial::Java_
112
+ end
113
+ it_should_behave_like "binomial engine"
114
+
115
+ end
116
+ end
117
+
118
+ end