distribution 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +9 -0
  3. data/Manifest.txt +12 -0
  4. data/README.txt +24 -7
  5. data/Rakefile +2 -0
  6. data/benchmark/binomial_coefficient.rb +55 -0
  7. data/benchmark/factorial_method.rb +4 -1
  8. data/benchmark/odd.rb +22 -0
  9. data/lib/distribution.rb +11 -7
  10. data/lib/distribution/binomial.rb +22 -20
  11. data/lib/distribution/binomial/gsl.rb +14 -0
  12. data/lib/distribution/binomial/java.rb +9 -0
  13. data/lib/distribution/binomial/ruby.rb +26 -0
  14. data/lib/distribution/bivariatenormal.rb +7 -5
  15. data/lib/distribution/bivariatenormal/java.rb +9 -0
  16. data/lib/distribution/chisquare.rb +1 -0
  17. data/lib/distribution/chisquare/gsl.rb +3 -3
  18. data/lib/distribution/chisquare/java.rb +9 -0
  19. data/lib/distribution/f.rb +1 -0
  20. data/lib/distribution/f/gsl.rb +3 -3
  21. data/lib/distribution/f/java.rb +9 -0
  22. data/lib/distribution/hypergeometric.rb +2 -0
  23. data/lib/distribution/hypergeometric/gsl.rb +2 -2
  24. data/lib/distribution/hypergeometric/java.rb +9 -0
  25. data/lib/distribution/hypergeometric/ruby.rb +24 -27
  26. data/lib/distribution/math_extension.rb +90 -8
  27. data/lib/distribution/normal.rb +1 -0
  28. data/lib/distribution/normal/java.rb +9 -0
  29. data/lib/distribution/t.rb +2 -0
  30. data/lib/distribution/t/java.rb +9 -0
  31. data/lib/distribution/t/statistics2.rb +1 -1
  32. data/spec/binomial_spec.rb +118 -0
  33. data/spec/distribution_spec.rb +1 -1
  34. data/spec/f_spec.rb +1 -1
  35. data/spec/hypergeometric_spec.rb +49 -13
  36. data/spec/math_extension_spec.rb +57 -9
  37. data/spec/normal_spec.rb +7 -3
  38. data/spec/shorthand_spec.rb +16 -2
  39. data/spec/t_spec.rb +1 -1
  40. metadata +44 -5
  41. metadata.gz.sig +0 -0
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module ChiSquare
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,6 +1,7 @@
1
1
  require 'distribution/f/ruby'
2
2
  require 'distribution/f/gsl'
3
3
  require 'distribution/f/statistics2'
4
+ require 'distribution/f/java'
4
5
  module Distribution
5
6
  # Calculate cdf and inverse cdf for Chi Square Distribution.
6
7
  #
@@ -3,14 +3,14 @@ module Distribution
3
3
  module GSL_
4
4
  class << self
5
5
  def pdf(x,k1,k2)
6
- GSL::Ran.fdist_pdf(x,k1,k2)
6
+ GSL::Ran.fdist_pdf(x.to_f,k1,k2)
7
7
  end
8
8
  # Return the P-value of the corresponding integral with
9
9
  # k degrees of freedom
10
10
  #
11
11
  # Distribution::F.p_value(0.95,1,2)
12
12
  def p_value(pr,k1,k2)
13
- GSL::Cdf.fdist_Pinv(pr,k1,k2)
13
+ GSL::Cdf.fdist_Pinv(pr.to_f,k1,k2)
14
14
  end
15
15
  # F cumulative distribution function (cdf).
16
16
  #
@@ -20,7 +20,7 @@ module Distribution
20
20
  # Distribution::F.cdf(20,3,2)
21
21
  #
22
22
  def cdf(x, k1, k2)
23
- GSL::Cdf.fdist_P(x.to_f,k1,k2)
23
+ GSL::Cdf.fdist_P(x.to_f.to_f,k1,k2)
24
24
  end
25
25
  end
26
26
  end
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module F
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,5 +1,7 @@
1
1
  require 'distribution/hypergeometric/ruby'
2
2
  require 'distribution/hypergeometric/gsl'
3
+ require 'distribution/hypergeometric/java'
4
+
3
5
 
4
6
  module Distribution
5
7
  # From Wikipedia:
@@ -5,9 +5,9 @@ module Distribution
5
5
  def pdf(k, m, n, total) # :nodoc:
6
6
  GSL::Ran::hypergeometric_pdf(k, m, total-m, n)
7
7
  end
8
- # The GSL::Cdf function for hypergeometric is broken:
8
+ # The GSL::Cdf function for hypergeometric
9
9
  #
10
- def cdf_broken(k, m, n, total) # :nodoc:
10
+ def cdf(k, m, n, total) # :nodoc:
11
11
  GSL::Cdf::hypergeometric_P(k, m, total-m, n)
12
12
  end
13
13
 
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module Hypergeometric
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,32 +1,13 @@
1
- # Added by John O. Woods, SciRuby project.
2
-
3
- # Extensions to Fixnum for Hypergeometric calculations in pure Ruby.
4
- # ==Reference
5
- # * http://bluebones.net/2007/09/combinatorics-in-ruby/
6
- # * http://mathworld.wolfram.com/StirlingsApproximation.html
7
- class Fixnum
8
- # +k+-combination of a set of size +self+
9
- def choose(k)
10
- Math.factorial(self) / (Math.factorial(k) * Math.factorial(self - k))
11
- end
12
-
13
- # Fast combination calculation using Gosper's approximation of factorials.
14
- def fast_choose(k)
15
- Math.fast_factorial(self).quo(Math.fast_factorial(self - k) * Math.fast_factorial(k))
16
- end
17
- end
1
+ # Added by John O. Woods, SciRuby project.
2
+ # Optimized by Claudio Bustos
18
3
 
19
4
  module Distribution
20
5
  module Hypergeometric
21
6
  module Ruby_
22
7
  class << self
23
- # Calculates PDF quickly. Not guaranteed to produce any accuracy, since it uses Stirling's approximation.
24
- # This can be improved, most likely, by writing specific cases of when to use fast_choose and when to use
25
- # choose.
26
- def pdf_aprox(k, m, n, total)
27
- m.fast_choose(k) * (total-m).fast_choose(n-k).quo( total.fast_choose(n))
8
+ def bc(n,k)
9
+ Math.binomial_coefficient(n,k)
28
10
  end
29
-
30
11
  # Hypergeometric probability density function
31
12
  #
32
13
  # Probability p(+k+, +m+, +n+, +total+) of drawing sets of size +m+ and +n+ with an intersection of size +k+
@@ -39,19 +20,26 @@ module Distribution
39
20
  min_m_n=m<n ? m : n
40
21
  max_t=[0,m+n-total].max
41
22
  return 0 if k>min_m_n or k<max_t
42
- m.choose(k) * (total-m).choose(n-k) / total.choose(n).to_f
23
+ (bc(m,k) * bc(total-m,n-k)).quo(bc(total,n))
24
+ end
25
+
26
+
27
+
28
+ def pdf_with_den(k,m,n,total,den)
29
+ (bc(m,k) * bc(total-m,n-k)).quo(den)
43
30
  end
44
31
 
45
32
  # p-value:
46
33
 
47
34
  def p_value(pr, m, n, total)
48
35
  ac=0
36
+ den=bc(total,n)
37
+
49
38
  (0..total).each do |i|
50
- ac+=pdf(i,m,n,total)
39
+ ac+=pdf_with_den(i,m,n,total,den)
51
40
  return i if ac>=pr
52
41
  end
53
42
  end
54
-
55
43
  # Cumulative distribution function.
56
44
  # The probability of obtain, from a sample of
57
45
  # size +n+, +k+ or less elements
@@ -61,8 +49,17 @@ module Distribution
61
49
  def cdf(k, m, n, total)
62
50
  raise "k>m" if k>m
63
51
  raise "k>n" if k>n
64
- (0..k).collect { |ki| pdf(ki,m,n,total) }.inject { |sum,v| sum+v}
52
+ # Store the den
53
+ den=bc(total,n)
54
+ (0..k).collect { |ki| pdf_with_den(ki,m,n,total,den) }.inject { |sum,v| sum+v}
65
55
  end
56
+
57
+
58
+ alias :exact_pdf :pdf
59
+ alias :exact_p_value :p_value
60
+ alias :exact_cdf :cdf
61
+
62
+
66
63
  end
67
64
  end
68
65
  end
@@ -1,4 +1,16 @@
1
- require 'prime'
1
+ if RUBY_VERSION<"1.9"
2
+ require 'mathn'
3
+ def Prime.each(upper,&block)
4
+ @primes=Prime.new
5
+ @primes.each do |prime|
6
+ break if prime > upper.to_i
7
+ block.call(prime)
8
+ end
9
+ end
10
+ else
11
+ require 'prime'
12
+ end
13
+
2
14
  require 'bigdecimal'
3
15
  require 'bigdecimal/math'
4
16
 
@@ -38,30 +50,31 @@ module Distribution
38
50
  return SmallOddSwing[n] if (n<33)
39
51
  sqrtN = Math.sqrt(n).floor
40
52
  count=0
53
+
41
54
  Prime.each(n/3) do |prime|
42
55
  next if prime<3
43
56
  if (prime<=sqrtN)
44
57
  q=n
45
58
  _p=1
46
- while((q/=prime)>0) do
47
- if ((q&1)==1)
59
+
60
+ while((q=(q/prime).truncate)>0) do
61
+ if((q%2)==1)
48
62
  _p*=prime
49
63
  end
50
64
  end
51
-
52
65
  if _p>1
53
66
  @prime_list[count]=_p
54
67
  count+=1
55
68
  end
56
69
 
57
70
  else
58
- if ((n/prime)&1==1)
71
+ if ((n/prime).truncate%2==1)
59
72
  @prime_list[count]=prime
60
73
  count+=1
61
74
  end
62
75
  end
63
76
  end
64
- prod=get_primorial(n/2+1,n)
77
+ prod=get_primorial((n/2).truncate+1,n)
65
78
  prod * @prime_list[0,count].inject(1) {|ac,v| ac*v}
66
79
  end
67
80
  def get_primorial(low,up)
@@ -152,6 +165,37 @@ module Distribution
152
165
  def beta(x,y)
153
166
  (gamma(x)*gamma(y)).quo(gamma(x+y))
154
167
  end
168
+ # I_x(a,b): Regularized incomplete beta function
169
+ #
170
+ # Source:
171
+ #
172
+ def regularized_beta_function(x,a,b)
173
+ return 1 if x==1
174
+ #incomplete_beta(x,a,b).quo(beta(a,b))
175
+ m=a
176
+ n=b+a-1
177
+ (m..n).inject(0) {|sum,j|
178
+ sum+(binomial_coefficient(n,j)* x**j * (1-x)**(n-j))
179
+ }
180
+
181
+ end
182
+ # B_x(a,b) : Incomplete beta function
183
+ # Should be replaced by
184
+ # http://lib.stat.cmu.edu/apstat/63
185
+ def incomplete_beta(x,a,b)
186
+ raise "Not work"
187
+ return beta(a,b) if x==1
188
+
189
+ ((x**a * (1-x)**b).quo(a)) * hyper_f(a+b,1,a+1,x)
190
+ end
191
+ def permutations(x,n)
192
+ factorial(x).quo(factorial(x-n))
193
+ end
194
+
195
+ def rising_factorial(x,n)
196
+ factorial(x+n-1).quo(factorial(x-1))
197
+ end
198
+
155
199
 
156
200
  LOG_2PI = Math.log(2 * Math::PI)# log(2PI)
157
201
  N = 8
@@ -192,17 +236,55 @@ module Distribution
192
236
  end
193
237
  Math.exp(loggamma(x))
194
238
  end
239
+ # Binomial coeffients, or:
240
+ # ( n )
241
+ # ( k )
242
+ # Gives the number of different k size subsets of a set size n
243
+ #
244
+ # Replaces (n,k) for (n, n-k) if k>n-k
245
+ #
246
+ # (n) n^k' (n)..(n-k+1)
247
+ # ( ) = ---- = ------------
248
+ # (k) k! k!
249
+ #
250
+ def binomial_coefficient(n,k)
251
+ return 1 if (k==0 or k==n)
252
+ k=[k, n-k].min
253
+ (((n-k+1)..n).inject(1) {|ac,v| ac * v}).quo(factorial(k))
254
+ # Other way to calcule binomial is this:
255
+ # (1..k).inject(1) {|ac, i| (ac*(n-k+i).quo(i))}
256
+ end
257
+ # Approximate binomial coefficient, using gamma function.
258
+ # The fastest method, until we fall on BigDecimal!
259
+ def binomial_coefficient_gamma(n,k)
260
+ return 1 if (k==0 or k==n)
261
+ k=[k, n-k].min
262
+
263
+ val=gamma(n+1) / (gamma(k+1)*gamma(n-k+1))
264
+ if (val.nan?)
265
+ lg=lgamma(n+1) - (lgamma(k+1)+lgamma(n-k+1))
266
+ val=Math.exp(lg)
267
+ # Crash again! We require BigDecimals
268
+ if val.infinite?
269
+ val=BigMath.exp(BigDecimal(lg.to_s),16)
270
+ end
271
+ end
272
+
273
+ val
274
+ end
195
275
  end
196
276
  end
197
277
 
198
278
  module Math
199
279
  include Distribution::MathExtension
200
- module_function :factorial, :beta, :gamma, :gosper, :loggamma, :fast_factorial
280
+ alias :lgamma :loggamma
281
+
282
+ module_function :factorial, :beta, :gamma, :gosper, :loggamma, :lgamma, :binomial_coefficient, :binomial_coefficient_gamma, :regularized_beta_function, :incomplete_beta, :permutations, :rising_factorial , :fast_factorial
201
283
  end
202
284
 
203
285
  # Necessary on Ruby 1.9
204
286
  module CMath # :nodoc:
205
287
  include Distribution::MathExtension
206
- module_function :factorial, :beta, :gamma, :gosper, :loggamma, :fast_factorial
288
+ module_function :factorial, :beta, :gosper, :loggamma, :binomial_coefficient, :binomial_coefficient_gamma, :regularized_beta_function, :incomplete_beta, :permutations, :rising_factorial, :fast_factorial
207
289
  end
208
290
 
@@ -1,6 +1,7 @@
1
1
  require 'distribution/normal/ruby'
2
2
  require 'distribution/normal/gsl'
3
3
  require 'distribution/normal/statistics2'
4
+ require 'distribution/normal/java'
4
5
 
5
6
  module Distribution
6
7
  # From Wikipedia:
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module Normal
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,6 +1,8 @@
1
1
  require 'distribution/t/ruby'
2
2
  require 'distribution/t/gsl'
3
3
  require 'distribution/t/statistics2'
4
+ require 'distribution/t/java'
5
+
4
6
  module Distribution
5
7
 
6
8
  # Calculate statisticals for T Distribution.
@@ -0,0 +1,9 @@
1
+ module Distribution
2
+ module T
3
+ # TODO
4
+ module Java_
5
+ class << self
6
+ end
7
+ end
8
+ end
9
+ end
@@ -11,7 +11,7 @@ module Distribution
11
11
 
12
12
 
13
13
  # There are some problem on i686 with t on statistics2
14
- if !RbConfig::CONFIG['arch']=~/i686/
14
+ if true or !RbConfig::CONFIG['arch']=~/i686/
15
15
  # T cumulative distribution function (cdf).
16
16
  #
17
17
  # Returns the integral of t-distribution
@@ -0,0 +1,118 @@
1
+ require File.expand_path(File.dirname(__FILE__)+"/spec_helper.rb")
2
+
3
+ describe Distribution::Binomial do
4
+
5
+ shared_examples_for "binomial engine" do
6
+ it "should return correct pdf" do
7
+ if @engine.respond_to? :pdf
8
+ [10,100,1000].each do |n|
9
+ [1.quo(4),1.quo(2),3.quo(4)].each do |pr|
10
+ [0, 1,n/2,n-1].each do |x|
11
+ exp=Math.binomial_coefficient(n,x)*pr**x*(1-pr)**(n-x)
12
+ obs=@engine.pdf(x,n,pr)
13
+ obs.should be_within(1e-5).of(exp), "For pdf(#{x},#{n},#{pr}) expected #{exp}, obtained #{obs}"
14
+
15
+ end
16
+ end
17
+ end
18
+ else
19
+ pending("No #{@engine}.pdf")
20
+ end
21
+ end
22
+ it_only_with_gsl "should return correct cdf for n<=100" do
23
+ if @engine.respond_to? :pdf
24
+ [10,100].each do |n|
25
+ [0.25,0.5,0.75].each do |pr|
26
+ [1,n/2,n-1].each do |x|
27
+ exp=GSL::Cdf.binomial_P(x,pr,n)
28
+ obs=@engine.cdf(x,n,pr)
29
+ exp.should be_within(1e-5).of(obs), "For cdf(#{x},#{n},#{pr}) expected #{exp}, obtained #{obs}"
30
+ end
31
+ end
32
+ end
33
+ else
34
+ pending("No #{@engine}.cdf")
35
+ end
36
+ end
37
+
38
+
39
+
40
+ end
41
+
42
+ describe "singleton" do
43
+ before do
44
+ @engine=Distribution::Binomial
45
+ end
46
+ it_should_behave_like "binomial engine"
47
+
48
+
49
+ it {@engine.should respond_to(:exact_pdf) }
50
+ it {
51
+ pending("No exact_p_value")
52
+ @engine.should respond_to(:exact_p_value)
53
+ }
54
+
55
+ it "exact_pdf should not return a Float if not float is used as parameter" do
56
+ @engine.exact_pdf(1,1,1).should_not be_a(Float)
57
+ @engine.exact_pdf(16, 80, 1.quo(2)).should_not be_a(Float)
58
+ end
59
+
60
+
61
+
62
+
63
+ end
64
+
65
+ describe Distribution::Binomial::Ruby_ do
66
+ before do
67
+ @engine=Distribution::Binomial::Ruby_
68
+ end
69
+ it_should_behave_like "binomial engine"
70
+
71
+ it "should return correct cdf for n>100" do
72
+ pending("incomplete beta function is slow. Should be replaced for a faster one")
73
+ end
74
+
75
+ it "should return correct p_value for n<=100" do
76
+ pending("Can't calculate with precision x using p")
77
+ [10,100].each do |n|
78
+ [0.25,0.5,0.75].each do |pr|
79
+ [n/2].each do |x|
80
+ cdf=@engine.cdf(x,n,pr)
81
+ p_value=@engine.p_value(cdf,n,pr)
82
+
83
+ p_value.should eq(x), "For p_value(#{cdf},#{n},#{pr}) expected #{x}, obtained #{p_value}"
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ end
90
+ if Distribution.has_gsl?
91
+ describe Distribution::Binomial::GSL_ do
92
+ before do
93
+ @engine=Distribution::Binomial::GSL_
94
+ end
95
+ it_should_behave_like "binomial engine"
96
+ end
97
+ end
98
+ #if Distribution.has_statistics2?
99
+ # describe Distribution::Binomial::Statistics2_ do
100
+ #
101
+ # before do
102
+ # @engine=Distribution::Binomial::Statistics2_
103
+ # end
104
+ #it_should_behave_like "binomial engine"
105
+ # end
106
+ #end
107
+
108
+ if Distribution.has_java?
109
+ describe Distribution::Binomial::Java_ do
110
+ before do
111
+ @engine=Distribution::Binomial::Java_
112
+ end
113
+ it_should_behave_like "binomial engine"
114
+
115
+ end
116
+ end
117
+
118
+ end