distribution 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +9 -0
- data/Manifest.txt +12 -0
- data/README.txt +24 -7
- data/Rakefile +2 -0
- data/benchmark/binomial_coefficient.rb +55 -0
- data/benchmark/factorial_method.rb +4 -1
- data/benchmark/odd.rb +22 -0
- data/lib/distribution.rb +11 -7
- data/lib/distribution/binomial.rb +22 -20
- data/lib/distribution/binomial/gsl.rb +14 -0
- data/lib/distribution/binomial/java.rb +9 -0
- data/lib/distribution/binomial/ruby.rb +26 -0
- data/lib/distribution/bivariatenormal.rb +7 -5
- data/lib/distribution/bivariatenormal/java.rb +9 -0
- data/lib/distribution/chisquare.rb +1 -0
- data/lib/distribution/chisquare/gsl.rb +3 -3
- data/lib/distribution/chisquare/java.rb +9 -0
- data/lib/distribution/f.rb +1 -0
- data/lib/distribution/f/gsl.rb +3 -3
- data/lib/distribution/f/java.rb +9 -0
- data/lib/distribution/hypergeometric.rb +2 -0
- data/lib/distribution/hypergeometric/gsl.rb +2 -2
- data/lib/distribution/hypergeometric/java.rb +9 -0
- data/lib/distribution/hypergeometric/ruby.rb +24 -27
- data/lib/distribution/math_extension.rb +90 -8
- data/lib/distribution/normal.rb +1 -0
- data/lib/distribution/normal/java.rb +9 -0
- data/lib/distribution/t.rb +2 -0
- data/lib/distribution/t/java.rb +9 -0
- data/lib/distribution/t/statistics2.rb +1 -1
- data/spec/binomial_spec.rb +118 -0
- data/spec/distribution_spec.rb +1 -1
- data/spec/f_spec.rb +1 -1
- data/spec/hypergeometric_spec.rb +49 -13
- data/spec/math_extension_spec.rb +57 -9
- data/spec/normal_spec.rb +7 -3
- data/spec/shorthand_spec.rb +16 -2
- data/spec/t_spec.rb +1 -1
- metadata +44 -5
- metadata.gz.sig +0 -0
data/lib/distribution/f.rb
CHANGED
data/lib/distribution/f/gsl.rb
CHANGED
@@ -3,14 +3,14 @@ module Distribution
|
|
3
3
|
module GSL_
|
4
4
|
class << self
|
5
5
|
def pdf(x,k1,k2)
|
6
|
-
GSL::Ran.fdist_pdf(x,k1,k2)
|
6
|
+
GSL::Ran.fdist_pdf(x.to_f,k1,k2)
|
7
7
|
end
|
8
8
|
# Return the P-value of the corresponding integral with
|
9
9
|
# k degrees of freedom
|
10
10
|
#
|
11
11
|
# Distribution::F.p_value(0.95,1,2)
|
12
12
|
def p_value(pr,k1,k2)
|
13
|
-
GSL::Cdf.fdist_Pinv(pr,k1,k2)
|
13
|
+
GSL::Cdf.fdist_Pinv(pr.to_f,k1,k2)
|
14
14
|
end
|
15
15
|
# F cumulative distribution function (cdf).
|
16
16
|
#
|
@@ -20,7 +20,7 @@ module Distribution
|
|
20
20
|
# Distribution::F.cdf(20,3,2)
|
21
21
|
#
|
22
22
|
def cdf(x, k1, k2)
|
23
|
-
GSL::Cdf.fdist_P(x.to_f,k1,k2)
|
23
|
+
GSL::Cdf.fdist_P(x.to_f.to_f,k1,k2)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
@@ -5,9 +5,9 @@ module Distribution
|
|
5
5
|
def pdf(k, m, n, total) # :nodoc:
|
6
6
|
GSL::Ran::hypergeometric_pdf(k, m, total-m, n)
|
7
7
|
end
|
8
|
-
# The GSL::Cdf function for hypergeometric
|
8
|
+
# The GSL::Cdf function for hypergeometric
|
9
9
|
#
|
10
|
-
def
|
10
|
+
def cdf(k, m, n, total) # :nodoc:
|
11
11
|
GSL::Cdf::hypergeometric_P(k, m, total-m, n)
|
12
12
|
end
|
13
13
|
|
@@ -1,32 +1,13 @@
|
|
1
|
-
# Added by John O. Woods, SciRuby project.
|
2
|
-
|
3
|
-
# Extensions to Fixnum for Hypergeometric calculations in pure Ruby.
|
4
|
-
# ==Reference
|
5
|
-
# * http://bluebones.net/2007/09/combinatorics-in-ruby/
|
6
|
-
# * http://mathworld.wolfram.com/StirlingsApproximation.html
|
7
|
-
class Fixnum
|
8
|
-
# +k+-combination of a set of size +self+
|
9
|
-
def choose(k)
|
10
|
-
Math.factorial(self) / (Math.factorial(k) * Math.factorial(self - k))
|
11
|
-
end
|
12
|
-
|
13
|
-
# Fast combination calculation using Gosper's approximation of factorials.
|
14
|
-
def fast_choose(k)
|
15
|
-
Math.fast_factorial(self).quo(Math.fast_factorial(self - k) * Math.fast_factorial(k))
|
16
|
-
end
|
17
|
-
end
|
1
|
+
# Added by John O. Woods, SciRuby project.
|
2
|
+
# Optimized by Claudio Bustos
|
18
3
|
|
19
4
|
module Distribution
|
20
5
|
module Hypergeometric
|
21
6
|
module Ruby_
|
22
7
|
class << self
|
23
|
-
|
24
|
-
|
25
|
-
# choose.
|
26
|
-
def pdf_aprox(k, m, n, total)
|
27
|
-
m.fast_choose(k) * (total-m).fast_choose(n-k).quo( total.fast_choose(n))
|
8
|
+
def bc(n,k)
|
9
|
+
Math.binomial_coefficient(n,k)
|
28
10
|
end
|
29
|
-
|
30
11
|
# Hypergeometric probability density function
|
31
12
|
#
|
32
13
|
# Probability p(+k+, +m+, +n+, +total+) of drawing sets of size +m+ and +n+ with an intersection of size +k+
|
@@ -39,19 +20,26 @@ module Distribution
|
|
39
20
|
min_m_n=m<n ? m : n
|
40
21
|
max_t=[0,m+n-total].max
|
41
22
|
return 0 if k>min_m_n or k<max_t
|
42
|
-
m
|
23
|
+
(bc(m,k) * bc(total-m,n-k)).quo(bc(total,n))
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
def pdf_with_den(k,m,n,total,den)
|
29
|
+
(bc(m,k) * bc(total-m,n-k)).quo(den)
|
43
30
|
end
|
44
31
|
|
45
32
|
# p-value:
|
46
33
|
|
47
34
|
def p_value(pr, m, n, total)
|
48
35
|
ac=0
|
36
|
+
den=bc(total,n)
|
37
|
+
|
49
38
|
(0..total).each do |i|
|
50
|
-
ac+=
|
39
|
+
ac+=pdf_with_den(i,m,n,total,den)
|
51
40
|
return i if ac>=pr
|
52
41
|
end
|
53
42
|
end
|
54
|
-
|
55
43
|
# Cumulative distribution function.
|
56
44
|
# The probability of obtain, from a sample of
|
57
45
|
# size +n+, +k+ or less elements
|
@@ -61,8 +49,17 @@ module Distribution
|
|
61
49
|
def cdf(k, m, n, total)
|
62
50
|
raise "k>m" if k>m
|
63
51
|
raise "k>n" if k>n
|
64
|
-
|
52
|
+
# Store the den
|
53
|
+
den=bc(total,n)
|
54
|
+
(0..k).collect { |ki| pdf_with_den(ki,m,n,total,den) }.inject { |sum,v| sum+v}
|
65
55
|
end
|
56
|
+
|
57
|
+
|
58
|
+
alias :exact_pdf :pdf
|
59
|
+
alias :exact_p_value :p_value
|
60
|
+
alias :exact_cdf :cdf
|
61
|
+
|
62
|
+
|
66
63
|
end
|
67
64
|
end
|
68
65
|
end
|
@@ -1,4 +1,16 @@
|
|
1
|
-
|
1
|
+
if RUBY_VERSION<"1.9"
|
2
|
+
require 'mathn'
|
3
|
+
def Prime.each(upper,&block)
|
4
|
+
@primes=Prime.new
|
5
|
+
@primes.each do |prime|
|
6
|
+
break if prime > upper.to_i
|
7
|
+
block.call(prime)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
else
|
11
|
+
require 'prime'
|
12
|
+
end
|
13
|
+
|
2
14
|
require 'bigdecimal'
|
3
15
|
require 'bigdecimal/math'
|
4
16
|
|
@@ -38,30 +50,31 @@ module Distribution
|
|
38
50
|
return SmallOddSwing[n] if (n<33)
|
39
51
|
sqrtN = Math.sqrt(n).floor
|
40
52
|
count=0
|
53
|
+
|
41
54
|
Prime.each(n/3) do |prime|
|
42
55
|
next if prime<3
|
43
56
|
if (prime<=sqrtN)
|
44
57
|
q=n
|
45
58
|
_p=1
|
46
|
-
|
47
|
-
|
59
|
+
|
60
|
+
while((q=(q/prime).truncate)>0) do
|
61
|
+
if((q%2)==1)
|
48
62
|
_p*=prime
|
49
63
|
end
|
50
64
|
end
|
51
|
-
|
52
65
|
if _p>1
|
53
66
|
@prime_list[count]=_p
|
54
67
|
count+=1
|
55
68
|
end
|
56
69
|
|
57
70
|
else
|
58
|
-
if ((n/prime)
|
71
|
+
if ((n/prime).truncate%2==1)
|
59
72
|
@prime_list[count]=prime
|
60
73
|
count+=1
|
61
74
|
end
|
62
75
|
end
|
63
76
|
end
|
64
|
-
prod=get_primorial(n/2+1,n)
|
77
|
+
prod=get_primorial((n/2).truncate+1,n)
|
65
78
|
prod * @prime_list[0,count].inject(1) {|ac,v| ac*v}
|
66
79
|
end
|
67
80
|
def get_primorial(low,up)
|
@@ -152,6 +165,37 @@ module Distribution
|
|
152
165
|
def beta(x,y)
|
153
166
|
(gamma(x)*gamma(y)).quo(gamma(x+y))
|
154
167
|
end
|
168
|
+
# I_x(a,b): Regularized incomplete beta function
|
169
|
+
#
|
170
|
+
# Source:
|
171
|
+
#
|
172
|
+
def regularized_beta_function(x,a,b)
|
173
|
+
return 1 if x==1
|
174
|
+
#incomplete_beta(x,a,b).quo(beta(a,b))
|
175
|
+
m=a
|
176
|
+
n=b+a-1
|
177
|
+
(m..n).inject(0) {|sum,j|
|
178
|
+
sum+(binomial_coefficient(n,j)* x**j * (1-x)**(n-j))
|
179
|
+
}
|
180
|
+
|
181
|
+
end
|
182
|
+
# B_x(a,b) : Incomplete beta function
|
183
|
+
# Should be replaced by
|
184
|
+
# http://lib.stat.cmu.edu/apstat/63
|
185
|
+
def incomplete_beta(x,a,b)
|
186
|
+
raise "Not work"
|
187
|
+
return beta(a,b) if x==1
|
188
|
+
|
189
|
+
((x**a * (1-x)**b).quo(a)) * hyper_f(a+b,1,a+1,x)
|
190
|
+
end
|
191
|
+
def permutations(x,n)
|
192
|
+
factorial(x).quo(factorial(x-n))
|
193
|
+
end
|
194
|
+
|
195
|
+
def rising_factorial(x,n)
|
196
|
+
factorial(x+n-1).quo(factorial(x-1))
|
197
|
+
end
|
198
|
+
|
155
199
|
|
156
200
|
LOG_2PI = Math.log(2 * Math::PI)# log(2PI)
|
157
201
|
N = 8
|
@@ -192,17 +236,55 @@ module Distribution
|
|
192
236
|
end
|
193
237
|
Math.exp(loggamma(x))
|
194
238
|
end
|
239
|
+
# Binomial coeffients, or:
|
240
|
+
# ( n )
|
241
|
+
# ( k )
|
242
|
+
# Gives the number of different k size subsets of a set size n
|
243
|
+
#
|
244
|
+
# Replaces (n,k) for (n, n-k) if k>n-k
|
245
|
+
#
|
246
|
+
# (n) n^k' (n)..(n-k+1)
|
247
|
+
# ( ) = ---- = ------------
|
248
|
+
# (k) k! k!
|
249
|
+
#
|
250
|
+
def binomial_coefficient(n,k)
|
251
|
+
return 1 if (k==0 or k==n)
|
252
|
+
k=[k, n-k].min
|
253
|
+
(((n-k+1)..n).inject(1) {|ac,v| ac * v}).quo(factorial(k))
|
254
|
+
# Other way to calcule binomial is this:
|
255
|
+
# (1..k).inject(1) {|ac, i| (ac*(n-k+i).quo(i))}
|
256
|
+
end
|
257
|
+
# Approximate binomial coefficient, using gamma function.
|
258
|
+
# The fastest method, until we fall on BigDecimal!
|
259
|
+
def binomial_coefficient_gamma(n,k)
|
260
|
+
return 1 if (k==0 or k==n)
|
261
|
+
k=[k, n-k].min
|
262
|
+
|
263
|
+
val=gamma(n+1) / (gamma(k+1)*gamma(n-k+1))
|
264
|
+
if (val.nan?)
|
265
|
+
lg=lgamma(n+1) - (lgamma(k+1)+lgamma(n-k+1))
|
266
|
+
val=Math.exp(lg)
|
267
|
+
# Crash again! We require BigDecimals
|
268
|
+
if val.infinite?
|
269
|
+
val=BigMath.exp(BigDecimal(lg.to_s),16)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
val
|
274
|
+
end
|
195
275
|
end
|
196
276
|
end
|
197
277
|
|
198
278
|
module Math
|
199
279
|
include Distribution::MathExtension
|
200
|
-
|
280
|
+
alias :lgamma :loggamma
|
281
|
+
|
282
|
+
module_function :factorial, :beta, :gamma, :gosper, :loggamma, :lgamma, :binomial_coefficient, :binomial_coefficient_gamma, :regularized_beta_function, :incomplete_beta, :permutations, :rising_factorial , :fast_factorial
|
201
283
|
end
|
202
284
|
|
203
285
|
# Necessary on Ruby 1.9
|
204
286
|
module CMath # :nodoc:
|
205
287
|
include Distribution::MathExtension
|
206
|
-
module_function :factorial, :beta, :
|
288
|
+
module_function :factorial, :beta, :gosper, :loggamma, :binomial_coefficient, :binomial_coefficient_gamma, :regularized_beta_function, :incomplete_beta, :permutations, :rising_factorial, :fast_factorial
|
207
289
|
end
|
208
290
|
|
data/lib/distribution/normal.rb
CHANGED
data/lib/distribution/t.rb
CHANGED
@@ -11,7 +11,7 @@ module Distribution
|
|
11
11
|
|
12
12
|
|
13
13
|
# There are some problem on i686 with t on statistics2
|
14
|
-
if !RbConfig::CONFIG['arch']=~/i686/
|
14
|
+
if true or !RbConfig::CONFIG['arch']=~/i686/
|
15
15
|
# T cumulative distribution function (cdf).
|
16
16
|
#
|
17
17
|
# Returns the integral of t-distribution
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__)+"/spec_helper.rb")
|
2
|
+
|
3
|
+
describe Distribution::Binomial do
|
4
|
+
|
5
|
+
shared_examples_for "binomial engine" do
|
6
|
+
it "should return correct pdf" do
|
7
|
+
if @engine.respond_to? :pdf
|
8
|
+
[10,100,1000].each do |n|
|
9
|
+
[1.quo(4),1.quo(2),3.quo(4)].each do |pr|
|
10
|
+
[0, 1,n/2,n-1].each do |x|
|
11
|
+
exp=Math.binomial_coefficient(n,x)*pr**x*(1-pr)**(n-x)
|
12
|
+
obs=@engine.pdf(x,n,pr)
|
13
|
+
obs.should be_within(1e-5).of(exp), "For pdf(#{x},#{n},#{pr}) expected #{exp}, obtained #{obs}"
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
else
|
19
|
+
pending("No #{@engine}.pdf")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
it_only_with_gsl "should return correct cdf for n<=100" do
|
23
|
+
if @engine.respond_to? :pdf
|
24
|
+
[10,100].each do |n|
|
25
|
+
[0.25,0.5,0.75].each do |pr|
|
26
|
+
[1,n/2,n-1].each do |x|
|
27
|
+
exp=GSL::Cdf.binomial_P(x,pr,n)
|
28
|
+
obs=@engine.cdf(x,n,pr)
|
29
|
+
exp.should be_within(1e-5).of(obs), "For cdf(#{x},#{n},#{pr}) expected #{exp}, obtained #{obs}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
else
|
34
|
+
pending("No #{@engine}.cdf")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "singleton" do
|
43
|
+
before do
|
44
|
+
@engine=Distribution::Binomial
|
45
|
+
end
|
46
|
+
it_should_behave_like "binomial engine"
|
47
|
+
|
48
|
+
|
49
|
+
it {@engine.should respond_to(:exact_pdf) }
|
50
|
+
it {
|
51
|
+
pending("No exact_p_value")
|
52
|
+
@engine.should respond_to(:exact_p_value)
|
53
|
+
}
|
54
|
+
|
55
|
+
it "exact_pdf should not return a Float if not float is used as parameter" do
|
56
|
+
@engine.exact_pdf(1,1,1).should_not be_a(Float)
|
57
|
+
@engine.exact_pdf(16, 80, 1.quo(2)).should_not be_a(Float)
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
describe Distribution::Binomial::Ruby_ do
|
66
|
+
before do
|
67
|
+
@engine=Distribution::Binomial::Ruby_
|
68
|
+
end
|
69
|
+
it_should_behave_like "binomial engine"
|
70
|
+
|
71
|
+
it "should return correct cdf for n>100" do
|
72
|
+
pending("incomplete beta function is slow. Should be replaced for a faster one")
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should return correct p_value for n<=100" do
|
76
|
+
pending("Can't calculate with precision x using p")
|
77
|
+
[10,100].each do |n|
|
78
|
+
[0.25,0.5,0.75].each do |pr|
|
79
|
+
[n/2].each do |x|
|
80
|
+
cdf=@engine.cdf(x,n,pr)
|
81
|
+
p_value=@engine.p_value(cdf,n,pr)
|
82
|
+
|
83
|
+
p_value.should eq(x), "For p_value(#{cdf},#{n},#{pr}) expected #{x}, obtained #{p_value}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
if Distribution.has_gsl?
|
91
|
+
describe Distribution::Binomial::GSL_ do
|
92
|
+
before do
|
93
|
+
@engine=Distribution::Binomial::GSL_
|
94
|
+
end
|
95
|
+
it_should_behave_like "binomial engine"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
#if Distribution.has_statistics2?
|
99
|
+
# describe Distribution::Binomial::Statistics2_ do
|
100
|
+
#
|
101
|
+
# before do
|
102
|
+
# @engine=Distribution::Binomial::Statistics2_
|
103
|
+
# end
|
104
|
+
#it_should_behave_like "binomial engine"
|
105
|
+
# end
|
106
|
+
#end
|
107
|
+
|
108
|
+
if Distribution.has_java?
|
109
|
+
describe Distribution::Binomial::Java_ do
|
110
|
+
before do
|
111
|
+
@engine=Distribution::Binomial::Java_
|
112
|
+
end
|
113
|
+
it_should_behave_like "binomial engine"
|
114
|
+
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|