distribution 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +9 -0
- data/Manifest.txt +12 -0
- data/README.txt +24 -7
- data/Rakefile +2 -0
- data/benchmark/binomial_coefficient.rb +55 -0
- data/benchmark/factorial_method.rb +4 -1
- data/benchmark/odd.rb +22 -0
- data/lib/distribution.rb +11 -7
- data/lib/distribution/binomial.rb +22 -20
- data/lib/distribution/binomial/gsl.rb +14 -0
- data/lib/distribution/binomial/java.rb +9 -0
- data/lib/distribution/binomial/ruby.rb +26 -0
- data/lib/distribution/bivariatenormal.rb +7 -5
- data/lib/distribution/bivariatenormal/java.rb +9 -0
- data/lib/distribution/chisquare.rb +1 -0
- data/lib/distribution/chisquare/gsl.rb +3 -3
- data/lib/distribution/chisquare/java.rb +9 -0
- data/lib/distribution/f.rb +1 -0
- data/lib/distribution/f/gsl.rb +3 -3
- data/lib/distribution/f/java.rb +9 -0
- data/lib/distribution/hypergeometric.rb +2 -0
- data/lib/distribution/hypergeometric/gsl.rb +2 -2
- data/lib/distribution/hypergeometric/java.rb +9 -0
- data/lib/distribution/hypergeometric/ruby.rb +24 -27
- data/lib/distribution/math_extension.rb +90 -8
- data/lib/distribution/normal.rb +1 -0
- data/lib/distribution/normal/java.rb +9 -0
- data/lib/distribution/t.rb +2 -0
- data/lib/distribution/t/java.rb +9 -0
- data/lib/distribution/t/statistics2.rb +1 -1
- data/spec/binomial_spec.rb +118 -0
- data/spec/distribution_spec.rb +1 -1
- data/spec/f_spec.rb +1 -1
- data/spec/hypergeometric_spec.rb +49 -13
- data/spec/math_extension_spec.rb +57 -9
- data/spec/normal_spec.rb +7 -3
- data/spec/shorthand_spec.rb +16 -2
- data/spec/t_spec.rb +1 -1
- metadata +44 -5
- metadata.gz.sig +0 -0
data/lib/distribution/f.rb
CHANGED
data/lib/distribution/f/gsl.rb
CHANGED
@@ -3,14 +3,14 @@ module Distribution
|
|
3
3
|
module GSL_
|
4
4
|
class << self
|
5
5
|
def pdf(x,k1,k2)
|
6
|
-
GSL::Ran.fdist_pdf(x,k1,k2)
|
6
|
+
GSL::Ran.fdist_pdf(x.to_f,k1,k2)
|
7
7
|
end
|
8
8
|
# Return the P-value of the corresponding integral with
|
9
9
|
# k degrees of freedom
|
10
10
|
#
|
11
11
|
# Distribution::F.p_value(0.95,1,2)
|
12
12
|
def p_value(pr,k1,k2)
|
13
|
-
GSL::Cdf.fdist_Pinv(pr,k1,k2)
|
13
|
+
GSL::Cdf.fdist_Pinv(pr.to_f,k1,k2)
|
14
14
|
end
|
15
15
|
# F cumulative distribution function (cdf).
|
16
16
|
#
|
@@ -20,7 +20,7 @@ module Distribution
|
|
20
20
|
# Distribution::F.cdf(20,3,2)
|
21
21
|
#
|
22
22
|
def cdf(x, k1, k2)
|
23
|
-
GSL::Cdf.fdist_P(x.to_f,k1,k2)
|
23
|
+
GSL::Cdf.fdist_P(x.to_f.to_f,k1,k2)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
@@ -5,9 +5,9 @@ module Distribution
|
|
5
5
|
def pdf(k, m, n, total) # :nodoc:
|
6
6
|
GSL::Ran::hypergeometric_pdf(k, m, total-m, n)
|
7
7
|
end
|
8
|
-
# The GSL::Cdf function for hypergeometric
|
8
|
+
# The GSL::Cdf function for hypergeometric
|
9
9
|
#
|
10
|
-
def
|
10
|
+
def cdf(k, m, n, total) # :nodoc:
|
11
11
|
GSL::Cdf::hypergeometric_P(k, m, total-m, n)
|
12
12
|
end
|
13
13
|
|
@@ -1,32 +1,13 @@
|
|
1
|
-
# Added by John O. Woods, SciRuby project.
|
2
|
-
|
3
|
-
# Extensions to Fixnum for Hypergeometric calculations in pure Ruby.
|
4
|
-
# ==Reference
|
5
|
-
# * http://bluebones.net/2007/09/combinatorics-in-ruby/
|
6
|
-
# * http://mathworld.wolfram.com/StirlingsApproximation.html
|
7
|
-
class Fixnum
|
8
|
-
# +k+-combination of a set of size +self+
|
9
|
-
def choose(k)
|
10
|
-
Math.factorial(self) / (Math.factorial(k) * Math.factorial(self - k))
|
11
|
-
end
|
12
|
-
|
13
|
-
# Fast combination calculation using Gosper's approximation of factorials.
|
14
|
-
def fast_choose(k)
|
15
|
-
Math.fast_factorial(self).quo(Math.fast_factorial(self - k) * Math.fast_factorial(k))
|
16
|
-
end
|
17
|
-
end
|
1
|
+
# Added by John O. Woods, SciRuby project.
|
2
|
+
# Optimized by Claudio Bustos
|
18
3
|
|
19
4
|
module Distribution
|
20
5
|
module Hypergeometric
|
21
6
|
module Ruby_
|
22
7
|
class << self
|
23
|
-
|
24
|
-
|
25
|
-
# choose.
|
26
|
-
def pdf_aprox(k, m, n, total)
|
27
|
-
m.fast_choose(k) * (total-m).fast_choose(n-k).quo( total.fast_choose(n))
|
8
|
+
def bc(n,k)
|
9
|
+
Math.binomial_coefficient(n,k)
|
28
10
|
end
|
29
|
-
|
30
11
|
# Hypergeometric probability density function
|
31
12
|
#
|
32
13
|
# Probability p(+k+, +m+, +n+, +total+) of drawing sets of size +m+ and +n+ with an intersection of size +k+
|
@@ -39,19 +20,26 @@ module Distribution
|
|
39
20
|
min_m_n=m<n ? m : n
|
40
21
|
max_t=[0,m+n-total].max
|
41
22
|
return 0 if k>min_m_n or k<max_t
|
42
|
-
m
|
23
|
+
(bc(m,k) * bc(total-m,n-k)).quo(bc(total,n))
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
def pdf_with_den(k,m,n,total,den)
|
29
|
+
(bc(m,k) * bc(total-m,n-k)).quo(den)
|
43
30
|
end
|
44
31
|
|
45
32
|
# p-value:
|
46
33
|
|
47
34
|
def p_value(pr, m, n, total)
|
48
35
|
ac=0
|
36
|
+
den=bc(total,n)
|
37
|
+
|
49
38
|
(0..total).each do |i|
|
50
|
-
ac+=
|
39
|
+
ac+=pdf_with_den(i,m,n,total,den)
|
51
40
|
return i if ac>=pr
|
52
41
|
end
|
53
42
|
end
|
54
|
-
|
55
43
|
# Cumulative distribution function.
|
56
44
|
# The probability of obtain, from a sample of
|
57
45
|
# size +n+, +k+ or less elements
|
@@ -61,8 +49,17 @@ module Distribution
|
|
61
49
|
def cdf(k, m, n, total)
|
62
50
|
raise "k>m" if k>m
|
63
51
|
raise "k>n" if k>n
|
64
|
-
|
52
|
+
# Store the den
|
53
|
+
den=bc(total,n)
|
54
|
+
(0..k).collect { |ki| pdf_with_den(ki,m,n,total,den) }.inject { |sum,v| sum+v}
|
65
55
|
end
|
56
|
+
|
57
|
+
|
58
|
+
alias :exact_pdf :pdf
|
59
|
+
alias :exact_p_value :p_value
|
60
|
+
alias :exact_cdf :cdf
|
61
|
+
|
62
|
+
|
66
63
|
end
|
67
64
|
end
|
68
65
|
end
|
@@ -1,4 +1,16 @@
|
|
1
|
-
|
1
|
+
if RUBY_VERSION<"1.9"
|
2
|
+
require 'mathn'
|
3
|
+
def Prime.each(upper,&block)
|
4
|
+
@primes=Prime.new
|
5
|
+
@primes.each do |prime|
|
6
|
+
break if prime > upper.to_i
|
7
|
+
block.call(prime)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
else
|
11
|
+
require 'prime'
|
12
|
+
end
|
13
|
+
|
2
14
|
require 'bigdecimal'
|
3
15
|
require 'bigdecimal/math'
|
4
16
|
|
@@ -38,30 +50,31 @@ module Distribution
|
|
38
50
|
return SmallOddSwing[n] if (n<33)
|
39
51
|
sqrtN = Math.sqrt(n).floor
|
40
52
|
count=0
|
53
|
+
|
41
54
|
Prime.each(n/3) do |prime|
|
42
55
|
next if prime<3
|
43
56
|
if (prime<=sqrtN)
|
44
57
|
q=n
|
45
58
|
_p=1
|
46
|
-
|
47
|
-
|
59
|
+
|
60
|
+
while((q=(q/prime).truncate)>0) do
|
61
|
+
if((q%2)==1)
|
48
62
|
_p*=prime
|
49
63
|
end
|
50
64
|
end
|
51
|
-
|
52
65
|
if _p>1
|
53
66
|
@prime_list[count]=_p
|
54
67
|
count+=1
|
55
68
|
end
|
56
69
|
|
57
70
|
else
|
58
|
-
if ((n/prime)
|
71
|
+
if ((n/prime).truncate%2==1)
|
59
72
|
@prime_list[count]=prime
|
60
73
|
count+=1
|
61
74
|
end
|
62
75
|
end
|
63
76
|
end
|
64
|
-
prod=get_primorial(n/2+1,n)
|
77
|
+
prod=get_primorial((n/2).truncate+1,n)
|
65
78
|
prod * @prime_list[0,count].inject(1) {|ac,v| ac*v}
|
66
79
|
end
|
67
80
|
def get_primorial(low,up)
|
@@ -152,6 +165,37 @@ module Distribution
|
|
152
165
|
def beta(x,y)
|
153
166
|
(gamma(x)*gamma(y)).quo(gamma(x+y))
|
154
167
|
end
|
168
|
+
# I_x(a,b): Regularized incomplete beta function
|
169
|
+
#
|
170
|
+
# Source:
|
171
|
+
#
|
172
|
+
def regularized_beta_function(x,a,b)
|
173
|
+
return 1 if x==1
|
174
|
+
#incomplete_beta(x,a,b).quo(beta(a,b))
|
175
|
+
m=a
|
176
|
+
n=b+a-1
|
177
|
+
(m..n).inject(0) {|sum,j|
|
178
|
+
sum+(binomial_coefficient(n,j)* x**j * (1-x)**(n-j))
|
179
|
+
}
|
180
|
+
|
181
|
+
end
|
182
|
+
# B_x(a,b) : Incomplete beta function
|
183
|
+
# Should be replaced by
|
184
|
+
# http://lib.stat.cmu.edu/apstat/63
|
185
|
+
def incomplete_beta(x,a,b)
|
186
|
+
raise "Not work"
|
187
|
+
return beta(a,b) if x==1
|
188
|
+
|
189
|
+
((x**a * (1-x)**b).quo(a)) * hyper_f(a+b,1,a+1,x)
|
190
|
+
end
|
191
|
+
def permutations(x,n)
|
192
|
+
factorial(x).quo(factorial(x-n))
|
193
|
+
end
|
194
|
+
|
195
|
+
def rising_factorial(x,n)
|
196
|
+
factorial(x+n-1).quo(factorial(x-1))
|
197
|
+
end
|
198
|
+
|
155
199
|
|
156
200
|
LOG_2PI = Math.log(2 * Math::PI)# log(2PI)
|
157
201
|
N = 8
|
@@ -192,17 +236,55 @@ module Distribution
|
|
192
236
|
end
|
193
237
|
Math.exp(loggamma(x))
|
194
238
|
end
|
239
|
+
# Binomial coeffients, or:
|
240
|
+
# ( n )
|
241
|
+
# ( k )
|
242
|
+
# Gives the number of different k size subsets of a set size n
|
243
|
+
#
|
244
|
+
# Replaces (n,k) for (n, n-k) if k>n-k
|
245
|
+
#
|
246
|
+
# (n) n^k' (n)..(n-k+1)
|
247
|
+
# ( ) = ---- = ------------
|
248
|
+
# (k) k! k!
|
249
|
+
#
|
250
|
+
def binomial_coefficient(n,k)
|
251
|
+
return 1 if (k==0 or k==n)
|
252
|
+
k=[k, n-k].min
|
253
|
+
(((n-k+1)..n).inject(1) {|ac,v| ac * v}).quo(factorial(k))
|
254
|
+
# Other way to calcule binomial is this:
|
255
|
+
# (1..k).inject(1) {|ac, i| (ac*(n-k+i).quo(i))}
|
256
|
+
end
|
257
|
+
# Approximate binomial coefficient, using gamma function.
|
258
|
+
# The fastest method, until we fall on BigDecimal!
|
259
|
+
def binomial_coefficient_gamma(n,k)
|
260
|
+
return 1 if (k==0 or k==n)
|
261
|
+
k=[k, n-k].min
|
262
|
+
|
263
|
+
val=gamma(n+1) / (gamma(k+1)*gamma(n-k+1))
|
264
|
+
if (val.nan?)
|
265
|
+
lg=lgamma(n+1) - (lgamma(k+1)+lgamma(n-k+1))
|
266
|
+
val=Math.exp(lg)
|
267
|
+
# Crash again! We require BigDecimals
|
268
|
+
if val.infinite?
|
269
|
+
val=BigMath.exp(BigDecimal(lg.to_s),16)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
val
|
274
|
+
end
|
195
275
|
end
|
196
276
|
end
|
197
277
|
|
198
278
|
module Math
|
199
279
|
include Distribution::MathExtension
|
200
|
-
|
280
|
+
alias :lgamma :loggamma
|
281
|
+
|
282
|
+
module_function :factorial, :beta, :gamma, :gosper, :loggamma, :lgamma, :binomial_coefficient, :binomial_coefficient_gamma, :regularized_beta_function, :incomplete_beta, :permutations, :rising_factorial , :fast_factorial
|
201
283
|
end
|
202
284
|
|
203
285
|
# Necessary on Ruby 1.9
|
204
286
|
module CMath # :nodoc:
|
205
287
|
include Distribution::MathExtension
|
206
|
-
module_function :factorial, :beta, :
|
288
|
+
module_function :factorial, :beta, :gosper, :loggamma, :binomial_coefficient, :binomial_coefficient_gamma, :regularized_beta_function, :incomplete_beta, :permutations, :rising_factorial, :fast_factorial
|
207
289
|
end
|
208
290
|
|
data/lib/distribution/normal.rb
CHANGED
data/lib/distribution/t.rb
CHANGED
@@ -11,7 +11,7 @@ module Distribution
|
|
11
11
|
|
12
12
|
|
13
13
|
# There are some problem on i686 with t on statistics2
|
14
|
-
if !RbConfig::CONFIG['arch']=~/i686/
|
14
|
+
if true or !RbConfig::CONFIG['arch']=~/i686/
|
15
15
|
# T cumulative distribution function (cdf).
|
16
16
|
#
|
17
17
|
# Returns the integral of t-distribution
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__)+"/spec_helper.rb")
|
2
|
+
|
3
|
+
describe Distribution::Binomial do
|
4
|
+
|
5
|
+
shared_examples_for "binomial engine" do
|
6
|
+
it "should return correct pdf" do
|
7
|
+
if @engine.respond_to? :pdf
|
8
|
+
[10,100,1000].each do |n|
|
9
|
+
[1.quo(4),1.quo(2),3.quo(4)].each do |pr|
|
10
|
+
[0, 1,n/2,n-1].each do |x|
|
11
|
+
exp=Math.binomial_coefficient(n,x)*pr**x*(1-pr)**(n-x)
|
12
|
+
obs=@engine.pdf(x,n,pr)
|
13
|
+
obs.should be_within(1e-5).of(exp), "For pdf(#{x},#{n},#{pr}) expected #{exp}, obtained #{obs}"
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
else
|
19
|
+
pending("No #{@engine}.pdf")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
it_only_with_gsl "should return correct cdf for n<=100" do
|
23
|
+
if @engine.respond_to? :pdf
|
24
|
+
[10,100].each do |n|
|
25
|
+
[0.25,0.5,0.75].each do |pr|
|
26
|
+
[1,n/2,n-1].each do |x|
|
27
|
+
exp=GSL::Cdf.binomial_P(x,pr,n)
|
28
|
+
obs=@engine.cdf(x,n,pr)
|
29
|
+
exp.should be_within(1e-5).of(obs), "For cdf(#{x},#{n},#{pr}) expected #{exp}, obtained #{obs}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
else
|
34
|
+
pending("No #{@engine}.cdf")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "singleton" do
|
43
|
+
before do
|
44
|
+
@engine=Distribution::Binomial
|
45
|
+
end
|
46
|
+
it_should_behave_like "binomial engine"
|
47
|
+
|
48
|
+
|
49
|
+
it {@engine.should respond_to(:exact_pdf) }
|
50
|
+
it {
|
51
|
+
pending("No exact_p_value")
|
52
|
+
@engine.should respond_to(:exact_p_value)
|
53
|
+
}
|
54
|
+
|
55
|
+
it "exact_pdf should not return a Float if not float is used as parameter" do
|
56
|
+
@engine.exact_pdf(1,1,1).should_not be_a(Float)
|
57
|
+
@engine.exact_pdf(16, 80, 1.quo(2)).should_not be_a(Float)
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
describe Distribution::Binomial::Ruby_ do
|
66
|
+
before do
|
67
|
+
@engine=Distribution::Binomial::Ruby_
|
68
|
+
end
|
69
|
+
it_should_behave_like "binomial engine"
|
70
|
+
|
71
|
+
it "should return correct cdf for n>100" do
|
72
|
+
pending("incomplete beta function is slow. Should be replaced for a faster one")
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should return correct p_value for n<=100" do
|
76
|
+
pending("Can't calculate with precision x using p")
|
77
|
+
[10,100].each do |n|
|
78
|
+
[0.25,0.5,0.75].each do |pr|
|
79
|
+
[n/2].each do |x|
|
80
|
+
cdf=@engine.cdf(x,n,pr)
|
81
|
+
p_value=@engine.p_value(cdf,n,pr)
|
82
|
+
|
83
|
+
p_value.should eq(x), "For p_value(#{cdf},#{n},#{pr}) expected #{x}, obtained #{p_value}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
if Distribution.has_gsl?
|
91
|
+
describe Distribution::Binomial::GSL_ do
|
92
|
+
before do
|
93
|
+
@engine=Distribution::Binomial::GSL_
|
94
|
+
end
|
95
|
+
it_should_behave_like "binomial engine"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
#if Distribution.has_statistics2?
|
99
|
+
# describe Distribution::Binomial::Statistics2_ do
|
100
|
+
#
|
101
|
+
# before do
|
102
|
+
# @engine=Distribution::Binomial::Statistics2_
|
103
|
+
# end
|
104
|
+
#it_should_behave_like "binomial engine"
|
105
|
+
# end
|
106
|
+
#end
|
107
|
+
|
108
|
+
if Distribution.has_java?
|
109
|
+
describe Distribution::Binomial::Java_ do
|
110
|
+
before do
|
111
|
+
@engine=Distribution::Binomial::Java_
|
112
|
+
end
|
113
|
+
it_should_behave_like "binomial engine"
|
114
|
+
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|