distribution 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/.autotest +23 -0
- data/History.txt +3 -0
- data/Manifest.txt +39 -0
- data/README.txt +71 -0
- data/Rakefile +19 -0
- data/bin/distribution +3 -0
- data/lib/distribution.rb +148 -0
- data/lib/distribution/bivariatenormal.rb +25 -0
- data/lib/distribution/bivariatenormal/gsl.rb +11 -0
- data/lib/distribution/bivariatenormal/ruby.rb +281 -0
- data/lib/distribution/bivariatenormal/statistics2.rb +0 -0
- data/lib/distribution/chisquare.rb +29 -0
- data/lib/distribution/chisquare/gsl.rb +27 -0
- data/lib/distribution/chisquare/ruby.rb +85 -0
- data/lib/distribution/chisquare/statistics2.rb +21 -0
- data/lib/distribution/f.rb +28 -0
- data/lib/distribution/f/gsl.rb +28 -0
- data/lib/distribution/f/ruby.rb +117 -0
- data/lib/distribution/f/statistics2.rb +26 -0
- data/lib/distribution/math_extension.rb +72 -0
- data/lib/distribution/normal.rb +36 -0
- data/lib/distribution/normal/gsl.rb +24 -0
- data/lib/distribution/normal/ruby.rb +99 -0
- data/lib/distribution/normal/statistics2.rb +14 -0
- data/lib/distribution/normalmultivariate.rb +73 -0
- data/lib/distribution/t.rb +27 -0
- data/lib/distribution/t/gsl.rb +29 -0
- data/lib/distribution/t/ruby.rb +105 -0
- data/lib/distribution/t/statistics2.rb +28 -0
- data/spec/bivariatenormal_spec.rb +63 -0
- data/spec/chisquare_spec.rb +89 -0
- data/spec/distribution_spec.rb +19 -0
- data/spec/f_spec.rb +107 -0
- data/spec/normal_spec.rb +105 -0
- data/spec/shorthand_function.rb +6 -0
- data/spec/shorthand_spec.rb +14 -0
- data/spec/spec.opts +3 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/t_spec.rb +98 -0
- metadata +160 -0
- metadata.gz.sig +1 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'distribution/normal/ruby'
|
2
|
+
require 'distribution/normal/gsl'
|
3
|
+
require 'distribution/normal/statistics2'
|
4
|
+
|
5
|
+
module Distribution
|
6
|
+
# From Wikipedia:
|
7
|
+
# Continuous probability distribution that is often used as
|
8
|
+
# a first approximation to describe real-valued random variables
|
9
|
+
# that tend to cluster around a single mean value.
|
10
|
+
# The graph of the associated probability density function is “bell”-shaped
|
11
|
+
module Normal
|
12
|
+
SHORTHAND='norm'
|
13
|
+
extend Distributable
|
14
|
+
|
15
|
+
create_distribution_methods
|
16
|
+
|
17
|
+
##
|
18
|
+
# :singleton-method: pdf(x)
|
19
|
+
# Returns PDF of Normal distribution
|
20
|
+
|
21
|
+
##
|
22
|
+
# :singleton-method: p_value(qn)
|
23
|
+
# Return the P-value of the corresponding integral +qn+
|
24
|
+
|
25
|
+
##
|
26
|
+
# :singleton-method: cdf(x)
|
27
|
+
# Returns the integral of Normal distribution over [0, +x+]
|
28
|
+
|
29
|
+
##
|
30
|
+
# :singleton-method: rng
|
31
|
+
# Returns a lambda which returns a random number from
|
32
|
+
# X ~ N(0,1)
|
33
|
+
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Distribution
|
2
|
+
module Normal
|
3
|
+
module GSL_
|
4
|
+
class << self
|
5
|
+
def rng(mean=0,sigma=1,seed=nil)
|
6
|
+
seed||=rand(10e8)
|
7
|
+
rng=GSL::Rng.alloc(GSL::Rng::MT19937,seed)
|
8
|
+
lambda { mean+rng.gaussian(sigma)}
|
9
|
+
end
|
10
|
+
def cdf(x) # :nodoc:
|
11
|
+
GSL::Cdf::ugaussian_P(x)
|
12
|
+
end
|
13
|
+
def pdf(x) # :nodoc:
|
14
|
+
GSL::Ran::gaussian_pdf(x)
|
15
|
+
end
|
16
|
+
def p_value(qn)
|
17
|
+
GSL::Cdf::ugaussian_Pinv(qn)
|
18
|
+
end
|
19
|
+
def gsl
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module Distribution
|
2
|
+
module Normal
|
3
|
+
module Ruby_
|
4
|
+
class << self
|
5
|
+
|
6
|
+
# random number within a gaussian distribution X ~ N(0,1)
|
7
|
+
def rngu
|
8
|
+
rng(0,1,nil)
|
9
|
+
end
|
10
|
+
# Return a proc which return a random number within a
|
11
|
+
# gaussian distribution X ~ N(+mean+,+sigma+^2)
|
12
|
+
# +seed+ feed the
|
13
|
+
# == Reference:
|
14
|
+
# * http://www.taygeta.com/random/gaussian.html
|
15
|
+
def rng(mean=0,sigma=1,seed=nil)
|
16
|
+
returned,y1,y2=0,0,0
|
17
|
+
lambda {
|
18
|
+
if returned==0
|
19
|
+
begin
|
20
|
+
x1 = 2.0 * rand - 1.0
|
21
|
+
x2 = 2.0 * rand - 1.0
|
22
|
+
w = x1 * x1 + x2 * x2
|
23
|
+
end while ( w >= 1.0 )
|
24
|
+
w = Math::sqrt( (-2.0 * Math::log( w ) ) / w )
|
25
|
+
y1 = x1 * w
|
26
|
+
y2 = x2 * w
|
27
|
+
returned=1
|
28
|
+
y1*sigma + mean
|
29
|
+
else
|
30
|
+
returned=0
|
31
|
+
y2 * sigma + mean
|
32
|
+
end
|
33
|
+
}
|
34
|
+
|
35
|
+
end
|
36
|
+
# Return the inverse CDF or P-value of the corresponding integral
|
37
|
+
def p_value(qn)
|
38
|
+
b = [1.570796288, 0.03706987906, -0.8364353589e-3,
|
39
|
+
-0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5,
|
40
|
+
-0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8,
|
41
|
+
0.3657763036e-10, 0.6936233982e-12]
|
42
|
+
|
43
|
+
if(qn < 0.0 || 1.0 < qn)
|
44
|
+
$stderr.printf("Error : qn <= 0 or qn >= 1 in pnorm()!\n")
|
45
|
+
return 0.0;
|
46
|
+
end
|
47
|
+
qn == 0.5 and return 0.0
|
48
|
+
|
49
|
+
w1 = qn
|
50
|
+
qn > 0.5 and w1 = 1.0 - w1
|
51
|
+
w3 = -Math.log(4.0 * w1 * (1.0 - w1))
|
52
|
+
w1 = b[0]
|
53
|
+
1.upto 10 do |i|
|
54
|
+
w1 += b[i] * w3**i;
|
55
|
+
end
|
56
|
+
qn > 0.5 and return Math.sqrt(w1 * w3)
|
57
|
+
-Math.sqrt(w1 * w3)
|
58
|
+
|
59
|
+
end
|
60
|
+
# Normal cumulative distribution function (cdf).
|
61
|
+
#
|
62
|
+
# Returns the integral of normal distribution
|
63
|
+
# over (-Infty, z].
|
64
|
+
#
|
65
|
+
def cdf(z)
|
66
|
+
0.0 if z < -12
|
67
|
+
1.0 if z > 12
|
68
|
+
0.5 if z == 0.0
|
69
|
+
|
70
|
+
if z > 0.0
|
71
|
+
e = true
|
72
|
+
else
|
73
|
+
e = false
|
74
|
+
z = -z
|
75
|
+
end
|
76
|
+
z = z.to_f
|
77
|
+
z2 = z * z
|
78
|
+
t = q = z * Math.exp(-0.5 * z2) / SQ2PI
|
79
|
+
|
80
|
+
3.step(199, 2) do |i|
|
81
|
+
prev = q
|
82
|
+
t *= z2 / i
|
83
|
+
q += t
|
84
|
+
if q <= prev
|
85
|
+
return(e ? 0.5 + q : 0.5 - q)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
e ? 1.0 : 0.0
|
89
|
+
end
|
90
|
+
|
91
|
+
# Normal probability density function (pdf)
|
92
|
+
# With x=0 and sigma=1
|
93
|
+
def pdf(x)
|
94
|
+
(1.0 / SQ2PI)*Math::exp(-(x**2/2.0))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Distribution
|
2
|
+
# Calculate cdf and inverse cdf for Multivariate Distribution.
|
3
|
+
module NormalMultivariate
|
4
|
+
class << self
|
5
|
+
# Returns multivariate cdf distribution
|
6
|
+
# * a is the array of lower values
|
7
|
+
# * b is the array of higher values
|
8
|
+
# * s is an symmetric positive definite covariance matrix
|
9
|
+
def cdf(aa,bb,sigma, epsilon=0.0001, alpha=2.5, max_iterations=100) # :nodoc:
|
10
|
+
raise "Doesn't work yet"
|
11
|
+
a=[nil]+aa
|
12
|
+
b=[nil]+bb
|
13
|
+
m=aa.size
|
14
|
+
sigma=sigma.to_gsl if sigma.respond_to? :to_gsl
|
15
|
+
|
16
|
+
cc=GSL::Linalg::Cholesky.decomp(sigma)
|
17
|
+
c=cc.lower
|
18
|
+
intsum=0
|
19
|
+
varsum=0
|
20
|
+
n=0
|
21
|
+
d=Array.new(m+1,nil)
|
22
|
+
e=Array.new(m+1,nil)
|
23
|
+
f=Array.new(m+1,nil)
|
24
|
+
(1..m).each {|i|
|
25
|
+
d[i]=0.0 if a[i].nil?
|
26
|
+
e[i]=1.0 if b[i].nil?
|
27
|
+
}
|
28
|
+
d[1]=uPhi(a[1].quo( c[0,0])) unless d[1]==0
|
29
|
+
e[1]=uPhi(b[1].quo( c[0,0])) unless e[1]==1
|
30
|
+
f[1]=e[1]-d[1]
|
31
|
+
|
32
|
+
error=1000
|
33
|
+
begin
|
34
|
+
w=(m+1).times.collect {|i| rand*epsilon}
|
35
|
+
y=[]
|
36
|
+
(2..m).each do |i|
|
37
|
+
y[i-1]=iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))
|
38
|
+
sumc=0
|
39
|
+
(1..(i-1)).each do |j|
|
40
|
+
sumc+=c[i-1, j-1]*y[j]
|
41
|
+
end
|
42
|
+
|
43
|
+
if a[i]!=nil
|
44
|
+
d[i]=uPhi((a[i]-sumc).quo(c[i-1,i-1]))
|
45
|
+
end
|
46
|
+
# puts "sumc:#{sumc}"
|
47
|
+
|
48
|
+
if b[i]!=nil
|
49
|
+
#puts "e[#{i}] :#{c[i-1,i-1]}"
|
50
|
+
e[i]=uPhi((b[i]-sumc).quo(c[i-1, i-1]))
|
51
|
+
end
|
52
|
+
f[i]=(e[i]-d[i])*f[i-1]
|
53
|
+
end
|
54
|
+
intsum+=intsum+f[m]
|
55
|
+
varsum=varsum+f[m]**2
|
56
|
+
n+=1
|
57
|
+
error=alpha*Math::sqrt((varsum.quo(n) - (intsum.quo(n))**2).quo(n))
|
58
|
+
end while(error>epsilon and n<max_iterations)
|
59
|
+
|
60
|
+
f=intsum.quo(n)
|
61
|
+
#p intsum
|
62
|
+
#puts "f:#{f}, n:#{n}, error:#{error}"
|
63
|
+
f
|
64
|
+
end
|
65
|
+
def iPhi(pr)
|
66
|
+
Distribution::Normal.p_value(pr)
|
67
|
+
end
|
68
|
+
def uPhi(x)
|
69
|
+
Distribution::Normal.cdf(x)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'distribution/t/ruby'
|
2
|
+
require 'distribution/t/gsl'
|
3
|
+
require 'distribution/t/statistics2'
|
4
|
+
module Distribution
|
5
|
+
|
6
|
+
# Calculate statisticals for T Distribution.
|
7
|
+
module T
|
8
|
+
SHORTHAND='f'
|
9
|
+
extend Distributable
|
10
|
+
create_distribution_methods
|
11
|
+
|
12
|
+
##
|
13
|
+
# :singleton-method: pdf(x,k)
|
14
|
+
# Returns the integral of T distribution
|
15
|
+
# with +k+ degrees of freedom over [0, +x+]
|
16
|
+
|
17
|
+
##
|
18
|
+
# :singleton-method: p_value(qn, k)
|
19
|
+
# Return the P-value of the corresponding integral +qn+ with
|
20
|
+
# +k+ degrees of freedom
|
21
|
+
|
22
|
+
##
|
23
|
+
# :singleton-method: cdf(x,k)
|
24
|
+
# Returns the integral of T distribution
|
25
|
+
# with +k+ degrees of freedom over [0, +x+]
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Distribution
|
2
|
+
module T
|
3
|
+
module GSL_
|
4
|
+
class << self
|
5
|
+
|
6
|
+
def pdf(x,k)
|
7
|
+
GSL::Ran.tdist_pdf(x,k)
|
8
|
+
end
|
9
|
+
# Return the P-value of the corresponding integral with
|
10
|
+
# k degrees of freedom
|
11
|
+
#
|
12
|
+
# Distribution::F.p_value(0.95,1,2)
|
13
|
+
def p_value(pr,k)
|
14
|
+
GSL::Cdf.tdist_Pinv(pr,k)
|
15
|
+
end
|
16
|
+
# F cumulative distribution function (cdf).
|
17
|
+
#
|
18
|
+
# Returns the integral of F-distribution
|
19
|
+
# with k1 and k2 degrees of freedom
|
20
|
+
# over [0, x].
|
21
|
+
# Distribution::F.cdf(20,3,2)
|
22
|
+
#
|
23
|
+
def cdf(x, k)
|
24
|
+
GSL::Cdf.tdist_P(x.to_f, k)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Distribution
|
2
|
+
module T
|
3
|
+
module Ruby_
|
4
|
+
class << self
|
5
|
+
def pdf(t,v)
|
6
|
+
((Math.gamma((v+1) / 2.0)) / (Math.sqrt(v*Math::PI)*Math.gamma(v/2.0))) * ((1+(t**2 / v.to_f))**(-(v+1) / 2.0))
|
7
|
+
|
8
|
+
end
|
9
|
+
# Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
|
10
|
+
def cdf(t, n)
|
11
|
+
p_t(n, t)
|
12
|
+
end
|
13
|
+
|
14
|
+
# t-distribution ([1])
|
15
|
+
# (-\infty, x]
|
16
|
+
def p_t(df, t)
|
17
|
+
c2 = df.to_f / (df + t * t);
|
18
|
+
s = Math.sqrt(1.0 - c2)
|
19
|
+
s = -s if t < 0.0
|
20
|
+
p = 0.0;
|
21
|
+
i = df % 2 + 2
|
22
|
+
while i <= df
|
23
|
+
p += s
|
24
|
+
s *= (i - 1) * c2 / i
|
25
|
+
i += 2
|
26
|
+
end
|
27
|
+
if df.is_a? Float or df & 1 != 0
|
28
|
+
0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df))) / Math::PI
|
29
|
+
else
|
30
|
+
(1.0 + p) / 2.0
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
# inverse of t-distribution ([2])
|
36
|
+
# (-\infty, -q/2] + [q/2, \infty)
|
37
|
+
def ptsub(q, n)
|
38
|
+
q = q.to_f
|
39
|
+
if(n == 1 && 0.001 < q && q < 0.01)
|
40
|
+
eps = 1.0e-4
|
41
|
+
elsif (n == 2 && q < 0.0001)
|
42
|
+
eps = 1.0e-4
|
43
|
+
elsif (n == 1 && q < 0.001)
|
44
|
+
eps = 1.0e-2
|
45
|
+
else
|
46
|
+
eps = 1.0e-5
|
47
|
+
end
|
48
|
+
s = 10000.0
|
49
|
+
w = 0.0
|
50
|
+
loop do
|
51
|
+
w += s
|
52
|
+
if(s <= eps) then return w end
|
53
|
+
if((qe = 2.0 - p_t(n, w)*2.0 - q) == 0.0) then return w end
|
54
|
+
if(qe < 0.0)
|
55
|
+
w -= s
|
56
|
+
s /= 10.0 #/
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def pt(q, n)
|
62
|
+
q = q.to_f
|
63
|
+
if(q < 1.0e-5 || q > 1.0 || n < 1)
|
64
|
+
$stderr.printf("Error : Illigal parameter in pt()!\n")
|
65
|
+
return 0.0
|
66
|
+
end
|
67
|
+
|
68
|
+
if(n <= 5) then return ptsub(q, n) end
|
69
|
+
if(q <= 5.0e-3 && n <= 13) then return ptsub(q, n) end
|
70
|
+
|
71
|
+
f1 = 4.0 * (f = n.to_f)
|
72
|
+
f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
|
73
|
+
f2 *= 96.0
|
74
|
+
f3 *= 384.0
|
75
|
+
f4 *= 92160.0
|
76
|
+
f5 *= 368640.0
|
77
|
+
u = Normal.p_value(1.0 - q / 2.0)
|
78
|
+
|
79
|
+
w0 = (u2 = u * u) * u
|
80
|
+
w1 = w0 * u2
|
81
|
+
w2 = w1 * u2
|
82
|
+
w3 = w2 * u2
|
83
|
+
w4 = w3 * u2
|
84
|
+
w = (w0 + u) / f1
|
85
|
+
w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
|
86
|
+
w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
|
87
|
+
w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
|
88
|
+
w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17955.0 * u) / f5
|
89
|
+
u + w
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns the P-value of tdist().
|
93
|
+
def p_value(y,n)
|
94
|
+
if y > 0.5
|
95
|
+
pt(2.0 - y*2.0, n)
|
96
|
+
else
|
97
|
+
- pt(y*2.0, n)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
module Distribution
|
3
|
+
module T
|
4
|
+
module Statistics2_
|
5
|
+
class << self
|
6
|
+
# Return the P-value of the corresponding integral with
|
7
|
+
# k degrees of freedom
|
8
|
+
def p_value(pr,k)
|
9
|
+
Statistics2.ptdist(k, pr)
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
# There are some problem on i686 with t on statistics2
|
14
|
+
if !RbConfig::CONFIG['arch']=~/i686/
|
15
|
+
# T cumulative distribution function (cdf).
|
16
|
+
#
|
17
|
+
# Returns the integral of t-distribution
|
18
|
+
# with n degrees of freedom over (-Infty, x].
|
19
|
+
#
|
20
|
+
def cdf(x,k)
|
21
|
+
Statistics2.tdist(k,x)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|