distribution 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/.autotest +23 -0
- data/History.txt +3 -0
- data/Manifest.txt +39 -0
- data/README.txt +71 -0
- data/Rakefile +19 -0
- data/bin/distribution +3 -0
- data/lib/distribution.rb +148 -0
- data/lib/distribution/bivariatenormal.rb +25 -0
- data/lib/distribution/bivariatenormal/gsl.rb +11 -0
- data/lib/distribution/bivariatenormal/ruby.rb +281 -0
- data/lib/distribution/bivariatenormal/statistics2.rb +0 -0
- data/lib/distribution/chisquare.rb +29 -0
- data/lib/distribution/chisquare/gsl.rb +27 -0
- data/lib/distribution/chisquare/ruby.rb +85 -0
- data/lib/distribution/chisquare/statistics2.rb +21 -0
- data/lib/distribution/f.rb +28 -0
- data/lib/distribution/f/gsl.rb +28 -0
- data/lib/distribution/f/ruby.rb +117 -0
- data/lib/distribution/f/statistics2.rb +26 -0
- data/lib/distribution/math_extension.rb +72 -0
- data/lib/distribution/normal.rb +36 -0
- data/lib/distribution/normal/gsl.rb +24 -0
- data/lib/distribution/normal/ruby.rb +99 -0
- data/lib/distribution/normal/statistics2.rb +14 -0
- data/lib/distribution/normalmultivariate.rb +73 -0
- data/lib/distribution/t.rb +27 -0
- data/lib/distribution/t/gsl.rb +29 -0
- data/lib/distribution/t/ruby.rb +105 -0
- data/lib/distribution/t/statistics2.rb +28 -0
- data/spec/bivariatenormal_spec.rb +63 -0
- data/spec/chisquare_spec.rb +89 -0
- data/spec/distribution_spec.rb +19 -0
- data/spec/f_spec.rb +107 -0
- data/spec/normal_spec.rb +105 -0
- data/spec/shorthand_function.rb +6 -0
- data/spec/shorthand_spec.rb +14 -0
- data/spec/spec.opts +3 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/t_spec.rb +98 -0
- metadata +160 -0
- metadata.gz.sig +1 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'distribution/normal/ruby'
|
2
|
+
require 'distribution/normal/gsl'
|
3
|
+
require 'distribution/normal/statistics2'
|
4
|
+
|
5
|
+
module Distribution
|
6
|
+
# From Wikipedia:
|
7
|
+
# Continuous probability distribution that is often used as
|
8
|
+
# a first approximation to describe real-valued random variables
|
9
|
+
# that tend to cluster around a single mean value.
|
10
|
+
# The graph of the associated probability density function is “bell”-shaped
|
11
|
+
module Normal
|
12
|
+
SHORTHAND='norm'
|
13
|
+
extend Distributable
|
14
|
+
|
15
|
+
create_distribution_methods
|
16
|
+
|
17
|
+
##
|
18
|
+
# :singleton-method: pdf(x)
|
19
|
+
# Returns PDF of Normal distribution
|
20
|
+
|
21
|
+
##
|
22
|
+
# :singleton-method: p_value(qn)
|
23
|
+
# Return the P-value of the corresponding integral +qn+
|
24
|
+
|
25
|
+
##
|
26
|
+
# :singleton-method: cdf(x)
|
27
|
+
# Returns the integral of Normal distribution over [0, +x+]
|
28
|
+
|
29
|
+
##
|
30
|
+
# :singleton-method: rng
|
31
|
+
# Returns a lambda which returns a random number from
|
32
|
+
# X ~ N(0,1)
|
33
|
+
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Distribution
|
2
|
+
module Normal
|
3
|
+
module GSL_
|
4
|
+
class << self
|
5
|
+
def rng(mean=0,sigma=1,seed=nil)
|
6
|
+
seed||=rand(10e8)
|
7
|
+
rng=GSL::Rng.alloc(GSL::Rng::MT19937,seed)
|
8
|
+
lambda { mean+rng.gaussian(sigma)}
|
9
|
+
end
|
10
|
+
def cdf(x) # :nodoc:
|
11
|
+
GSL::Cdf::ugaussian_P(x)
|
12
|
+
end
|
13
|
+
def pdf(x) # :nodoc:
|
14
|
+
GSL::Ran::gaussian_pdf(x)
|
15
|
+
end
|
16
|
+
def p_value(qn)
|
17
|
+
GSL::Cdf::ugaussian_Pinv(qn)
|
18
|
+
end
|
19
|
+
def gsl
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module Distribution
|
2
|
+
module Normal
|
3
|
+
module Ruby_
|
4
|
+
class << self
|
5
|
+
|
6
|
+
# random number within a gaussian distribution X ~ N(0,1)
|
7
|
+
def rngu
|
8
|
+
rng(0,1,nil)
|
9
|
+
end
|
10
|
+
# Return a proc which return a random number within a
|
11
|
+
# gaussian distribution X ~ N(+mean+,+sigma+^2)
|
12
|
+
# +seed+ feed the
|
13
|
+
# == Reference:
|
14
|
+
# * http://www.taygeta.com/random/gaussian.html
|
15
|
+
def rng(mean=0,sigma=1,seed=nil)
|
16
|
+
returned,y1,y2=0,0,0
|
17
|
+
lambda {
|
18
|
+
if returned==0
|
19
|
+
begin
|
20
|
+
x1 = 2.0 * rand - 1.0
|
21
|
+
x2 = 2.0 * rand - 1.0
|
22
|
+
w = x1 * x1 + x2 * x2
|
23
|
+
end while ( w >= 1.0 )
|
24
|
+
w = Math::sqrt( (-2.0 * Math::log( w ) ) / w )
|
25
|
+
y1 = x1 * w
|
26
|
+
y2 = x2 * w
|
27
|
+
returned=1
|
28
|
+
y1*sigma + mean
|
29
|
+
else
|
30
|
+
returned=0
|
31
|
+
y2 * sigma + mean
|
32
|
+
end
|
33
|
+
}
|
34
|
+
|
35
|
+
end
|
36
|
+
# Return the inverse CDF or P-value of the corresponding integral
|
37
|
+
def p_value(qn)
|
38
|
+
b = [1.570796288, 0.03706987906, -0.8364353589e-3,
|
39
|
+
-0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5,
|
40
|
+
-0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8,
|
41
|
+
0.3657763036e-10, 0.6936233982e-12]
|
42
|
+
|
43
|
+
if(qn < 0.0 || 1.0 < qn)
|
44
|
+
$stderr.printf("Error : qn <= 0 or qn >= 1 in pnorm()!\n")
|
45
|
+
return 0.0;
|
46
|
+
end
|
47
|
+
qn == 0.5 and return 0.0
|
48
|
+
|
49
|
+
w1 = qn
|
50
|
+
qn > 0.5 and w1 = 1.0 - w1
|
51
|
+
w3 = -Math.log(4.0 * w1 * (1.0 - w1))
|
52
|
+
w1 = b[0]
|
53
|
+
1.upto 10 do |i|
|
54
|
+
w1 += b[i] * w3**i;
|
55
|
+
end
|
56
|
+
qn > 0.5 and return Math.sqrt(w1 * w3)
|
57
|
+
-Math.sqrt(w1 * w3)
|
58
|
+
|
59
|
+
end
|
60
|
+
# Normal cumulative distribution function (cdf).
|
61
|
+
#
|
62
|
+
# Returns the integral of normal distribution
|
63
|
+
# over (-Infty, z].
|
64
|
+
#
|
65
|
+
def cdf(z)
|
66
|
+
0.0 if z < -12
|
67
|
+
1.0 if z > 12
|
68
|
+
0.5 if z == 0.0
|
69
|
+
|
70
|
+
if z > 0.0
|
71
|
+
e = true
|
72
|
+
else
|
73
|
+
e = false
|
74
|
+
z = -z
|
75
|
+
end
|
76
|
+
z = z.to_f
|
77
|
+
z2 = z * z
|
78
|
+
t = q = z * Math.exp(-0.5 * z2) / SQ2PI
|
79
|
+
|
80
|
+
3.step(199, 2) do |i|
|
81
|
+
prev = q
|
82
|
+
t *= z2 / i
|
83
|
+
q += t
|
84
|
+
if q <= prev
|
85
|
+
return(e ? 0.5 + q : 0.5 - q)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
e ? 1.0 : 0.0
|
89
|
+
end
|
90
|
+
|
91
|
+
# Normal probability density function (pdf)
|
92
|
+
# With x=0 and sigma=1
|
93
|
+
def pdf(x)
|
94
|
+
(1.0 / SQ2PI)*Math::exp(-(x**2/2.0))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Distribution
|
2
|
+
# Calculate cdf and inverse cdf for Multivariate Distribution.
|
3
|
+
module NormalMultivariate
|
4
|
+
class << self
|
5
|
+
# Returns multivariate cdf distribution
|
6
|
+
# * a is the array of lower values
|
7
|
+
# * b is the array of higher values
|
8
|
+
# * s is an symmetric positive definite covariance matrix
|
9
|
+
def cdf(aa,bb,sigma, epsilon=0.0001, alpha=2.5, max_iterations=100) # :nodoc:
|
10
|
+
raise "Doesn't work yet"
|
11
|
+
a=[nil]+aa
|
12
|
+
b=[nil]+bb
|
13
|
+
m=aa.size
|
14
|
+
sigma=sigma.to_gsl if sigma.respond_to? :to_gsl
|
15
|
+
|
16
|
+
cc=GSL::Linalg::Cholesky.decomp(sigma)
|
17
|
+
c=cc.lower
|
18
|
+
intsum=0
|
19
|
+
varsum=0
|
20
|
+
n=0
|
21
|
+
d=Array.new(m+1,nil)
|
22
|
+
e=Array.new(m+1,nil)
|
23
|
+
f=Array.new(m+1,nil)
|
24
|
+
(1..m).each {|i|
|
25
|
+
d[i]=0.0 if a[i].nil?
|
26
|
+
e[i]=1.0 if b[i].nil?
|
27
|
+
}
|
28
|
+
d[1]=uPhi(a[1].quo( c[0,0])) unless d[1]==0
|
29
|
+
e[1]=uPhi(b[1].quo( c[0,0])) unless e[1]==1
|
30
|
+
f[1]=e[1]-d[1]
|
31
|
+
|
32
|
+
error=1000
|
33
|
+
begin
|
34
|
+
w=(m+1).times.collect {|i| rand*epsilon}
|
35
|
+
y=[]
|
36
|
+
(2..m).each do |i|
|
37
|
+
y[i-1]=iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))
|
38
|
+
sumc=0
|
39
|
+
(1..(i-1)).each do |j|
|
40
|
+
sumc+=c[i-1, j-1]*y[j]
|
41
|
+
end
|
42
|
+
|
43
|
+
if a[i]!=nil
|
44
|
+
d[i]=uPhi((a[i]-sumc).quo(c[i-1,i-1]))
|
45
|
+
end
|
46
|
+
# puts "sumc:#{sumc}"
|
47
|
+
|
48
|
+
if b[i]!=nil
|
49
|
+
#puts "e[#{i}] :#{c[i-1,i-1]}"
|
50
|
+
e[i]=uPhi((b[i]-sumc).quo(c[i-1, i-1]))
|
51
|
+
end
|
52
|
+
f[i]=(e[i]-d[i])*f[i-1]
|
53
|
+
end
|
54
|
+
intsum+=intsum+f[m]
|
55
|
+
varsum=varsum+f[m]**2
|
56
|
+
n+=1
|
57
|
+
error=alpha*Math::sqrt((varsum.quo(n) - (intsum.quo(n))**2).quo(n))
|
58
|
+
end while(error>epsilon and n<max_iterations)
|
59
|
+
|
60
|
+
f=intsum.quo(n)
|
61
|
+
#p intsum
|
62
|
+
#puts "f:#{f}, n:#{n}, error:#{error}"
|
63
|
+
f
|
64
|
+
end
|
65
|
+
def iPhi(pr)
|
66
|
+
Distribution::Normal.p_value(pr)
|
67
|
+
end
|
68
|
+
def uPhi(x)
|
69
|
+
Distribution::Normal.cdf(x)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'distribution/t/ruby'
|
2
|
+
require 'distribution/t/gsl'
|
3
|
+
require 'distribution/t/statistics2'
|
4
|
+
module Distribution
|
5
|
+
|
6
|
+
# Calculate statisticals for T Distribution.
|
7
|
+
module T
|
8
|
+
SHORTHAND='f'
|
9
|
+
extend Distributable
|
10
|
+
create_distribution_methods
|
11
|
+
|
12
|
+
##
|
13
|
+
# :singleton-method: pdf(x,k)
|
14
|
+
# Returns the integral of T distribution
|
15
|
+
# with +k+ degrees of freedom over [0, +x+]
|
16
|
+
|
17
|
+
##
|
18
|
+
# :singleton-method: p_value(qn, k)
|
19
|
+
# Return the P-value of the corresponding integral +qn+ with
|
20
|
+
# +k+ degrees of freedom
|
21
|
+
|
22
|
+
##
|
23
|
+
# :singleton-method: cdf(x,k)
|
24
|
+
# Returns the integral of T distribution
|
25
|
+
# with +k+ degrees of freedom over [0, +x+]
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Distribution
|
2
|
+
module T
|
3
|
+
module GSL_
|
4
|
+
class << self
|
5
|
+
|
6
|
+
def pdf(x,k)
|
7
|
+
GSL::Ran.tdist_pdf(x,k)
|
8
|
+
end
|
9
|
+
# Return the P-value of the corresponding integral with
|
10
|
+
# k degrees of freedom
|
11
|
+
#
|
12
|
+
# Distribution::F.p_value(0.95,1,2)
|
13
|
+
def p_value(pr,k)
|
14
|
+
GSL::Cdf.tdist_Pinv(pr,k)
|
15
|
+
end
|
16
|
+
# F cumulative distribution function (cdf).
|
17
|
+
#
|
18
|
+
# Returns the integral of F-distribution
|
19
|
+
# with k1 and k2 degrees of freedom
|
20
|
+
# over [0, x].
|
21
|
+
# Distribution::F.cdf(20,3,2)
|
22
|
+
#
|
23
|
+
def cdf(x, k)
|
24
|
+
GSL::Cdf.tdist_P(x.to_f, k)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Distribution
|
2
|
+
module T
|
3
|
+
module Ruby_
|
4
|
+
class << self
|
5
|
+
def pdf(t,v)
|
6
|
+
((Math.gamma((v+1) / 2.0)) / (Math.sqrt(v*Math::PI)*Math.gamma(v/2.0))) * ((1+(t**2 / v.to_f))**(-(v+1) / 2.0))
|
7
|
+
|
8
|
+
end
|
9
|
+
# Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
|
10
|
+
def cdf(t, n)
|
11
|
+
p_t(n, t)
|
12
|
+
end
|
13
|
+
|
14
|
+
# t-distribution ([1])
|
15
|
+
# (-\infty, x]
|
16
|
+
def p_t(df, t)
|
17
|
+
c2 = df.to_f / (df + t * t);
|
18
|
+
s = Math.sqrt(1.0 - c2)
|
19
|
+
s = -s if t < 0.0
|
20
|
+
p = 0.0;
|
21
|
+
i = df % 2 + 2
|
22
|
+
while i <= df
|
23
|
+
p += s
|
24
|
+
s *= (i - 1) * c2 / i
|
25
|
+
i += 2
|
26
|
+
end
|
27
|
+
if df.is_a? Float or df & 1 != 0
|
28
|
+
0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df))) / Math::PI
|
29
|
+
else
|
30
|
+
(1.0 + p) / 2.0
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
# inverse of t-distribution ([2])
|
36
|
+
# (-\infty, -q/2] + [q/2, \infty)
|
37
|
+
def ptsub(q, n)
|
38
|
+
q = q.to_f
|
39
|
+
if(n == 1 && 0.001 < q && q < 0.01)
|
40
|
+
eps = 1.0e-4
|
41
|
+
elsif (n == 2 && q < 0.0001)
|
42
|
+
eps = 1.0e-4
|
43
|
+
elsif (n == 1 && q < 0.001)
|
44
|
+
eps = 1.0e-2
|
45
|
+
else
|
46
|
+
eps = 1.0e-5
|
47
|
+
end
|
48
|
+
s = 10000.0
|
49
|
+
w = 0.0
|
50
|
+
loop do
|
51
|
+
w += s
|
52
|
+
if(s <= eps) then return w end
|
53
|
+
if((qe = 2.0 - p_t(n, w)*2.0 - q) == 0.0) then return w end
|
54
|
+
if(qe < 0.0)
|
55
|
+
w -= s
|
56
|
+
s /= 10.0 #/
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def pt(q, n)
|
62
|
+
q = q.to_f
|
63
|
+
if(q < 1.0e-5 || q > 1.0 || n < 1)
|
64
|
+
$stderr.printf("Error : Illigal parameter in pt()!\n")
|
65
|
+
return 0.0
|
66
|
+
end
|
67
|
+
|
68
|
+
if(n <= 5) then return ptsub(q, n) end
|
69
|
+
if(q <= 5.0e-3 && n <= 13) then return ptsub(q, n) end
|
70
|
+
|
71
|
+
f1 = 4.0 * (f = n.to_f)
|
72
|
+
f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
|
73
|
+
f2 *= 96.0
|
74
|
+
f3 *= 384.0
|
75
|
+
f4 *= 92160.0
|
76
|
+
f5 *= 368640.0
|
77
|
+
u = Normal.p_value(1.0 - q / 2.0)
|
78
|
+
|
79
|
+
w0 = (u2 = u * u) * u
|
80
|
+
w1 = w0 * u2
|
81
|
+
w2 = w1 * u2
|
82
|
+
w3 = w2 * u2
|
83
|
+
w4 = w3 * u2
|
84
|
+
w = (w0 + u) / f1
|
85
|
+
w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
|
86
|
+
w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
|
87
|
+
w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
|
88
|
+
w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17955.0 * u) / f5
|
89
|
+
u + w
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns the P-value of tdist().
|
93
|
+
def p_value(y,n)
|
94
|
+
if y > 0.5
|
95
|
+
pt(2.0 - y*2.0, n)
|
96
|
+
else
|
97
|
+
- pt(y*2.0, n)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
module Distribution
|
3
|
+
module T
|
4
|
+
module Statistics2_
|
5
|
+
class << self
|
6
|
+
# Return the P-value of the corresponding integral with
|
7
|
+
# k degrees of freedom
|
8
|
+
def p_value(pr,k)
|
9
|
+
Statistics2.ptdist(k, pr)
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
# There are some problem on i686 with t on statistics2
|
14
|
+
if !RbConfig::CONFIG['arch']=~/i686/
|
15
|
+
# T cumulative distribution function (cdf).
|
16
|
+
#
|
17
|
+
# Returns the integral of t-distribution
|
18
|
+
# with n degrees of freedom over (-Infty, x].
|
19
|
+
#
|
20
|
+
def cdf(x,k)
|
21
|
+
Statistics2.tdist(k,x)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|