distribution 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.autotest +23 -0
  3. data/History.txt +3 -0
  4. data/Manifest.txt +39 -0
  5. data/README.txt +71 -0
  6. data/Rakefile +19 -0
  7. data/bin/distribution +3 -0
  8. data/lib/distribution.rb +148 -0
  9. data/lib/distribution/bivariatenormal.rb +25 -0
  10. data/lib/distribution/bivariatenormal/gsl.rb +11 -0
  11. data/lib/distribution/bivariatenormal/ruby.rb +281 -0
  12. data/lib/distribution/bivariatenormal/statistics2.rb +0 -0
  13. data/lib/distribution/chisquare.rb +29 -0
  14. data/lib/distribution/chisquare/gsl.rb +27 -0
  15. data/lib/distribution/chisquare/ruby.rb +85 -0
  16. data/lib/distribution/chisquare/statistics2.rb +21 -0
  17. data/lib/distribution/f.rb +28 -0
  18. data/lib/distribution/f/gsl.rb +28 -0
  19. data/lib/distribution/f/ruby.rb +117 -0
  20. data/lib/distribution/f/statistics2.rb +26 -0
  21. data/lib/distribution/math_extension.rb +72 -0
  22. data/lib/distribution/normal.rb +36 -0
  23. data/lib/distribution/normal/gsl.rb +24 -0
  24. data/lib/distribution/normal/ruby.rb +99 -0
  25. data/lib/distribution/normal/statistics2.rb +14 -0
  26. data/lib/distribution/normalmultivariate.rb +73 -0
  27. data/lib/distribution/t.rb +27 -0
  28. data/lib/distribution/t/gsl.rb +29 -0
  29. data/lib/distribution/t/ruby.rb +105 -0
  30. data/lib/distribution/t/statistics2.rb +28 -0
  31. data/spec/bivariatenormal_spec.rb +63 -0
  32. data/spec/chisquare_spec.rb +89 -0
  33. data/spec/distribution_spec.rb +19 -0
  34. data/spec/f_spec.rb +107 -0
  35. data/spec/normal_spec.rb +105 -0
  36. data/spec/shorthand_function.rb +6 -0
  37. data/spec/shorthand_spec.rb +14 -0
  38. data/spec/spec.opts +3 -0
  39. data/spec/spec_helper.rb +23 -0
  40. data/spec/t_spec.rb +98 -0
  41. metadata +160 -0
  42. metadata.gz.sig +1 -0
@@ -0,0 +1,36 @@
1
+ require 'distribution/normal/ruby'
2
+ require 'distribution/normal/gsl'
3
+ require 'distribution/normal/statistics2'
4
+
5
+ module Distribution
6
+ # From Wikipedia:
7
+ # Continuous probability distribution that is often used as
8
+ # a first approximation to describe real-valued random variables
9
+ # that tend to cluster around a single mean value.
10
+ # The graph of the associated probability density function is “bell”-shaped
11
+ module Normal
12
+ SHORTHAND='norm'
13
+ extend Distributable
14
+
15
+ create_distribution_methods
16
+
17
+ ##
18
+ # :singleton-method: pdf(x)
19
+ # Returns PDF of Normal distribution
20
+
21
+ ##
22
+ # :singleton-method: p_value(qn)
23
+ # Return the P-value of the corresponding integral +qn+
24
+
25
+ ##
26
+ # :singleton-method: cdf(x)
27
+ # Returns the integral of Normal distribution over [0, +x+]
28
+
29
+ ##
30
+ # :singleton-method: rng
31
+ # Returns a lambda which returns a random number from
32
+ # X ~ N(0,1)
33
+
34
+
35
+ end
36
+ end
@@ -0,0 +1,24 @@
1
+ module Distribution
2
+ module Normal
3
+ module GSL_
4
+ class << self
5
+ def rng(mean=0,sigma=1,seed=nil)
6
+ seed||=rand(10e8)
7
+ rng=GSL::Rng.alloc(GSL::Rng::MT19937,seed)
8
+ lambda { mean+rng.gaussian(sigma)}
9
+ end
10
+ def cdf(x) # :nodoc:
11
+ GSL::Cdf::ugaussian_P(x)
12
+ end
13
+ def pdf(x) # :nodoc:
14
+ GSL::Ran::gaussian_pdf(x)
15
+ end
16
+ def p_value(qn)
17
+ GSL::Cdf::ugaussian_Pinv(qn)
18
+ end
19
+ def gsl
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,99 @@
1
+ module Distribution
2
+ module Normal
3
+ module Ruby_
4
+ class << self
5
+
6
+ # random number within a gaussian distribution X ~ N(0,1)
7
+ def rngu
8
+ rng(0,1,nil)
9
+ end
10
+ # Return a proc which return a random number within a
11
+ # gaussian distribution X ~ N(+mean+,+sigma+^2)
12
+ # +seed+ feed the
13
+ # == Reference:
14
+ # * http://www.taygeta.com/random/gaussian.html
15
+ def rng(mean=0,sigma=1,seed=nil)
16
+ returned,y1,y2=0,0,0
17
+ lambda {
18
+ if returned==0
19
+ begin
20
+ x1 = 2.0 * rand - 1.0
21
+ x2 = 2.0 * rand - 1.0
22
+ w = x1 * x1 + x2 * x2
23
+ end while ( w >= 1.0 )
24
+ w = Math::sqrt( (-2.0 * Math::log( w ) ) / w )
25
+ y1 = x1 * w
26
+ y2 = x2 * w
27
+ returned=1
28
+ y1*sigma + mean
29
+ else
30
+ returned=0
31
+ y2 * sigma + mean
32
+ end
33
+ }
34
+
35
+ end
36
+ # Return the inverse CDF or P-value of the corresponding integral
37
+ def p_value(qn)
38
+ b = [1.570796288, 0.03706987906, -0.8364353589e-3,
39
+ -0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5,
40
+ -0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8,
41
+ 0.3657763036e-10, 0.6936233982e-12]
42
+
43
+ if(qn < 0.0 || 1.0 < qn)
44
+ $stderr.printf("Error : qn <= 0 or qn >= 1 in pnorm()!\n")
45
+ return 0.0;
46
+ end
47
+ qn == 0.5 and return 0.0
48
+
49
+ w1 = qn
50
+ qn > 0.5 and w1 = 1.0 - w1
51
+ w3 = -Math.log(4.0 * w1 * (1.0 - w1))
52
+ w1 = b[0]
53
+ 1.upto 10 do |i|
54
+ w1 += b[i] * w3**i;
55
+ end
56
+ qn > 0.5 and return Math.sqrt(w1 * w3)
57
+ -Math.sqrt(w1 * w3)
58
+
59
+ end
60
+ # Normal cumulative distribution function (cdf).
61
+ #
62
+ # Returns the integral of normal distribution
63
+ # over (-Infty, z].
64
+ #
65
+ def cdf(z)
66
+ 0.0 if z < -12
67
+ 1.0 if z > 12
68
+ 0.5 if z == 0.0
69
+
70
+ if z > 0.0
71
+ e = true
72
+ else
73
+ e = false
74
+ z = -z
75
+ end
76
+ z = z.to_f
77
+ z2 = z * z
78
+ t = q = z * Math.exp(-0.5 * z2) / SQ2PI
79
+
80
+ 3.step(199, 2) do |i|
81
+ prev = q
82
+ t *= z2 / i
83
+ q += t
84
+ if q <= prev
85
+ return(e ? 0.5 + q : 0.5 - q)
86
+ end
87
+ end
88
+ e ? 1.0 : 0.0
89
+ end
90
+
91
+ # Normal probability density function (pdf)
92
+ # With x=0 and sigma=1
93
+ def pdf(x)
94
+ (1.0 / SQ2PI)*Math::exp(-(x**2/2.0))
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,14 @@
1
+ module Distribution
2
+ module Normal
3
+ module Statistics2_
4
+ class << self
5
+ def cdf(x)
6
+ Statistics2.normaldist(x)
7
+ end
8
+ def p_value(pr)
9
+ Statistics2.pnormaldist(pr)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,73 @@
1
+ module Distribution
2
+ # Calculate cdf and inverse cdf for Multivariate Distribution.
3
+ module NormalMultivariate
4
+ class << self
5
+ # Returns multivariate cdf distribution
6
+ # * a is the array of lower values
7
+ # * b is the array of higher values
8
+ # * s is an symmetric positive definite covariance matrix
9
+ def cdf(aa,bb,sigma, epsilon=0.0001, alpha=2.5, max_iterations=100) # :nodoc:
10
+ raise "Doesn't work yet"
11
+ a=[nil]+aa
12
+ b=[nil]+bb
13
+ m=aa.size
14
+ sigma=sigma.to_gsl if sigma.respond_to? :to_gsl
15
+
16
+ cc=GSL::Linalg::Cholesky.decomp(sigma)
17
+ c=cc.lower
18
+ intsum=0
19
+ varsum=0
20
+ n=0
21
+ d=Array.new(m+1,nil)
22
+ e=Array.new(m+1,nil)
23
+ f=Array.new(m+1,nil)
24
+ (1..m).each {|i|
25
+ d[i]=0.0 if a[i].nil?
26
+ e[i]=1.0 if b[i].nil?
27
+ }
28
+ d[1]=uPhi(a[1].quo( c[0,0])) unless d[1]==0
29
+ e[1]=uPhi(b[1].quo( c[0,0])) unless e[1]==1
30
+ f[1]=e[1]-d[1]
31
+
32
+ error=1000
33
+ begin
34
+ w=(m+1).times.collect {|i| rand*epsilon}
35
+ y=[]
36
+ (2..m).each do |i|
37
+ y[i-1]=iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))
38
+ sumc=0
39
+ (1..(i-1)).each do |j|
40
+ sumc+=c[i-1, j-1]*y[j]
41
+ end
42
+
43
+ if a[i]!=nil
44
+ d[i]=uPhi((a[i]-sumc).quo(c[i-1,i-1]))
45
+ end
46
+ # puts "sumc:#{sumc}"
47
+
48
+ if b[i]!=nil
49
+ #puts "e[#{i}] :#{c[i-1,i-1]}"
50
+ e[i]=uPhi((b[i]-sumc).quo(c[i-1, i-1]))
51
+ end
52
+ f[i]=(e[i]-d[i])*f[i-1]
53
+ end
54
+ intsum+=intsum+f[m]
55
+ varsum=varsum+f[m]**2
56
+ n+=1
57
+ error=alpha*Math::sqrt((varsum.quo(n) - (intsum.quo(n))**2).quo(n))
58
+ end while(error>epsilon and n<max_iterations)
59
+
60
+ f=intsum.quo(n)
61
+ #p intsum
62
+ #puts "f:#{f}, n:#{n}, error:#{error}"
63
+ f
64
+ end
65
+ def iPhi(pr)
66
+ Distribution::Normal.p_value(pr)
67
+ end
68
+ def uPhi(x)
69
+ Distribution::Normal.cdf(x)
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,27 @@
1
+ require 'distribution/t/ruby'
2
+ require 'distribution/t/gsl'
3
+ require 'distribution/t/statistics2'
4
+ module Distribution
5
+
6
+ # Calculate statisticals for T Distribution.
7
+ module T
8
+ SHORTHAND='f'
9
+ extend Distributable
10
+ create_distribution_methods
11
+
12
+ ##
13
+ # :singleton-method: pdf(x,k)
14
+ # Returns the integral of T distribution
15
+ # with +k+ degrees of freedom over [0, +x+]
16
+
17
+ ##
18
+ # :singleton-method: p_value(qn, k)
19
+ # Return the P-value of the corresponding integral +qn+ with
20
+ # +k+ degrees of freedom
21
+
22
+ ##
23
+ # :singleton-method: cdf(x,k)
24
+ # Returns the integral of T distribution
25
+ # with +k+ degrees of freedom over [0, +x+]
26
+ end
27
+ end
@@ -0,0 +1,29 @@
1
+ module Distribution
2
+ module T
3
+ module GSL_
4
+ class << self
5
+
6
+ def pdf(x,k)
7
+ GSL::Ran.tdist_pdf(x,k)
8
+ end
9
+ # Return the P-value of the corresponding integral with
10
+ # k degrees of freedom
11
+ #
12
+ # Distribution::F.p_value(0.95,1,2)
13
+ def p_value(pr,k)
14
+ GSL::Cdf.tdist_Pinv(pr,k)
15
+ end
16
+ # F cumulative distribution function (cdf).
17
+ #
18
+ # Returns the integral of F-distribution
19
+ # with k1 and k2 degrees of freedom
20
+ # over [0, x].
21
+ # Distribution::F.cdf(20,3,2)
22
+ #
23
+ def cdf(x, k)
24
+ GSL::Cdf.tdist_P(x.to_f, k)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,105 @@
1
+ module Distribution
2
+ module T
3
+ module Ruby_
4
+ class << self
5
+ def pdf(t,v)
6
+ ((Math.gamma((v+1) / 2.0)) / (Math.sqrt(v*Math::PI)*Math.gamma(v/2.0))) * ((1+(t**2 / v.to_f))**(-(v+1) / 2.0))
7
+
8
+ end
9
+ # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
10
+ def cdf(t, n)
11
+ p_t(n, t)
12
+ end
13
+
14
+ # t-distribution ([1])
15
+ # (-\infty, x]
16
+ def p_t(df, t)
17
+ c2 = df.to_f / (df + t * t);
18
+ s = Math.sqrt(1.0 - c2)
19
+ s = -s if t < 0.0
20
+ p = 0.0;
21
+ i = df % 2 + 2
22
+ while i <= df
23
+ p += s
24
+ s *= (i - 1) * c2 / i
25
+ i += 2
26
+ end
27
+ if df.is_a? Float or df & 1 != 0
28
+ 0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df))) / Math::PI
29
+ else
30
+ (1.0 + p) / 2.0
31
+ end
32
+ end
33
+
34
+
35
+ # inverse of t-distribution ([2])
36
+ # (-\infty, -q/2] + [q/2, \infty)
37
+ def ptsub(q, n)
38
+ q = q.to_f
39
+ if(n == 1 && 0.001 < q && q < 0.01)
40
+ eps = 1.0e-4
41
+ elsif (n == 2 && q < 0.0001)
42
+ eps = 1.0e-4
43
+ elsif (n == 1 && q < 0.001)
44
+ eps = 1.0e-2
45
+ else
46
+ eps = 1.0e-5
47
+ end
48
+ s = 10000.0
49
+ w = 0.0
50
+ loop do
51
+ w += s
52
+ if(s <= eps) then return w end
53
+ if((qe = 2.0 - p_t(n, w)*2.0 - q) == 0.0) then return w end
54
+ if(qe < 0.0)
55
+ w -= s
56
+ s /= 10.0 #/
57
+ end
58
+ end
59
+ end
60
+
61
+ def pt(q, n)
62
+ q = q.to_f
63
+ if(q < 1.0e-5 || q > 1.0 || n < 1)
64
+ $stderr.printf("Error : Illigal parameter in pt()!\n")
65
+ return 0.0
66
+ end
67
+
68
+ if(n <= 5) then return ptsub(q, n) end
69
+ if(q <= 5.0e-3 && n <= 13) then return ptsub(q, n) end
70
+
71
+ f1 = 4.0 * (f = n.to_f)
72
+ f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
73
+ f2 *= 96.0
74
+ f3 *= 384.0
75
+ f4 *= 92160.0
76
+ f5 *= 368640.0
77
+ u = Normal.p_value(1.0 - q / 2.0)
78
+
79
+ w0 = (u2 = u * u) * u
80
+ w1 = w0 * u2
81
+ w2 = w1 * u2
82
+ w3 = w2 * u2
83
+ w4 = w3 * u2
84
+ w = (w0 + u) / f1
85
+ w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
86
+ w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
87
+ w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
88
+ w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17955.0 * u) / f5
89
+ u + w
90
+ end
91
+
92
+ # Returns the P-value of tdist().
93
+ def p_value(y,n)
94
+ if y > 0.5
95
+ pt(2.0 - y*2.0, n)
96
+ else
97
+ - pt(y*2.0, n)
98
+ end
99
+ end
100
+
101
+
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,28 @@
1
+ require 'rbconfig'
2
+ module Distribution
3
+ module T
4
+ module Statistics2_
5
+ class << self
6
+ # Return the P-value of the corresponding integral with
7
+ # k degrees of freedom
8
+ def p_value(pr,k)
9
+ Statistics2.ptdist(k, pr)
10
+ end
11
+
12
+
13
+ # There are some problem on i686 with t on statistics2
14
+ if !RbConfig::CONFIG['arch']=~/i686/
15
+ # T cumulative distribution function (cdf).
16
+ #
17
+ # Returns the integral of t-distribution
18
+ # with n degrees of freedom over (-Infty, x].
19
+ #
20
+ def cdf(x,k)
21
+ Statistics2.tdist(k,x)
22
+ end
23
+ end
24
+
25
+ end
26
+ end
27
+ end
28
+ end