distribution 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.autotest +23 -0
  3. data/History.txt +3 -0
  4. data/Manifest.txt +39 -0
  5. data/README.txt +71 -0
  6. data/Rakefile +19 -0
  7. data/bin/distribution +3 -0
  8. data/lib/distribution.rb +148 -0
  9. data/lib/distribution/bivariatenormal.rb +25 -0
  10. data/lib/distribution/bivariatenormal/gsl.rb +11 -0
  11. data/lib/distribution/bivariatenormal/ruby.rb +281 -0
  12. data/lib/distribution/bivariatenormal/statistics2.rb +0 -0
  13. data/lib/distribution/chisquare.rb +29 -0
  14. data/lib/distribution/chisquare/gsl.rb +27 -0
  15. data/lib/distribution/chisquare/ruby.rb +85 -0
  16. data/lib/distribution/chisquare/statistics2.rb +21 -0
  17. data/lib/distribution/f.rb +28 -0
  18. data/lib/distribution/f/gsl.rb +28 -0
  19. data/lib/distribution/f/ruby.rb +117 -0
  20. data/lib/distribution/f/statistics2.rb +26 -0
  21. data/lib/distribution/math_extension.rb +72 -0
  22. data/lib/distribution/normal.rb +36 -0
  23. data/lib/distribution/normal/gsl.rb +24 -0
  24. data/lib/distribution/normal/ruby.rb +99 -0
  25. data/lib/distribution/normal/statistics2.rb +14 -0
  26. data/lib/distribution/normalmultivariate.rb +73 -0
  27. data/lib/distribution/t.rb +27 -0
  28. data/lib/distribution/t/gsl.rb +29 -0
  29. data/lib/distribution/t/ruby.rb +105 -0
  30. data/lib/distribution/t/statistics2.rb +28 -0
  31. data/spec/bivariatenormal_spec.rb +63 -0
  32. data/spec/chisquare_spec.rb +89 -0
  33. data/spec/distribution_spec.rb +19 -0
  34. data/spec/f_spec.rb +107 -0
  35. data/spec/normal_spec.rb +105 -0
  36. data/spec/shorthand_function.rb +6 -0
  37. data/spec/shorthand_spec.rb +14 -0
  38. data/spec/spec.opts +3 -0
  39. data/spec/spec_helper.rb +23 -0
  40. data/spec/t_spec.rb +98 -0
  41. metadata +160 -0
  42. metadata.gz.sig +1 -0
@@ -0,0 +1,36 @@
1
+ require 'distribution/normal/ruby'
2
+ require 'distribution/normal/gsl'
3
+ require 'distribution/normal/statistics2'
4
+
5
+ module Distribution
6
+ # From Wikipedia:
7
+ # Continuous probability distribution that is often used as
8
+ # a first approximation to describe real-valued random variables
9
+ # that tend to cluster around a single mean value.
10
+ # The graph of the associated probability density function is “bell”-shaped
11
+ module Normal
12
+ SHORTHAND='norm'
13
+ extend Distributable
14
+
15
+ create_distribution_methods
16
+
17
+ ##
18
+ # :singleton-method: pdf(x)
19
+ # Returns PDF of Normal distribution
20
+
21
+ ##
22
+ # :singleton-method: p_value(qn)
23
+ # Return the P-value of the corresponding integral +qn+
24
+
25
+ ##
26
+ # :singleton-method: cdf(x)
27
+ # Returns the integral of Normal distribution over [0, +x+]
28
+
29
+ ##
30
+ # :singleton-method: rng
31
+ # Returns a lambda which returns a random number from
32
+ # X ~ N(0,1)
33
+
34
+
35
+ end
36
+ end
@@ -0,0 +1,24 @@
1
+ module Distribution
2
+ module Normal
3
+ module GSL_
4
+ class << self
5
+ def rng(mean=0,sigma=1,seed=nil)
6
+ seed||=rand(10e8)
7
+ rng=GSL::Rng.alloc(GSL::Rng::MT19937,seed)
8
+ lambda { mean+rng.gaussian(sigma)}
9
+ end
10
+ def cdf(x) # :nodoc:
11
+ GSL::Cdf::ugaussian_P(x)
12
+ end
13
+ def pdf(x) # :nodoc:
14
+ GSL::Ran::gaussian_pdf(x)
15
+ end
16
+ def p_value(qn)
17
+ GSL::Cdf::ugaussian_Pinv(qn)
18
+ end
19
+ def gsl
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,99 @@
1
+ module Distribution
2
+ module Normal
3
+ module Ruby_
4
+ class << self
5
+
6
+ # random number within a gaussian distribution X ~ N(0,1)
7
+ def rngu
8
+ rng(0,1,nil)
9
+ end
10
+ # Return a proc which return a random number within a
11
+ # gaussian distribution X ~ N(+mean+,+sigma+^2)
12
+ # +seed+ feed the
13
+ # == Reference:
14
+ # * http://www.taygeta.com/random/gaussian.html
15
+ def rng(mean=0,sigma=1,seed=nil)
16
+ returned,y1,y2=0,0,0
17
+ lambda {
18
+ if returned==0
19
+ begin
20
+ x1 = 2.0 * rand - 1.0
21
+ x2 = 2.0 * rand - 1.0
22
+ w = x1 * x1 + x2 * x2
23
+ end while ( w >= 1.0 )
24
+ w = Math::sqrt( (-2.0 * Math::log( w ) ) / w )
25
+ y1 = x1 * w
26
+ y2 = x2 * w
27
+ returned=1
28
+ y1*sigma + mean
29
+ else
30
+ returned=0
31
+ y2 * sigma + mean
32
+ end
33
+ }
34
+
35
+ end
36
+ # Return the inverse CDF or P-value of the corresponding integral
37
+ def p_value(qn)
38
+ b = [1.570796288, 0.03706987906, -0.8364353589e-3,
39
+ -0.2250947176e-3, 0.6841218299e-5, 0.5824238515e-5,
40
+ -0.104527497e-5, 0.8360937017e-7, -0.3231081277e-8,
41
+ 0.3657763036e-10, 0.6936233982e-12]
42
+
43
+ if(qn < 0.0 || 1.0 < qn)
44
+ $stderr.printf("Error : qn <= 0 or qn >= 1 in pnorm()!\n")
45
+ return 0.0;
46
+ end
47
+ qn == 0.5 and return 0.0
48
+
49
+ w1 = qn
50
+ qn > 0.5 and w1 = 1.0 - w1
51
+ w3 = -Math.log(4.0 * w1 * (1.0 - w1))
52
+ w1 = b[0]
53
+ 1.upto 10 do |i|
54
+ w1 += b[i] * w3**i;
55
+ end
56
+ qn > 0.5 and return Math.sqrt(w1 * w3)
57
+ -Math.sqrt(w1 * w3)
58
+
59
+ end
60
+ # Normal cumulative distribution function (cdf).
61
+ #
62
+ # Returns the integral of normal distribution
63
+ # over (-Infty, z].
64
+ #
65
+ def cdf(z)
66
+ 0.0 if z < -12
67
+ 1.0 if z > 12
68
+ 0.5 if z == 0.0
69
+
70
+ if z > 0.0
71
+ e = true
72
+ else
73
+ e = false
74
+ z = -z
75
+ end
76
+ z = z.to_f
77
+ z2 = z * z
78
+ t = q = z * Math.exp(-0.5 * z2) / SQ2PI
79
+
80
+ 3.step(199, 2) do |i|
81
+ prev = q
82
+ t *= z2 / i
83
+ q += t
84
+ if q <= prev
85
+ return(e ? 0.5 + q : 0.5 - q)
86
+ end
87
+ end
88
+ e ? 1.0 : 0.0
89
+ end
90
+
91
+ # Normal probability density function (pdf)
92
+ # With x=0 and sigma=1
93
+ def pdf(x)
94
+ (1.0 / SQ2PI)*Math::exp(-(x**2/2.0))
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,14 @@
1
+ module Distribution
2
+ module Normal
3
+ module Statistics2_
4
+ class << self
5
+ def cdf(x)
6
+ Statistics2.normaldist(x)
7
+ end
8
+ def p_value(pr)
9
+ Statistics2.pnormaldist(pr)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,73 @@
1
+ module Distribution
2
+ # Calculate cdf and inverse cdf for Multivariate Distribution.
3
+ module NormalMultivariate
4
+ class << self
5
+ # Returns multivariate cdf distribution
6
+ # * a is the array of lower values
7
+ # * b is the array of higher values
8
+ # * s is an symmetric positive definite covariance matrix
9
+ def cdf(aa,bb,sigma, epsilon=0.0001, alpha=2.5, max_iterations=100) # :nodoc:
10
+ raise "Doesn't work yet"
11
+ a=[nil]+aa
12
+ b=[nil]+bb
13
+ m=aa.size
14
+ sigma=sigma.to_gsl if sigma.respond_to? :to_gsl
15
+
16
+ cc=GSL::Linalg::Cholesky.decomp(sigma)
17
+ c=cc.lower
18
+ intsum=0
19
+ varsum=0
20
+ n=0
21
+ d=Array.new(m+1,nil)
22
+ e=Array.new(m+1,nil)
23
+ f=Array.new(m+1,nil)
24
+ (1..m).each {|i|
25
+ d[i]=0.0 if a[i].nil?
26
+ e[i]=1.0 if b[i].nil?
27
+ }
28
+ d[1]=uPhi(a[1].quo( c[0,0])) unless d[1]==0
29
+ e[1]=uPhi(b[1].quo( c[0,0])) unless e[1]==1
30
+ f[1]=e[1]-d[1]
31
+
32
+ error=1000
33
+ begin
34
+ w=(m+1).times.collect {|i| rand*epsilon}
35
+ y=[]
36
+ (2..m).each do |i|
37
+ y[i-1]=iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))
38
+ sumc=0
39
+ (1..(i-1)).each do |j|
40
+ sumc+=c[i-1, j-1]*y[j]
41
+ end
42
+
43
+ if a[i]!=nil
44
+ d[i]=uPhi((a[i]-sumc).quo(c[i-1,i-1]))
45
+ end
46
+ # puts "sumc:#{sumc}"
47
+
48
+ if b[i]!=nil
49
+ #puts "e[#{i}] :#{c[i-1,i-1]}"
50
+ e[i]=uPhi((b[i]-sumc).quo(c[i-1, i-1]))
51
+ end
52
+ f[i]=(e[i]-d[i])*f[i-1]
53
+ end
54
+ intsum+=intsum+f[m]
55
+ varsum=varsum+f[m]**2
56
+ n+=1
57
+ error=alpha*Math::sqrt((varsum.quo(n) - (intsum.quo(n))**2).quo(n))
58
+ end while(error>epsilon and n<max_iterations)
59
+
60
+ f=intsum.quo(n)
61
+ #p intsum
62
+ #puts "f:#{f}, n:#{n}, error:#{error}"
63
+ f
64
+ end
65
+ def iPhi(pr)
66
+ Distribution::Normal.p_value(pr)
67
+ end
68
+ def uPhi(x)
69
+ Distribution::Normal.cdf(x)
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,27 @@
1
+ require 'distribution/t/ruby'
2
+ require 'distribution/t/gsl'
3
+ require 'distribution/t/statistics2'
4
+ module Distribution
5
+
6
+ # Calculate statisticals for T Distribution.
7
+ module T
8
+ SHORTHAND='f'
9
+ extend Distributable
10
+ create_distribution_methods
11
+
12
+ ##
13
+ # :singleton-method: pdf(x,k)
14
+ # Returns the integral of T distribution
15
+ # with +k+ degrees of freedom over [0, +x+]
16
+
17
+ ##
18
+ # :singleton-method: p_value(qn, k)
19
+ # Return the P-value of the corresponding integral +qn+ with
20
+ # +k+ degrees of freedom
21
+
22
+ ##
23
+ # :singleton-method: cdf(x,k)
24
+ # Returns the integral of T distribution
25
+ # with +k+ degrees of freedom over [0, +x+]
26
+ end
27
+ end
@@ -0,0 +1,29 @@
1
+ module Distribution
2
+ module T
3
+ module GSL_
4
+ class << self
5
+
6
+ def pdf(x,k)
7
+ GSL::Ran.tdist_pdf(x,k)
8
+ end
9
+ # Return the P-value of the corresponding integral with
10
+ # k degrees of freedom
11
+ #
12
+ # Distribution::F.p_value(0.95,1,2)
13
+ def p_value(pr,k)
14
+ GSL::Cdf.tdist_Pinv(pr,k)
15
+ end
16
+ # F cumulative distribution function (cdf).
17
+ #
18
+ # Returns the integral of F-distribution
19
+ # with k1 and k2 degrees of freedom
20
+ # over [0, x].
21
+ # Distribution::F.cdf(20,3,2)
22
+ #
23
+ def cdf(x, k)
24
+ GSL::Cdf.tdist_P(x.to_f, k)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,105 @@
1
+ module Distribution
2
+ module T
3
+ module Ruby_
4
+ class << self
5
+ def pdf(t,v)
6
+ ((Math.gamma((v+1) / 2.0)) / (Math.sqrt(v*Math::PI)*Math.gamma(v/2.0))) * ((1+(t**2 / v.to_f))**(-(v+1) / 2.0))
7
+
8
+ end
9
+ # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
10
+ def cdf(t, n)
11
+ p_t(n, t)
12
+ end
13
+
14
+ # t-distribution ([1])
15
+ # (-\infty, x]
16
+ def p_t(df, t)
17
+ c2 = df.to_f / (df + t * t);
18
+ s = Math.sqrt(1.0 - c2)
19
+ s = -s if t < 0.0
20
+ p = 0.0;
21
+ i = df % 2 + 2
22
+ while i <= df
23
+ p += s
24
+ s *= (i - 1) * c2 / i
25
+ i += 2
26
+ end
27
+ if df.is_a? Float or df & 1 != 0
28
+ 0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df))) / Math::PI
29
+ else
30
+ (1.0 + p) / 2.0
31
+ end
32
+ end
33
+
34
+
35
+ # inverse of t-distribution ([2])
36
+ # (-\infty, -q/2] + [q/2, \infty)
37
+ def ptsub(q, n)
38
+ q = q.to_f
39
+ if(n == 1 && 0.001 < q && q < 0.01)
40
+ eps = 1.0e-4
41
+ elsif (n == 2 && q < 0.0001)
42
+ eps = 1.0e-4
43
+ elsif (n == 1 && q < 0.001)
44
+ eps = 1.0e-2
45
+ else
46
+ eps = 1.0e-5
47
+ end
48
+ s = 10000.0
49
+ w = 0.0
50
+ loop do
51
+ w += s
52
+ if(s <= eps) then return w end
53
+ if((qe = 2.0 - p_t(n, w)*2.0 - q) == 0.0) then return w end
54
+ if(qe < 0.0)
55
+ w -= s
56
+ s /= 10.0 #/
57
+ end
58
+ end
59
+ end
60
+
61
+ def pt(q, n)
62
+ q = q.to_f
63
+ if(q < 1.0e-5 || q > 1.0 || n < 1)
64
+ $stderr.printf("Error : Illigal parameter in pt()!\n")
65
+ return 0.0
66
+ end
67
+
68
+ if(n <= 5) then return ptsub(q, n) end
69
+ if(q <= 5.0e-3 && n <= 13) then return ptsub(q, n) end
70
+
71
+ f1 = 4.0 * (f = n.to_f)
72
+ f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
73
+ f2 *= 96.0
74
+ f3 *= 384.0
75
+ f4 *= 92160.0
76
+ f5 *= 368640.0
77
+ u = Normal.p_value(1.0 - q / 2.0)
78
+
79
+ w0 = (u2 = u * u) * u
80
+ w1 = w0 * u2
81
+ w2 = w1 * u2
82
+ w3 = w2 * u2
83
+ w4 = w3 * u2
84
+ w = (w0 + u) / f1
85
+ w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
86
+ w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
87
+ w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
88
+ w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17955.0 * u) / f5
89
+ u + w
90
+ end
91
+
92
+ # Returns the P-value of tdist().
93
+ def p_value(y,n)
94
+ if y > 0.5
95
+ pt(2.0 - y*2.0, n)
96
+ else
97
+ - pt(y*2.0, n)
98
+ end
99
+ end
100
+
101
+
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,28 @@
1
+ require 'rbconfig'
2
+ module Distribution
3
+ module T
4
+ module Statistics2_
5
+ class << self
6
+ # Return the P-value of the corresponding integral with
7
+ # k degrees of freedom
8
+ def p_value(pr,k)
9
+ Statistics2.ptdist(k, pr)
10
+ end
11
+
12
+
13
+ # There are some problem on i686 with t on statistics2
14
+ if !RbConfig::CONFIG['arch']=~/i686/
15
+ # T cumulative distribution function (cdf).
16
+ #
17
+ # Returns the integral of t-distribution
18
+ # with n degrees of freedom over (-Infty, x].
19
+ #
20
+ def cdf(x,k)
21
+ Statistics2.tdist(k,x)
22
+ end
23
+ end
24
+
25
+ end
26
+ end
27
+ end
28
+ end