rubystats 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,9 @@
1
+ # This is a set of Ruby statistics libraries ported from the PHPMath libraries.
2
+ # PHPMath libraries created by Paul Meagher (many of which were ported from
3
+ # various sources.
4
+ # See http://www.phpmath.com/ for PHPMath libraries.
5
+ #
6
+ # See examples and tests for usage.
7
+ # Author:: Bryan Donovan
8
+ #
9
+ # License:: MIT http://www.opensource.org/licenses/mit-license.php
@@ -0,0 +1,36 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+ require 'beta_distribution'
3
+
4
+ def get_lower_limit(trials,alpha,p)
5
+ if p==0
6
+ lcl=0
7
+ else
8
+ q=trials-p+1
9
+ bin= BetaDistribution.new(p,q)
10
+ lcl=bin.inverse_cdf(alpha)
11
+ end
12
+ return lcl
13
+ end
14
+
15
+ def get_upper_limit(trials,alpha,p)
16
+ q=trials-p
17
+ p=p+1
18
+ bin= BetaDistribution.new(p,q)
19
+ ucl=bin.inverse_cdf(1-alpha)
20
+ return ucl
21
+ end
22
+
23
+
24
+ trials = 50
25
+ alpha = 0.05
26
+ p = 10
27
+
28
+ lcl = get_lower_limit(trials, alpha, p)
29
+ ucl = get_upper_limit(trials, alpha, p)
30
+
31
+ puts "lcl= "
32
+ p lcl
33
+ puts "ucl= "
34
+ p ucl
35
+
36
+
@@ -0,0 +1,20 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+ require 'binomial_distribution'
3
+
4
+ t = 100
5
+ f = 7
6
+ p = 0.05
7
+
8
+
9
+ bin = BinomialDistribution.new(t,p)
10
+ f = f - 1
11
+ mean = bin.mean
12
+ puts mean
13
+ for i in 1..5
14
+ pdf = bin.pdf(i)
15
+ cdf = bin.cdf(i)
16
+ inv = bin.inverse_cdf(cdf)
17
+ puts inv
18
+ puts "#{i}: #{pdf} : #{cdf}"
19
+ end
20
+
@@ -0,0 +1,25 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+ require 'fishers_exact_test'
3
+ require 'pp'
4
+
5
+ tested1 = 20
6
+ tested2 = 30
7
+ f1 = 10
8
+ f2 = 10
9
+
10
+ t1 = tested1 - f1
11
+ t2 = tested2 - f2
12
+
13
+ fet = FishersExactTest.new
14
+
15
+ fisher = fet.calculate(t1,t2,f1,f2)
16
+
17
+ pp fisher
18
+
19
+ perc = 100 * (1.0 - fisher[:twotail])
20
+
21
+ pp perc
22
+
23
+
24
+
25
+
@@ -0,0 +1,8 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+ require 'normal_distribution'
3
+
4
+ norm = NormalDistribution.new(10, 2)
5
+ cdf = norm.cdf(11)
6
+ pdf = norm.pdf(11)
7
+ puts cdf
8
+ puts pdf
@@ -0,0 +1,87 @@
1
+ require 'probability_distribution'
2
+
3
+ class BetaDistribution < ProbabilityDistribution
4
+ include SpecialMath
5
+
6
+ attr_reader :p, :q
7
+
8
+ #dgr_p = degrees of freedom p
9
+ #dgr_q = degrees of freedom q
10
+ def initialize(dgr_p, dgr_q)
11
+ if dgr_p <= 0 || dgr_q <= 0
12
+ return nil
13
+ end
14
+ @p = dgr_p.to_f
15
+ @q = dgr_q.to_f
16
+ end
17
+
18
+ def mean
19
+ @p.to_f / (@p.to_f + @q.to_f)
20
+ end
21
+
22
+ def standard_deviation
23
+ Math.sqrt(@p * @q / ((@p + @q)**2 * (@p + @q + 1)))
24
+ end
25
+
26
+ def pdf(x)
27
+ if x.class == Array
28
+ pdf_vals = []
29
+ for i in (0 ... x.size)
30
+ check_range(x[i])
31
+ if x[i] == 0.0 || x[i] == 1.0
32
+ pdf_vals[i] = 0.0
33
+ else
34
+ pdf_vals[i] = Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x[i]) + (@q - 1.0) * Math.log(1.0 - x[i]))
35
+ end
36
+ end
37
+ return pdf_vals
38
+ else
39
+ check_range(x)
40
+ if (x == 0.0) || (x == 1.0)
41
+ return 0.0
42
+ else
43
+ return Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x) + (@q - 1.0) * Math.log(1.0 - x)
44
+ )
45
+ end
46
+ end
47
+ end
48
+
49
+ def cdf(x)
50
+ if x.class == Array
51
+ cdf_vals = Array.new
52
+ for i in 0 ... x.size
53
+ check_range(x[i])
54
+ cdf_vals[i] = incomplete_beta(x[i], @p, @q)
55
+ end
56
+ return cdf_vals
57
+ else
58
+ check_range(x)
59
+ cdf_val = incomplete_beta(x, @p, @q)
60
+ return cdf_val
61
+ end
62
+ end
63
+
64
+ def icdf(prob)
65
+ if prob.class == Array
66
+ inv_vals = Array.new
67
+ for i in 0 ... prob.size
68
+ check_range(prob[i])
69
+ if prob[i] == 0.0
70
+ inv_vals[i] = 0.0
71
+ end
72
+ if prob[i] == 1.0
73
+ inv_vals[i] = 1.0
74
+ end
75
+ inv_vals[i] = find_root(prob[i], 0.5, 0.0, 1.0)
76
+ end
77
+ return inv_vals
78
+ else
79
+ check_range(prob)
80
+ return 0.0 if prob == 0.0
81
+ return 1.0 if prob == 1.0
82
+ return find_root(prob, 0.5, 0.0, 1.0)
83
+ end
84
+ end
85
+
86
+ end
87
+
@@ -0,0 +1,194 @@
1
+ require 'probability_distribution'
2
+ # This class provides an object for encapsulating binomial distributions
3
+ # Ported to Ruby from PHPMath class by Bryan Donovan
4
+ # Author:: Mark Hale
5
+ # Author:: Paul Meagher
6
+ # Author:: Bryan Donovan (mailto:bryandonovan@myrealbox.com)
7
+ class BinomialDistribution < ProbabilityDistribution
8
+ include NumericalConstants
9
+ include SpecialMath
10
+ include ExtraMath
11
+
12
+ # Constructs a binomial distribution
13
+ def initialize (trials, prob)
14
+ if trials <= 0
15
+ raise "Error: trials must be greater than 0"
16
+ end
17
+ @n = trials
18
+ if prob < 0.0 || prob > 1.0
19
+ raise "Error: prob must be between 0 and 1"
20
+ end
21
+ @p = prob
22
+ end
23
+
24
+ #returns the number of trials
25
+ def get_trials_parameter
26
+ return @n
27
+ end
28
+
29
+ #returns the probability
30
+ def get_probability_parameter
31
+ return @p
32
+ end
33
+
34
+ #returns the mean
35
+ def get_mean
36
+ return @n * @p
37
+ end
38
+
39
+ #returns the variance
40
+ def variance
41
+ return @n * @p * (1.0 - @p)
42
+ end
43
+
44
+ # Probability density function of a binomial distribution (equivalent
45
+ # to R dbinom function).
46
+ # _x should be an integer
47
+ # returns the probability that a stochastic variable x has the value _x,
48
+ # i.e. P(x = _x)
49
+ def pdf(_x)
50
+ if _x.class == Array
51
+ pdf_vals = []
52
+ for i in (0 ... _x.length)
53
+ check_range(_x[i], 0.0, @n)
54
+ pdf_vals[i] = binomial(@n, _x[i]) * (1-@p)**(@n-_x[i])
55
+ end
56
+ return pdf_vals
57
+ else
58
+ check_range(_x, 0.0, @n)
59
+ return binomial(@n, _x) * @p**_x * (1-@p)**(@n-_x)
60
+ end
61
+ end
62
+
63
+ # Cumulative binomial distribution function (equivalent to R pbinom function).
64
+ # _x should be integer-valued and can be single integer or array of integers
65
+ # returns single value or array containing probability that a stochastic
66
+ # variable x is less then X, i.e. P(x < _x).
67
+ def cdf(_x)
68
+ if _x.class == Array
69
+ inv_vals = []
70
+ for i in (0 ..._x.length)
71
+ pdf_vals[i] = get_cdf(_x[i])
72
+ end
73
+ return pdf_vals
74
+ else
75
+ return get_cdf(_x)
76
+ end
77
+ end
78
+
79
+ # Inverse of the cumulative binomial distribution function
80
+ # (equivalent to R qbinom function).
81
+ # returns the value X for which P(x < _x).
82
+ def get_icdf(prob)
83
+ if prob.class == Array
84
+ inv_vals = []
85
+ for i in (0 ...prob.length)
86
+ check_range(prob[i])
87
+ inv_vals[i] = (find_root(prob[i], @n/2, 0.0, @n)).floor
88
+ end
89
+ return inv_vals
90
+ else
91
+ check_range(prob)
92
+ return (find_root(prob, @n/2, 0.0, @n)).floor
93
+ end
94
+ end
95
+
96
+ # Wrapper for binomial RNG function (equivalent to R rbinom function).
97
+ # returns random deviate given trials and p
98
+ def rng(num_vals = 1)
99
+ if num_vals < 1
100
+ raise "Error num_vals must be greater than or equal to 1"
101
+ end
102
+ if num_vals == 1
103
+ return get_rng
104
+ else
105
+ rand_vals = []
106
+ for i in (0 ...num_vals)
107
+ rand_vals[i] = get_rng
108
+ end
109
+ return rand_vals
110
+ end
111
+ end
112
+
113
+ # Private methods below
114
+
115
+ private
116
+
117
+ # Private shared function for getting cumulant for particular x
118
+ # param _x should be integer-valued
119
+ # returns the probability that a stochastic variable x is less than _x
120
+ # i.e P(x < _x)
121
+ def get_cdf(_x)
122
+ check_range(_x, 0.0, @n)
123
+ sum = 0.0
124
+ for i in (0 .. _x)
125
+ sum = sum + pdf(i)
126
+ end
127
+ return sum
128
+ end
129
+
130
+ # Private binomial RNG function
131
+ # Original version of this function from Press et al.
132
+ #
133
+ # see http://www.library.cornell.edu/nr/bookcpdf/c7-3.pdf
134
+ #
135
+ # Changed parts having to do with generating a uniformly distributed
136
+ # number in the 0 to 1 range. Also using instance variables, instead
137
+ # of supplying function with p and n values. Finally calling port
138
+ # of JSci's log gamma routine instead of Press et al.
139
+ #
140
+ # There are enough non-trivial changes to this function that the
141
+ # port conforms to the Press et al. copyright.
142
+ def get_rng
143
+ nold = -1
144
+ pold = -1
145
+ p = (if @p <= 0.5 then @p else 1.0 - @p end)
146
+ am = @n * p
147
+ if @n < 25
148
+ bnl = 0.0
149
+ for i in (1...@n)
150
+ if Kernel.rand < p
151
+ bnl = bnl.next
152
+ end
153
+ end
154
+ elsif am < 1.0
155
+ g = Math.exp(-am)
156
+ t = 1.0
157
+ for j in (0 ... @n)
158
+ t = t * Kernel.rand
159
+ break if t < g
160
+ end
161
+ bnl = (if j <= @n then j else @n end)
162
+ else
163
+ if n != nold
164
+ en = @n
165
+ oldg = log_gamma(en + 1.0)
166
+ nold = n
167
+ end
168
+ if p != pold
169
+ pc = 1.0 - p
170
+ plog = Math.log(p)
171
+ pclog = Math.log(pc)
172
+ pold = p
173
+ end
174
+ sq = Math.sqrt(2.0 * am * pc)
175
+ until Kernel.rand <= t do
176
+ until (em >= 0.0 || em < (en + 1.0)) do
177
+ angle = Pi * Kernel.rand
178
+ y = Math.tan(angle)
179
+ em = sq * y + am
180
+ end
181
+ em = em.floor
182
+ t = 1.2 * sq * (1.0 + y * y) *
183
+ Math.exp(oldg - log_gamma(em + 1.0) -
184
+ log_gamma(en - em + 1.0) + em * plog + (en - em) * pclog)
185
+ end
186
+ bnl = em
187
+ end
188
+ if p != @p
189
+ bnl = @n - bnl
190
+ end
191
+ return bnl
192
+ end
193
+ end
194
+
@@ -0,0 +1,171 @@
1
+ #! /usr/local/bin/ruby
2
+
3
+ # Fisher's Exact Test Function Library
4
+ #
5
+ # Based on JavaScript version created by: Oyvind Langsrud
6
+ # Ported to Ruby by Bryan Donovan
7
+
8
+ class FishersExactTest
9
+
10
+ def initialize
11
+ @sn11 = 0.0
12
+ @sn1_ = 0.0
13
+ @sn_1 = 0.0
14
+ @sn = 0.0
15
+ @sprob = 0.0
16
+
17
+ @sleft = 0.0
18
+ @sright = 0.0
19
+ @sless = 0.0
20
+ @slarg = 0.0
21
+
22
+ @left = 0.0
23
+ @right = 0.0
24
+ @twotail = 0.0
25
+ end
26
+
27
+ # Reference: "Lanczos, C. 'A precision approximation
28
+ # of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964."
29
+ # Translation of Alan Miller's FORTRAN-implementation
30
+ # See http://lib.stat.cmu.edu/apstat/245
31
+ def lngamm(z)
32
+ x = 0
33
+ x += 0.0000001659470187408462/(z+7)
34
+ x += 0.000009934937113930748 /(z+6)
35
+ x -= 0.1385710331296526 /(z+5)
36
+ x += 12.50734324009056 /(z+4)
37
+ x -= 176.6150291498386 /(z+3)
38
+ x += 771.3234287757674 /(z+2)
39
+ x -= 1259.139216722289 /(z+1)
40
+ x += 676.5203681218835 /(z)
41
+ x += 0.9999999999995183
42
+
43
+ return(Math.log(x)-5.58106146679532777-z+(z-0.5) * Math.log(z+6.5))
44
+ end
45
+
46
+ def lnfact(n)
47
+ if n <= 1
48
+ return 0
49
+ else
50
+ return lngamm(n+1)
51
+ end
52
+ end
53
+
54
+ def lnbico(n,k)
55
+ return lnfact(n) - lnfact(k) - lnfact(n-k)
56
+ end
57
+
58
+ def hyper_323(n11, n1_, n_1, n)
59
+ return Math.exp(lnbico(n1_, n11) + lnbico(n-n1_, n_1-n11) - lnbico(n, n_1))
60
+ end
61
+
62
+ def hyper(n11)
63
+ return hyper0(n11, 0, 0, 0)
64
+ end
65
+
66
+ def hyper0(n11i,n1_i,n_1i,ni)
67
+ if n1_i == 0 and n_1i ==0 and ni == 0
68
+ unless n11i % 10 == 0
69
+ if n11i == @sn11+1
70
+ @sprob *= ((@sn1_ - @sn11)/(n11i.to_f))*((@sn_1 - @sn11)/(n11i.to_f + @sn - @sn1_ - @sn_1))
71
+ @sn11 = n11i
72
+ return @sprob
73
+ end
74
+ if n11i == @sn11-1
75
+ @sprob *= ((@sn11)/(@sn1_-n11i.to_f))*((@sn11+@sn-@sn1_-@sn_1)/(@sn_1-n11i.to_f))
76
+ @sn11 = n11i
77
+ return @sprob
78
+ end
79
+ end
80
+ @sn11 = n11i
81
+ else
82
+ @sn11 = n11i
83
+ @sn1_ = n1_i
84
+ @sn_1 = n_1i
85
+ @sn = ni
86
+ end
87
+ @sprob = hyper_323(@sn11,@sn1_,@sn_1,@sn)
88
+ return @sprob
89
+ end
90
+
91
+ def exact(n11,n1_,n_1,n)
92
+
93
+ p = i = j = prob = 0.0
94
+
95
+ max = n1_
96
+ max = n_1 if n_1 < max
97
+ min = n1_ + n_1 - n
98
+ min = 0 if min < 0
99
+
100
+ if min == max
101
+ @sless = 1
102
+ @sright = 1
103
+ @sleft = 1
104
+ @slarg = 1
105
+ return 1
106
+ end
107
+
108
+ prob = hyper0(n11,n1_,n_1,n)
109
+ @sleft = 0
110
+
111
+ p = hyper(min)
112
+ i = min + 1
113
+ while p < (0.99999999 * prob)
114
+ @sleft += p
115
+ p = hyper(i)
116
+ i += 1
117
+ end
118
+
119
+ i -= 1
120
+
121
+ if p < (1.00000001*prob)
122
+ @sleft += p
123
+ else
124
+ i -= 1
125
+ end
126
+
127
+ @sright = 0
128
+
129
+ p = hyper(max)
130
+ j = max - 1
131
+ while p < (0.99999999 * prob)
132
+ @sright += p
133
+ p = hyper(j)
134
+ j -= 1
135
+ end
136
+ j += 1
137
+
138
+ if p < (1.00000001*prob)
139
+ @sright += p
140
+ else
141
+ j += 1
142
+ end
143
+
144
+ if (i - n11).abs < (j - n11).abs
145
+ @sless = @sleft
146
+ @slarg = 1 - @sleft + prob
147
+ else
148
+ @sless = 1 - @sright + prob
149
+ @slarg = @sright
150
+ end
151
+ return prob
152
+ end
153
+
154
+ def calculate(n11_,n12_,n21_,n22_)
155
+ n11_ *= -1 if n11_ < 0
156
+ n12_ *= -1 if n12_ < 0
157
+ n21_ *= -1 if n21_ < 0
158
+ n22_ *= -1 if n22_ < 0
159
+ n1_ = n11_ + n12_
160
+ n_1 = n11_ + n21_
161
+ n = n11_ + n12_ + n21_ + n22_
162
+ prob = exact(n11_,n1_,n_1,n)
163
+ left = @sless
164
+ right = @slarg
165
+ twotail = @sleft + @sright
166
+ twotail = 1 if twotail > 1
167
+ values_hash = { :left =>left, :right =>right, :twotail =>twotail }
168
+ return values_hash
169
+ end
170
+
171
+ end