rubystats 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README DELETED
@@ -1,9 +0,0 @@
1
- # This is a set of Ruby statistics libraries ported from the PHPMath libraries.
2
- # PHPMath libraries created by Paul Meagher (many of which were ported from
3
- # various sources.
4
- # See http://www.phpmath.com/ for PHPMath libraries.
5
- #
6
- # See examples and tests for usage.
7
- # Author:: Bryan Donovan
8
- #
9
- # License:: LGPL http://www.gnu.org/copyleft/lesser.html
@@ -1,87 +0,0 @@
1
- require 'probability_distribution'
2
-
3
- class BetaDistribution < ProbabilityDistribution
4
- include SpecialMath
5
-
6
- attr_reader :p, :q
7
-
8
- #dgr_p = degrees of freedom p
9
- #dgr_q = degrees of freedom q
10
- def initialize(dgr_p, dgr_q)
11
- if dgr_p <= 0 || dgr_q <= 0
12
- return nil
13
- end
14
- @p = dgr_p.to_f
15
- @q = dgr_q.to_f
16
- end
17
-
18
- def mean
19
- @p.to_f / (@p.to_f + @q.to_f)
20
- end
21
-
22
- def standard_deviation
23
- Math.sqrt(@p * @q / ((@p + @q)**2 * (@p + @q + 1)))
24
- end
25
-
26
- def pdf(x)
27
- if x.class == Array
28
- pdf_vals = []
29
- for i in (0 ... x.size)
30
- check_range(x[i])
31
- if x[i] == 0.0 || x[i] == 1.0
32
- pdf_vals[i] = 0.0
33
- else
34
- pdf_vals[i] = Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x[i]) + (@q - 1.0) * Math.log(1.0 - x[i]))
35
- end
36
- end
37
- return pdf_vals
38
- else
39
- check_range(x)
40
- if (x == 0.0) || (x == 1.0)
41
- return 0.0
42
- else
43
- return Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x) + (@q - 1.0) * Math.log(1.0 - x)
44
- )
45
- end
46
- end
47
- end
48
-
49
- def cdf(x)
50
- if x.class == Array
51
- cdf_vals = Array.new
52
- for i in 0 ... x.size
53
- check_range(x[i])
54
- cdf_vals[i] = incomplete_beta(x[i], @p, @q)
55
- end
56
- return cdf_vals
57
- else
58
- check_range(x)
59
- cdf_val = incomplete_beta(x, @p, @q)
60
- return cdf_val
61
- end
62
- end
63
-
64
- def icdf(prob)
65
- if prob.class == Array
66
- inv_vals = Array.new
67
- for i in 0 ... prob.size
68
- check_range(prob[i])
69
- if prob[i] == 0.0
70
- inv_vals[i] = 0.0
71
- end
72
- if prob[i] == 1.0
73
- inv_vals[i] = 1.0
74
- end
75
- inv_vals[i] = find_root(prob[i], 0.5, 0.0, 1.0)
76
- end
77
- return inv_vals
78
- else
79
- check_range(prob)
80
- return 0.0 if prob == 0.0
81
- return 1.0 if prob == 1.0
82
- return find_root(prob, 0.5, 0.0, 1.0)
83
- end
84
- end
85
-
86
- end
87
-
@@ -1,194 +0,0 @@
1
- require 'probability_distribution'
2
- # This class provides an object for encapsulating binomial distributions
3
- # Ported to Ruby from PHPMath class by Bryan Donovan
4
- # Author:: Mark Hale
5
- # Author:: Paul Meagher
6
- # Author:: Bryan Donovan (mailto:bryandonovan@myrealbox.com)
7
- class BinomialDistribution < ProbabilityDistribution
8
- include NumericalConstants
9
- include SpecialMath
10
- include ExtraMath
11
-
12
- # Constructs a binomial distribution
13
- def initialize (trials, prob)
14
- if trials <= 0
15
- raise "Error: trials must be greater than 0"
16
- end
17
- @n = trials
18
- if prob < 0.0 || prob > 1.0
19
- raise "Error: prob must be between 0 and 1"
20
- end
21
- @p = prob
22
- end
23
-
24
- #returns the number of trials
25
- def get_trials_parameter
26
- return @n
27
- end
28
-
29
- #returns the probability
30
- def get_probability_parameter
31
- return @p
32
- end
33
-
34
- #returns the mean
35
- def get_mean
36
- return @n * @p
37
- end
38
-
39
- #returns the variance
40
- def variance
41
- return @n * @p * (1.0 - @p)
42
- end
43
-
44
- # Probability density function of a binomial distribution (equivalent
45
- # to R dbinom function).
46
- # _x should be an integer
47
- # returns the probability that a stochastic variable x has the value _x,
48
- # i.e. P(x = _x)
49
- def pdf(_x)
50
- if _x.class == Array
51
- pdf_vals = []
52
- for i in (0 ... _x.length)
53
- check_range(_x[i], 0.0, @n)
54
- pdf_vals[i] = binomial(@n, _x[i]) * (1-@p)**(@n-_x[i])
55
- end
56
- return pdf_vals
57
- else
58
- check_range(_x, 0.0, @n)
59
- return binomial(@n, _x) * @p**_x * (1-@p)**(@n-_x)
60
- end
61
- end
62
-
63
- # Cumulative binomial distribution function (equivalent to R pbinom function).
64
- # _x should be integer-valued and can be single integer or array of integers
65
- # returns single value or array containing probability that a stochastic
66
- # variable x is less then X, i.e. P(x < _x).
67
- def cdf(_x)
68
- if _x.class == Array
69
- inv_vals = []
70
- for i in (0 ..._x.length)
71
- pdf_vals[i] = get_cdf(_x[i])
72
- end
73
- return pdf_vals
74
- else
75
- return get_cdf(_x)
76
- end
77
- end
78
-
79
- # Inverse of the cumulative binomial distribution function
80
- # (equivalent to R qbinom function).
81
- # returns the value X for which P(x < _x).
82
- def get_icdf(prob)
83
- if prob.class == Array
84
- inv_vals = []
85
- for i in (0 ...prob.length)
86
- check_range(prob[i])
87
- inv_vals[i] = (find_root(prob[i], @n/2, 0.0, @n)).floor
88
- end
89
- return inv_vals
90
- else
91
- check_range(prob)
92
- return (find_root(prob, @n/2, 0.0, @n)).floor
93
- end
94
- end
95
-
96
- # Wrapper for binomial RNG function (equivalent to R rbinom function).
97
- # returns random deviate given trials and p
98
- def rng(num_vals = 1)
99
- if num_vals < 1
100
- raise "Error num_vals must be greater than or equal to 1"
101
- end
102
- if num_vals == 1
103
- return get_rng
104
- else
105
- rand_vals = []
106
- for i in (0 ...num_vals)
107
- rand_vals[i] = get_rng
108
- end
109
- return rand_vals
110
- end
111
- end
112
-
113
- # Private methods below
114
-
115
- private
116
-
117
- # Private shared function for getting cumulant for particular x
118
- # param _x should be integer-valued
119
- # returns the probability that a stochastic variable x is less than _x
120
- # i.e P(x < _x)
121
- def get_cdf(_x)
122
- check_range(_x, 0.0, @n)
123
- sum = 0.0
124
- for i in (0 .. _x)
125
- sum = sum + pdf(i)
126
- end
127
- return sum
128
- end
129
-
130
- # Private binomial RNG function
131
- # Original version of this function from Press et al.
132
- #
133
- # see http://www.library.cornell.edu/nr/bookcpdf/c7-3.pdf
134
- #
135
- # Changed parts having to do with generating a uniformly distributed
136
- # number in the 0 to 1 range. Also using instance variables, instead
137
- # of supplying function with p and n values. Finally calling port
138
- # of JSci's log gamma routine instead of Press et al.
139
- #
140
- # There are enough non-trivial changes to this function that the
141
- # port conforms to the Press et al. copyright.
142
- def get_rng
143
- nold = -1
144
- pold = -1
145
- p = (if @p <= 0.5 then @p else 1.0 - @p end)
146
- am = @n * p
147
- if @n < 25
148
- bnl = 0.0
149
- for i in (1...@n)
150
- if Kernel.rand < p
151
- bnl = bnl.next
152
- end
153
- end
154
- elsif am < 1.0
155
- g = Math.exp(-am)
156
- t = 1.0
157
- for j in (0 ... @n)
158
- t = t * Kernel.rand
159
- break if t < g
160
- end
161
- bnl = (if j <= @n then j else @n end)
162
- else
163
- if n != nold
164
- en = @n
165
- oldg = log_gamma(en + 1.0)
166
- nold = n
167
- end
168
- if p != pold
169
- pc = 1.0 - p
170
- plog = Math.log(p)
171
- pclog = Math.log(pc)
172
- pold = p
173
- end
174
- sq = Math.sqrt(2.0 * am * pc)
175
- until Kernel.rand <= t do
176
- until (em >= 0.0 || em < (en + 1.0)) do
177
- angle = Pi * Kernel.rand
178
- y = Math.tan(angle)
179
- em = sq * y + am
180
- end
181
- em = em.floor
182
- t = 1.2 * sq * (1.0 + y * y) *
183
- Math.exp(oldg - log_gamma(em + 1.0) -
184
- log_gamma(en - em + 1.0) + em * plog + (en - em) * pclog)
185
- end
186
- bnl = em
187
- end
188
- if p != @p
189
- bnl = @n - bnl
190
- end
191
- return bnl
192
- end
193
- end
194
-
@@ -1,171 +0,0 @@
1
- #! /usr/local/bin/ruby
2
-
3
- # Fisher's Exact Test Function Library
4
- #
5
- # Based on JavaScript version created by: Oyvind Langsrud
6
- # Ported to Ruby by Bryan Donovan
7
-
8
- class FishersExactTest
9
-
10
- def initialize
11
- @sn11 = 0.0
12
- @sn1_ = 0.0
13
- @sn_1 = 0.0
14
- @sn = 0.0
15
- @sprob = 0.0
16
-
17
- @sleft = 0.0
18
- @sright = 0.0
19
- @sless = 0.0
20
- @slarg = 0.0
21
-
22
- @left = 0.0
23
- @right = 0.0
24
- @twotail = 0.0
25
- end
26
-
27
- # Reference: "Lanczos, C. 'A precision approximation
28
- # of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964."
29
- # Translation of Alan Miller's FORTRAN-implementation
30
- # See http://lib.stat.cmu.edu/apstat/245
31
- def lngamm(z)
32
- x = 0
33
- x += 0.0000001659470187408462/(z+7)
34
- x += 0.000009934937113930748 /(z+6)
35
- x -= 0.1385710331296526 /(z+5)
36
- x += 12.50734324009056 /(z+4)
37
- x -= 176.6150291498386 /(z+3)
38
- x += 771.3234287757674 /(z+2)
39
- x -= 1259.139216722289 /(z+1)
40
- x += 676.5203681218835 /(z)
41
- x += 0.9999999999995183
42
-
43
- return(Math.log(x)-5.58106146679532777-z+(z-0.5) * Math.log(z+6.5))
44
- end
45
-
46
- def lnfact(n)
47
- if n <= 1
48
- return 0
49
- else
50
- return lngamm(n+1)
51
- end
52
- end
53
-
54
- def lnbico(n,k)
55
- return lnfact(n) - lnfact(k) - lnfact(n-k)
56
- end
57
-
58
- def hyper_323(n11, n1_, n_1, n)
59
- return Math.exp(lnbico(n1_, n11) + lnbico(n-n1_, n_1-n11) - lnbico(n, n_1))
60
- end
61
-
62
- def hyper(n11)
63
- return hyper0(n11, 0, 0, 0)
64
- end
65
-
66
- def hyper0(n11i,n1_i,n_1i,ni)
67
- if n1_i == 0 and n_1i ==0 and ni == 0
68
- unless n11i % 10 == 0
69
- if n11i == @sn11+1
70
- @sprob *= ((@sn1_ - @sn11)/(n11i.to_f))*((@sn_1 - @sn11)/(n11i.to_f + @sn - @sn1_ - @sn_1))
71
- @sn11 = n11i
72
- return @sprob
73
- end
74
- if n11i == @sn11-1
75
- @sprob *= ((@sn11)/(@sn1_-n11i.to_f))*((@sn11+@sn-@sn1_-@sn_1)/(@sn_1-n11i.to_f))
76
- @sn11 = n11i
77
- return @sprob
78
- end
79
- end
80
- @sn11 = n11i
81
- else
82
- @sn11 = n11i
83
- @sn1_ = n1_i
84
- @sn_1 = n_1i
85
- @sn = ni
86
- end
87
- @sprob = hyper_323(@sn11,@sn1_,@sn_1,@sn)
88
- return @sprob
89
- end
90
-
91
- def exact(n11,n1_,n_1,n)
92
-
93
- p = i = j = prob = 0.0
94
-
95
- max = n1_
96
- max = n_1 if n_1 < max
97
- min = n1_ + n_1 - n
98
- min = 0 if min < 0
99
-
100
- if min == max
101
- @sless = 1
102
- @sright = 1
103
- @sleft = 1
104
- @slarg = 1
105
- return 1
106
- end
107
-
108
- prob = hyper0(n11,n1_,n_1,n)
109
- @sleft = 0
110
-
111
- p = hyper(min)
112
- i = min + 1
113
- while p < (0.99999999 * prob)
114
- @sleft += p
115
- p = hyper(i)
116
- i += 1
117
- end
118
-
119
- i -= 1
120
-
121
- if p < (1.00000001*prob)
122
- @sleft += p
123
- else
124
- i -= 1
125
- end
126
-
127
- @sright = 0
128
-
129
- p = hyper(max)
130
- j = max - 1
131
- while p < (0.99999999 * prob)
132
- @sright += p
133
- p = hyper(j)
134
- j -= 1
135
- end
136
- j += 1
137
-
138
- if p < (1.00000001*prob)
139
- @sright += p
140
- else
141
- j += 1
142
- end
143
-
144
- if (i - n11).abs < (j - n11).abs
145
- @sless = @sleft
146
- @slarg = 1 - @sleft + prob
147
- else
148
- @sless = 1 - @sright + prob
149
- @slarg = @sright
150
- end
151
- return prob
152
- end
153
-
154
- def calculate(n11_,n12_,n21_,n22_)
155
- n11_ *= -1 if n11_ < 0
156
- n12_ *= -1 if n12_ < 0
157
- n21_ *= -1 if n21_ < 0
158
- n22_ *= -1 if n22_ < 0
159
- n1_ = n11_ + n12_
160
- n_1 = n11_ + n21_
161
- n = n11_ + n12_ + n21_ + n22_
162
- prob = exact(n11_,n1_,n_1,n)
163
- left = @sless
164
- right = @slarg
165
- twotail = @sleft + @sright
166
- twotail = 1 if twotail > 1
167
- values_hash = { :left =>left, :right =>right, :twotail =>twotail }
168
- return values_hash
169
- end
170
-
171
- end