rubystats 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/Manifest.txt +22 -0
- data/README.txt +109 -0
- data/Rakefile +19 -0
- data/examples/beta.rb +10 -12
- data/examples/binomial.rb +12 -10
- data/examples/failrate_vs_goal.rb +28 -0
- data/examples/fisher.rb +2 -6
- data/examples/norm.rb +10 -4
- data/lib/rubystats.rb +9 -0
- data/lib/rubystats/beta_distribution.rb +88 -0
- data/lib/rubystats/binomial_distribution.rb +195 -0
- data/lib/rubystats/fishers_exact_test.rb +171 -0
- data/lib/rubystats/modules.rb +742 -0
- data/lib/rubystats/normal_distribution.rb +114 -0
- data/lib/rubystats/probability_distribution.rb +131 -0
- data/{tests → test}/tc_beta.rb +4 -4
- data/{tests → test}/tc_binomial.rb +4 -4
- data/{tests → test}/tc_fisher.rb +2 -2
- data/test/tc_norm.rb +14 -0
- data/test/tc_require_all.rb +18 -0
- data/{tests → test}/ts_stats.rb +0 -0
- metadata +72 -51
- data/README +0 -9
- data/lib/beta_distribution.rb +0 -87
- data/lib/binomial_distribution.rb +0 -194
- data/lib/fishers_exact_test.rb +0 -171
- data/lib/modules/extra_math.rb +0 -7
- data/lib/modules/numerical_constants.rb +0 -17
- data/lib/modules/special_math.rb +0 -721
- data/lib/normal_distribution.rb +0 -114
- data/lib/probability_distribution.rb +0 -132
- data/tests/tc_norm.rb +0 -13
data/README
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
# This is a set of Ruby statistics libraries ported from the PHPMath libraries.
|
2
|
-
# PHPMath libraries created by Paul Meagher (many of which were ported from
|
3
|
-
# various sources.
|
4
|
-
# See http://www.phpmath.com/ for PHPMath libraries.
|
5
|
-
#
|
6
|
-
# See examples and tests for usage.
|
7
|
-
# Author:: Bryan Donovan
|
8
|
-
#
|
9
|
-
# License:: LGPL http://www.gnu.org/copyleft/lesser.html
|
data/lib/beta_distribution.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
require 'probability_distribution'
|
2
|
-
|
3
|
-
class BetaDistribution < ProbabilityDistribution
|
4
|
-
include SpecialMath
|
5
|
-
|
6
|
-
attr_reader :p, :q
|
7
|
-
|
8
|
-
#dgr_p = degrees of freedom p
|
9
|
-
#dgr_q = degrees of freedom q
|
10
|
-
def initialize(dgr_p, dgr_q)
|
11
|
-
if dgr_p <= 0 || dgr_q <= 0
|
12
|
-
return nil
|
13
|
-
end
|
14
|
-
@p = dgr_p.to_f
|
15
|
-
@q = dgr_q.to_f
|
16
|
-
end
|
17
|
-
|
18
|
-
def mean
|
19
|
-
@p.to_f / (@p.to_f + @q.to_f)
|
20
|
-
end
|
21
|
-
|
22
|
-
def standard_deviation
|
23
|
-
Math.sqrt(@p * @q / ((@p + @q)**2 * (@p + @q + 1)))
|
24
|
-
end
|
25
|
-
|
26
|
-
def pdf(x)
|
27
|
-
if x.class == Array
|
28
|
-
pdf_vals = []
|
29
|
-
for i in (0 ... x.size)
|
30
|
-
check_range(x[i])
|
31
|
-
if x[i] == 0.0 || x[i] == 1.0
|
32
|
-
pdf_vals[i] = 0.0
|
33
|
-
else
|
34
|
-
pdf_vals[i] = Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x[i]) + (@q - 1.0) * Math.log(1.0 - x[i]))
|
35
|
-
end
|
36
|
-
end
|
37
|
-
return pdf_vals
|
38
|
-
else
|
39
|
-
check_range(x)
|
40
|
-
if (x == 0.0) || (x == 1.0)
|
41
|
-
return 0.0
|
42
|
-
else
|
43
|
-
return Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x) + (@q - 1.0) * Math.log(1.0 - x)
|
44
|
-
)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def cdf(x)
|
50
|
-
if x.class == Array
|
51
|
-
cdf_vals = Array.new
|
52
|
-
for i in 0 ... x.size
|
53
|
-
check_range(x[i])
|
54
|
-
cdf_vals[i] = incomplete_beta(x[i], @p, @q)
|
55
|
-
end
|
56
|
-
return cdf_vals
|
57
|
-
else
|
58
|
-
check_range(x)
|
59
|
-
cdf_val = incomplete_beta(x, @p, @q)
|
60
|
-
return cdf_val
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
def icdf(prob)
|
65
|
-
if prob.class == Array
|
66
|
-
inv_vals = Array.new
|
67
|
-
for i in 0 ... prob.size
|
68
|
-
check_range(prob[i])
|
69
|
-
if prob[i] == 0.0
|
70
|
-
inv_vals[i] = 0.0
|
71
|
-
end
|
72
|
-
if prob[i] == 1.0
|
73
|
-
inv_vals[i] = 1.0
|
74
|
-
end
|
75
|
-
inv_vals[i] = find_root(prob[i], 0.5, 0.0, 1.0)
|
76
|
-
end
|
77
|
-
return inv_vals
|
78
|
-
else
|
79
|
-
check_range(prob)
|
80
|
-
return 0.0 if prob == 0.0
|
81
|
-
return 1.0 if prob == 1.0
|
82
|
-
return find_root(prob, 0.5, 0.0, 1.0)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
|
@@ -1,194 +0,0 @@
|
|
1
|
-
require 'probability_distribution'
|
2
|
-
# This class provides an object for encapsulating binomial distributions
|
3
|
-
# Ported to Ruby from PHPMath class by Bryan Donovan
|
4
|
-
# Author:: Mark Hale
|
5
|
-
# Author:: Paul Meagher
|
6
|
-
# Author:: Bryan Donovan (mailto:bryandonovan@myrealbox.com)
|
7
|
-
class BinomialDistribution < ProbabilityDistribution
|
8
|
-
include NumericalConstants
|
9
|
-
include SpecialMath
|
10
|
-
include ExtraMath
|
11
|
-
|
12
|
-
# Constructs a binomial distribution
|
13
|
-
def initialize (trials, prob)
|
14
|
-
if trials <= 0
|
15
|
-
raise "Error: trials must be greater than 0"
|
16
|
-
end
|
17
|
-
@n = trials
|
18
|
-
if prob < 0.0 || prob > 1.0
|
19
|
-
raise "Error: prob must be between 0 and 1"
|
20
|
-
end
|
21
|
-
@p = prob
|
22
|
-
end
|
23
|
-
|
24
|
-
#returns the number of trials
|
25
|
-
def get_trials_parameter
|
26
|
-
return @n
|
27
|
-
end
|
28
|
-
|
29
|
-
#returns the probability
|
30
|
-
def get_probability_parameter
|
31
|
-
return @p
|
32
|
-
end
|
33
|
-
|
34
|
-
#returns the mean
|
35
|
-
def get_mean
|
36
|
-
return @n * @p
|
37
|
-
end
|
38
|
-
|
39
|
-
#returns the variance
|
40
|
-
def variance
|
41
|
-
return @n * @p * (1.0 - @p)
|
42
|
-
end
|
43
|
-
|
44
|
-
# Probability density function of a binomial distribution (equivalent
|
45
|
-
# to R dbinom function).
|
46
|
-
# _x should be an integer
|
47
|
-
# returns the probability that a stochastic variable x has the value _x,
|
48
|
-
# i.e. P(x = _x)
|
49
|
-
def pdf(_x)
|
50
|
-
if _x.class == Array
|
51
|
-
pdf_vals = []
|
52
|
-
for i in (0 ... _x.length)
|
53
|
-
check_range(_x[i], 0.0, @n)
|
54
|
-
pdf_vals[i] = binomial(@n, _x[i]) * (1-@p)**(@n-_x[i])
|
55
|
-
end
|
56
|
-
return pdf_vals
|
57
|
-
else
|
58
|
-
check_range(_x, 0.0, @n)
|
59
|
-
return binomial(@n, _x) * @p**_x * (1-@p)**(@n-_x)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
# Cumulative binomial distribution function (equivalent to R pbinom function).
|
64
|
-
# _x should be integer-valued and can be single integer or array of integers
|
65
|
-
# returns single value or array containing probability that a stochastic
|
66
|
-
# variable x is less then X, i.e. P(x < _x).
|
67
|
-
def cdf(_x)
|
68
|
-
if _x.class == Array
|
69
|
-
inv_vals = []
|
70
|
-
for i in (0 ..._x.length)
|
71
|
-
pdf_vals[i] = get_cdf(_x[i])
|
72
|
-
end
|
73
|
-
return pdf_vals
|
74
|
-
else
|
75
|
-
return get_cdf(_x)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
# Inverse of the cumulative binomial distribution function
|
80
|
-
# (equivalent to R qbinom function).
|
81
|
-
# returns the value X for which P(x < _x).
|
82
|
-
def get_icdf(prob)
|
83
|
-
if prob.class == Array
|
84
|
-
inv_vals = []
|
85
|
-
for i in (0 ...prob.length)
|
86
|
-
check_range(prob[i])
|
87
|
-
inv_vals[i] = (find_root(prob[i], @n/2, 0.0, @n)).floor
|
88
|
-
end
|
89
|
-
return inv_vals
|
90
|
-
else
|
91
|
-
check_range(prob)
|
92
|
-
return (find_root(prob, @n/2, 0.0, @n)).floor
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
# Wrapper for binomial RNG function (equivalent to R rbinom function).
|
97
|
-
# returns random deviate given trials and p
|
98
|
-
def rng(num_vals = 1)
|
99
|
-
if num_vals < 1
|
100
|
-
raise "Error num_vals must be greater than or equal to 1"
|
101
|
-
end
|
102
|
-
if num_vals == 1
|
103
|
-
return get_rng
|
104
|
-
else
|
105
|
-
rand_vals = []
|
106
|
-
for i in (0 ...num_vals)
|
107
|
-
rand_vals[i] = get_rng
|
108
|
-
end
|
109
|
-
return rand_vals
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
# Private methods below
|
114
|
-
|
115
|
-
private
|
116
|
-
|
117
|
-
# Private shared function for getting cumulant for particular x
|
118
|
-
# param _x should be integer-valued
|
119
|
-
# returns the probability that a stochastic variable x is less than _x
|
120
|
-
# i.e P(x < _x)
|
121
|
-
def get_cdf(_x)
|
122
|
-
check_range(_x, 0.0, @n)
|
123
|
-
sum = 0.0
|
124
|
-
for i in (0 .. _x)
|
125
|
-
sum = sum + pdf(i)
|
126
|
-
end
|
127
|
-
return sum
|
128
|
-
end
|
129
|
-
|
130
|
-
# Private binomial RNG function
|
131
|
-
# Original version of this function from Press et al.
|
132
|
-
#
|
133
|
-
# see http://www.library.cornell.edu/nr/bookcpdf/c7-3.pdf
|
134
|
-
#
|
135
|
-
# Changed parts having to do with generating a uniformly distributed
|
136
|
-
# number in the 0 to 1 range. Also using instance variables, instead
|
137
|
-
# of supplying function with p and n values. Finally calling port
|
138
|
-
# of JSci's log gamma routine instead of Press et al.
|
139
|
-
#
|
140
|
-
# There are enough non-trivial changes to this function that the
|
141
|
-
# port conforms to the Press et al. copyright.
|
142
|
-
def get_rng
|
143
|
-
nold = -1
|
144
|
-
pold = -1
|
145
|
-
p = (if @p <= 0.5 then @p else 1.0 - @p end)
|
146
|
-
am = @n * p
|
147
|
-
if @n < 25
|
148
|
-
bnl = 0.0
|
149
|
-
for i in (1...@n)
|
150
|
-
if Kernel.rand < p
|
151
|
-
bnl = bnl.next
|
152
|
-
end
|
153
|
-
end
|
154
|
-
elsif am < 1.0
|
155
|
-
g = Math.exp(-am)
|
156
|
-
t = 1.0
|
157
|
-
for j in (0 ... @n)
|
158
|
-
t = t * Kernel.rand
|
159
|
-
break if t < g
|
160
|
-
end
|
161
|
-
bnl = (if j <= @n then j else @n end)
|
162
|
-
else
|
163
|
-
if n != nold
|
164
|
-
en = @n
|
165
|
-
oldg = log_gamma(en + 1.0)
|
166
|
-
nold = n
|
167
|
-
end
|
168
|
-
if p != pold
|
169
|
-
pc = 1.0 - p
|
170
|
-
plog = Math.log(p)
|
171
|
-
pclog = Math.log(pc)
|
172
|
-
pold = p
|
173
|
-
end
|
174
|
-
sq = Math.sqrt(2.0 * am * pc)
|
175
|
-
until Kernel.rand <= t do
|
176
|
-
until (em >= 0.0 || em < (en + 1.0)) do
|
177
|
-
angle = Pi * Kernel.rand
|
178
|
-
y = Math.tan(angle)
|
179
|
-
em = sq * y + am
|
180
|
-
end
|
181
|
-
em = em.floor
|
182
|
-
t = 1.2 * sq * (1.0 + y * y) *
|
183
|
-
Math.exp(oldg - log_gamma(em + 1.0) -
|
184
|
-
log_gamma(en - em + 1.0) + em * plog + (en - em) * pclog)
|
185
|
-
end
|
186
|
-
bnl = em
|
187
|
-
end
|
188
|
-
if p != @p
|
189
|
-
bnl = @n - bnl
|
190
|
-
end
|
191
|
-
return bnl
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
data/lib/fishers_exact_test.rb
DELETED
@@ -1,171 +0,0 @@
|
|
1
|
-
#! /usr/local/bin/ruby
|
2
|
-
|
3
|
-
# Fisher's Exact Test Function Library
|
4
|
-
#
|
5
|
-
# Based on JavaScript version created by: Oyvind Langsrud
|
6
|
-
# Ported to Ruby by Bryan Donovan
|
7
|
-
|
8
|
-
class FishersExactTest
|
9
|
-
|
10
|
-
def initialize
|
11
|
-
@sn11 = 0.0
|
12
|
-
@sn1_ = 0.0
|
13
|
-
@sn_1 = 0.0
|
14
|
-
@sn = 0.0
|
15
|
-
@sprob = 0.0
|
16
|
-
|
17
|
-
@sleft = 0.0
|
18
|
-
@sright = 0.0
|
19
|
-
@sless = 0.0
|
20
|
-
@slarg = 0.0
|
21
|
-
|
22
|
-
@left = 0.0
|
23
|
-
@right = 0.0
|
24
|
-
@twotail = 0.0
|
25
|
-
end
|
26
|
-
|
27
|
-
# Reference: "Lanczos, C. 'A precision approximation
|
28
|
-
# of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964."
|
29
|
-
# Translation of Alan Miller's FORTRAN-implementation
|
30
|
-
# See http://lib.stat.cmu.edu/apstat/245
|
31
|
-
def lngamm(z)
|
32
|
-
x = 0
|
33
|
-
x += 0.0000001659470187408462/(z+7)
|
34
|
-
x += 0.000009934937113930748 /(z+6)
|
35
|
-
x -= 0.1385710331296526 /(z+5)
|
36
|
-
x += 12.50734324009056 /(z+4)
|
37
|
-
x -= 176.6150291498386 /(z+3)
|
38
|
-
x += 771.3234287757674 /(z+2)
|
39
|
-
x -= 1259.139216722289 /(z+1)
|
40
|
-
x += 676.5203681218835 /(z)
|
41
|
-
x += 0.9999999999995183
|
42
|
-
|
43
|
-
return(Math.log(x)-5.58106146679532777-z+(z-0.5) * Math.log(z+6.5))
|
44
|
-
end
|
45
|
-
|
46
|
-
def lnfact(n)
|
47
|
-
if n <= 1
|
48
|
-
return 0
|
49
|
-
else
|
50
|
-
return lngamm(n+1)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def lnbico(n,k)
|
55
|
-
return lnfact(n) - lnfact(k) - lnfact(n-k)
|
56
|
-
end
|
57
|
-
|
58
|
-
def hyper_323(n11, n1_, n_1, n)
|
59
|
-
return Math.exp(lnbico(n1_, n11) + lnbico(n-n1_, n_1-n11) - lnbico(n, n_1))
|
60
|
-
end
|
61
|
-
|
62
|
-
def hyper(n11)
|
63
|
-
return hyper0(n11, 0, 0, 0)
|
64
|
-
end
|
65
|
-
|
66
|
-
def hyper0(n11i,n1_i,n_1i,ni)
|
67
|
-
if n1_i == 0 and n_1i ==0 and ni == 0
|
68
|
-
unless n11i % 10 == 0
|
69
|
-
if n11i == @sn11+1
|
70
|
-
@sprob *= ((@sn1_ - @sn11)/(n11i.to_f))*((@sn_1 - @sn11)/(n11i.to_f + @sn - @sn1_ - @sn_1))
|
71
|
-
@sn11 = n11i
|
72
|
-
return @sprob
|
73
|
-
end
|
74
|
-
if n11i == @sn11-1
|
75
|
-
@sprob *= ((@sn11)/(@sn1_-n11i.to_f))*((@sn11+@sn-@sn1_-@sn_1)/(@sn_1-n11i.to_f))
|
76
|
-
@sn11 = n11i
|
77
|
-
return @sprob
|
78
|
-
end
|
79
|
-
end
|
80
|
-
@sn11 = n11i
|
81
|
-
else
|
82
|
-
@sn11 = n11i
|
83
|
-
@sn1_ = n1_i
|
84
|
-
@sn_1 = n_1i
|
85
|
-
@sn = ni
|
86
|
-
end
|
87
|
-
@sprob = hyper_323(@sn11,@sn1_,@sn_1,@sn)
|
88
|
-
return @sprob
|
89
|
-
end
|
90
|
-
|
91
|
-
def exact(n11,n1_,n_1,n)
|
92
|
-
|
93
|
-
p = i = j = prob = 0.0
|
94
|
-
|
95
|
-
max = n1_
|
96
|
-
max = n_1 if n_1 < max
|
97
|
-
min = n1_ + n_1 - n
|
98
|
-
min = 0 if min < 0
|
99
|
-
|
100
|
-
if min == max
|
101
|
-
@sless = 1
|
102
|
-
@sright = 1
|
103
|
-
@sleft = 1
|
104
|
-
@slarg = 1
|
105
|
-
return 1
|
106
|
-
end
|
107
|
-
|
108
|
-
prob = hyper0(n11,n1_,n_1,n)
|
109
|
-
@sleft = 0
|
110
|
-
|
111
|
-
p = hyper(min)
|
112
|
-
i = min + 1
|
113
|
-
while p < (0.99999999 * prob)
|
114
|
-
@sleft += p
|
115
|
-
p = hyper(i)
|
116
|
-
i += 1
|
117
|
-
end
|
118
|
-
|
119
|
-
i -= 1
|
120
|
-
|
121
|
-
if p < (1.00000001*prob)
|
122
|
-
@sleft += p
|
123
|
-
else
|
124
|
-
i -= 1
|
125
|
-
end
|
126
|
-
|
127
|
-
@sright = 0
|
128
|
-
|
129
|
-
p = hyper(max)
|
130
|
-
j = max - 1
|
131
|
-
while p < (0.99999999 * prob)
|
132
|
-
@sright += p
|
133
|
-
p = hyper(j)
|
134
|
-
j -= 1
|
135
|
-
end
|
136
|
-
j += 1
|
137
|
-
|
138
|
-
if p < (1.00000001*prob)
|
139
|
-
@sright += p
|
140
|
-
else
|
141
|
-
j += 1
|
142
|
-
end
|
143
|
-
|
144
|
-
if (i - n11).abs < (j - n11).abs
|
145
|
-
@sless = @sleft
|
146
|
-
@slarg = 1 - @sleft + prob
|
147
|
-
else
|
148
|
-
@sless = 1 - @sright + prob
|
149
|
-
@slarg = @sright
|
150
|
-
end
|
151
|
-
return prob
|
152
|
-
end
|
153
|
-
|
154
|
-
def calculate(n11_,n12_,n21_,n22_)
|
155
|
-
n11_ *= -1 if n11_ < 0
|
156
|
-
n12_ *= -1 if n12_ < 0
|
157
|
-
n21_ *= -1 if n21_ < 0
|
158
|
-
n22_ *= -1 if n22_ < 0
|
159
|
-
n1_ = n11_ + n12_
|
160
|
-
n_1 = n11_ + n21_
|
161
|
-
n = n11_ + n12_ + n21_ + n22_
|
162
|
-
prob = exact(n11_,n1_,n_1,n)
|
163
|
-
left = @sless
|
164
|
-
right = @slarg
|
165
|
-
twotail = @sleft + @sright
|
166
|
-
twotail = 1 if twotail > 1
|
167
|
-
values_hash = { :left =>left, :right =>right, :twotail =>twotail }
|
168
|
-
return values_hash
|
169
|
-
end
|
170
|
-
|
171
|
-
end
|