rubystats 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/Manifest.txt +22 -0
- data/README.txt +109 -0
- data/Rakefile +19 -0
- data/examples/beta.rb +10 -12
- data/examples/binomial.rb +12 -10
- data/examples/failrate_vs_goal.rb +28 -0
- data/examples/fisher.rb +2 -6
- data/examples/norm.rb +10 -4
- data/lib/rubystats.rb +9 -0
- data/lib/rubystats/beta_distribution.rb +88 -0
- data/lib/rubystats/binomial_distribution.rb +195 -0
- data/lib/rubystats/fishers_exact_test.rb +171 -0
- data/lib/rubystats/modules.rb +742 -0
- data/lib/rubystats/normal_distribution.rb +114 -0
- data/lib/rubystats/probability_distribution.rb +131 -0
- data/{tests → test}/tc_beta.rb +4 -4
- data/{tests → test}/tc_binomial.rb +4 -4
- data/{tests → test}/tc_fisher.rb +2 -2
- data/test/tc_norm.rb +14 -0
- data/test/tc_require_all.rb +18 -0
- data/{tests → test}/ts_stats.rb +0 -0
- metadata +72 -51
- data/README +0 -9
- data/lib/beta_distribution.rb +0 -87
- data/lib/binomial_distribution.rb +0 -194
- data/lib/fishers_exact_test.rb +0 -171
- data/lib/modules/extra_math.rb +0 -7
- data/lib/modules/numerical_constants.rb +0 -17
- data/lib/modules/special_math.rb +0 -721
- data/lib/normal_distribution.rb +0 -114
- data/lib/probability_distribution.rb +0 -132
- data/tests/tc_norm.rb +0 -13
data/README
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
# This is a set of Ruby statistics libraries ported from the PHPMath libraries.
|
2
|
-
# PHPMath libraries created by Paul Meagher (many of which were ported from
|
3
|
-
# various sources.
|
4
|
-
# See http://www.phpmath.com/ for PHPMath libraries.
|
5
|
-
#
|
6
|
-
# See examples and tests for usage.
|
7
|
-
# Author:: Bryan Donovan
|
8
|
-
#
|
9
|
-
# License:: LGPL http://www.gnu.org/copyleft/lesser.html
|
data/lib/beta_distribution.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
require 'probability_distribution'
|
2
|
-
|
3
|
-
class BetaDistribution < ProbabilityDistribution
|
4
|
-
include SpecialMath
|
5
|
-
|
6
|
-
attr_reader :p, :q
|
7
|
-
|
8
|
-
#dgr_p = degrees of freedom p
|
9
|
-
#dgr_q = degrees of freedom q
|
10
|
-
def initialize(dgr_p, dgr_q)
|
11
|
-
if dgr_p <= 0 || dgr_q <= 0
|
12
|
-
return nil
|
13
|
-
end
|
14
|
-
@p = dgr_p.to_f
|
15
|
-
@q = dgr_q.to_f
|
16
|
-
end
|
17
|
-
|
18
|
-
def mean
|
19
|
-
@p.to_f / (@p.to_f + @q.to_f)
|
20
|
-
end
|
21
|
-
|
22
|
-
def standard_deviation
|
23
|
-
Math.sqrt(@p * @q / ((@p + @q)**2 * (@p + @q + 1)))
|
24
|
-
end
|
25
|
-
|
26
|
-
def pdf(x)
|
27
|
-
if x.class == Array
|
28
|
-
pdf_vals = []
|
29
|
-
for i in (0 ... x.size)
|
30
|
-
check_range(x[i])
|
31
|
-
if x[i] == 0.0 || x[i] == 1.0
|
32
|
-
pdf_vals[i] = 0.0
|
33
|
-
else
|
34
|
-
pdf_vals[i] = Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x[i]) + (@q - 1.0) * Math.log(1.0 - x[i]))
|
35
|
-
end
|
36
|
-
end
|
37
|
-
return pdf_vals
|
38
|
-
else
|
39
|
-
check_range(x)
|
40
|
-
if (x == 0.0) || (x == 1.0)
|
41
|
-
return 0.0
|
42
|
-
else
|
43
|
-
return Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x) + (@q - 1.0) * Math.log(1.0 - x)
|
44
|
-
)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def cdf(x)
|
50
|
-
if x.class == Array
|
51
|
-
cdf_vals = Array.new
|
52
|
-
for i in 0 ... x.size
|
53
|
-
check_range(x[i])
|
54
|
-
cdf_vals[i] = incomplete_beta(x[i], @p, @q)
|
55
|
-
end
|
56
|
-
return cdf_vals
|
57
|
-
else
|
58
|
-
check_range(x)
|
59
|
-
cdf_val = incomplete_beta(x, @p, @q)
|
60
|
-
return cdf_val
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
def icdf(prob)
|
65
|
-
if prob.class == Array
|
66
|
-
inv_vals = Array.new
|
67
|
-
for i in 0 ... prob.size
|
68
|
-
check_range(prob[i])
|
69
|
-
if prob[i] == 0.0
|
70
|
-
inv_vals[i] = 0.0
|
71
|
-
end
|
72
|
-
if prob[i] == 1.0
|
73
|
-
inv_vals[i] = 1.0
|
74
|
-
end
|
75
|
-
inv_vals[i] = find_root(prob[i], 0.5, 0.0, 1.0)
|
76
|
-
end
|
77
|
-
return inv_vals
|
78
|
-
else
|
79
|
-
check_range(prob)
|
80
|
-
return 0.0 if prob == 0.0
|
81
|
-
return 1.0 if prob == 1.0
|
82
|
-
return find_root(prob, 0.5, 0.0, 1.0)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
|
@@ -1,194 +0,0 @@
|
|
1
|
-
require 'probability_distribution'
|
2
|
-
# This class provides an object for encapsulating binomial distributions
|
3
|
-
# Ported to Ruby from PHPMath class by Bryan Donovan
|
4
|
-
# Author:: Mark Hale
|
5
|
-
# Author:: Paul Meagher
|
6
|
-
# Author:: Bryan Donovan (mailto:bryandonovan@myrealbox.com)
|
7
|
-
class BinomialDistribution < ProbabilityDistribution
|
8
|
-
include NumericalConstants
|
9
|
-
include SpecialMath
|
10
|
-
include ExtraMath
|
11
|
-
|
12
|
-
# Constructs a binomial distribution
|
13
|
-
def initialize (trials, prob)
|
14
|
-
if trials <= 0
|
15
|
-
raise "Error: trials must be greater than 0"
|
16
|
-
end
|
17
|
-
@n = trials
|
18
|
-
if prob < 0.0 || prob > 1.0
|
19
|
-
raise "Error: prob must be between 0 and 1"
|
20
|
-
end
|
21
|
-
@p = prob
|
22
|
-
end
|
23
|
-
|
24
|
-
#returns the number of trials
|
25
|
-
def get_trials_parameter
|
26
|
-
return @n
|
27
|
-
end
|
28
|
-
|
29
|
-
#returns the probability
|
30
|
-
def get_probability_parameter
|
31
|
-
return @p
|
32
|
-
end
|
33
|
-
|
34
|
-
#returns the mean
|
35
|
-
def get_mean
|
36
|
-
return @n * @p
|
37
|
-
end
|
38
|
-
|
39
|
-
#returns the variance
|
40
|
-
def variance
|
41
|
-
return @n * @p * (1.0 - @p)
|
42
|
-
end
|
43
|
-
|
44
|
-
# Probability density function of a binomial distribution (equivalent
|
45
|
-
# to R dbinom function).
|
46
|
-
# _x should be an integer
|
47
|
-
# returns the probability that a stochastic variable x has the value _x,
|
48
|
-
# i.e. P(x = _x)
|
49
|
-
def pdf(_x)
|
50
|
-
if _x.class == Array
|
51
|
-
pdf_vals = []
|
52
|
-
for i in (0 ... _x.length)
|
53
|
-
check_range(_x[i], 0.0, @n)
|
54
|
-
pdf_vals[i] = binomial(@n, _x[i]) * (1-@p)**(@n-_x[i])
|
55
|
-
end
|
56
|
-
return pdf_vals
|
57
|
-
else
|
58
|
-
check_range(_x, 0.0, @n)
|
59
|
-
return binomial(@n, _x) * @p**_x * (1-@p)**(@n-_x)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
# Cumulative binomial distribution function (equivalent to R pbinom function).
|
64
|
-
# _x should be integer-valued and can be single integer or array of integers
|
65
|
-
# returns single value or array containing probability that a stochastic
|
66
|
-
# variable x is less then X, i.e. P(x < _x).
|
67
|
-
def cdf(_x)
|
68
|
-
if _x.class == Array
|
69
|
-
inv_vals = []
|
70
|
-
for i in (0 ..._x.length)
|
71
|
-
pdf_vals[i] = get_cdf(_x[i])
|
72
|
-
end
|
73
|
-
return pdf_vals
|
74
|
-
else
|
75
|
-
return get_cdf(_x)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
# Inverse of the cumulative binomial distribution function
|
80
|
-
# (equivalent to R qbinom function).
|
81
|
-
# returns the value X for which P(x < _x).
|
82
|
-
def get_icdf(prob)
|
83
|
-
if prob.class == Array
|
84
|
-
inv_vals = []
|
85
|
-
for i in (0 ...prob.length)
|
86
|
-
check_range(prob[i])
|
87
|
-
inv_vals[i] = (find_root(prob[i], @n/2, 0.0, @n)).floor
|
88
|
-
end
|
89
|
-
return inv_vals
|
90
|
-
else
|
91
|
-
check_range(prob)
|
92
|
-
return (find_root(prob, @n/2, 0.0, @n)).floor
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
# Wrapper for binomial RNG function (equivalent to R rbinom function).
|
97
|
-
# returns random deviate given trials and p
|
98
|
-
def rng(num_vals = 1)
|
99
|
-
if num_vals < 1
|
100
|
-
raise "Error num_vals must be greater than or equal to 1"
|
101
|
-
end
|
102
|
-
if num_vals == 1
|
103
|
-
return get_rng
|
104
|
-
else
|
105
|
-
rand_vals = []
|
106
|
-
for i in (0 ...num_vals)
|
107
|
-
rand_vals[i] = get_rng
|
108
|
-
end
|
109
|
-
return rand_vals
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
# Private methods below
|
114
|
-
|
115
|
-
private
|
116
|
-
|
117
|
-
# Private shared function for getting cumulant for particular x
|
118
|
-
# param _x should be integer-valued
|
119
|
-
# returns the probability that a stochastic variable x is less than _x
|
120
|
-
# i.e P(x < _x)
|
121
|
-
def get_cdf(_x)
|
122
|
-
check_range(_x, 0.0, @n)
|
123
|
-
sum = 0.0
|
124
|
-
for i in (0 .. _x)
|
125
|
-
sum = sum + pdf(i)
|
126
|
-
end
|
127
|
-
return sum
|
128
|
-
end
|
129
|
-
|
130
|
-
# Private binomial RNG function
|
131
|
-
# Original version of this function from Press et al.
|
132
|
-
#
|
133
|
-
# see http://www.library.cornell.edu/nr/bookcpdf/c7-3.pdf
|
134
|
-
#
|
135
|
-
# Changed parts having to do with generating a uniformly distributed
|
136
|
-
# number in the 0 to 1 range. Also using instance variables, instead
|
137
|
-
# of supplying function with p and n values. Finally calling port
|
138
|
-
# of JSci's log gamma routine instead of Press et al.
|
139
|
-
#
|
140
|
-
# There are enough non-trivial changes to this function that the
|
141
|
-
# port conforms to the Press et al. copyright.
|
142
|
-
def get_rng
|
143
|
-
nold = -1
|
144
|
-
pold = -1
|
145
|
-
p = (if @p <= 0.5 then @p else 1.0 - @p end)
|
146
|
-
am = @n * p
|
147
|
-
if @n < 25
|
148
|
-
bnl = 0.0
|
149
|
-
for i in (1...@n)
|
150
|
-
if Kernel.rand < p
|
151
|
-
bnl = bnl.next
|
152
|
-
end
|
153
|
-
end
|
154
|
-
elsif am < 1.0
|
155
|
-
g = Math.exp(-am)
|
156
|
-
t = 1.0
|
157
|
-
for j in (0 ... @n)
|
158
|
-
t = t * Kernel.rand
|
159
|
-
break if t < g
|
160
|
-
end
|
161
|
-
bnl = (if j <= @n then j else @n end)
|
162
|
-
else
|
163
|
-
if n != nold
|
164
|
-
en = @n
|
165
|
-
oldg = log_gamma(en + 1.0)
|
166
|
-
nold = n
|
167
|
-
end
|
168
|
-
if p != pold
|
169
|
-
pc = 1.0 - p
|
170
|
-
plog = Math.log(p)
|
171
|
-
pclog = Math.log(pc)
|
172
|
-
pold = p
|
173
|
-
end
|
174
|
-
sq = Math.sqrt(2.0 * am * pc)
|
175
|
-
until Kernel.rand <= t do
|
176
|
-
until (em >= 0.0 || em < (en + 1.0)) do
|
177
|
-
angle = Pi * Kernel.rand
|
178
|
-
y = Math.tan(angle)
|
179
|
-
em = sq * y + am
|
180
|
-
end
|
181
|
-
em = em.floor
|
182
|
-
t = 1.2 * sq * (1.0 + y * y) *
|
183
|
-
Math.exp(oldg - log_gamma(em + 1.0) -
|
184
|
-
log_gamma(en - em + 1.0) + em * plog + (en - em) * pclog)
|
185
|
-
end
|
186
|
-
bnl = em
|
187
|
-
end
|
188
|
-
if p != @p
|
189
|
-
bnl = @n - bnl
|
190
|
-
end
|
191
|
-
return bnl
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
data/lib/fishers_exact_test.rb
DELETED
@@ -1,171 +0,0 @@
|
|
1
|
-
#! /usr/local/bin/ruby
|
2
|
-
|
3
|
-
# Fisher's Exact Test Function Library
|
4
|
-
#
|
5
|
-
# Based on JavaScript version created by: Oyvind Langsrud
|
6
|
-
# Ported to Ruby by Bryan Donovan
|
7
|
-
|
8
|
-
class FishersExactTest
|
9
|
-
|
10
|
-
def initialize
|
11
|
-
@sn11 = 0.0
|
12
|
-
@sn1_ = 0.0
|
13
|
-
@sn_1 = 0.0
|
14
|
-
@sn = 0.0
|
15
|
-
@sprob = 0.0
|
16
|
-
|
17
|
-
@sleft = 0.0
|
18
|
-
@sright = 0.0
|
19
|
-
@sless = 0.0
|
20
|
-
@slarg = 0.0
|
21
|
-
|
22
|
-
@left = 0.0
|
23
|
-
@right = 0.0
|
24
|
-
@twotail = 0.0
|
25
|
-
end
|
26
|
-
|
27
|
-
# Reference: "Lanczos, C. 'A precision approximation
|
28
|
-
# of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964."
|
29
|
-
# Translation of Alan Miller's FORTRAN-implementation
|
30
|
-
# See http://lib.stat.cmu.edu/apstat/245
|
31
|
-
def lngamm(z)
|
32
|
-
x = 0
|
33
|
-
x += 0.0000001659470187408462/(z+7)
|
34
|
-
x += 0.000009934937113930748 /(z+6)
|
35
|
-
x -= 0.1385710331296526 /(z+5)
|
36
|
-
x += 12.50734324009056 /(z+4)
|
37
|
-
x -= 176.6150291498386 /(z+3)
|
38
|
-
x += 771.3234287757674 /(z+2)
|
39
|
-
x -= 1259.139216722289 /(z+1)
|
40
|
-
x += 676.5203681218835 /(z)
|
41
|
-
x += 0.9999999999995183
|
42
|
-
|
43
|
-
return(Math.log(x)-5.58106146679532777-z+(z-0.5) * Math.log(z+6.5))
|
44
|
-
end
|
45
|
-
|
46
|
-
def lnfact(n)
|
47
|
-
if n <= 1
|
48
|
-
return 0
|
49
|
-
else
|
50
|
-
return lngamm(n+1)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def lnbico(n,k)
|
55
|
-
return lnfact(n) - lnfact(k) - lnfact(n-k)
|
56
|
-
end
|
57
|
-
|
58
|
-
def hyper_323(n11, n1_, n_1, n)
|
59
|
-
return Math.exp(lnbico(n1_, n11) + lnbico(n-n1_, n_1-n11) - lnbico(n, n_1))
|
60
|
-
end
|
61
|
-
|
62
|
-
def hyper(n11)
|
63
|
-
return hyper0(n11, 0, 0, 0)
|
64
|
-
end
|
65
|
-
|
66
|
-
def hyper0(n11i,n1_i,n_1i,ni)
|
67
|
-
if n1_i == 0 and n_1i ==0 and ni == 0
|
68
|
-
unless n11i % 10 == 0
|
69
|
-
if n11i == @sn11+1
|
70
|
-
@sprob *= ((@sn1_ - @sn11)/(n11i.to_f))*((@sn_1 - @sn11)/(n11i.to_f + @sn - @sn1_ - @sn_1))
|
71
|
-
@sn11 = n11i
|
72
|
-
return @sprob
|
73
|
-
end
|
74
|
-
if n11i == @sn11-1
|
75
|
-
@sprob *= ((@sn11)/(@sn1_-n11i.to_f))*((@sn11+@sn-@sn1_-@sn_1)/(@sn_1-n11i.to_f))
|
76
|
-
@sn11 = n11i
|
77
|
-
return @sprob
|
78
|
-
end
|
79
|
-
end
|
80
|
-
@sn11 = n11i
|
81
|
-
else
|
82
|
-
@sn11 = n11i
|
83
|
-
@sn1_ = n1_i
|
84
|
-
@sn_1 = n_1i
|
85
|
-
@sn = ni
|
86
|
-
end
|
87
|
-
@sprob = hyper_323(@sn11,@sn1_,@sn_1,@sn)
|
88
|
-
return @sprob
|
89
|
-
end
|
90
|
-
|
91
|
-
def exact(n11,n1_,n_1,n)
|
92
|
-
|
93
|
-
p = i = j = prob = 0.0
|
94
|
-
|
95
|
-
max = n1_
|
96
|
-
max = n_1 if n_1 < max
|
97
|
-
min = n1_ + n_1 - n
|
98
|
-
min = 0 if min < 0
|
99
|
-
|
100
|
-
if min == max
|
101
|
-
@sless = 1
|
102
|
-
@sright = 1
|
103
|
-
@sleft = 1
|
104
|
-
@slarg = 1
|
105
|
-
return 1
|
106
|
-
end
|
107
|
-
|
108
|
-
prob = hyper0(n11,n1_,n_1,n)
|
109
|
-
@sleft = 0
|
110
|
-
|
111
|
-
p = hyper(min)
|
112
|
-
i = min + 1
|
113
|
-
while p < (0.99999999 * prob)
|
114
|
-
@sleft += p
|
115
|
-
p = hyper(i)
|
116
|
-
i += 1
|
117
|
-
end
|
118
|
-
|
119
|
-
i -= 1
|
120
|
-
|
121
|
-
if p < (1.00000001*prob)
|
122
|
-
@sleft += p
|
123
|
-
else
|
124
|
-
i -= 1
|
125
|
-
end
|
126
|
-
|
127
|
-
@sright = 0
|
128
|
-
|
129
|
-
p = hyper(max)
|
130
|
-
j = max - 1
|
131
|
-
while p < (0.99999999 * prob)
|
132
|
-
@sright += p
|
133
|
-
p = hyper(j)
|
134
|
-
j -= 1
|
135
|
-
end
|
136
|
-
j += 1
|
137
|
-
|
138
|
-
if p < (1.00000001*prob)
|
139
|
-
@sright += p
|
140
|
-
else
|
141
|
-
j += 1
|
142
|
-
end
|
143
|
-
|
144
|
-
if (i - n11).abs < (j - n11).abs
|
145
|
-
@sless = @sleft
|
146
|
-
@slarg = 1 - @sleft + prob
|
147
|
-
else
|
148
|
-
@sless = 1 - @sright + prob
|
149
|
-
@slarg = @sright
|
150
|
-
end
|
151
|
-
return prob
|
152
|
-
end
|
153
|
-
|
154
|
-
def calculate(n11_,n12_,n21_,n22_)
|
155
|
-
n11_ *= -1 if n11_ < 0
|
156
|
-
n12_ *= -1 if n12_ < 0
|
157
|
-
n21_ *= -1 if n21_ < 0
|
158
|
-
n22_ *= -1 if n22_ < 0
|
159
|
-
n1_ = n11_ + n12_
|
160
|
-
n_1 = n11_ + n21_
|
161
|
-
n = n11_ + n12_ + n21_ + n22_
|
162
|
-
prob = exact(n11_,n1_,n_1,n)
|
163
|
-
left = @sless
|
164
|
-
right = @slarg
|
165
|
-
twotail = @sleft + @sright
|
166
|
-
twotail = 1 if twotail > 1
|
167
|
-
values_hash = { :left =>left, :right =>right, :twotail =>twotail }
|
168
|
-
return values_hash
|
169
|
-
end
|
170
|
-
|
171
|
-
end
|