rubystats 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +9 -0
- data/examples/beta.rb +36 -0
- data/examples/binomial.rb +20 -0
- data/examples/fisher.rb +25 -0
- data/examples/norm.rb +8 -0
- data/lib/beta_distribution.rb +87 -0
- data/lib/binomial_distribution.rb +194 -0
- data/lib/fishers_exact_test.rb +171 -0
- data/lib/modules/extra_math.rb +7 -0
- data/lib/modules/numerical_constants.rb +17 -0
- data/lib/modules/special_math.rb +721 -0
- data/lib/normal_distribution.rb +114 -0
- data/lib/probability_distribution.rb +132 -0
- data/tests/tc_beta.rb +78 -0
- data/tests/tc_binomial.rb +22 -0
- data/tests/tc_fisher.rb +20 -0
- data/tests/tc_norm.rb +13 -0
- data/tests/ts_stats.rb +5 -0
- metadata +61 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'probability_distribution'
|
2
|
+
require 'modules/special_math'
|
3
|
+
# This class provides an object for encapsulating normal distributions
|
4
|
+
# Ported to Ruby from PHPMath class by Bryan Donovan
|
5
|
+
# Author:: Jaco van Kooten
|
6
|
+
# Author:: Paul Meagher
|
7
|
+
# Author:: Bryan Donovan (mailto:bryandonovan@myrealbox.com)
|
8
|
+
|
9
|
+
class NormalDistribution < ProbabilityDistribution
|
10
|
+
include SpecialMath
|
11
|
+
|
12
|
+
# Constructs a normal distribution (defaults to zero mean and
|
13
|
+
# unity variance).
|
14
|
+
def initialize(mu=0.0, sigma=1.0)
|
15
|
+
@mean = mu
|
16
|
+
if sigma <= 0.0
|
17
|
+
return "error"
|
18
|
+
end
|
19
|
+
@stdev = sigma
|
20
|
+
@variance = sigma**2
|
21
|
+
@pdf_denominator = Sqrt2pi * Math.sqrt(@variance)
|
22
|
+
@cdf_denominator = Sqrt2 * Math.sqrt(@variance)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns the mean of the distribution
|
26
|
+
def get_mean
|
27
|
+
return @mean
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns the standard deviation of the distribution
|
31
|
+
def get_standard_deviation
|
32
|
+
return @stdev
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns the variance of the distribution
|
36
|
+
def get_variance
|
37
|
+
return @variance
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# Obtain single PDF value
|
43
|
+
# Returns the probability that a stochastic variable x has the value X,
|
44
|
+
# i.e. P(x=X)
|
45
|
+
def get_pdf(x)
|
46
|
+
Math.exp( -((x-@mean)**2) / (2 * @variance)) / @pdf_denominator
|
47
|
+
end
|
48
|
+
|
49
|
+
# Obtain single CDF value
|
50
|
+
# Returns the probability that a stochastic variable x is less than X,
|
51
|
+
# i.e. P(x<X)
|
52
|
+
def get_cdf(x)
|
53
|
+
complementary_error( -(x - @mean) / @cdf_denominator) / 2
|
54
|
+
end
|
55
|
+
|
56
|
+
# Obtain single inverse CDF value.
|
57
|
+
# returns the value X for which P(x<X).
|
58
|
+
def get_icdf(p)
|
59
|
+
check_range(p)
|
60
|
+
if p == 0.0
|
61
|
+
return -Max_value
|
62
|
+
end
|
63
|
+
if p == 1.0
|
64
|
+
return Max_value
|
65
|
+
end
|
66
|
+
if p == 0.5
|
67
|
+
return @mean
|
68
|
+
end
|
69
|
+
|
70
|
+
mean_save = @mean
|
71
|
+
var_save = @variance
|
72
|
+
pdf_D_save = @pdf_denominator
|
73
|
+
cdf_D_save = @cdf_denominator
|
74
|
+
@mean = 0.0
|
75
|
+
@variance = 1.0
|
76
|
+
@pdf_denominator = Math.sqrt(Two_pi)
|
77
|
+
@cdf_denominator = Sqrt2
|
78
|
+
x = find_root(p, 0.0, -100.0, 100.0)
|
79
|
+
#scale back
|
80
|
+
@mean = mean_save
|
81
|
+
@variance = var_save
|
82
|
+
@pdf_denominator = pdf_D_save
|
83
|
+
@cdf_denominator = cdf_D_save
|
84
|
+
return x * Math.sqrt(@variance) + @mean
|
85
|
+
end
|
86
|
+
|
87
|
+
# Uses the polar form of the Box-Muller transformation which
|
88
|
+
# is both faster and more robust numerically than basic Box-Muller
|
89
|
+
# transform. To speed up repeated RNG computations, two random values
|
90
|
+
# are computed after the while loop and the second one is saved and
|
91
|
+
# directly used if the method is called again.
|
92
|
+
# see http://www.taygeta.com/random/gaussian.html
|
93
|
+
# returns single normal deviate
|
94
|
+
def get_rng
|
95
|
+
if @use_last
|
96
|
+
y1 = @last
|
97
|
+
@use_last = false
|
98
|
+
else
|
99
|
+
until w < 1.0 do
|
100
|
+
r1 = Kernel.rand
|
101
|
+
r2 = Kernel.rand
|
102
|
+
x1 = 2.0 * r1 - 1.0
|
103
|
+
x2 = 2.0 * r2 - 1.0
|
104
|
+
w = x1 * x1 * x2 * x2
|
105
|
+
end
|
106
|
+
w = Math.sqrt((-2.0 * Math.log(w)) / w)
|
107
|
+
y1 = x1 * w
|
108
|
+
@last = x2 * w
|
109
|
+
@use_last = true
|
110
|
+
end
|
111
|
+
return @mean + y1 * Math.sqrt(@variance)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require 'modules/numerical_constants'
|
2
|
+
require 'modules/extra_math'
|
3
|
+
require 'modules/special_math'
|
4
|
+
|
5
|
+
class ProbabilityDistribution
|
6
|
+
include NumericalConstants
|
7
|
+
include SpecialMath
|
8
|
+
include ExtraMath
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
end
|
12
|
+
|
13
|
+
def mean
|
14
|
+
get_mean
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_mean
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def pdf(x)
|
22
|
+
if x.class == Array
|
23
|
+
pdf_vals = []
|
24
|
+
for i in (0..x.length)
|
25
|
+
pdf_vals[i] = get_pdf(x[i])
|
26
|
+
end
|
27
|
+
return pdf_vals
|
28
|
+
else
|
29
|
+
return get_pdf(x)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def get_pdf(x)
|
34
|
+
end
|
35
|
+
|
36
|
+
def cdf(x)
|
37
|
+
if x.class == Array
|
38
|
+
cdf_vals = []
|
39
|
+
for i in (0...x.size)
|
40
|
+
cdf_vals[i] = get_cdf(x[i])
|
41
|
+
end
|
42
|
+
return cdf_vals
|
43
|
+
else
|
44
|
+
return get_cdf(x)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_cdf(x)
|
49
|
+
end
|
50
|
+
|
51
|
+
def icdf(p)
|
52
|
+
if p.class == Array
|
53
|
+
inv_vals = []
|
54
|
+
for i in (0..p.length)
|
55
|
+
inv_vals[i] = get_icdf(p[i])
|
56
|
+
end
|
57
|
+
return inv_vals
|
58
|
+
else
|
59
|
+
return get_icdf(p)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_icdf(p)
|
64
|
+
end
|
65
|
+
|
66
|
+
def rng(n=1)
|
67
|
+
if n < 1
|
68
|
+
return "Number of random numbers to return must be 1 or greater"
|
69
|
+
end
|
70
|
+
if (n > 1)
|
71
|
+
rnd_vals = []
|
72
|
+
for i in (0..n)
|
73
|
+
rnd_vals[i] = get_rng()
|
74
|
+
end
|
75
|
+
return rnd_vals
|
76
|
+
else
|
77
|
+
return get_rng()
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_rng()
|
82
|
+
end
|
83
|
+
|
84
|
+
def check_range(x, lo=0.0, hi=1.0)
|
85
|
+
if (x < lo) || (x > hi)
|
86
|
+
return "error"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def get_factorial(n)
|
91
|
+
if n <= 1
|
92
|
+
return 1
|
93
|
+
else
|
94
|
+
return n * get_factorial(n-1)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def find_root (prob, guess, x_lo, x_hi)
|
99
|
+
accuracy = 1.0e-10
|
100
|
+
max_iteration = 150
|
101
|
+
x = guess
|
102
|
+
x_new = guess
|
103
|
+
error = 0.0
|
104
|
+
pdf = 0.0
|
105
|
+
dx = 1000.0
|
106
|
+
i = 0
|
107
|
+
while ( dx.abs > accuracy && (i += 1) < max_iteration )
|
108
|
+
#Apply Newton-Raphson step
|
109
|
+
error = cdf(x) - prob
|
110
|
+
if error < 0.0
|
111
|
+
x_lo = x
|
112
|
+
else
|
113
|
+
x_hi = x
|
114
|
+
end
|
115
|
+
pdf = pdf(x)
|
116
|
+
if pdf != 0.0
|
117
|
+
dx = error / pdf
|
118
|
+
x_new = x -dx
|
119
|
+
end
|
120
|
+
# If the NR fails to converge (which for example may be the
|
121
|
+
# case if the initial guess is to rough) we apply a bisection
|
122
|
+
# step to determine a more narrow interval around the root.
|
123
|
+
if x_new < x_lo || x_new > x_hi || pdf == 0.0
|
124
|
+
x_new = (x_lo + x_hi) / 2.0
|
125
|
+
dx = x_new - x
|
126
|
+
end
|
127
|
+
x = x_new
|
128
|
+
end
|
129
|
+
return x
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
data/tests/tc_beta.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
require 'test/unit'
|
3
|
+
require 'beta_distribution'
|
4
|
+
|
5
|
+
class TestBeta < Test::Unit::TestCase
|
6
|
+
def test_simple
|
7
|
+
p = 12
|
8
|
+
q = 59
|
9
|
+
beta = BetaDistribution.new(p,q)
|
10
|
+
assert_equal("0.169014084507042", beta.mean.to_s)
|
11
|
+
assert_equal("0.0441664031038187", beta.standard_deviation.to_s)
|
12
|
+
assert_equal("6.26075815849967", beta.pdf(0.2).to_s)
|
13
|
+
assert_equal("0.999999997766913", beta.cdf(0.50).to_s)
|
14
|
+
assert_equal("0.102003194113565", beta.icdf(0.05).to_s)
|
15
|
+
|
16
|
+
x_vals = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
|
17
|
+
p_vals = beta.pdf(x_vals)
|
18
|
+
c_vals = beta.cdf(x_vals)
|
19
|
+
expected_pvals = [ 2.83232625227534,
|
20
|
+
8.89978000366836,
|
21
|
+
6.26075815849967,
|
22
|
+
1.72572305993386,
|
23
|
+
0.234475706454223,
|
24
|
+
0.0173700433944934]
|
25
|
+
expected_cvals = [0.0440640755091473,
|
26
|
+
0.356009606171447,
|
27
|
+
0.768319101921981,
|
28
|
+
0.956058132801147,
|
29
|
+
0.995358286711105,
|
30
|
+
0.99971672771575]
|
31
|
+
assert_equal(expected_pvals.to_s, p_vals.to_s)
|
32
|
+
assert_equal(expected_cvals.to_s, c_vals.to_s)
|
33
|
+
x_vals.each do |x|
|
34
|
+
cdf = beta.cdf(x)
|
35
|
+
inv_cdf = beta.icdf(cdf)
|
36
|
+
assert_in_delta(x.to_s, inv_cdf.to_s, 0.00000001)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_control_limits
|
42
|
+
trials = 50
|
43
|
+
alpha = 0.05
|
44
|
+
p = 10
|
45
|
+
lcl = get_lower_limit(trials, alpha, p)
|
46
|
+
ucl = get_upper_limit(trials, alpha, p)
|
47
|
+
assert_equal("0.112721613414076",lcl.to_s)
|
48
|
+
assert_equal("0.315596061420013",ucl.to_s)
|
49
|
+
|
50
|
+
trials = 210
|
51
|
+
alpha = 0.10
|
52
|
+
p = 47
|
53
|
+
lcl = get_lower_limit(trials, alpha, p)
|
54
|
+
ucl = get_upper_limit(trials, alpha, p)
|
55
|
+
assert_equal("0.186679485269901",lcl.to_s)
|
56
|
+
assert_equal("0.264957601783544",ucl.to_s)
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_lower_limit(trials,alpha,p)
|
60
|
+
if p==0
|
61
|
+
lcl=0
|
62
|
+
else
|
63
|
+
q=trials-p+1
|
64
|
+
bin= BetaDistribution.new(p,q)
|
65
|
+
lcl=bin.icdf(alpha)
|
66
|
+
end
|
67
|
+
return lcl
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_upper_limit(trials,alpha,p)
|
71
|
+
q=trials-p
|
72
|
+
p=p+1
|
73
|
+
bin= BetaDistribution.new(p,q)
|
74
|
+
ucl=bin.icdf(1-alpha)
|
75
|
+
return ucl
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
require 'test/unit'
|
3
|
+
require 'binomial_distribution'
|
4
|
+
|
5
|
+
class TestBinomial < Test::Unit::TestCase
|
6
|
+
def test_simple
|
7
|
+
t = 100
|
8
|
+
f = 7
|
9
|
+
p = 0.05
|
10
|
+
|
11
|
+
bin = BinomialDistribution.new(t,p)
|
12
|
+
cdf = bin.cdf(f)
|
13
|
+
pdf = bin.pdf(f)
|
14
|
+
mean = bin.mean
|
15
|
+
inv_cdf = bin.icdf(cdf)
|
16
|
+
|
17
|
+
assert_equal("0.10602553736479",pdf.to_s)
|
18
|
+
assert_equal("0.872039521379601",cdf.to_s)
|
19
|
+
assert_equal("5.0",mean.to_s)
|
20
|
+
assert_equal(f,inv_cdf)
|
21
|
+
end
|
22
|
+
end
|
data/tests/tc_fisher.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
require 'test/unit'
|
3
|
+
require 'fishers_exact_test'
|
4
|
+
|
5
|
+
class TestFisher < Test::Unit::TestCase
|
6
|
+
def test_simple
|
7
|
+
tested1 = 20
|
8
|
+
tested2 = 30
|
9
|
+
f1 = 10
|
10
|
+
f2 = 10
|
11
|
+
t1 = tested1 - f1
|
12
|
+
t2 = tested2 - f2
|
13
|
+
fet = FishersExactTest.new
|
14
|
+
fisher = fet.calculate(t1,t2,f1,f2)
|
15
|
+
|
16
|
+
assert_equal("0.188301375769922",fisher[:left].to_s)
|
17
|
+
assert_equal("0.929481131661052",fisher[:right].to_s)
|
18
|
+
assert_equal("0.257549242810992",fisher[:twotail].to_s)
|
19
|
+
end
|
20
|
+
end
|
data/tests/tc_norm.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
require 'test/unit'
|
3
|
+
require 'normal_distribution'
|
4
|
+
|
5
|
+
class TestNormal < Test::Unit::TestCase
|
6
|
+
def test_simple
|
7
|
+
|
8
|
+
norm = NormalDistribution.new(10,2)
|
9
|
+
cdf = norm.cdf(11)
|
10
|
+
|
11
|
+
assert_equal("0.691462461274013",cdf.to_s)
|
12
|
+
end
|
13
|
+
end
|
data/tests/ts_stats.rb
ADDED
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.11
|
3
|
+
specification_version: 1
|
4
|
+
name: rubystats
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2006-01-04 00:00:00 -08:00
|
8
|
+
summary: "Classes for statistical calculations, e.g., binomial, beta, and normal
|
9
|
+
distributions with PDF, CDF and inverse CDF (all ported from PHPMath) as well as
|
10
|
+
Fisher's Exact Test"
|
11
|
+
require_paths:
|
12
|
+
- lib
|
13
|
+
email: Bryandonovan@myrealbox.com
|
14
|
+
homepage: http://www.bryandonovan.com
|
15
|
+
rubyforge_project:
|
16
|
+
description:
|
17
|
+
autorequire:
|
18
|
+
default_executable:
|
19
|
+
bindir: bin
|
20
|
+
has_rdoc: true
|
21
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
22
|
+
requirements:
|
23
|
+
-
|
24
|
+
- ">"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.0
|
27
|
+
version:
|
28
|
+
platform: ruby
|
29
|
+
signing_key:
|
30
|
+
cert_chain:
|
31
|
+
authors:
|
32
|
+
- Bryan Donovan
|
33
|
+
files:
|
34
|
+
- lib/beta_distribution.rb
|
35
|
+
- lib/binomial_distribution.rb
|
36
|
+
- lib/fishers_exact_test.rb
|
37
|
+
- lib/normal_distribution.rb
|
38
|
+
- lib/probability_distribution.rb
|
39
|
+
- lib/modules
|
40
|
+
- lib/modules/special_math.rb
|
41
|
+
- lib/modules/extra_math.rb
|
42
|
+
- lib/modules/numerical_constants.rb
|
43
|
+
- tests/tc_beta.rb
|
44
|
+
- tests/tc_fisher.rb
|
45
|
+
- tests/tc_binomial.rb
|
46
|
+
- tests/ts_stats.rb
|
47
|
+
- tests/tc_norm.rb
|
48
|
+
- examples/fisher.rb
|
49
|
+
- examples/binomial.rb
|
50
|
+
- examples/norm.rb
|
51
|
+
- examples/beta.rb
|
52
|
+
- README
|
53
|
+
test_files:
|
54
|
+
- tests/ts_stats.rb
|
55
|
+
rdoc_options: []
|
56
|
+
extra_rdoc_files:
|
57
|
+
- README
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
requirements: []
|
61
|
+
dependencies: []
|