rubystats 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ === 0.2.0 / 2008-04-14
2
+
3
+ * Major reorganization of code.
4
+ * Added lib/rubystats subdirectory and namespaced all classes under the Rubystats module.
5
+ * Added another example or two and fixed bug #16827.
6
+ * Should not break old API
7
+ *Now using Hoe to manage gem.
@@ -0,0 +1,22 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ examples/beta.rb
6
+ examples/binomial.rb
7
+ examples/failrate_vs_goal.rb
8
+ examples/fisher.rb
9
+ examples/norm.rb
10
+ lib/rubystats.rb
11
+ lib/rubystats/beta_distribution.rb
12
+ lib/rubystats/binomial_distribution.rb
13
+ lib/rubystats/fishers_exact_test.rb
14
+ lib/rubystats/modules.rb
15
+ lib/rubystats/normal_distribution.rb
16
+ lib/rubystats/probability_distribution.rb
17
+ test/tc_beta.rb
18
+ test/tc_binomial.rb
19
+ test/tc_fisher.rb
20
+ test/tc_norm.rb
21
+ test/tc_require_all.rb
22
+ test/ts_stats.rb
@@ -0,0 +1,109 @@
1
+ = rubystats
2
+
3
+ * http://rubyforge.org/projects/rubystats/
4
+
5
+ == DESCRIPTION:
6
+
7
+ This is a set of Ruby statistics libraries ported from the PHPMath libraries.
8
+ PHPMath libraries created by Paul Meagher (many of which were ported from
9
+ various sources).
10
+ See http://www.phpmath.com/ for PHPMath libraries.
11
+
12
+ See examples and tests for usage.
13
+
14
+ == NOTE for version 0.2.0:
15
+
16
+ The API has changed somewhat in version 0.2. All tests pass with the old API, but
17
+ You now if you want to load just one of the distributions, you must require it like:
18
+
19
+ require 'rubystats/normal_distribution'
20
+
21
+ Then prefix the class name with the Rubystats module name:
22
+ norm = Rubystats::NormalDistribution.new(10, 2)
23
+
24
+ Alternatively, you can simply require 'rubystats' and have all the classes loaded, and
25
+ have the Rubystats module included.
26
+
27
+
28
+ == Author:
29
+ Bryan Donovan
30
+ 2006-2008
31
+
32
+
33
+ == WARNING
34
+ This is beta-quality software. It works well according to my tests, but the API may change and other features may be added.
35
+
36
+ == FEATURES:
37
+ Classes for distributions:
38
+
39
+ * Normal
40
+ * Binomial
41
+ * Beta
42
+
43
+ Also includes Fisher's Exact Test
44
+
45
+ == SYNOPSIS:
46
+ === Example: normal distribution with mean of 10 and standard deviation of 2
47
+
48
+ norm = Rubystats::NormalDistribution.new(10, 2)
49
+ cdf = norm.cdf(11)
50
+ pdf = norm.pdf(11)
51
+ puts "CDF(11): #{cdf}"
52
+ puts "PDF(11): #{pdf}"
53
+
54
+ Output:
55
+ CDF(11): 0.691462461274013
56
+ PDF(11): 0.0733813315868699
57
+
58
+ === Example: get some random numbers from a normal distribution
59
+
60
+ puts "Random numbers from normal distribution:"
61
+ 10.times do
62
+ puts norm.rng
63
+ end
64
+
65
+ (sample) Output:
66
+
67
+ 18.8877297946427
68
+ -15.4463065628574
69
+ 4.55538065315298
70
+ 17.0281528150355
71
+ 3.16543873165151
72
+ 2.48599492216993
73
+ 14.3947330544886
74
+ -3.47989062859462
75
+ 5.05832591294848
76
+ 31.2952983108343
77
+
78
+ == REQUIREMENTS:
79
+
80
+ * Ruby > 1.8.2 (may work with earlier versions)
81
+
82
+ == INSTALL:
83
+
84
+ * sudo gem install rubystats
85
+
86
+ == LICENSE:
87
+
88
+ (The MIT License)
89
+
90
+ Copyright (c) 2008
91
+
92
+ Permission is hereby granted, free of charge, to any person obtaining
93
+ a copy of this software and associated documentation files (the
94
+ 'Software'), to deal in the Software without restriction, including
95
+ without limitation the rights to use, copy, modify, merge, publish,
96
+ distribute, sublicense, and/or sell copies of the Software, and to
97
+ permit persons to whom the Software is furnished to do so, subject to
98
+ the following conditions:
99
+
100
+ The above copyright notice and this permission notice shall be
101
+ included in all copies or substantial portions of the Software.
102
+
103
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
104
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
105
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
106
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
107
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
108
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
109
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+ $:.unshift(File.dirname(__FILE__) + "/lib")
4
+ require 'rubystats'
5
+
6
+ Hoe.new('rubystats', Rubystats::VERSION) do |p|
7
+ p.name = "rubystats"
8
+ p.author = "Bryan Donovan - http://www.bryandonovan.com"
9
+ p.email = "b.dondo+rubyforge@gmail.com"
10
+ p.description = "Ruby Stats is a port of the statistics libraries from PHPMath. Probability distributions include binomial, beta, and normal distributions with PDF, CDF and inverse CDF as well as Fisher's Exact Test."
11
+ p.summary = "Port of PHPMath to Ruby"
12
+ p.url = "http://rubyforge.org/projects/rubystats/"
13
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
14
+ p.remote_rdoc_dir = '' # Release to root
15
+ end
16
+
17
+ rule '' do |t|
18
+ system "cd test && ruby ts_stats.rb"
19
+ end
@@ -1,22 +1,22 @@
1
1
  $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
- require 'beta_distribution'
2
+ require 'rubystats/beta_distribution'
3
3
 
4
4
  def get_lower_limit(trials,alpha,p)
5
- if p==0
6
- lcl=0
5
+ if p == 0
6
+ lcl = 0
7
7
  else
8
- q=trials-p+1
9
- bet= BetaDistribution.new(p,q)
10
- lcl=bet.icdf(alpha)
8
+ q = trials - p + 1
9
+ bet = Rubystats::BetaDistribution.new(p,q)
10
+ lcl = bet.icdf(alpha)
11
11
  end
12
12
  return lcl
13
13
  end
14
14
 
15
15
  def get_upper_limit(trials,alpha,p)
16
- q=trials-p
17
- p=p+1
18
- bet= BetaDistribution.new(p,q)
19
- ucl=bet.icdf(1-alpha)
16
+ q = trials - p
17
+ p = p + 1
18
+ bet = Rubystats::BetaDistribution.new(p,q)
19
+ ucl = bet.icdf(1-alpha)
20
20
  return ucl
21
21
  end
22
22
 
@@ -32,5 +32,3 @@ puts "lcl= "
32
32
  p lcl
33
33
  puts "ucl= "
34
34
  p ucl
35
-
36
-
@@ -1,20 +1,22 @@
1
1
  $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
- require 'binomial_distribution'
2
+ require 'rubystats/binomial_distribution'
3
3
 
4
4
  t = 100
5
5
  f = 7
6
6
  p = 0.05
7
7
 
8
8
 
9
- bin = BinomialDistribution.new(t,p)
9
+ bin = Rubystats::BinomialDistribution.new(t,p)
10
10
  f = f - 1
11
- mean = bin.mean
12
- puts mean
13
- for i in 1..5
14
- pdf = bin.pdf(i)
15
- cdf = bin.cdf(i)
16
- inv = bin.icdf(cdf)
17
- puts inv
18
- puts "#{i}: #{pdf} : #{cdf}"
11
+ mean = bin.mean
12
+ puts mean
13
+
14
+ for i in 1..12
15
+ pdf = bin.pdf(i)
16
+ cdf = bin.cdf(i)
17
+ inv = bin.icdf(cdf)
18
+ pval = 1 - cdf
19
+ # puts inv
20
+ puts "#{i}: #{pdf} : #{cdf}: pval: #{pval}"
19
21
  end
20
22
 
@@ -0,0 +1,28 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+ require 'rubystats/binomial_distribution'
3
+
4
+ # Manufacturing example.
5
+ # We have 10 different-sized batches of units that
6
+ # get tested in our process. We want to see if,
7
+ # at > 95% confidence, the fail rate for any of
8
+ # those batches is worse than our goal fail rate
9
+ # of 10%
10
+
11
+ tested = [100, 68, 67, 96, 46, 2, 13, 33, 88, 71]
12
+ failed = [12, 9, 12, 7, 7, 0, 6, 4, 5, 5]
13
+
14
+ bad_fail_rate = 0.10
15
+ alpha = 0.05
16
+
17
+
18
+ for i in 0..9
19
+ t = tested[i]
20
+ f = failed[i]
21
+ bin = Rubystats::BinomialDistribution.new(t,bad_fail_rate)
22
+ pdf = bin.pdf(f-1)
23
+ cdf = bin.cdf(f-1)
24
+ pval = 1 - cdf
25
+ pval = sprintf("%.3f",pval).to_f
26
+ status = pval <= alpha ? "RED ALERT" : "OK"
27
+ puts "Tested: #{t}\tFailed: #{f}\tpval: #{pval}\tStatus:#{status}"
28
+ end
@@ -1,5 +1,5 @@
1
1
  $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
- require 'fishers_exact_test'
2
+ require 'rubystats/fishers_exact_test'
3
3
  require 'pp'
4
4
 
5
5
  tested1 = 20
@@ -10,7 +10,7 @@ f2 = 10
10
10
  t1 = tested1 - f1
11
11
  t2 = tested2 - f2
12
12
 
13
- fet = FishersExactTest.new
13
+ fet = Rubystats::FishersExactTest.new
14
14
 
15
15
  fisher = fet.calculate(t1,t2,f1,f2)
16
16
 
@@ -19,7 +19,3 @@ pp fisher
19
19
  perc = 100 * (1.0 - fisher[:twotail])
20
20
 
21
21
  pp perc
22
-
23
-
24
-
25
-
@@ -1,8 +1,14 @@
1
1
  $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
- require 'normal_distribution'
2
+ require 'rubystats/normal_distribution'
3
3
 
4
- norm = NormalDistribution.new(10, 2)
4
+ #normal distribution with mean of 10 and standard deviation of 2
5
+ norm = Rubystats::NormalDistribution.new(10, 2)
5
6
  cdf = norm.cdf(11)
6
7
  pdf = norm.pdf(11)
7
- puts cdf
8
- puts pdf
8
+ puts "CDF(11): #{cdf}"
9
+ puts "PDF(11): #{pdf}"
10
+
11
+ puts "Random numbers from normal distribution:"
12
+ 10.times do
13
+ puts norm.rng
14
+ end
@@ -0,0 +1,9 @@
1
+ module Rubystats
2
+ VERSION = '0.2.0'
3
+ end
4
+
5
+ require 'rubystats/normal_distribution'
6
+ require 'rubystats/binomial_distribution'
7
+ require 'rubystats/beta_distribution'
8
+ require 'rubystats/fishers_exact_test'
9
+ include Rubystats
@@ -0,0 +1,88 @@
1
+ require 'rubystats/probability_distribution'
2
+
3
+ module Rubystats
4
+ class BetaDistribution < Rubystats::ProbabilityDistribution
5
+ include Rubystats::SpecialMath
6
+
7
+ attr_reader :p, :q
8
+
9
+ #dgr_p = degrees of freedom p
10
+ #dgr_q = degrees of freedom q
11
+ def initialize(dgr_p, dgr_q)
12
+ if dgr_p <= 0 || dgr_q <= 0
13
+ return nil
14
+ end
15
+ @p = dgr_p.to_f
16
+ @q = dgr_q.to_f
17
+ end
18
+
19
+ def mean
20
+ @p.to_f / (@p.to_f + @q.to_f)
21
+ end
22
+
23
+ def standard_deviation
24
+ Math.sqrt(@p * @q / ((@p + @q)**2 * (@p + @q + 1)))
25
+ end
26
+
27
+ def pdf(x)
28
+ if x.class == Array
29
+ pdf_vals = []
30
+ for i in (0 ... x.size)
31
+ check_range(x[i])
32
+ if x[i] == 0.0 || x[i] == 1.0
33
+ pdf_vals[i] = 0.0
34
+ else
35
+ pdf_vals[i] = Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x[i]) + (@q - 1.0) * Math.log(1.0 - x[i]))
36
+ end
37
+ end
38
+ return pdf_vals
39
+ else
40
+ check_range(x)
41
+ if (x == 0.0) || (x == 1.0)
42
+ return 0.0
43
+ else
44
+ return Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x) + (@q - 1.0) * Math.log(1.0 - x)
45
+ )
46
+ end
47
+ end
48
+ end
49
+
50
+ def cdf(x)
51
+ if x.class == Array
52
+ cdf_vals = Array.new
53
+ for i in 0 ... x.size
54
+ check_range(x[i])
55
+ cdf_vals[i] = incomplete_beta(x[i], @p, @q)
56
+ end
57
+ return cdf_vals
58
+ else
59
+ check_range(x)
60
+ cdf_val = incomplete_beta(x, @p, @q)
61
+ return cdf_val
62
+ end
63
+ end
64
+
65
+ def icdf(prob)
66
+ if prob.class == Array
67
+ inv_vals = Array.new
68
+ for i in 0 ... prob.size
69
+ check_range(prob[i])
70
+ if prob[i] == 0.0
71
+ inv_vals[i] = 0.0
72
+ end
73
+ if prob[i] == 1.0
74
+ inv_vals[i] = 1.0
75
+ end
76
+ inv_vals[i] = find_root(prob[i], 0.5, 0.0, 1.0)
77
+ end
78
+ return inv_vals
79
+ else
80
+ check_range(prob)
81
+ return 0.0 if prob == 0.0
82
+ return 1.0 if prob == 1.0
83
+ return find_root(prob, 0.5, 0.0, 1.0)
84
+ end
85
+ end
86
+
87
+ end
88
+ end
@@ -0,0 +1,195 @@
1
+ require 'rubystats/probability_distribution'
2
+ # This class provides an object for encapsulating binomial distributions
3
+ # Ported to Ruby from PHPMath class by Bryan Donovan
4
+ # Author:: Mark Hale
5
+ # Author:: Paul Meagher
6
+ # Author:: Bryan Donovan (http://www.bryandonovan.com)
7
+ module Rubystats
8
+ class BinomialDistribution < Rubystats::ProbabilityDistribution
9
+ include Rubystats::NumericalConstants
10
+ include Rubystats::SpecialMath
11
+ include Rubystats::ExtraMath
12
+
13
+ # Constructs a binomial distribution
14
+ def initialize (trials, prob)
15
+ if trials <= 0
16
+ raise "Error: trials must be greater than 0"
17
+ end
18
+ @n = trials
19
+ if prob < 0.0 || prob > 1.0
20
+ raise "Error: prob must be between 0 and 1"
21
+ end
22
+ @p = prob
23
+ end
24
+
25
+ #returns the number of trials
26
+ def get_trials_parameter
27
+ return @n
28
+ end
29
+
30
+ #returns the probability
31
+ def get_probability_parameter
32
+ return @p
33
+ end
34
+
35
+ #returns the mean
36
+ def get_mean
37
+ return @n * @p
38
+ end
39
+
40
+ #returns the variance
41
+ def variance
42
+ return @n * @p * (1.0 - @p)
43
+ end
44
+
45
+ # Probability density function of a binomial distribution (equivalent
46
+ # to R dbinom function).
47
+ # _x should be an integer
48
+ # returns the probability that a stochastic variable x has the value _x,
49
+ # i.e. P(x = _x)
50
+ def pdf(_x)
51
+ if _x.class == Array
52
+ pdf_vals = []
53
+ for i in (0 ... _x.length)
54
+ check_range(_x[i], 0.0, @n)
55
+ pdf_vals[i] = binomial(@n, _x[i]) * (1-@p)**(@n-_x[i])
56
+ end
57
+ return pdf_vals
58
+ else
59
+ check_range(_x, 0.0, @n)
60
+ return binomial(@n, _x) * @p**_x * (1-@p)**(@n-_x)
61
+ end
62
+ end
63
+
64
+ # Cumulative binomial distribution function (equivalent to R pbinom function).
65
+ # _x should be integer-valued and can be single integer or array of integers
66
+ # returns single value or array containing probability that a stochastic
67
+ # variable x is less then X, i.e. P(x < _x).
68
+ def cdf(_x)
69
+ if _x.class == Array
70
+ inv_vals = []
71
+ for i in (0 ..._x.length)
72
+ pdf_vals[i] = get_cdf(_x[i])
73
+ end
74
+ return pdf_vals
75
+ else
76
+ return get_cdf(_x)
77
+ end
78
+ end
79
+
80
+ # Inverse of the cumulative binomial distribution function
81
+ # (equivalent to R qbinom function).
82
+ # returns the value X for which P(x < _x).
83
+ def get_icdf(prob)
84
+ if prob.class == Array
85
+ inv_vals = []
86
+ for i in (0 ...prob.length)
87
+ check_range(prob[i])
88
+ inv_vals[i] = (find_root(prob[i], @n/2, 0.0, @n)).floor
89
+ end
90
+ return inv_vals
91
+ else
92
+ check_range(prob)
93
+ return (find_root(prob, @n/2, 0.0, @n)).floor
94
+ end
95
+ end
96
+
97
+ # Wrapper for binomial RNG function (equivalent to R rbinom function).
98
+ # returns random deviate given trials and p
99
+ def rng(num_vals = 1)
100
+ if num_vals < 1
101
+ raise "Error num_vals must be greater than or equal to 1"
102
+ end
103
+ if num_vals == 1
104
+ return get_rng
105
+ else
106
+ rand_vals = []
107
+ for i in (0 ...num_vals)
108
+ rand_vals[i] = get_rng
109
+ end
110
+ return rand_vals
111
+ end
112
+ end
113
+
114
+ # Private methods below
115
+
116
+ private
117
+
118
+ # Private shared function for getting cumulant for particular x
119
+ # param _x should be integer-valued
120
+ # returns the probability that a stochastic variable x is less than _x
121
+ # i.e P(x < _x)
122
+ def get_cdf(_x)
123
+ check_range(_x, 0.0, @n)
124
+ sum = 0.0
125
+ for i in (0 .. _x)
126
+ sum = sum + pdf(i)
127
+ end
128
+ return sum
129
+ end
130
+
131
+ # Private binomial RNG function
132
+ # Original version of this function from Press et al.
133
+ #
134
+ # see http://www.library.cornell.edu/nr/bookcpdf/c7-3.pdf
135
+ #
136
+ # Changed parts having to do with generating a uniformly distributed
137
+ # number in the 0 to 1 range. Also using instance variables, instead
138
+ # of supplying function with p and n values. Finally calling port
139
+ # of JSci's log gamma routine instead of Press et al.
140
+ #
141
+ # There are enough non-trivial changes to this function that the
142
+ # port conforms to the Press et al. copyright.
143
+ def get_rng
144
+ nold = -1
145
+ pold = -1
146
+ p = (if @p <= 0.5 then @p else 1.0 - @p end)
147
+ am = @n * p
148
+ if @n < 25
149
+ bnl = 0.0
150
+ for i in (1...@n)
151
+ if Kernel.rand < p
152
+ bnl = bnl.next
153
+ end
154
+ end
155
+ elsif am < 1.0
156
+ g = Math.exp(-am)
157
+ t = 1.0
158
+ for j in (0 ... @n)
159
+ t = t * Kernel.rand
160
+ break if t < g
161
+ end
162
+ bnl = (if j <= @n then j else @n end)
163
+ else
164
+ if n != nold
165
+ en = @n
166
+ oldg = log_gamma(en + 1.0)
167
+ nold = n
168
+ end
169
+ if p != pold
170
+ pc = 1.0 - p
171
+ plog = Math.log(p)
172
+ pclog = Math.log(pc)
173
+ pold = p
174
+ end
175
+ sq = Math.sqrt(2.0 * am * pc)
176
+ until Kernel.rand <= t do
177
+ until (em >= 0.0 || em < (en + 1.0)) do
178
+ angle = Pi * Kernel.rand
179
+ y = Math.tan(angle)
180
+ em = sq * y + am
181
+ end
182
+ em = em.floor
183
+ t = 1.2 * sq * (1.0 + y * y) *
184
+ Math.exp(oldg - log_gamma(em + 1.0) -
185
+ log_gamma(en - em + 1.0) + em * plog + (en - em) * pclog)
186
+ end
187
+ bnl = em
188
+ end
189
+ if p != @p
190
+ bnl = @n - bnl
191
+ end
192
+ return bnl
193
+ end
194
+ end
195
+ end