RubyGems - rubystats - Versions diffs - 0.1.0 - Mend

rubystats 0.1.0

Files changed (19) hide show

data/README +9 -0
data/examples/beta.rb +36 -0
data/examples/binomial.rb +20 -0
data/examples/fisher.rb +25 -0
data/examples/norm.rb +8 -0
data/lib/beta_distribution.rb +87 -0
data/lib/binomial_distribution.rb +194 -0
data/lib/fishers_exact_test.rb +171 -0
data/lib/modules/extra_math.rb +7 -0
data/lib/modules/numerical_constants.rb +17 -0
data/lib/modules/special_math.rb +721 -0
data/lib/normal_distribution.rb +114 -0
data/lib/probability_distribution.rb +132 -0
data/tests/tc_beta.rb +78 -0
data/tests/tc_binomial.rb +22 -0
data/tests/tc_fisher.rb +20 -0
data/tests/tc_norm.rb +13 -0
data/tests/ts_stats.rb +5 -0
metadata +61 -0

data/README ADDED

@@ -0,0 +1,9 @@
+# This is a set of Ruby statistics libraries ported from the PHPMath libraries.
+# PHPMath libraries created by Paul Meagher (many of which were ported from
+# various sources.
+# See http://www.phpmath.com/ for PHPMath libraries.
+#
+# See examples and tests for usage.
+# Author:: Bryan Donovan
+#
+# License:: MIT http://www.opensource.org/licenses/mit-license.php

data/examples/beta.rb ADDED

@@ -0,0 +1,36 @@
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'beta_distribution'
+def get_lower_limit(trials,alpha,p)
+	if p==0
+		lcl=0
+	else
+		q=trials-p+1
+		bin= BetaDistribution.new(p,q)
+		lcl=bin.inverse_cdf(alpha)
+	end
+	return lcl
+end
+def get_upper_limit(trials,alpha,p)
+	q=trials-p
+	p=p+1
+	bin= BetaDistribution.new(p,q)
+	ucl=bin.inverse_cdf(1-alpha)
+	return ucl
+end
+trials = 50
+alpha = 0.05
+p = 10
+lcl = get_lower_limit(trials, alpha, p)
+ucl = get_upper_limit(trials, alpha, p)
+puts "lcl= "
+p lcl
+puts "ucl= "
+p ucl

data/examples/binomial.rb ADDED

@@ -0,0 +1,20 @@
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'binomial_distribution'
+t = 100
+f = 7
+p = 0.05
+bin = BinomialDistribution.new(t,p)
+f = f - 1
+	mean = bin.mean
+	puts mean
+for i in 1..5
+	pdf = bin.pdf(i)
+	cdf = bin.cdf(i)
+	inv = bin.inverse_cdf(cdf)
+	puts inv
+	puts "#{i}: #{pdf} : #{cdf}"
+end

data/examples/fisher.rb ADDED

@@ -0,0 +1,25 @@
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'fishers_exact_test'
+require 'pp'
+tested1 = 20
+tested2 = 30
+f1 = 10
+f2 = 10
+t1 = tested1 - f1
+t2 = tested2 - f2
+fet = FishersExactTest.new
+fisher = fet.calculate(t1,t2,f1,f2)
+pp fisher
+perc = 100 * (1.0 - fisher[:twotail])
+pp perc

data/examples/norm.rb ADDED

@@ -0,0 +1,8 @@
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'normal_distribution'
+norm = NormalDistribution.new(10, 2)
+cdf = norm.cdf(11)
+pdf = norm.pdf(11)
+puts cdf
+puts pdf

data/lib/beta_distribution.rb ADDED

@@ -0,0 +1,87 @@
+require 'probability_distribution'
+class BetaDistribution < ProbabilityDistribution
+	include SpecialMath
+	attr_reader :p, :q
+	#dgr_p = degrees of freedom p
+	#dgr_q = degrees of freedom q
+	def initialize(dgr_p, dgr_q)
+		if dgr_p <= 0 || dgr_q <= 0
+			return nil
+		end
+		@p = dgr_p.to_f
+		@q = dgr_q.to_f
+	end
+	def mean
+		@p.to_f / (@p.to_f + @q.to_f)
+	end
+	def standard_deviation
+		Math.sqrt(@p * @q / ((@p + @q)**2 * (@p + @q + 1)))
+	end
+	def pdf(x)
+		if x.class == Array
+			pdf_vals = []
+			for i in (0 ... x.size)
+				check_range(x[i])
+				if x[i] == 0.0 || x[i] == 1.0
+					pdf_vals[i] = 0.0
+				else
+					pdf_vals[i] = Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x[i]) + (@q - 1.0) * Math.log(1.0 - x[i]))
+				end
+			end
+			return pdf_vals
+		else
+			check_range(x)
+			if  (x == 0.0) || (x == 1.0)
+				return 0.0
+			else
+				return Math.exp( - log_beta(@p, @q) + (@p - 1.0) * Math.log(x) + (@q - 1.0) * Math.log(1.0 - x)
+											 )
+			end
+		end
+	end
+	def cdf(x)
+		if x.class == Array
+			cdf_vals = Array.new
+			for i in 0 ... x.size
+				check_range(x[i])
+				cdf_vals[i] = incomplete_beta(x[i], @p, @q)
+			end
+			return cdf_vals
+		else
+			check_range(x)
+			cdf_val = incomplete_beta(x, @p, @q)
+			return cdf_val
+		end
+	end
+	def icdf(prob)
+		if prob.class == Array
+			inv_vals = Array.new
+			for i in 0 ... prob.size
+				check_range(prob[i])
+				if prob[i] == 0.0
+					inv_vals[i] = 0.0
+				end
+				if prob[i] == 1.0
+					inv_vals[i] = 1.0
+				end
+				inv_vals[i] = find_root(prob[i], 0.5, 0.0, 1.0)
+			end
+			return inv_vals
+		else
+			check_range(prob)
+			return 0.0 if prob == 0.0
+			return 1.0 if prob == 1.0
+			return find_root(prob, 0.5, 0.0, 1.0)
+		end
+	end
+end

data/lib/binomial_distribution.rb ADDED

@@ -0,0 +1,194 @@
+require 'probability_distribution'
+# This class provides an object for encapsulating binomial distributions
+# Ported to Ruby from PHPMath class by Bryan Donovan
+# Author:: Mark Hale
+# Author:: Paul Meagher
+# Author:: Bryan Donovan (mailto:bryandonovan@myrealbox.com)
+class BinomialDistribution < ProbabilityDistribution
+	include NumericalConstants
+	include SpecialMath
+	include ExtraMath
+	# Constructs a binomial distribution
+	def initialize (trials, prob)
+		if trials <= 0
+			raise "Error: trials must be greater than 0"
+		end
+		@n = trials
+		if prob < 0.0 || prob > 1.0
+			raise "Error: prob must be between 0 and 1"
+		end
+		@p = prob
+	end
+	#returns the number of trials
+	def get_trials_parameter
+		return @n
+	end
+	#returns the probability
+	def get_probability_parameter
+		return @p
+	end
+	#returns the mean
+	def get_mean
+		return @n * @p
+	end
+	#returns the variance
+	def variance
+		return @n * @p * (1.0 - @p)
+	end
+	# Probability density function of a binomial distribution (equivalent
+	# to R dbinom function).
+	# _x should be an integer
+	# returns the probability that a stochastic variable x has the value _x,
+	# i.e. P(x = _x)
+	def pdf(_x)
+		if _x.class == Array
+			pdf_vals = []
+		  for i in (0 ... _x.length)
+				check_range(_x[i], 0.0, @n)
+				pdf_vals[i] = binomial(@n, _x[i]) * (1-@p)**(@n-_x[i])
+			end
+			return pdf_vals
+		else
+			check_range(_x, 0.0, @n)
+			return binomial(@n, _x) * @p**_x * (1-@p)**(@n-_x)
+		end
+	end
+	# Cumulative binomial distribution function (equivalent to R pbinom function).
+	# _x should be integer-valued and can be single integer or array of integers
+	# returns single value or array containing probability that a stochastic
+	# variable x is less then X, i.e. P(x < _x).
+	def cdf(_x)
+		if _x.class == Array
+			inv_vals = []
+			for i in (0 ..._x.length)
+				pdf_vals[i] = get_cdf(_x[i])
+			end
+			return pdf_vals
+		else
+			return get_cdf(_x)
+		end
+	end
+	# Inverse of the cumulative binomial distribution function
+	# (equivalent to R qbinom function).
+	# returns the value X for which P(x < _x).
+	def get_icdf(prob)
+		if prob.class == Array
+			inv_vals = []
+			for i in (0 ...prob.length)
+				check_range(prob[i])
+				inv_vals[i] = (find_root(prob[i], @n/2, 0.0, @n)).floor
+			end
+			return inv_vals
+		else
+			check_range(prob)
+			return (find_root(prob, @n/2, 0.0, @n)).floor
+		end
+	end
+	# Wrapper for binomial RNG function (equivalent to R rbinom function).
+	# returns random deviate given trials and p
+	def rng(num_vals = 1)
+		if num_vals < 1
+			raise "Error num_vals must be greater than or equal to 1"
+		end
+		if num_vals == 1
+			return get_rng
+		else
+			rand_vals = []
+			for i in (0 ...num_vals)
+				rand_vals[i] = get_rng
+			end
+			return rand_vals
+		end
+	end
+	# Private methods below
+	private
+	# Private shared function for getting cumulant for particular x
+	# param _x should be integer-valued
+	# returns the probability that a stochastic variable x is less than _x
+	# i.e P(x < _x)
+	def get_cdf(_x)
+		check_range(_x, 0.0, @n)
+		sum = 0.0
+		for i in (0 .. _x)
+			sum = sum + pdf(i)
+		end
+		return sum
+	end
+	# Private binomial RNG function
+	# Original version of this function from Press et al.
+	#
+	# see http://www.library.cornell.edu/nr/bookcpdf/c7-3.pdf
+	#
+	# Changed parts having to do with generating a uniformly distributed
+	# number in the 0 to 1 range.  Also using instance variables, instead
+	# of supplying function with p and n values.  Finally calling port
+	# of JSci's log gamma routine instead of Press et al.
+	#
+	# There are enough non-trivial changes to this function that the
+	# port conforms to the Press et al. copyright.
+	def get_rng
+		nold = -1
+		pold = -1
+		p = (if @p <= 0.5 then @p else 1.0 - @p end)
+		am = @n * p
+		if @n < 25
+			bnl = 0.0
+			for i in (1...@n)
+				if  Kernel.rand < p
+					bnl = bnl.next
+				end
+			end
+		elsif am < 1.0
+			g = Math.exp(-am)
+			t = 1.0
+			for j in (0 ... @n)
+				t = t * Kernel.rand
+				break if t < g
+			end
+			bnl = (if j <= @n then j else @n end)
+		else
+			if n != nold
+				en = @n
+				oldg = log_gamma(en + 1.0)
+				nold = n
+			end
+			if p != pold
+				pc = 1.0 - p
+				plog = Math.log(p)
+				pclog = Math.log(pc)
+				pold = p
+			end
+			sq = Math.sqrt(2.0 * am * pc)
+			until Kernel.rand <= t do
+				until (em >= 0.0 || em < (en + 1.0)) do
+					angle = Pi * Kernel.rand
+					y = Math.tan(angle)
+					em = sq * y + am
+				end
+				em = em.floor
+				t = 1.2 * sq * (1.0 + y * y) *
+				Math.exp(oldg - log_gamma(em + 1.0) -
+				log_gamma(en - em + 1.0) + em * plog + (en - em) * pclog)
+			end
+			bnl = em
+		end
+		if p != @p
+			bnl = @n - bnl
+		end
+		return bnl
+	end
+end

data/lib/fishers_exact_test.rb ADDED

@@ -0,0 +1,171 @@
+#! /usr/local/bin/ruby
+# Fisher's Exact Test Function Library
+#
+# Based on JavaScript version created by: Oyvind Langsrud
+# Ported to Ruby by Bryan Donovan
+class FishersExactTest
+	def initialize
+		@sn11  = 0.0
+		@sn1_  = 0.0
+		@sn_1  = 0.0
+		@sn    = 0.0
+		@sprob = 0.0
+		@sleft  = 0.0
+		@sright = 0.0
+		@sless  = 0.0
+		@slarg  = 0.0
+		@left    = 0.0
+		@right   = 0.0
+		@twotail = 0.0
+	end
+	# Reference: "Lanczos, C. 'A precision approximation
+	# of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964."
+	# Translation of  Alan Miller's FORTRAN-implementation
+	# See http://lib.stat.cmu.edu/apstat/245
+	def lngamm(z)
+		x = 0
+		x += 0.0000001659470187408462/(z+7)
+		x += 0.000009934937113930748 /(z+6)
+		x -= 0.1385710331296526      /(z+5)
+		x += 12.50734324009056       /(z+4)
+		x -= 176.6150291498386       /(z+3)
+		x += 771.3234287757674       /(z+2)
+		x -= 1259.139216722289       /(z+1)
+		x += 676.5203681218835       /(z)
+		x += 0.9999999999995183
+		return(Math.log(x)-5.58106146679532777-z+(z-0.5) * Math.log(z+6.5))
+	end
+	def lnfact(n)
+		if n <= 1
+			return 0
+		else
+			return lngamm(n+1)
+		end
+	end
+	def lnbico(n,k)
+		return lnfact(n) - lnfact(k) - lnfact(n-k)
+	end
+	def hyper_323(n11, n1_, n_1, n)
+		return Math.exp(lnbico(n1_, n11) + lnbico(n-n1_, n_1-n11) - lnbico(n, n_1))
+	end
+	def hyper(n11)
+		return hyper0(n11, 0, 0, 0)
+	end
+	def hyper0(n11i,n1_i,n_1i,ni)
+		if n1_i == 0 and n_1i ==0 and ni == 0
+			unless n11i % 10 == 0
+				if n11i == @sn11+1
+					@sprob *= ((@sn1_ - @sn11)/(n11i.to_f))*((@sn_1 - @sn11)/(n11i.to_f + @sn - @sn1_ - @sn_1))
+					@sn11 = n11i
+					return @sprob
+				end
+				if n11i == @sn11-1
+					@sprob *= ((@sn11)/(@sn1_-n11i.to_f))*((@sn11+@sn-@sn1_-@sn_1)/(@sn_1-n11i.to_f))
+					@sn11 = n11i
+					return @sprob
+				end
+			end
+			@sn11 = n11i
+		else
+			@sn11 = n11i
+			@sn1_ = n1_i
+			@sn_1 = n_1i
+			@sn   = ni
+		end
+		@sprob = hyper_323(@sn11,@sn1_,@sn_1,@sn)
+		return @sprob
+	end
+	def exact(n11,n1_,n_1,n)
+		p = i = j = prob = 0.0
+		max = n1_
+		max = n_1 if n_1 < max
+		min = n1_ + n_1 - n
+		min = 0 if min < 0
+		if min == max
+			@sless  = 1
+			@sright = 1
+			@sleft  = 1
+			@slarg  = 1
+			return 1
+		end
+		prob = hyper0(n11,n1_,n_1,n)
+		@sleft = 0
+		p = hyper(min)
+		i = min + 1
+		while p < (0.99999999 * prob)
+		 @sleft += p
+			p = hyper(i)
+			i += 1
+		end
+		i -= 1
+		if p < (1.00000001*prob)
+			@sleft += p
+		else
+			i -= 1
+		end
+		@sright = 0
+		p = hyper(max)
+		j = max - 1
+		while p < (0.99999999 * prob)
+			@sright += p
+			p = hyper(j)
+			j -= 1
+		end
+		j += 1
+		if p < (1.00000001*prob)
+			@sright += p
+		else
+			j += 1
+		end
+		if (i - n11).abs < (j - n11).abs
+			@sless = @sleft
+			@slarg = 1 - @sleft + prob
+		else
+			@sless = 1 - @sright + prob
+			@slarg = @sright
+		end
+		return prob
+	end
+	def calculate(n11_,n12_,n21_,n22_)
+		n11_ *= -1 if n11_ < 0
+		n12_ *= -1 if n12_ < 0
+		n21_ *= -1 if n21_ < 0
+		n22_ *= -1 if n22_ < 0
+		n1_     = n11_ + n12_
+		n_1     = n11_ + n21_
+		n       = n11_ + n12_ + n21_ + n22_
+		prob    = exact(n11_,n1_,n_1,n)
+		left    = @sless
+		right   = @slarg
+		twotail = @sleft + @sright
+		twotail = 1 if twotail > 1
+		values_hash = { :left =>left, :right =>right, :twotail =>twotail }
+		return values_hash
+	end
+end