RubyGems - rubystats - Versions diffs - 0.1.0 - Mend

rubystats 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

data/README +9 -0
data/examples/beta.rb +36 -0
data/examples/binomial.rb +20 -0
data/examples/fisher.rb +25 -0
data/examples/norm.rb +8 -0
data/lib/beta_distribution.rb +87 -0
data/lib/binomial_distribution.rb +194 -0
data/lib/fishers_exact_test.rb +171 -0
data/lib/modules/extra_math.rb +7 -0
data/lib/modules/numerical_constants.rb +17 -0
data/lib/modules/special_math.rb +721 -0
data/lib/normal_distribution.rb +114 -0
data/lib/probability_distribution.rb +132 -0
data/tests/tc_beta.rb +78 -0
data/tests/tc_binomial.rb +22 -0
data/tests/tc_fisher.rb +20 -0
data/tests/tc_norm.rb +13 -0
data/tests/ts_stats.rb +5 -0
metadata +61 -0

data/lib/normal_distribution.rb ADDED

@@ -0,0 +1,114 @@
+require 'probability_distribution'
+require 'modules/special_math'
+# This class provides an object for encapsulating normal distributions
+# Ported to Ruby from PHPMath class by Bryan Donovan
+# Author:: Jaco van Kooten
+# Author:: Paul Meagher
+# Author:: Bryan Donovan (mailto:bryandonovan@myrealbox.com)
+class NormalDistribution < ProbabilityDistribution
+	include SpecialMath
+	# Constructs a normal distribution (defaults to zero mean and
+	# unity variance).
+	def initialize(mu=0.0, sigma=1.0)
+		@mean = mu
+		if sigma <= 0.0
+			return "error"
+		end
+		@stdev = sigma
+		@variance = sigma**2
+		@pdf_denominator = Sqrt2pi * Math.sqrt(@variance)
+		@cdf_denominator = Sqrt2   * Math.sqrt(@variance)
+	end
+	# Returns the mean of the distribution
+	def get_mean
+		return @mean
+	end
+	# Returns the standard deviation of the distribution
+	def get_standard_deviation
+		return @stdev
+	end
+	# Returns the variance of the distribution
+	def get_variance
+		return @variance
+	end
+	private
+	# Obtain single PDF value
+	# Returns the probability that a stochastic variable x has the value X,
+	# i.e. P(x=X)
+	def get_pdf(x)
+		Math.exp( -((x-@mean)**2) / (2 * @variance)) / @pdf_denominator
+	end
+	# Obtain single CDF value
+	# Returns the probability that a stochastic variable x is less than X,
+	# i.e. P(x<X)
+	def get_cdf(x)
+		complementary_error( -(x - @mean) / @cdf_denominator) / 2
+	end
+	# Obtain single inverse CDF value.
+	#	returns the value X for which P(x&lt;X).
+	def get_icdf(p)
+		check_range(p)
+		if p == 0.0
+			return -Max_value
+		end
+		if p == 1.0
+			return Max_value
+		end
+		if p == 0.5
+			return @mean
+		end
+		mean_save = @mean
+		var_save = @variance
+		pdf_D_save = @pdf_denominator
+		cdf_D_save = @cdf_denominator
+		@mean = 0.0
+		@variance = 1.0
+		@pdf_denominator = Math.sqrt(Two_pi)
+		@cdf_denominator = Sqrt2
+		x = find_root(p, 0.0, -100.0, 100.0)
+		#scale back
+		@mean = mean_save
+		@variance = var_save
+		@pdf_denominator = pdf_D_save
+		@cdf_denominator = cdf_D_save
+		return x * Math.sqrt(@variance) + @mean
+	end
+	# Uses the polar form of the Box-Muller transformation which
+	#	is both faster and more robust numerically than basic Box-Muller
+	# transform. To speed up repeated RNG computations, two random values
+	# are computed after the while loop and the second one is saved and
+	# directly used if the method is called again.
+	# see http://www.taygeta.com/random/gaussian.html
+	# returns single normal deviate
+	def get_rng
+		if @use_last
+			y1 = @last
+			@use_last = false
+		else
+			until w < 1.0 do
+				r1 = Kernel.rand
+				r2 = Kernel.rand
+				x1 = 2.0 * r1 - 1.0
+				x2 = 2.0 * r2 - 1.0
+				w  = x1 * x1 * x2 * x2
+			end
+			w = Math.sqrt((-2.0 * Math.log(w)) / w)
+			y1 = x1 * w
+			@last = x2 * w
+			@use_last = true
+		end
+		return @mean + y1 * Math.sqrt(@variance)
+	end
+end

data/lib/probability_distribution.rb ADDED

@@ -0,0 +1,132 @@
+require 'modules/numerical_constants'
+require 'modules/extra_math'
+require 'modules/special_math'
+class ProbabilityDistribution
+	include NumericalConstants
+	include SpecialMath
+	include ExtraMath
+	def initialize
+	end
+	def mean
+		get_mean
+	end
+	def get_mean
+	end
+	def pdf(x)
+		if x.class == Array
+			pdf_vals = []
+			for i in (0..x.length)
+				pdf_vals[i] = get_pdf(x[i])
+			end
+			return pdf_vals
+		else
+			return get_pdf(x)
+		end
+	end
+	def get_pdf(x)
+	end
+	def cdf(x)
+		if x.class == Array
+			cdf_vals = []
+			for i in (0...x.size)
+				cdf_vals[i] = get_cdf(x[i])
+			end
+			return cdf_vals
+		else
+			return get_cdf(x)
+		end
+	end
+	def get_cdf(x)
+	end
+	def icdf(p)
+		if p.class == Array
+			inv_vals = []
+			for i in (0..p.length)
+				inv_vals[i] = get_icdf(p[i])
+			end
+			return inv_vals
+		else
+			return get_icdf(p)
+		end
+	end
+	def get_icdf(p)
+	end
+	def rng(n=1)
+		if n < 1
+			return "Number of random numbers to return must be 1 or greater"
+		end
+		if (n > 1)
+			rnd_vals = []
+			for i in (0..n)
+				rnd_vals[i] = get_rng()
+			end
+			return rnd_vals
+		else
+			return get_rng()
+		end
+	end
+	def get_rng()
+	end
+	def check_range(x, lo=0.0, hi=1.0)
+		if (x < lo) || (x > hi)
+			return "error"
+		end
+	end
+	def get_factorial(n)
+		if n <= 1
+			return 1
+		else
+			return n * get_factorial(n-1)
+		end
+	end
+	def find_root (prob, guess, x_lo, x_hi)
+		accuracy = 1.0e-10
+		max_iteration = 150
+		x 		= guess
+		x_new = guess
+		error = 0.0
+		pdf 	= 0.0
+		dx 		= 1000.0
+		i 		= 0
+		while ( dx.abs > accuracy && (i += 1) < max_iteration )
+			#Apply Newton-Raphson step
+			error = cdf(x) - prob
+			if error < 0.0
+				x_lo = x
+			else
+				x_hi = x
+			end
+			pdf = pdf(x)
+			if pdf != 0.0
+				dx = error / pdf
+				x_new = x -dx
+			end
+			# If the NR fails to converge (which for example may be the
+			# case if the initial guess is to rough) we apply a bisection
+			# step to determine a more narrow interval around the root.
+			if  x_new < x_lo || x_new > x_hi || pdf == 0.0
+				x_new = (x_lo + x_hi) / 2.0
+				dx = x_new - x
+			end
+			x = x_new
+		end
+		return x
+	end
+end

data/tests/tc_beta.rb ADDED

@@ -0,0 +1,78 @@
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'test/unit'
+require 'beta_distribution'
+class TestBeta < Test::Unit::TestCase
+	def test_simple
+		p = 12
+		q = 59
+		beta = BetaDistribution.new(p,q)
+		assert_equal("0.169014084507042", beta.mean.to_s)
+		assert_equal("0.0441664031038187", beta.standard_deviation.to_s)
+		assert_equal("6.26075815849967", beta.pdf(0.2).to_s)
+		assert_equal("0.999999997766913", beta.cdf(0.50).to_s)
+		assert_equal("0.102003194113565", beta.icdf(0.05).to_s)
+		x_vals = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
+		p_vals = beta.pdf(x_vals)
+		c_vals = beta.cdf(x_vals)
+		expected_pvals = [ 2.83232625227534,
+			8.89978000366836,
+			6.26075815849967,
+			1.72572305993386,
+			0.234475706454223,
+			0.0173700433944934]
+		expected_cvals = [0.0440640755091473,
+			0.356009606171447,
+			0.768319101921981,
+			0.956058132801147,
+			0.995358286711105,
+			0.99971672771575]
+		assert_equal(expected_pvals.to_s, p_vals.to_s)
+		assert_equal(expected_cvals.to_s, c_vals.to_s)
+		x_vals.each do |x|
+			cdf = beta.cdf(x)
+			inv_cdf = beta.icdf(cdf)
+			assert_in_delta(x.to_s, inv_cdf.to_s, 0.00000001)
+		end
+	end
+	def test_control_limits
+		trials = 50
+		alpha = 0.05
+		p = 10
+		lcl = get_lower_limit(trials, alpha, p)
+		ucl = get_upper_limit(trials, alpha, p)
+		assert_equal("0.112721613414076",lcl.to_s)
+		assert_equal("0.315596061420013",ucl.to_s)
+		trials = 210
+		alpha = 0.10
+		p = 47
+		lcl = get_lower_limit(trials, alpha, p)
+		ucl = get_upper_limit(trials, alpha, p)
+		assert_equal("0.186679485269901",lcl.to_s)
+		assert_equal("0.264957601783544",ucl.to_s)
+	end
+	def get_lower_limit(trials,alpha,p)
+		if p==0
+			lcl=0
+		else
+			q=trials-p+1
+			bin= BetaDistribution.new(p,q)
+			lcl=bin.icdf(alpha)
+		end
+		return lcl
+	end
+	def get_upper_limit(trials,alpha,p)
+		q=trials-p
+		p=p+1
+		bin= BetaDistribution.new(p,q)
+		ucl=bin.icdf(1-alpha)
+		return ucl
+	end
+end

data/tests/tc_binomial.rb ADDED

@@ -0,0 +1,22 @@
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'test/unit'
+require 'binomial_distribution'
+class TestBinomial < Test::Unit::TestCase
+	def test_simple
+		t = 100
+		f = 7
+		p = 0.05
+		bin = BinomialDistribution.new(t,p)
+		cdf = bin.cdf(f)
+		pdf = bin.pdf(f)
+		mean = bin.mean
+		inv_cdf = bin.icdf(cdf)
+		assert_equal("0.10602553736479",pdf.to_s)
+		assert_equal("0.872039521379601",cdf.to_s)
+		assert_equal("5.0",mean.to_s)
+		assert_equal(f,inv_cdf)
+	end
+end

data/tests/tc_fisher.rb ADDED

@@ -0,0 +1,20 @@
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'test/unit'
+require 'fishers_exact_test'
+class TestFisher < Test::Unit::TestCase
+	def test_simple
+		tested1 = 20
+		tested2 = 30
+		f1 = 10
+		f2 = 10
+		t1 = tested1 - f1
+		t2 = tested2 - f2
+		fet = FishersExactTest.new
+		fisher = fet.calculate(t1,t2,f1,f2)
+		assert_equal("0.188301375769922",fisher[:left].to_s)
+		assert_equal("0.929481131661052",fisher[:right].to_s)
+		assert_equal("0.257549242810992",fisher[:twotail].to_s)
+	end
+end

data/tests/tc_norm.rb ADDED

@@ -0,0 +1,13 @@
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'test/unit'
+require 'normal_distribution'
+class TestNormal < Test::Unit::TestCase
+	def test_simple
+		norm = NormalDistribution.new(10,2)
+		cdf = norm.cdf(11)
+		assert_equal("0.691462461274013",cdf.to_s)
+	end
+end

data/tests/ts_stats.rb ADDED

@@ -0,0 +1,5 @@
+require 'test/unit'
+require 'tc_fisher'
+require 'tc_binomial'
+require 'tc_beta'
+require 'tc_norm'

metadata ADDED

@@ -0,0 +1,61 @@
+--- !ruby/object:Gem::Specification
+rubygems_version: 0.8.11
+specification_version: 1
+name: rubystats
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+date: 2006-01-04 00:00:00 -08:00
+summary: "Classes for statistical calculations, e.g., binomial, beta, and normal
+  distributions with PDF, CDF and inverse CDF (all ported from PHPMath) as well as
+  Fisher's Exact Test"
+require_paths:
+  - lib
+email: Bryandonovan@myrealbox.com
+homepage: http://www.bryandonovan.com
+rubyforge_project:
+description:
+autorequire:
+default_executable:
+bindir: bin
+has_rdoc: true
+required_ruby_version: !ruby/object:Gem::Version::Requirement
+  requirements:
+    -
+      - ">"
+      - !ruby/object:Gem::Version
+        version: 0.0.0
+  version:
+platform: ruby
+signing_key:
+cert_chain:
+authors:
+  - Bryan Donovan
+files:
+  - lib/beta_distribution.rb
+  - lib/binomial_distribution.rb
+  - lib/fishers_exact_test.rb
+  - lib/normal_distribution.rb
+  - lib/probability_distribution.rb
+  - lib/modules
+  - lib/modules/special_math.rb
+  - lib/modules/extra_math.rb
+  - lib/modules/numerical_constants.rb
+  - tests/tc_beta.rb
+  - tests/tc_fisher.rb
+  - tests/tc_binomial.rb
+  - tests/ts_stats.rb
+  - tests/tc_norm.rb
+  - examples/fisher.rb
+  - examples/binomial.rb
+  - examples/norm.rb
+  - examples/beta.rb
+  - README
+test_files:
+  - tests/ts_stats.rb
+rdoc_options: []
+extra_rdoc_files:
+  - README
+executables: []
+extensions: []
+requirements: []
+dependencies: []