RubyGems - Rsquared - Versions diffs - 0.0.1 - Mend

Rsquared 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/.travis.yml ADDED Viewed

@@ -0,0 +1,8 @@
+---
+language: ruby
+rvm:
+  - "1.9.3"
+  - "1.8.7"
+  - jruby-19mode
+install: ./install.sh

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in Rsquared.gemspec
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2013 Daniel Cohen
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,36 @@
+# Rsquared
+A full featured Ruby statistics library with assumption verification to make using statistics easy,
+even with no background.
+[![Build Status](https://travis-ci.org/dacohen/Rsquared.png)](https://travis-ci.org/dacohen/Rsquared)
+## Installation
+Add this line to your application's Gemfile:
+    gem 'Rsquared'
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install Rsquared
+## Usage
+You can run a statistical test, with assumption checking by supplying an array of numerical data points:
+    >> ttest = Rsquared::TTest.new(data)
+    >> ttest.statistic #=> Float
+    >> ttest.significant? #=> Boolean
+## Contributing
+1. Fork it
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/Rakefile ADDED Viewed

@@ -0,0 +1,11 @@
+require "bundler/gem_tasks"
+require "rake/testtask"
+task :default => :test
+Rake::TestTask.new do |t|
+	t.libs << "test"
+	t.test_files = FileList["test/*.rb"]
+	t.verbose = true
+end

data/Rsquared.gemspec ADDED Viewed

@@ -0,0 +1,24 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'Rsquared/version'
+Gem::Specification.new do |spec|
+  spec.name          = "Rsquared"
+  spec.version       = Rsquared::VERSION
+  spec.authors       = ["Daniel Cohen"]
+  spec.email         = ["dcohen@gatech.edu"]
+  spec.description   = %q{A full-featured Ruby statistics library with assumption verification}
+  spec.summary       = %q{Provides statistical distributions, tests and verifies relevant assumptions}
+  spec.homepage      = "https://github.com/dacohen/Rsquared"
+  spec.license       = "MIT"
+  spec.files         = `git ls-files`.split($/)
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.3"
+  spec.add_development_dependency "rake"
+  spec.add_development_dependency "distribution"
+end

data/install.sh ADDED Viewed

@@ -0,0 +1,8 @@
+#! /usr/bin/env bash
+version=$(ruby -e "require './lib/Rsquared/version.rb'" -e "puts Rsquared::VERSION")
+bundle install
+gem build Rsquared.gemspec
+gem install Rsquared-"$version".gem

data/lib/Rsquared/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Rsquared
+  VERSION = "0.0.1"
+end

data/lib/constants.rb ADDED Viewed

@@ -0,0 +1,24 @@
+KSCV = [0.975, 0.842, 0.708, 0.624, 0.565, 0.521, 0.486, 0.457, 0.432, 0.410, 0.391, 0.375, 0.361, 0.349, 0.338, 0.328, 0.318, 0.309, 0.301, 0.294]
+module Rsquared
+       class Upper
+       	     def self.tail
+	     	 return 1
+     	     end
+       end
+       class Lower
+       	     def self.tail
+	     	 return -1
+	     end
+       end
+       class Two
+       	     def self.sided
+	     	 return true
+	     end
+       end
+       class AssumptionError < StandardError
+       end
+end

data/lib/enumerableext.rb ADDED Viewed

@@ -0,0 +1,46 @@
+module Enumerable
+       def sum
+       	   return self.inject(0){|acc, i| acc+i}
+       end
+       def mean
+       	   return self.sum/self.length.to_f
+       end
+       def variance
+       	   varsum = self.inject(0){|acc, i| acc + (i - self.mean)**2}
+	   return(varsum/(self.length.to_f-1.0))
+       end
+       def popvariance
+       	   return self.variance*((self.length.to_f-1.0)/self.length.to_f)
+       end
+       def popstddev
+       	   return Math.sqrt(self.popvariance)
+       end
+       def stddev
+       	   return Math.sqrt(self.variance)
+       end
+       def skew
+       	   thirdsum = self.inject(0){|acc, i| acc + (i - self.mean)**3}
+	   thirdmoment = thirdsum/self.length.to_f
+	   return thirdmoment / (self.popvariance)**(3.0/2.0)
+       end
+       def kurtosis
+       	   fourthsum = self.inject(0){|acc, i| acc + (i - self.mean)**4}
+	   fourthmoment = fourthsum/self.length.to_f
+	   return (fourthmoment / (self.popvariance)**2)
+       end
+       def std
+       	   result = []
+       	   (0..self.length-1).each do |i|
+	   	result[i] = (self[i] - self.mean)/self.stddev
+	   end
+	   return result
+       end
+end

data/lib/rsquared.rb ADDED Viewed

@@ -0,0 +1,213 @@
+require "Rsquared/version"
+require "rubygems"
+require "distribution"
+require "constants"
+require "complex"
+require "enumerableext.rb"
+module Rsquared
+  ##
+  # KSTest implements the Kolomogorov-Smirnov test for normality
+  # kstest = Rsquared::KSTest.new(data)
+  # kstest.normal? => Boolean, indicates normality of data at 5% confidence
+  #
+  class KSTest
+  	##
+	# Intitializes the test object with an array of numerical data
+	#
+  	def initialize(data)
+	    @data = data.std.sort!
+	    fn = 0
+	    d = []
+	    range = @data.max - @data.min
+	    @data.each_with_index do |x, i|
+	    	# Calculate Fn
+		fn = i + 1
+		d[i] = fn/@data.length.to_f - Distribution::Normal::cdf(x)
+		fn = 0.0
+	    end
+	    @ksstat = d.max
+	    return @ksstat
+	 end
+	 ##
+	 # Returns a boolean indiciating the significance of the test a the 5% level
+	 #
+	 def significant?
+	     if @ksstat > Helper::kscv(@data.length) then
+	     	return true
+	     else
+		return false
+	     end
+	 end
+	 ##
+	 # Returns logical opposite of significance
+	 #
+	 def normal?
+	     !self.significant?
+	 end
+	 def inspect
+	     significant?
+	 end
+	 ##
+	 # Returns the test statistic
+	 #
+	 def statistic
+	     @ksstat
+	 end
+  end
+  ##
+  # Tests for outliers on either side of the data
+  # grubbs = Rsquared::GrubbsTest.new(data)
+  # grubbs.significant? => Boolean
+  #
+  class GrubbsTest
+	  ##
+	  # Initializes the Test object with an array of numerical data
+	  #
+	  def initialize(data)
+	     @data = data.sort
+   	     @gstat = [((@data.mean - @data.min)/@data.stddev).abs, ((@data.mean - @data.max)/@data.stddev).abs].max
+	  end
+	  ##
+	  # Returns a boolean indicating the significance of the test at the 5% level
+	  #
+	  def significant?(alpha=0.05)
+	     if @gstat > Helper::grubbscv(@data.length, alpha) then
+	     	return true
+	     else
+		return false
+	     end
+	  end
+	  def inspect
+	      significant?
+	  end
+	  ##
+	  # Returns the test statistic as a float
+	  #
+	  def statistic
+	      @gstat
+	  end
+	  alias_method :outlier?, :significant?
+  end
+  ##
+  # Tests for deviation of sample mean from expected mean
+  # ttest = Rsquared::TTest.new(data, mu0, sided)
+  # mu0 is the expected value of the sample mean
+  # Supply Rsquared::Upper.tail, Rsquared::Lower.tail or Rsquared::Two.sided
+  # Use Upper.tail when you suspect that the sample mean will be greater than the expected mean
+  # Use Lower.tail when you suspect that the sample mean will be smaller than the expected mean
+  # Use Two.sided when you suspect neither
+  class TTest
+  	##
+	# Initializes the TTest object with the supplied arguments
+	#
+  	def initialize(data, mu0, sided)
+	    @data = data
+  	    @mu0 = mu0
+	    @sided = sided
+	    if KSTest.new(@data).significant? and @data.length < 40 then
+	       raise AssumptionException, "The data is not close enough to a normal distribution for such a small sample size"
+	    end
+	    if GrubbsTest.new(@data).outlier? then
+	       raise AssumptionException, "Your data has one or more outliers, which the T-Distribution cannot handle"
+	    end
+	    @tstat = (@data.mean - @mu0)/(data.stddev/Math.sqrt(@data.length))
+	    @pvalue = Distribution::T::cdf(@tstat, @data.length-1)
+	    if @sided == Upper.tail then
+	       @pvalue = 1.0-@pvalue
+	    elsif @sided == Two.sided then
+	       @pvalue = [(1.0-@pvalue)*2.0, @pvalue*2.0].min
+	    end
+	 end
+	 def inspect
+	     @pvalue
+	 end
+	 ##
+	 # Returns the t-statistic
+	 #
+	 def statistic
+	     @tstat
+	 end
+	 ##
+	 # Checks for significance at the supplied alpha level
+	 #
+	 def significant?(alpha=0.05)
+	     if @pvalue < alpha then
+	     	return true
+	     else
+		return false
+	     end
+	 end
+  end
+  ##
+  # The Helper module implements uncommon statistical functions directly
+  # For use by experts only
+  # = Example
+  #
+  # Rsquared::Helper::kscv(30) => 0.190
+  #
+  module Helper
+	##
+	# kscv(n) => Float
+	# Estimates the 5% critical value of the Kolomogorov-Smirnov distribution given sample size
+	#
+	def kscv(n)
+	    if n < 1 then
+	       return 1.0
+	    elsif n < 21 then
+	       return KSCV[n-1]
+	    elsif n >= 20 and n < 25 then
+	       return 0.270
+	    elsif n >= 25 and n < 30 then
+	       return 0.240
+	    elsif n >= 30 and n < 35 then
+	       return 0.230
+	    elsif n > 35 then
+	       return 1.36/Math.sqrt(n)
+	    end
+	end
+	##
+	# grubbscv(n, alpha) => Float
+	# Calculates the Grubbs critical value
+	#
+	def grubbscv(n, alpha)
+	    tcv = Distribution::T::p_value(alpha/(2*n), n-2)
+	    return ((n-1)/Math.sqrt(n))*Math.sqrt(tcv**2/((n-2)+tcv**2))
+	end
+	 module_function :kscv, :grubbscv
+  end
+end

data/lib/version.rb ADDED Viewed

File without changes

data/test/enumerable_tests.rb ADDED Viewed

@@ -0,0 +1,44 @@
+require 'test/unit'
+require 'rsquared'
+class EnumerableTest < Test::Unit::TestCase
+      def test_sum
+      	  assert_in_delta 15.0, [1,2,3,4,5].sum, 0.001
+      end
+      def test_mean
+      	  assert_in_delta 3.0, [1,2,3,4,5].mean, 0.001
+      end
+      def test_variance
+      	  assert_in_delta 2.5, [1,2,3,4,5].variance, 0.001
+      end
+      def test_stddev
+      	  assert_in_delta 1.5811, [1,2,3,4,5].stddev, 0.001
+      end
+      def test_popvariance
+      	  assert_in_delta 2.0, [1,2,3,4,5].popvariance, 0.001
+      end
+      def test_popstddev
+      	  assert_in_delta 1.4142, [1,2,3,4,5].popstddev, 0.001
+      end
+      def test_skew
+      	  assert_in_delta 0.0, [1,2,3,4,5].skew, 0.001
+      end
+      def test_kurtosis
+      	  assert_in_delta 1.7, [1,2,3,4,5].kurtosis, 0.1
+      end
+      def test_std
+      	  @checkvalues = [-1.2649, -0.63247, 0, 0.63257, 1.2649]
+      	  [1,2,3,4,5].std.each_with_index do |x, i|
+	  	assert_in_delta @checkvalues[i], x, 0.001
+	  end
+      end
+end

data/test/helper_tests.rb ADDED Viewed

@@ -0,0 +1,16 @@
+require 'test/unit'
+require 'rsquared'
+class HelperTest < Test::Unit::TestCase
+      def test_kscv
+      	        assert_equal 0.410, Rsquared::Helper::kscv(10)
+	  	assert_equal 0.240, Rsquared::Helper::kscv(27)
+	  	assert_in_delta 0.20273, Rsquared::Helper::kscv(45), 0.001
+      end
+      def test_grubbscv
+      	  assert_in_delta 2.91, Rsquared::Helper::grubbscv(30, 0.05), 0.01
+      end
+end

data/test/rsquared_tests.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require 'test/unit'
+require 'rsquared'
+module Test::Unit::Assertions
+       def refute(bool, *rest)
+       	   assert(!bool, *rest)
+       end
+end
+$data = [-105, 135, 40, 90, -55, -85, 70, 180, 140, -10, -105, 40, 185, -90, -90, 80, 70, -155, 345, 250, 10, -135, 80, 85, -40, 250, -20, 35, 305, -135]
+class RsquaredTests < Test::Unit::TestCase
+      def test_KS
+	  kstest = Rsquared::KSTest.new($data)
+      	  assert_in_delta 0.1046877, kstest.statistic, 0.001
+	  assert kstest.normal?
+      end
+      def test_TTest
+      	  ttest = Rsquared::TTest.new($data, 0, Rsquared::Two.sided)
+	  assert_in_delta 0.0763, ttest.inspect, 0.001
+	  refute ttest.significant?
+	  ttest = Rsquared::TTest.new($data, 0, Rsquared::Upper.tail)
+	  assert_in_delta 0.03813, ttest.inspect, 0.001
+	  assert ttest.significant?
+      end
+      def test_Grubbs
+      	  grubbs = Rsquared::GrubbsTest.new($data)
+	  assert_in_delta 2.21, grubbs.statistic, 0.01
+	  refute grubbs.significant?
+	  refute grubbs.outlier?
+	  data = $data + [800]
+	  grubbs = Rsquared::GrubbsTest.new(data)
+	  assert grubbs.significant?
+	  assert grubbs.outlier?
+      end
+end

metadata ADDED Viewed

@@ -0,0 +1,124 @@
+--- !ruby/object:Gem::Specification
+name: Rsquared
+version: !ruby/object:Gem::Version
+  hash: 29
+  prerelease:
+  segments:
+  - 0
+  - 0
+  - 1
+  version: 0.0.1
+platform: ruby
+authors:
+- Daniel Cohen
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-06-28 00:00:00 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 9
+        segments:
+        - 1
+        - 3
+        version: "1.3"
+  type: :development
+  version_requirements: *id001
+- !ruby/object:Gem::Dependency
+  name: rake
+  prerelease: false
+  requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  type: :development
+  version_requirements: *id002
+- !ruby/object:Gem::Dependency
+  name: distribution
+  prerelease: false
+  requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  type: :development
+  version_requirements: *id003
+description: A full-featured Ruby statistics library with assumption verification
+email:
+- dcohen@gatech.edu
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .travis.yml
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- Rsquared.gemspec
+- install.sh
+- lib/Rsquared/version.rb
+- lib/constants.rb
+- lib/enumerableext.rb
+- lib/rsquared.rb
+- lib/version.rb
+- test/enumerable_tests.rb
+- test/helper_tests.rb
+- test/rsquared_tests.rb
+homepage: https://github.com/dacohen/Rsquared
+licenses:
+- MIT
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.24
+signing_key:
+specification_version: 3
+summary: Provides statistical distributions, tests and verifies relevant assumptions
+test_files:
+- test/enumerable_tests.rb
+- test/helper_tests.rb
+- test/rsquared_tests.rb