Rsquared 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ ---
2
+ language: ruby
3
+ rvm:
4
+ - "1.9.3"
5
+ - "1.8.7"
6
+ - jruby-19mode
7
+
8
+ install: ./install.sh
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in Rsquared.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Daniel Cohen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # Rsquared
2
+
3
+ A full featured Ruby statistics library with assumption verification to make using statistics easy,
4
+ even with no background.
5
+
6
+ [![Build Status](https://travis-ci.org/dacohen/Rsquared.png)](https://travis-ci.org/dacohen/Rsquared)
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'Rsquared'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install Rsquared
21
+
22
+ ## Usage
23
+
24
+ You can run a statistical test, with assumption checking by supplying an array of numerical data points:
25
+
26
+ >> ttest = Rsquared::TTest.new(data)
27
+ >> ttest.statistic #=> Float
28
+ >> ttest.significant? #=> Boolean
29
+
30
+ ## Contributing
31
+
32
+ 1. Fork it
33
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
34
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
35
+ 4. Push to the branch (`git push origin my-new-feature`)
36
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ task :default => :test
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs << "test"
8
+ t.test_files = FileList["test/*.rb"]
9
+ t.verbose = true
10
+ end
11
+
data/Rsquared.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'Rsquared/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "Rsquared"
8
+ spec.version = Rsquared::VERSION
9
+ spec.authors = ["Daniel Cohen"]
10
+ spec.email = ["dcohen@gatech.edu"]
11
+ spec.description = %q{A full-featured Ruby statistics library with assumption verification}
12
+ spec.summary = %q{Provides statistical distributions, tests and verifies relevant assumptions}
13
+ spec.homepage = "https://github.com/dacohen/Rsquared"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "distribution"
24
+ end
data/install.sh ADDED
@@ -0,0 +1,8 @@
1
+ #! /usr/bin/env bash
2
+
3
+ version=$(ruby -e "require './lib/Rsquared/version.rb'" -e "puts Rsquared::VERSION")
4
+
5
+ bundle install
6
+ gem build Rsquared.gemspec
7
+ gem install Rsquared-"$version".gem
8
+
@@ -0,0 +1,3 @@
1
+ module Rsquared
2
+ VERSION = "0.0.1"
3
+ end
data/lib/constants.rb ADDED
@@ -0,0 +1,24 @@
1
+ KSCV = [0.975, 0.842, 0.708, 0.624, 0.565, 0.521, 0.486, 0.457, 0.432, 0.410, 0.391, 0.375, 0.361, 0.349, 0.338, 0.328, 0.318, 0.309, 0.301, 0.294]
2
+
3
+ module Rsquared
4
+ class Upper
5
+ def self.tail
6
+ return 1
7
+ end
8
+ end
9
+
10
+ class Lower
11
+ def self.tail
12
+ return -1
13
+ end
14
+ end
15
+
16
+ class Two
17
+ def self.sided
18
+ return true
19
+ end
20
+ end
21
+
22
+ class AssumptionError < StandardError
23
+ end
24
+ end
@@ -0,0 +1,46 @@
1
+ module Enumerable
2
+ def sum
3
+ return self.inject(0){|acc, i| acc+i}
4
+ end
5
+
6
+ def mean
7
+ return self.sum/self.length.to_f
8
+ end
9
+
10
+ def variance
11
+ varsum = self.inject(0){|acc, i| acc + (i - self.mean)**2}
12
+ return(varsum/(self.length.to_f-1.0))
13
+ end
14
+
15
+ def popvariance
16
+ return self.variance*((self.length.to_f-1.0)/self.length.to_f)
17
+ end
18
+
19
+ def popstddev
20
+ return Math.sqrt(self.popvariance)
21
+ end
22
+
23
+ def stddev
24
+ return Math.sqrt(self.variance)
25
+ end
26
+
27
+ def skew
28
+ thirdsum = self.inject(0){|acc, i| acc + (i - self.mean)**3}
29
+ thirdmoment = thirdsum/self.length.to_f
30
+ return thirdmoment / (self.popvariance)**(3.0/2.0)
31
+ end
32
+
33
+ def kurtosis
34
+ fourthsum = self.inject(0){|acc, i| acc + (i - self.mean)**4}
35
+ fourthmoment = fourthsum/self.length.to_f
36
+ return (fourthmoment / (self.popvariance)**2)
37
+ end
38
+
39
+ def std
40
+ result = []
41
+ (0..self.length-1).each do |i|
42
+ result[i] = (self[i] - self.mean)/self.stddev
43
+ end
44
+ return result
45
+ end
46
+ end
data/lib/rsquared.rb ADDED
@@ -0,0 +1,213 @@
1
+ require "Rsquared/version"
2
+ require "rubygems"
3
+ require "distribution"
4
+ require "constants"
5
+ require "complex"
6
+ require "enumerableext.rb"
7
+
8
+ module Rsquared
9
+
10
+ ##
11
+ # KSTest implements the Kolomogorov-Smirnov test for normality
12
+ # kstest = Rsquared::KSTest.new(data)
13
+ # kstest.normal? => Boolean, indicates normality of data at 5% confidence
14
+ #
15
+
16
+ class KSTest
17
+ ##
18
+ # Intitializes the test object with an array of numerical data
19
+ #
20
+
21
+ def initialize(data)
22
+ @data = data.std.sort!
23
+ fn = 0
24
+ d = []
25
+ range = @data.max - @data.min
26
+ @data.each_with_index do |x, i|
27
+ # Calculate Fn
28
+ fn = i + 1
29
+ d[i] = fn/@data.length.to_f - Distribution::Normal::cdf(x)
30
+ fn = 0.0
31
+ end
32
+ @ksstat = d.max
33
+ return @ksstat
34
+ end
35
+
36
+ ##
37
+ # Returns a boolean indiciating the significance of the test a the 5% level
38
+ #
39
+
40
+ def significant?
41
+ if @ksstat > Helper::kscv(@data.length) then
42
+ return true
43
+ else
44
+ return false
45
+ end
46
+ end
47
+
48
+ ##
49
+ # Returns logical opposite of significance
50
+ #
51
+
52
+ def normal?
53
+ !self.significant?
54
+ end
55
+
56
+ def inspect
57
+ significant?
58
+ end
59
+
60
+ ##
61
+ # Returns the test statistic
62
+ #
63
+
64
+ def statistic
65
+ @ksstat
66
+ end
67
+ end
68
+
69
+ ##
70
+ # Tests for outliers on either side of the data
71
+ # grubbs = Rsquared::GrubbsTest.new(data)
72
+ # grubbs.significant? => Boolean
73
+ #
74
+
75
+ class GrubbsTest
76
+ ##
77
+ # Initializes the Test object with an array of numerical data
78
+ #
79
+
80
+ def initialize(data)
81
+ @data = data.sort
82
+ @gstat = [((@data.mean - @data.min)/@data.stddev).abs, ((@data.mean - @data.max)/@data.stddev).abs].max
83
+ end
84
+
85
+ ##
86
+ # Returns a boolean indicating the significance of the test at the 5% level
87
+ #
88
+
89
+ def significant?(alpha=0.05)
90
+ if @gstat > Helper::grubbscv(@data.length, alpha) then
91
+ return true
92
+ else
93
+ return false
94
+ end
95
+ end
96
+
97
+ def inspect
98
+ significant?
99
+ end
100
+
101
+ ##
102
+ # Returns the test statistic as a float
103
+ #
104
+
105
+ def statistic
106
+ @gstat
107
+ end
108
+
109
+ alias_method :outlier?, :significant?
110
+ end
111
+
112
+ ##
113
+ # Tests for deviation of sample mean from expected mean
114
+ # ttest = Rsquared::TTest.new(data, mu0, sided)
115
+ # mu0 is the expected value of the sample mean
116
+ # Supply Rsquared::Upper.tail, Rsquared::Lower.tail or Rsquared::Two.sided
117
+ # Use Upper.tail when you suspect that the sample mean will be greater than the expected mean
118
+ # Use Lower.tail when you suspect that the sample mean will be smaller than the expected mean
119
+ # Use Two.sided when you suspect neither
120
+
121
+ class TTest
122
+ ##
123
+ # Initializes the TTest object with the supplied arguments
124
+ #
125
+
126
+ def initialize(data, mu0, sided)
127
+ @data = data
128
+ @mu0 = mu0
129
+ @sided = sided
130
+
131
+ if KSTest.new(@data).significant? and @data.length < 40 then
132
+ raise AssumptionException, "The data is not close enough to a normal distribution for such a small sample size"
133
+ end
134
+ if GrubbsTest.new(@data).outlier? then
135
+ raise AssumptionException, "Your data has one or more outliers, which the T-Distribution cannot handle"
136
+ end
137
+
138
+ @tstat = (@data.mean - @mu0)/(data.stddev/Math.sqrt(@data.length))
139
+ @pvalue = Distribution::T::cdf(@tstat, @data.length-1)
140
+ if @sided == Upper.tail then
141
+ @pvalue = 1.0-@pvalue
142
+ elsif @sided == Two.sided then
143
+ @pvalue = [(1.0-@pvalue)*2.0, @pvalue*2.0].min
144
+ end
145
+ end
146
+
147
+ def inspect
148
+ @pvalue
149
+ end
150
+
151
+ ##
152
+ # Returns the t-statistic
153
+ #
154
+
155
+ def statistic
156
+ @tstat
157
+ end
158
+
159
+ ##
160
+ # Checks for significance at the supplied alpha level
161
+ #
162
+
163
+ def significant?(alpha=0.05)
164
+ if @pvalue < alpha then
165
+ return true
166
+ else
167
+ return false
168
+ end
169
+ end
170
+ end
171
+
172
+ ##
173
+ # The Helper module implements uncommon statistical functions directly
174
+ # For use by experts only
175
+ # = Example
176
+ #
177
+ # Rsquared::Helper::kscv(30) => 0.190
178
+ #
179
+ module Helper
180
+ ##
181
+ # kscv(n) => Float
182
+ # Estimates the 5% critical value of the Kolomogorov-Smirnov distribution given sample size
183
+ #
184
+
185
+ def kscv(n)
186
+ if n < 1 then
187
+ return 1.0
188
+ elsif n < 21 then
189
+ return KSCV[n-1]
190
+ elsif n >= 20 and n < 25 then
191
+ return 0.270
192
+ elsif n >= 25 and n < 30 then
193
+ return 0.240
194
+ elsif n >= 30 and n < 35 then
195
+ return 0.230
196
+ elsif n > 35 then
197
+ return 1.36/Math.sqrt(n)
198
+ end
199
+ end
200
+
201
+ ##
202
+ # grubbscv(n, alpha) => Float
203
+ # Calculates the Grubbs critical value
204
+ #
205
+
206
+ def grubbscv(n, alpha)
207
+ tcv = Distribution::T::p_value(alpha/(2*n), n-2)
208
+ return ((n-1)/Math.sqrt(n))*Math.sqrt(tcv**2/((n-2)+tcv**2))
209
+ end
210
+
211
+ module_function :kscv, :grubbscv
212
+ end
213
+ end
data/lib/version.rb ADDED
File without changes
@@ -0,0 +1,44 @@
1
+ require 'test/unit'
2
+ require 'rsquared'
3
+
4
+ class EnumerableTest < Test::Unit::TestCase
5
+
6
+ def test_sum
7
+ assert_in_delta 15.0, [1,2,3,4,5].sum, 0.001
8
+ end
9
+
10
+ def test_mean
11
+ assert_in_delta 3.0, [1,2,3,4,5].mean, 0.001
12
+ end
13
+
14
+ def test_variance
15
+ assert_in_delta 2.5, [1,2,3,4,5].variance, 0.001
16
+ end
17
+
18
+ def test_stddev
19
+ assert_in_delta 1.5811, [1,2,3,4,5].stddev, 0.001
20
+ end
21
+
22
+ def test_popvariance
23
+ assert_in_delta 2.0, [1,2,3,4,5].popvariance, 0.001
24
+ end
25
+
26
+ def test_popstddev
27
+ assert_in_delta 1.4142, [1,2,3,4,5].popstddev, 0.001
28
+ end
29
+
30
+ def test_skew
31
+ assert_in_delta 0.0, [1,2,3,4,5].skew, 0.001
32
+ end
33
+
34
+ def test_kurtosis
35
+ assert_in_delta 1.7, [1,2,3,4,5].kurtosis, 0.1
36
+ end
37
+
38
+ def test_std
39
+ @checkvalues = [-1.2649, -0.63247, 0, 0.63257, 1.2649]
40
+ [1,2,3,4,5].std.each_with_index do |x, i|
41
+ assert_in_delta @checkvalues[i], x, 0.001
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,16 @@
1
+ require 'test/unit'
2
+ require 'rsquared'
3
+
4
+ class HelperTest < Test::Unit::TestCase
5
+
6
+ def test_kscv
7
+ assert_equal 0.410, Rsquared::Helper::kscv(10)
8
+ assert_equal 0.240, Rsquared::Helper::kscv(27)
9
+ assert_in_delta 0.20273, Rsquared::Helper::kscv(45), 0.001
10
+ end
11
+
12
+ def test_grubbscv
13
+ assert_in_delta 2.91, Rsquared::Helper::grubbscv(30, 0.05), 0.01
14
+ end
15
+
16
+ end
@@ -0,0 +1,42 @@
1
+ require 'test/unit'
2
+ require 'rsquared'
3
+
4
+ module Test::Unit::Assertions
5
+ def refute(bool, *rest)
6
+ assert(!bool, *rest)
7
+ end
8
+ end
9
+
10
+ $data = [-105, 135, 40, 90, -55, -85, 70, 180, 140, -10, -105, 40, 185, -90, -90, 80, 70, -155, 345, 250, 10, -135, 80, 85, -40, 250, -20, 35, 305, -135]
11
+
12
+ class RsquaredTests < Test::Unit::TestCase
13
+
14
+ def test_KS
15
+ kstest = Rsquared::KSTest.new($data)
16
+ assert_in_delta 0.1046877, kstest.statistic, 0.001
17
+ assert kstest.normal?
18
+ end
19
+
20
+ def test_TTest
21
+ ttest = Rsquared::TTest.new($data, 0, Rsquared::Two.sided)
22
+ assert_in_delta 0.0763, ttest.inspect, 0.001
23
+ refute ttest.significant?
24
+
25
+ ttest = Rsquared::TTest.new($data, 0, Rsquared::Upper.tail)
26
+ assert_in_delta 0.03813, ttest.inspect, 0.001
27
+ assert ttest.significant?
28
+ end
29
+
30
+ def test_Grubbs
31
+ grubbs = Rsquared::GrubbsTest.new($data)
32
+ assert_in_delta 2.21, grubbs.statistic, 0.01
33
+ refute grubbs.significant?
34
+ refute grubbs.outlier?
35
+
36
+ data = $data + [800]
37
+ grubbs = Rsquared::GrubbsTest.new(data)
38
+ assert grubbs.significant?
39
+ assert grubbs.outlier?
40
+
41
+ end
42
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: Rsquared
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Daniel Cohen
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2013-06-28 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: bundler
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 9
29
+ segments:
30
+ - 1
31
+ - 3
32
+ version: "1.3"
33
+ type: :development
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rake
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :development
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: distribution
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :development
62
+ version_requirements: *id003
63
+ description: A full-featured Ruby statistics library with assumption verification
64
+ email:
65
+ - dcohen@gatech.edu
66
+ executables: []
67
+
68
+ extensions: []
69
+
70
+ extra_rdoc_files: []
71
+
72
+ files:
73
+ - .travis.yml
74
+ - Gemfile
75
+ - LICENSE.txt
76
+ - README.md
77
+ - Rakefile
78
+ - Rsquared.gemspec
79
+ - install.sh
80
+ - lib/Rsquared/version.rb
81
+ - lib/constants.rb
82
+ - lib/enumerableext.rb
83
+ - lib/rsquared.rb
84
+ - lib/version.rb
85
+ - test/enumerable_tests.rb
86
+ - test/helper_tests.rb
87
+ - test/rsquared_tests.rb
88
+ homepage: https://github.com/dacohen/Rsquared
89
+ licenses:
90
+ - MIT
91
+ post_install_message:
92
+ rdoc_options: []
93
+
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ hash: 3
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ hash: 3
111
+ segments:
112
+ - 0
113
+ version: "0"
114
+ requirements: []
115
+
116
+ rubyforge_project:
117
+ rubygems_version: 1.8.24
118
+ signing_key:
119
+ specification_version: 3
120
+ summary: Provides statistical distributions, tests and verifies relevant assumptions
121
+ test_files:
122
+ - test/enumerable_tests.rb
123
+ - test/helper_tests.rb
124
+ - test/rsquared_tests.rb