Rsquared 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ ---
2
+ language: ruby
3
+ rvm:
4
+ - "1.9.3"
5
+ - "1.8.7"
6
+ - jruby-19mode
7
+
8
+ install: ./install.sh
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in Rsquared.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Daniel Cohen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # Rsquared
2
+
3
+ A full featured Ruby statistics library with assumption verification to make using statistics easy,
4
+ even with no background.
5
+
6
+ [![Build Status](https://travis-ci.org/dacohen/Rsquared.png)](https://travis-ci.org/dacohen/Rsquared)
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'Rsquared'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install Rsquared
21
+
22
+ ## Usage
23
+
24
+ You can run a statistical test, with assumption checking by supplying an array of numerical data points:
25
+
26
+ >> ttest = Rsquared::TTest.new(data)
27
+ >> ttest.statistic #=> Float
28
+ >> ttest.significant? #=> Boolean
29
+
30
+ ## Contributing
31
+
32
+ 1. Fork it
33
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
34
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
35
+ 4. Push to the branch (`git push origin my-new-feature`)
36
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ task :default => :test
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs << "test"
8
+ t.test_files = FileList["test/*.rb"]
9
+ t.verbose = true
10
+ end
11
+
data/Rsquared.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'Rsquared/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "Rsquared"
8
+ spec.version = Rsquared::VERSION
9
+ spec.authors = ["Daniel Cohen"]
10
+ spec.email = ["dcohen@gatech.edu"]
11
+ spec.description = %q{A full-featured Ruby statistics library with assumption verification}
12
+ spec.summary = %q{Provides statistical distributions, tests and verifies relevant assumptions}
13
+ spec.homepage = "https://github.com/dacohen/Rsquared"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "distribution"
24
+ end
data/install.sh ADDED
@@ -0,0 +1,8 @@
1
+ #! /usr/bin/env bash
2
+
3
+ version=$(ruby -e "require './lib/Rsquared/version.rb'" -e "puts Rsquared::VERSION")
4
+
5
+ bundle install
6
+ gem build Rsquared.gemspec
7
+ gem install Rsquared-"$version".gem
8
+
@@ -0,0 +1,3 @@
1
+ module Rsquared
2
+ VERSION = "0.0.1"
3
+ end
data/lib/constants.rb ADDED
@@ -0,0 +1,24 @@
1
+ KSCV = [0.975, 0.842, 0.708, 0.624, 0.565, 0.521, 0.486, 0.457, 0.432, 0.410, 0.391, 0.375, 0.361, 0.349, 0.338, 0.328, 0.318, 0.309, 0.301, 0.294]
2
+
3
+ module Rsquared
4
+ class Upper
5
+ def self.tail
6
+ return 1
7
+ end
8
+ end
9
+
10
+ class Lower
11
+ def self.tail
12
+ return -1
13
+ end
14
+ end
15
+
16
+ class Two
17
+ def self.sided
18
+ return true
19
+ end
20
+ end
21
+
22
+ class AssumptionError < StandardError
23
+ end
24
+ end
@@ -0,0 +1,46 @@
1
+ module Enumerable
2
+ def sum
3
+ return self.inject(0){|acc, i| acc+i}
4
+ end
5
+
6
+ def mean
7
+ return self.sum/self.length.to_f
8
+ end
9
+
10
+ def variance
11
+ varsum = self.inject(0){|acc, i| acc + (i - self.mean)**2}
12
+ return(varsum/(self.length.to_f-1.0))
13
+ end
14
+
15
+ def popvariance
16
+ return self.variance*((self.length.to_f-1.0)/self.length.to_f)
17
+ end
18
+
19
+ def popstddev
20
+ return Math.sqrt(self.popvariance)
21
+ end
22
+
23
+ def stddev
24
+ return Math.sqrt(self.variance)
25
+ end
26
+
27
+ def skew
28
+ thirdsum = self.inject(0){|acc, i| acc + (i - self.mean)**3}
29
+ thirdmoment = thirdsum/self.length.to_f
30
+ return thirdmoment / (self.popvariance)**(3.0/2.0)
31
+ end
32
+
33
+ def kurtosis
34
+ fourthsum = self.inject(0){|acc, i| acc + (i - self.mean)**4}
35
+ fourthmoment = fourthsum/self.length.to_f
36
+ return (fourthmoment / (self.popvariance)**2)
37
+ end
38
+
39
+ def std
40
+ result = []
41
+ (0..self.length-1).each do |i|
42
+ result[i] = (self[i] - self.mean)/self.stddev
43
+ end
44
+ return result
45
+ end
46
+ end
data/lib/rsquared.rb ADDED
@@ -0,0 +1,213 @@
1
+ require "Rsquared/version"
2
+ require "rubygems"
3
+ require "distribution"
4
+ require "constants"
5
+ require "complex"
6
+ require "enumerableext.rb"
7
+
8
+ module Rsquared
9
+
10
+ ##
11
+ # KSTest implements the Kolomogorov-Smirnov test for normality
12
+ # kstest = Rsquared::KSTest.new(data)
13
+ # kstest.normal? => Boolean, indicates normality of data at 5% confidence
14
+ #
15
+
16
+ class KSTest
17
+ ##
18
+ # Intitializes the test object with an array of numerical data
19
+ #
20
+
21
+ def initialize(data)
22
+ @data = data.std.sort!
23
+ fn = 0
24
+ d = []
25
+ range = @data.max - @data.min
26
+ @data.each_with_index do |x, i|
27
+ # Calculate Fn
28
+ fn = i + 1
29
+ d[i] = fn/@data.length.to_f - Distribution::Normal::cdf(x)
30
+ fn = 0.0
31
+ end
32
+ @ksstat = d.max
33
+ return @ksstat
34
+ end
35
+
36
+ ##
37
+ # Returns a boolean indiciating the significance of the test a the 5% level
38
+ #
39
+
40
+ def significant?
41
+ if @ksstat > Helper::kscv(@data.length) then
42
+ return true
43
+ else
44
+ return false
45
+ end
46
+ end
47
+
48
+ ##
49
+ # Returns logical opposite of significance
50
+ #
51
+
52
+ def normal?
53
+ !self.significant?
54
+ end
55
+
56
+ def inspect
57
+ significant?
58
+ end
59
+
60
+ ##
61
+ # Returns the test statistic
62
+ #
63
+
64
+ def statistic
65
+ @ksstat
66
+ end
67
+ end
68
+
69
+ ##
70
+ # Tests for outliers on either side of the data
71
+ # grubbs = Rsquared::GrubbsTest.new(data)
72
+ # grubbs.significant? => Boolean
73
+ #
74
+
75
+ class GrubbsTest
76
+ ##
77
+ # Initializes the Test object with an array of numerical data
78
+ #
79
+
80
+ def initialize(data)
81
+ @data = data.sort
82
+ @gstat = [((@data.mean - @data.min)/@data.stddev).abs, ((@data.mean - @data.max)/@data.stddev).abs].max
83
+ end
84
+
85
+ ##
86
+ # Returns a boolean indicating the significance of the test at the 5% level
87
+ #
88
+
89
+ def significant?(alpha=0.05)
90
+ if @gstat > Helper::grubbscv(@data.length, alpha) then
91
+ return true
92
+ else
93
+ return false
94
+ end
95
+ end
96
+
97
+ def inspect
98
+ significant?
99
+ end
100
+
101
+ ##
102
+ # Returns the test statistic as a float
103
+ #
104
+
105
+ def statistic
106
+ @gstat
107
+ end
108
+
109
+ alias_method :outlier?, :significant?
110
+ end
111
+
112
+ ##
113
+ # Tests for deviation of sample mean from expected mean
114
+ # ttest = Rsquared::TTest.new(data, mu0, sided)
115
+ # mu0 is the expected value of the sample mean
116
+ # Supply Rsquared::Upper.tail, Rsquared::Lower.tail or Rsquared::Two.sided
117
+ # Use Upper.tail when you suspect that the sample mean will be greater than the expected mean
118
+ # Use Lower.tail when you suspect that the sample mean will be smaller than the expected mean
119
+ # Use Two.sided when you suspect neither
120
+
121
+ class TTest
122
+ ##
123
+ # Initializes the TTest object with the supplied arguments
124
+ #
125
+
126
+ def initialize(data, mu0, sided)
127
+ @data = data
128
+ @mu0 = mu0
129
+ @sided = sided
130
+
131
+ if KSTest.new(@data).significant? and @data.length < 40 then
132
+ raise AssumptionException, "The data is not close enough to a normal distribution for such a small sample size"
133
+ end
134
+ if GrubbsTest.new(@data).outlier? then
135
+ raise AssumptionException, "Your data has one or more outliers, which the T-Distribution cannot handle"
136
+ end
137
+
138
+ @tstat = (@data.mean - @mu0)/(data.stddev/Math.sqrt(@data.length))
139
+ @pvalue = Distribution::T::cdf(@tstat, @data.length-1)
140
+ if @sided == Upper.tail then
141
+ @pvalue = 1.0-@pvalue
142
+ elsif @sided == Two.sided then
143
+ @pvalue = [(1.0-@pvalue)*2.0, @pvalue*2.0].min
144
+ end
145
+ end
146
+
147
+ def inspect
148
+ @pvalue
149
+ end
150
+
151
+ ##
152
+ # Returns the t-statistic
153
+ #
154
+
155
+ def statistic
156
+ @tstat
157
+ end
158
+
159
+ ##
160
+ # Checks for significance at the supplied alpha level
161
+ #
162
+
163
+ def significant?(alpha=0.05)
164
+ if @pvalue < alpha then
165
+ return true
166
+ else
167
+ return false
168
+ end
169
+ end
170
+ end
171
+
172
+ ##
173
+ # The Helper module implements uncommon statistical functions directly
174
+ # For use by experts only
175
+ # = Example
176
+ #
177
+ # Rsquared::Helper::kscv(30) => 0.190
178
+ #
179
+ module Helper
180
+ ##
181
+ # kscv(n) => Float
182
+ # Estimates the 5% critical value of the Kolomogorov-Smirnov distribution given sample size
183
+ #
184
+
185
+ def kscv(n)
186
+ if n < 1 then
187
+ return 1.0
188
+ elsif n < 21 then
189
+ return KSCV[n-1]
190
+ elsif n >= 20 and n < 25 then
191
+ return 0.270
192
+ elsif n >= 25 and n < 30 then
193
+ return 0.240
194
+ elsif n >= 30 and n < 35 then
195
+ return 0.230
196
+ elsif n > 35 then
197
+ return 1.36/Math.sqrt(n)
198
+ end
199
+ end
200
+
201
+ ##
202
+ # grubbscv(n, alpha) => Float
203
+ # Calculates the Grubbs critical value
204
+ #
205
+
206
+ def grubbscv(n, alpha)
207
+ tcv = Distribution::T::p_value(alpha/(2*n), n-2)
208
+ return ((n-1)/Math.sqrt(n))*Math.sqrt(tcv**2/((n-2)+tcv**2))
209
+ end
210
+
211
+ module_function :kscv, :grubbscv
212
+ end
213
+ end
data/lib/version.rb ADDED
File without changes
@@ -0,0 +1,44 @@
1
+ require 'test/unit'
2
+ require 'rsquared'
3
+
4
+ class EnumerableTest < Test::Unit::TestCase
5
+
6
+ def test_sum
7
+ assert_in_delta 15.0, [1,2,3,4,5].sum, 0.001
8
+ end
9
+
10
+ def test_mean
11
+ assert_in_delta 3.0, [1,2,3,4,5].mean, 0.001
12
+ end
13
+
14
+ def test_variance
15
+ assert_in_delta 2.5, [1,2,3,4,5].variance, 0.001
16
+ end
17
+
18
+ def test_stddev
19
+ assert_in_delta 1.5811, [1,2,3,4,5].stddev, 0.001
20
+ end
21
+
22
+ def test_popvariance
23
+ assert_in_delta 2.0, [1,2,3,4,5].popvariance, 0.001
24
+ end
25
+
26
+ def test_popstddev
27
+ assert_in_delta 1.4142, [1,2,3,4,5].popstddev, 0.001
28
+ end
29
+
30
+ def test_skew
31
+ assert_in_delta 0.0, [1,2,3,4,5].skew, 0.001
32
+ end
33
+
34
+ def test_kurtosis
35
+ assert_in_delta 1.7, [1,2,3,4,5].kurtosis, 0.1
36
+ end
37
+
38
+ def test_std
39
+ @checkvalues = [-1.2649, -0.63247, 0, 0.63257, 1.2649]
40
+ [1,2,3,4,5].std.each_with_index do |x, i|
41
+ assert_in_delta @checkvalues[i], x, 0.001
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,16 @@
1
+ require 'test/unit'
2
+ require 'rsquared'
3
+
4
+ class HelperTest < Test::Unit::TestCase
5
+
6
+ def test_kscv
7
+ assert_equal 0.410, Rsquared::Helper::kscv(10)
8
+ assert_equal 0.240, Rsquared::Helper::kscv(27)
9
+ assert_in_delta 0.20273, Rsquared::Helper::kscv(45), 0.001
10
+ end
11
+
12
+ def test_grubbscv
13
+ assert_in_delta 2.91, Rsquared::Helper::grubbscv(30, 0.05), 0.01
14
+ end
15
+
16
+ end
@@ -0,0 +1,42 @@
1
+ require 'test/unit'
2
+ require 'rsquared'
3
+
4
+ module Test::Unit::Assertions
5
+ def refute(bool, *rest)
6
+ assert(!bool, *rest)
7
+ end
8
+ end
9
+
10
+ $data = [-105, 135, 40, 90, -55, -85, 70, 180, 140, -10, -105, 40, 185, -90, -90, 80, 70, -155, 345, 250, 10, -135, 80, 85, -40, 250, -20, 35, 305, -135]
11
+
12
+ class RsquaredTests < Test::Unit::TestCase
13
+
14
+ def test_KS
15
+ kstest = Rsquared::KSTest.new($data)
16
+ assert_in_delta 0.1046877, kstest.statistic, 0.001
17
+ assert kstest.normal?
18
+ end
19
+
20
+ def test_TTest
21
+ ttest = Rsquared::TTest.new($data, 0, Rsquared::Two.sided)
22
+ assert_in_delta 0.0763, ttest.inspect, 0.001
23
+ refute ttest.significant?
24
+
25
+ ttest = Rsquared::TTest.new($data, 0, Rsquared::Upper.tail)
26
+ assert_in_delta 0.03813, ttest.inspect, 0.001
27
+ assert ttest.significant?
28
+ end
29
+
30
+ def test_Grubbs
31
+ grubbs = Rsquared::GrubbsTest.new($data)
32
+ assert_in_delta 2.21, grubbs.statistic, 0.01
33
+ refute grubbs.significant?
34
+ refute grubbs.outlier?
35
+
36
+ data = $data + [800]
37
+ grubbs = Rsquared::GrubbsTest.new(data)
38
+ assert grubbs.significant?
39
+ assert grubbs.outlier?
40
+
41
+ end
42
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: Rsquared
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Daniel Cohen
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2013-06-28 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: bundler
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 9
29
+ segments:
30
+ - 1
31
+ - 3
32
+ version: "1.3"
33
+ type: :development
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rake
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :development
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: distribution
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :development
62
+ version_requirements: *id003
63
+ description: A full-featured Ruby statistics library with assumption verification
64
+ email:
65
+ - dcohen@gatech.edu
66
+ executables: []
67
+
68
+ extensions: []
69
+
70
+ extra_rdoc_files: []
71
+
72
+ files:
73
+ - .travis.yml
74
+ - Gemfile
75
+ - LICENSE.txt
76
+ - README.md
77
+ - Rakefile
78
+ - Rsquared.gemspec
79
+ - install.sh
80
+ - lib/Rsquared/version.rb
81
+ - lib/constants.rb
82
+ - lib/enumerableext.rb
83
+ - lib/rsquared.rb
84
+ - lib/version.rb
85
+ - test/enumerable_tests.rb
86
+ - test/helper_tests.rb
87
+ - test/rsquared_tests.rb
88
+ homepage: https://github.com/dacohen/Rsquared
89
+ licenses:
90
+ - MIT
91
+ post_install_message:
92
+ rdoc_options: []
93
+
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ hash: 3
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ hash: 3
111
+ segments:
112
+ - 0
113
+ version: "0"
114
+ requirements: []
115
+
116
+ rubyforge_project:
117
+ rubygems_version: 1.8.24
118
+ signing_key:
119
+ specification_version: 3
120
+ summary: Provides statistical distributions, tests and verifies relevant assumptions
121
+ test_files:
122
+ - test/enumerable_tests.rb
123
+ - test/helper_tests.rb
124
+ - test/rsquared_tests.rb