Rsquared 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 217fe13000468eed31354809a3b1cab76081a645
4
+ data.tar.gz: c631668b39e318e350fb683a1d57bd95227f711e
5
+ SHA512:
6
+ metadata.gz: 1b74ab721353b68dec614290529e9094b6ddebbd22680b006be69b870ef388bc062a93a3ff76ea1bf3496cc2c441f68aa034125339b96bc80c50918de2e16504
7
+ data.tar.gz: 53d8faa435ffee52dcac9eb712e37bc02a2695f03b209e9e162a491428713f68dc1677ae621ce059c763f1cb93bad01acc7a4e54ddaa4efd5efe1c0f5e69af8a
data/README.md CHANGED
@@ -3,8 +3,12 @@
3
3
  A full featured Ruby statistics library with assumption verification to make using statistics easy,
4
4
  even with no background.
5
5
 
6
+ Travis-CI:
6
7
  [![Build Status](https://travis-ci.org/dacohen/Rsquared.png)](https://travis-ci.org/dacohen/Rsquared)
7
8
 
9
+ CodeClimate:
10
+ [![Code Climate](https://codeclimate.com/github/dacohen/Rsquared.png)](https://codeclimate.com/github/dacohen/Rsquared)
11
+
8
12
  ## Installation
9
13
 
10
14
  Add this line to your application's Gemfile:
@@ -0,0 +1,44 @@
1
+ module Rsquared
2
+ ##
3
+ # Tests for outliers on either side of the data
4
+ # grubbs = Rsquared::GrubbsTest.new(data)
5
+ # grubbs.significant? => Boolean
6
+ #
7
+
8
+ class GrubbsTest
9
+ ##
10
+ # Initializes the Test object with an array of numerical data
11
+ #
12
+
13
+ def initialize(data)
14
+ @data = data.sort
15
+ @gstat = [((@data.mean - @data.min)/@data.stddev).abs, ((@data.mean - @data.max)/@data.stddev).abs].max
16
+ end
17
+
18
+ ##
19
+ # Returns a boolean indicating the significance of the test at the 5% level
20
+ #
21
+
22
+ def significant?(alpha=0.05)
23
+ if @gstat > Helper::grubbscv(@data.length, alpha) then
24
+ return true
25
+ else
26
+ return false
27
+ end
28
+ end
29
+
30
+ def inspect
31
+ significant?
32
+ end
33
+
34
+ ##
35
+ # Returns the test statistic as a float
36
+ #
37
+
38
+ def statistic
39
+ @gstat
40
+ end
41
+
42
+ alias_method :outlier?, :significant?
43
+ end
44
+ end
@@ -0,0 +1,60 @@
1
+ module Rsquared
2
+ ##
3
+ # KSTest implements the Kolomogorov-Smirnov test for normality
4
+ # kstest = Rsquared::KSTest.new(data)
5
+ # kstest.normal? => Boolean, indicates normality of data at 5% confidence
6
+ #
7
+
8
+ class KSTest
9
+ ##
10
+ # Intitializes the test object with an array of numerical data
11
+ #
12
+
13
+ def initialize(data)
14
+ @data = data.std.sort!
15
+ fn = 0
16
+ d = []
17
+ range = @data.max - @data.min
18
+ @data.each_with_index do |x, i|
19
+ # Calculate Fn
20
+ fn = i + 1
21
+ d[i] = fn/@data.length.to_f - Distribution::Normal::cdf(x)
22
+ fn = 0.0
23
+ end
24
+ @ksstat = d.max
25
+ return @ksstat
26
+ end
27
+
28
+ ##
29
+ # Returns a boolean indiciating the significance of the test a the 5% level
30
+ #
31
+
32
+ def significant?
33
+ if @ksstat > Helper::kscv(@data.length) then
34
+ return true
35
+ else
36
+ return false
37
+ end
38
+ end
39
+
40
+ ##
41
+ # Returns logical opposite of significance
42
+ #
43
+
44
+ def normal?
45
+ !self.significant?
46
+ end
47
+
48
+ def inspect
49
+ significant?
50
+ end
51
+
52
+ ##
53
+ # Returns the test statistic
54
+ #
55
+
56
+ def statistic
57
+ @ksstat
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,29 @@
1
+ module Rsquared
2
+
3
+ class PropTest < StatTest
4
+ def initialize(data, p0, sided)
5
+ @data = data
6
+ @p0 = p0
7
+ @sided = sided
8
+
9
+ if (@data.length*@p0 < 10.0) or (@data.length*(1.0-@p0) < 10.0) then
10
+ raise AssumptionError, "The number of successes or failures prediced by the proportion is too small"
11
+ end
12
+
13
+ @stderr = Math.sqrt((@p0*(1.0-@p0))/@data.length)
14
+ @pstat = ((@data.sum.to_f/@data.length.to_f) - @p0)/@stderr
15
+ @pvalue = Distribution::Normal::cdf(@pstat)
16
+ self.setSidedness!(@sided)
17
+ end
18
+
19
+
20
+ ##
21
+ # Returns the z-statistic
22
+
23
+ def statistic
24
+ @pstat
25
+ end
26
+
27
+ ## significant?, inspect implemented by inhertance
28
+ end
29
+ end
@@ -0,0 +1,31 @@
1
+ module Rsquared
2
+ class StatTest
3
+ attr_accessor :pvalue
4
+ def significant?(alpha=0.05)
5
+ if @pvalue < alpha then
6
+ return true
7
+ else
8
+ return false
9
+ end
10
+ end
11
+
12
+ def inspect
13
+ @pvalue
14
+ end
15
+
16
+ ##
17
+ # Modifies p-value to account for tails and/or two-sided tests
18
+ #
19
+
20
+ def setSidedness!(sided)
21
+ if sided == Upper.tail then
22
+ @pvalue = 1.0-@pvalue
23
+ elsif sided == Two.sided then
24
+ @pvalue = [(1.0-@pvalue)*2.0, @pvalue*2.0].min
25
+ end
26
+ end
27
+ end
28
+
29
+ class AssumptionError < StandardError
30
+ end
31
+ end
@@ -0,0 +1,44 @@
1
+ module Rsquared
2
+ ##
3
+ # Tests for deviation of sample mean from expected mean
4
+ # ttest = Rsquared::TTest.new(data, mu0, sided)
5
+ # mu0 is the expected value of the sample mean
6
+ # Supply Rsquared::Upper.tail, Rsquared::Lower.tail or Rsquared::Two.sided
7
+ # Use Upper.tail when you suspect that the sample mean will be greater than the expected mean
8
+ # Use Lower.tail when you suspect that the sample mean will be smaller than the expected mean
9
+ # Use Two.sided when you suspect neither
10
+
11
+ class TTest < StatTest
12
+ ##
13
+ # Initializes the TTest object with the supplied arguments
14
+ #
15
+
16
+ def initialize(data, mu0, sided)
17
+ @data = data
18
+ @mu0 = mu0
19
+ @sided = sided
20
+
21
+ if KSTest.new(@data).significant? and @data.length < 40 then
22
+ raise AssumptionError, "The data is not close enough to a normal distribution for such a small sample size"
23
+ end
24
+ if GrubbsTest.new(@data).outlier? then
25
+ raise AssumptionError, "Your data has one or more outliers, which the T-Distribution cannot handle"
26
+ end
27
+
28
+ @tstat = (@data.mean - @mu0)/(data.stddev/Math.sqrt(@data.length))
29
+ @pvalue = Distribution::T::cdf(@tstat, @data.length-1)
30
+ self.setSidedness!(@sided)
31
+ end
32
+
33
+ ##
34
+ # Returns the t-statistic
35
+ #
36
+
37
+ def statistic
38
+ @tstat
39
+ end
40
+
41
+ ## significant?, inspect implemented by inheritance
42
+
43
+ end
44
+ end
@@ -1,3 +1,3 @@
1
1
  module Rsquared
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/constants.rb CHANGED
@@ -18,7 +18,4 @@ module Rsquared
18
18
  return true
19
19
  end
20
20
  end
21
-
22
- class AssumptionError < StandardError
23
- end
24
21
  end
data/lib/rsquared.rb CHANGED
@@ -5,169 +5,13 @@ require "constants"
5
5
  require "complex"
6
6
  require "enumerableext.rb"
7
7
 
8
- module Rsquared
9
-
10
- ##
11
- # KSTest implements the Kolomogorov-Smirnov test for normality
12
- # kstest = Rsquared::KSTest.new(data)
13
- # kstest.normal? => Boolean, indicates normality of data at 5% confidence
14
- #
15
-
16
- class KSTest
17
- ##
18
- # Intitializes the test object with an array of numerical data
19
- #
20
-
21
- def initialize(data)
22
- @data = data.std.sort!
23
- fn = 0
24
- d = []
25
- range = @data.max - @data.min
26
- @data.each_with_index do |x, i|
27
- # Calculate Fn
28
- fn = i + 1
29
- d[i] = fn/@data.length.to_f - Distribution::Normal::cdf(x)
30
- fn = 0.0
31
- end
32
- @ksstat = d.max
33
- return @ksstat
34
- end
35
-
36
- ##
37
- # Returns a boolean indiciating the significance of the test a the 5% level
38
- #
39
-
40
- def significant?
41
- if @ksstat > Helper::kscv(@data.length) then
42
- return true
43
- else
44
- return false
45
- end
46
- end
47
-
48
- ##
49
- # Returns logical opposite of significance
50
- #
51
-
52
- def normal?
53
- !self.significant?
54
- end
55
-
56
- def inspect
57
- significant?
58
- end
59
-
60
- ##
61
- # Returns the test statistic
62
- #
63
-
64
- def statistic
65
- @ksstat
66
- end
67
- end
68
-
69
- ##
70
- # Tests for outliers on either side of the data
71
- # grubbs = Rsquared::GrubbsTest.new(data)
72
- # grubbs.significant? => Boolean
73
- #
74
-
75
- class GrubbsTest
76
- ##
77
- # Initializes the Test object with an array of numerical data
78
- #
79
-
80
- def initialize(data)
81
- @data = data.sort
82
- @gstat = [((@data.mean - @data.min)/@data.stddev).abs, ((@data.mean - @data.max)/@data.stddev).abs].max
83
- end
84
-
85
- ##
86
- # Returns a boolean indicating the significance of the test at the 5% level
87
- #
88
-
89
- def significant?(alpha=0.05)
90
- if @gstat > Helper::grubbscv(@data.length, alpha) then
91
- return true
92
- else
93
- return false
94
- end
95
- end
96
-
97
- def inspect
98
- significant?
99
- end
100
-
101
- ##
102
- # Returns the test statistic as a float
103
- #
104
-
105
- def statistic
106
- @gstat
107
- end
8
+ require 'Rsquared/StatTest'
9
+ require 'Rsquared/KSTest'
10
+ require 'Rsquared/GrubbsTest'
11
+ require 'Rsquared/TTest'
12
+ require 'Rsquared/PropTest'
108
13
 
109
- alias_method :outlier?, :significant?
110
- end
111
-
112
- ##
113
- # Tests for deviation of sample mean from expected mean
114
- # ttest = Rsquared::TTest.new(data, mu0, sided)
115
- # mu0 is the expected value of the sample mean
116
- # Supply Rsquared::Upper.tail, Rsquared::Lower.tail or Rsquared::Two.sided
117
- # Use Upper.tail when you suspect that the sample mean will be greater than the expected mean
118
- # Use Lower.tail when you suspect that the sample mean will be smaller than the expected mean
119
- # Use Two.sided when you suspect neither
120
-
121
- class TTest
122
- ##
123
- # Initializes the TTest object with the supplied arguments
124
- #
125
-
126
- def initialize(data, mu0, sided)
127
- @data = data
128
- @mu0 = mu0
129
- @sided = sided
130
-
131
- if KSTest.new(@data).significant? and @data.length < 40 then
132
- raise AssumptionException, "The data is not close enough to a normal distribution for such a small sample size"
133
- end
134
- if GrubbsTest.new(@data).outlier? then
135
- raise AssumptionException, "Your data has one or more outliers, which the T-Distribution cannot handle"
136
- end
137
-
138
- @tstat = (@data.mean - @mu0)/(data.stddev/Math.sqrt(@data.length))
139
- @pvalue = Distribution::T::cdf(@tstat, @data.length-1)
140
- if @sided == Upper.tail then
141
- @pvalue = 1.0-@pvalue
142
- elsif @sided == Two.sided then
143
- @pvalue = [(1.0-@pvalue)*2.0, @pvalue*2.0].min
144
- end
145
- end
146
-
147
- def inspect
148
- @pvalue
149
- end
150
-
151
- ##
152
- # Returns the t-statistic
153
- #
154
-
155
- def statistic
156
- @tstat
157
- end
158
-
159
- ##
160
- # Checks for significance at the supplied alpha level
161
- #
162
-
163
- def significant?(alpha=0.05)
164
- if @pvalue < alpha then
165
- return true
166
- else
167
- return false
168
- end
169
- end
170
- end
14
+ module Rsquared
171
15
 
172
16
  ##
173
17
  # The Helper module implements uncommon statistical functions directly
@@ -208,6 +52,19 @@ module Rsquared
208
52
  return ((n-1)/Math.sqrt(n))*Math.sqrt(tcv**2/((n-2)+tcv**2))
209
53
  end
210
54
 
55
+
56
+ ##
57
+ # Modifies p-value to account for tails and/or two-sided tests
58
+ #
59
+
60
+ def adjustForSided(pvalue, sided)
61
+ if sided == Upper.tail then
62
+ return 1.0-pvalue
63
+ elsif sided == Two.sided then
64
+ return [(1.0-pvalue)*2.0, pvalue*2.0].min
65
+ end
66
+ end
67
+
211
68
  module_function :kscv, :grubbscv
212
69
  end
213
70
  end
@@ -8,6 +8,7 @@ module Test::Unit::Assertions
8
8
  end
9
9
 
10
10
  $data = [-105, 135, 40, 90, -55, -85, 70, 180, 140, -10, -105, 40, 185, -90, -90, 80, 70, -155, 345, 250, 10, -135, 80, 85, -40, 250, -20, 35, 305, -135]
11
+ $propdata = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
11
12
 
12
13
  class RsquaredTests < Test::Unit::TestCase
13
14
 
@@ -27,6 +28,16 @@ class RsquaredTests < Test::Unit::TestCase
27
28
  assert ttest.significant?
28
29
  end
29
30
 
31
+ def test_PropTest
32
+ proptest = Rsquared::PropTest.new($propdata, 0.5, Rsquared::Upper.tail)
33
+ assert_in_delta 0.080757, proptest.inspect, 0.001
34
+ refute proptest.significant?
35
+
36
+ assert_raise(Rsquared::AssumptionError) do
37
+ proptest = Rsquared::PropTest.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], 0.5, Rsquared::Upper.tail)
38
+ end
39
+ end
40
+
30
41
  def test_Grubbs
31
42
  grubbs = Rsquared::GrubbsTest.new($data)
32
43
  assert_in_delta 2.21, grubbs.statistic, 0.01
metadata CHANGED
@@ -1,75 +1,64 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: Rsquared
3
- version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 1
10
- version: 0.0.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - Daniel Cohen
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2013-06-28 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2013-06-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
21
14
  name: bundler
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
26
17
  - - ~>
27
- - !ruby/object:Gem::Version
28
- hash: 9
29
- segments:
30
- - 1
31
- - 3
32
- version: "1.3"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
33
20
  type: :development
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: rake
37
21
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 3
44
- segments:
45
- - 0
46
- version: "0"
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
47
34
  type: :development
48
- version_requirements: *id002
49
- - !ruby/object:Gem::Dependency
50
- name: distribution
51
35
  prerelease: false
52
- requirement: &id003 !ruby/object:Gem::Requirement
53
- none: false
54
- requirements:
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- hash: 3
58
- segments:
59
- - 0
60
- version: "0"
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: distribution
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
61
48
  type: :development
62
- version_requirements: *id003
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
63
55
  description: A full-featured Ruby statistics library with assumption verification
64
- email:
56
+ email:
65
57
  - dcohen@gatech.edu
66
58
  executables: []
67
-
68
59
  extensions: []
69
-
70
60
  extra_rdoc_files: []
71
-
72
- files:
61
+ files:
73
62
  - .travis.yml
74
63
  - Gemfile
75
64
  - LICENSE.txt
@@ -77,48 +66,43 @@ files:
77
66
  - Rakefile
78
67
  - Rsquared.gemspec
79
68
  - install.sh
69
+ - lib/Rsquared/GrubbsTest.rb
70
+ - lib/Rsquared/KSTest.rb
71
+ - lib/Rsquared/PropTest.rb
72
+ - lib/Rsquared/StatTest.rb
73
+ - lib/Rsquared/TTest.rb
80
74
  - lib/Rsquared/version.rb
81
75
  - lib/constants.rb
82
76
  - lib/enumerableext.rb
83
77
  - lib/rsquared.rb
84
- - lib/version.rb
85
78
  - test/enumerable_tests.rb
86
79
  - test/helper_tests.rb
87
80
  - test/rsquared_tests.rb
88
81
  homepage: https://github.com/dacohen/Rsquared
89
- licenses:
82
+ licenses:
90
83
  - MIT
84
+ metadata: {}
91
85
  post_install_message:
92
86
  rdoc_options: []
93
-
94
- require_paths:
87
+ require_paths:
95
88
  - lib
96
- required_ruby_version: !ruby/object:Gem::Requirement
97
- none: false
98
- requirements:
99
- - - ">="
100
- - !ruby/object:Gem::Version
101
- hash: 3
102
- segments:
103
- - 0
104
- version: "0"
105
- required_rubygems_version: !ruby/object:Gem::Requirement
106
- none: false
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- hash: 3
111
- segments:
112
- - 0
113
- version: "0"
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
114
99
  requirements: []
115
-
116
100
  rubyforge_project:
117
- rubygems_version: 1.8.24
101
+ rubygems_version: 2.0.3
118
102
  signing_key:
119
- specification_version: 3
103
+ specification_version: 4
120
104
  summary: Provides statistical distributions, tests and verifies relevant assumptions
121
- test_files:
105
+ test_files:
122
106
  - test/enumerable_tests.rb
123
107
  - test/helper_tests.rb
124
108
  - test/rsquared_tests.rb
data/lib/version.rb DELETED
File without changes