Rsquared 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 217fe13000468eed31354809a3b1cab76081a645
4
+ data.tar.gz: c631668b39e318e350fb683a1d57bd95227f711e
5
+ SHA512:
6
+ metadata.gz: 1b74ab721353b68dec614290529e9094b6ddebbd22680b006be69b870ef388bc062a93a3ff76ea1bf3496cc2c441f68aa034125339b96bc80c50918de2e16504
7
+ data.tar.gz: 53d8faa435ffee52dcac9eb712e37bc02a2695f03b209e9e162a491428713f68dc1677ae621ce059c763f1cb93bad01acc7a4e54ddaa4efd5efe1c0f5e69af8a
data/README.md CHANGED
@@ -3,8 +3,12 @@
3
3
  A full featured Ruby statistics library with assumption verification to make using statistics easy,
4
4
  even with no background.
5
5
 
6
+ Travis-CI:
6
7
  [![Build Status](https://travis-ci.org/dacohen/Rsquared.png)](https://travis-ci.org/dacohen/Rsquared)
7
8
 
9
+ CodeClimate:
10
+ [![Code Climate](https://codeclimate.com/github/dacohen/Rsquared.png)](https://codeclimate.com/github/dacohen/Rsquared)
11
+
8
12
  ## Installation
9
13
 
10
14
  Add this line to your application's Gemfile:
@@ -0,0 +1,44 @@
1
+ module Rsquared
2
+ ##
3
+ # Tests for outliers on either side of the data
4
+ # grubbs = Rsquared::GrubbsTest.new(data)
5
+ # grubbs.significant? => Boolean
6
+ #
7
+
8
+ class GrubbsTest
9
+ ##
10
+ # Initializes the Test object with an array of numerical data
11
+ #
12
+
13
+ def initialize(data)
14
+ @data = data.sort
15
+ @gstat = [((@data.mean - @data.min)/@data.stddev).abs, ((@data.mean - @data.max)/@data.stddev).abs].max
16
+ end
17
+
18
+ ##
19
+ # Returns a boolean indicating the significance of the test at the 5% level
20
+ #
21
+
22
+ def significant?(alpha=0.05)
23
+ if @gstat > Helper::grubbscv(@data.length, alpha) then
24
+ return true
25
+ else
26
+ return false
27
+ end
28
+ end
29
+
30
+ def inspect
31
+ significant?
32
+ end
33
+
34
+ ##
35
+ # Returns the test statistic as a float
36
+ #
37
+
38
+ def statistic
39
+ @gstat
40
+ end
41
+
42
+ alias_method :outlier?, :significant?
43
+ end
44
+ end
@@ -0,0 +1,60 @@
1
+ module Rsquared
2
+ ##
3
+ # KSTest implements the Kolomogorov-Smirnov test for normality
4
+ # kstest = Rsquared::KSTest.new(data)
5
+ # kstest.normal? => Boolean, indicates normality of data at 5% confidence
6
+ #
7
+
8
+ class KSTest
9
+ ##
10
+ # Intitializes the test object with an array of numerical data
11
+ #
12
+
13
+ def initialize(data)
14
+ @data = data.std.sort!
15
+ fn = 0
16
+ d = []
17
+ range = @data.max - @data.min
18
+ @data.each_with_index do |x, i|
19
+ # Calculate Fn
20
+ fn = i + 1
21
+ d[i] = fn/@data.length.to_f - Distribution::Normal::cdf(x)
22
+ fn = 0.0
23
+ end
24
+ @ksstat = d.max
25
+ return @ksstat
26
+ end
27
+
28
+ ##
29
+ # Returns a boolean indiciating the significance of the test a the 5% level
30
+ #
31
+
32
+ def significant?
33
+ if @ksstat > Helper::kscv(@data.length) then
34
+ return true
35
+ else
36
+ return false
37
+ end
38
+ end
39
+
40
+ ##
41
+ # Returns logical opposite of significance
42
+ #
43
+
44
+ def normal?
45
+ !self.significant?
46
+ end
47
+
48
+ def inspect
49
+ significant?
50
+ end
51
+
52
+ ##
53
+ # Returns the test statistic
54
+ #
55
+
56
+ def statistic
57
+ @ksstat
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,29 @@
1
+ module Rsquared
2
+
3
+ class PropTest < StatTest
4
+ def initialize(data, p0, sided)
5
+ @data = data
6
+ @p0 = p0
7
+ @sided = sided
8
+
9
+ if (@data.length*@p0 < 10.0) or (@data.length*(1.0-@p0) < 10.0) then
10
+ raise AssumptionError, "The number of successes or failures prediced by the proportion is too small"
11
+ end
12
+
13
+ @stderr = Math.sqrt((@p0*(1.0-@p0))/@data.length)
14
+ @pstat = ((@data.sum.to_f/@data.length.to_f) - @p0)/@stderr
15
+ @pvalue = Distribution::Normal::cdf(@pstat)
16
+ self.setSidedness!(@sided)
17
+ end
18
+
19
+
20
+ ##
21
+ # Returns the z-statistic
22
+
23
+ def statistic
24
+ @pstat
25
+ end
26
+
27
+ ## significant?, inspect implemented by inhertance
28
+ end
29
+ end
@@ -0,0 +1,31 @@
1
+ module Rsquared
2
+ class StatTest
3
+ attr_accessor :pvalue
4
+ def significant?(alpha=0.05)
5
+ if @pvalue < alpha then
6
+ return true
7
+ else
8
+ return false
9
+ end
10
+ end
11
+
12
+ def inspect
13
+ @pvalue
14
+ end
15
+
16
+ ##
17
+ # Modifies p-value to account for tails and/or two-sided tests
18
+ #
19
+
20
+ def setSidedness!(sided)
21
+ if sided == Upper.tail then
22
+ @pvalue = 1.0-@pvalue
23
+ elsif sided == Two.sided then
24
+ @pvalue = [(1.0-@pvalue)*2.0, @pvalue*2.0].min
25
+ end
26
+ end
27
+ end
28
+
29
+ class AssumptionError < StandardError
30
+ end
31
+ end
@@ -0,0 +1,44 @@
1
+ module Rsquared
2
+ ##
3
+ # Tests for deviation of sample mean from expected mean
4
+ # ttest = Rsquared::TTest.new(data, mu0, sided)
5
+ # mu0 is the expected value of the sample mean
6
+ # Supply Rsquared::Upper.tail, Rsquared::Lower.tail or Rsquared::Two.sided
7
+ # Use Upper.tail when you suspect that the sample mean will be greater than the expected mean
8
+ # Use Lower.tail when you suspect that the sample mean will be smaller than the expected mean
9
+ # Use Two.sided when you suspect neither
10
+
11
+ class TTest < StatTest
12
+ ##
13
+ # Initializes the TTest object with the supplied arguments
14
+ #
15
+
16
+ def initialize(data, mu0, sided)
17
+ @data = data
18
+ @mu0 = mu0
19
+ @sided = sided
20
+
21
+ if KSTest.new(@data).significant? and @data.length < 40 then
22
+ raise AssumptionError, "The data is not close enough to a normal distribution for such a small sample size"
23
+ end
24
+ if GrubbsTest.new(@data).outlier? then
25
+ raise AssumptionError, "Your data has one or more outliers, which the T-Distribution cannot handle"
26
+ end
27
+
28
+ @tstat = (@data.mean - @mu0)/(data.stddev/Math.sqrt(@data.length))
29
+ @pvalue = Distribution::T::cdf(@tstat, @data.length-1)
30
+ self.setSidedness!(@sided)
31
+ end
32
+
33
+ ##
34
+ # Returns the t-statistic
35
+ #
36
+
37
+ def statistic
38
+ @tstat
39
+ end
40
+
41
+ ## significant?, inspect implemented by inheritance
42
+
43
+ end
44
+ end
@@ -1,3 +1,3 @@
1
1
  module Rsquared
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/constants.rb CHANGED
@@ -18,7 +18,4 @@ module Rsquared
18
18
  return true
19
19
  end
20
20
  end
21
-
22
- class AssumptionError < StandardError
23
- end
24
21
  end
data/lib/rsquared.rb CHANGED
@@ -5,169 +5,13 @@ require "constants"
5
5
  require "complex"
6
6
  require "enumerableext.rb"
7
7
 
8
- module Rsquared
9
-
10
- ##
11
- # KSTest implements the Kolomogorov-Smirnov test for normality
12
- # kstest = Rsquared::KSTest.new(data)
13
- # kstest.normal? => Boolean, indicates normality of data at 5% confidence
14
- #
15
-
16
- class KSTest
17
- ##
18
- # Intitializes the test object with an array of numerical data
19
- #
20
-
21
- def initialize(data)
22
- @data = data.std.sort!
23
- fn = 0
24
- d = []
25
- range = @data.max - @data.min
26
- @data.each_with_index do |x, i|
27
- # Calculate Fn
28
- fn = i + 1
29
- d[i] = fn/@data.length.to_f - Distribution::Normal::cdf(x)
30
- fn = 0.0
31
- end
32
- @ksstat = d.max
33
- return @ksstat
34
- end
35
-
36
- ##
37
- # Returns a boolean indiciating the significance of the test a the 5% level
38
- #
39
-
40
- def significant?
41
- if @ksstat > Helper::kscv(@data.length) then
42
- return true
43
- else
44
- return false
45
- end
46
- end
47
-
48
- ##
49
- # Returns logical opposite of significance
50
- #
51
-
52
- def normal?
53
- !self.significant?
54
- end
55
-
56
- def inspect
57
- significant?
58
- end
59
-
60
- ##
61
- # Returns the test statistic
62
- #
63
-
64
- def statistic
65
- @ksstat
66
- end
67
- end
68
-
69
- ##
70
- # Tests for outliers on either side of the data
71
- # grubbs = Rsquared::GrubbsTest.new(data)
72
- # grubbs.significant? => Boolean
73
- #
74
-
75
- class GrubbsTest
76
- ##
77
- # Initializes the Test object with an array of numerical data
78
- #
79
-
80
- def initialize(data)
81
- @data = data.sort
82
- @gstat = [((@data.mean - @data.min)/@data.stddev).abs, ((@data.mean - @data.max)/@data.stddev).abs].max
83
- end
84
-
85
- ##
86
- # Returns a boolean indicating the significance of the test at the 5% level
87
- #
88
-
89
- def significant?(alpha=0.05)
90
- if @gstat > Helper::grubbscv(@data.length, alpha) then
91
- return true
92
- else
93
- return false
94
- end
95
- end
96
-
97
- def inspect
98
- significant?
99
- end
100
-
101
- ##
102
- # Returns the test statistic as a float
103
- #
104
-
105
- def statistic
106
- @gstat
107
- end
8
+ require 'Rsquared/StatTest'
9
+ require 'Rsquared/KSTest'
10
+ require 'Rsquared/GrubbsTest'
11
+ require 'Rsquared/TTest'
12
+ require 'Rsquared/PropTest'
108
13
 
109
- alias_method :outlier?, :significant?
110
- end
111
-
112
- ##
113
- # Tests for deviation of sample mean from expected mean
114
- # ttest = Rsquared::TTest.new(data, mu0, sided)
115
- # mu0 is the expected value of the sample mean
116
- # Supply Rsquared::Upper.tail, Rsquared::Lower.tail or Rsquared::Two.sided
117
- # Use Upper.tail when you suspect that the sample mean will be greater than the expected mean
118
- # Use Lower.tail when you suspect that the sample mean will be smaller than the expected mean
119
- # Use Two.sided when you suspect neither
120
-
121
- class TTest
122
- ##
123
- # Initializes the TTest object with the supplied arguments
124
- #
125
-
126
- def initialize(data, mu0, sided)
127
- @data = data
128
- @mu0 = mu0
129
- @sided = sided
130
-
131
- if KSTest.new(@data).significant? and @data.length < 40 then
132
- raise AssumptionException, "The data is not close enough to a normal distribution for such a small sample size"
133
- end
134
- if GrubbsTest.new(@data).outlier? then
135
- raise AssumptionException, "Your data has one or more outliers, which the T-Distribution cannot handle"
136
- end
137
-
138
- @tstat = (@data.mean - @mu0)/(data.stddev/Math.sqrt(@data.length))
139
- @pvalue = Distribution::T::cdf(@tstat, @data.length-1)
140
- if @sided == Upper.tail then
141
- @pvalue = 1.0-@pvalue
142
- elsif @sided == Two.sided then
143
- @pvalue = [(1.0-@pvalue)*2.0, @pvalue*2.0].min
144
- end
145
- end
146
-
147
- def inspect
148
- @pvalue
149
- end
150
-
151
- ##
152
- # Returns the t-statistic
153
- #
154
-
155
- def statistic
156
- @tstat
157
- end
158
-
159
- ##
160
- # Checks for significance at the supplied alpha level
161
- #
162
-
163
- def significant?(alpha=0.05)
164
- if @pvalue < alpha then
165
- return true
166
- else
167
- return false
168
- end
169
- end
170
- end
14
+ module Rsquared
171
15
 
172
16
  ##
173
17
  # The Helper module implements uncommon statistical functions directly
@@ -208,6 +52,19 @@ module Rsquared
208
52
  return ((n-1)/Math.sqrt(n))*Math.sqrt(tcv**2/((n-2)+tcv**2))
209
53
  end
210
54
 
55
+
56
+ ##
57
+ # Modifies p-value to account for tails and/or two-sided tests
58
+ #
59
+
60
+ def adjustForSided(pvalue, sided)
61
+ if sided == Upper.tail then
62
+ return 1.0-pvalue
63
+ elsif sided == Two.sided then
64
+ return [(1.0-pvalue)*2.0, pvalue*2.0].min
65
+ end
66
+ end
67
+
211
68
  module_function :kscv, :grubbscv
212
69
  end
213
70
  end
@@ -8,6 +8,7 @@ module Test::Unit::Assertions
8
8
  end
9
9
 
10
10
  $data = [-105, 135, 40, 90, -55, -85, 70, 180, 140, -10, -105, 40, 185, -90, -90, 80, 70, -155, 345, 250, 10, -135, 80, 85, -40, 250, -20, 35, 305, -135]
11
+ $propdata = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
11
12
 
12
13
  class RsquaredTests < Test::Unit::TestCase
13
14
 
@@ -27,6 +28,16 @@ class RsquaredTests < Test::Unit::TestCase
27
28
  assert ttest.significant?
28
29
  end
29
30
 
31
+ def test_PropTest
32
+ proptest = Rsquared::PropTest.new($propdata, 0.5, Rsquared::Upper.tail)
33
+ assert_in_delta 0.080757, proptest.inspect, 0.001
34
+ refute proptest.significant?
35
+
36
+ assert_raise(Rsquared::AssumptionError) do
37
+ proptest = Rsquared::PropTest.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], 0.5, Rsquared::Upper.tail)
38
+ end
39
+ end
40
+
30
41
  def test_Grubbs
31
42
  grubbs = Rsquared::GrubbsTest.new($data)
32
43
  assert_in_delta 2.21, grubbs.statistic, 0.01
metadata CHANGED
@@ -1,75 +1,64 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: Rsquared
3
- version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 1
10
- version: 0.0.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - Daniel Cohen
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2013-06-28 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2013-06-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
21
14
  name: bundler
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
26
17
  - - ~>
27
- - !ruby/object:Gem::Version
28
- hash: 9
29
- segments:
30
- - 1
31
- - 3
32
- version: "1.3"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
33
20
  type: :development
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: rake
37
21
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 3
44
- segments:
45
- - 0
46
- version: "0"
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
47
34
  type: :development
48
- version_requirements: *id002
49
- - !ruby/object:Gem::Dependency
50
- name: distribution
51
35
  prerelease: false
52
- requirement: &id003 !ruby/object:Gem::Requirement
53
- none: false
54
- requirements:
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- hash: 3
58
- segments:
59
- - 0
60
- version: "0"
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: distribution
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
61
48
  type: :development
62
- version_requirements: *id003
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
63
55
  description: A full-featured Ruby statistics library with assumption verification
64
- email:
56
+ email:
65
57
  - dcohen@gatech.edu
66
58
  executables: []
67
-
68
59
  extensions: []
69
-
70
60
  extra_rdoc_files: []
71
-
72
- files:
61
+ files:
73
62
  - .travis.yml
74
63
  - Gemfile
75
64
  - LICENSE.txt
@@ -77,48 +66,43 @@ files:
77
66
  - Rakefile
78
67
  - Rsquared.gemspec
79
68
  - install.sh
69
+ - lib/Rsquared/GrubbsTest.rb
70
+ - lib/Rsquared/KSTest.rb
71
+ - lib/Rsquared/PropTest.rb
72
+ - lib/Rsquared/StatTest.rb
73
+ - lib/Rsquared/TTest.rb
80
74
  - lib/Rsquared/version.rb
81
75
  - lib/constants.rb
82
76
  - lib/enumerableext.rb
83
77
  - lib/rsquared.rb
84
- - lib/version.rb
85
78
  - test/enumerable_tests.rb
86
79
  - test/helper_tests.rb
87
80
  - test/rsquared_tests.rb
88
81
  homepage: https://github.com/dacohen/Rsquared
89
- licenses:
82
+ licenses:
90
83
  - MIT
84
+ metadata: {}
91
85
  post_install_message:
92
86
  rdoc_options: []
93
-
94
- require_paths:
87
+ require_paths:
95
88
  - lib
96
- required_ruby_version: !ruby/object:Gem::Requirement
97
- none: false
98
- requirements:
99
- - - ">="
100
- - !ruby/object:Gem::Version
101
- hash: 3
102
- segments:
103
- - 0
104
- version: "0"
105
- required_rubygems_version: !ruby/object:Gem::Requirement
106
- none: false
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- hash: 3
111
- segments:
112
- - 0
113
- version: "0"
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
114
99
  requirements: []
115
-
116
100
  rubyforge_project:
117
- rubygems_version: 1.8.24
101
+ rubygems_version: 2.0.3
118
102
  signing_key:
119
- specification_version: 3
103
+ specification_version: 4
120
104
  summary: Provides statistical distributions, tests and verifies relevant assumptions
121
- test_files:
105
+ test_files:
122
106
  - test/enumerable_tests.rb
123
107
  - test/helper_tests.rb
124
108
  - test/rsquared_tests.rb
data/lib/version.rb DELETED
File without changes