statsample 0.8.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
@@ -1,3 +1,6 @@
1
+ === 0.9.0 / 2010-04-04
2
+ * New Statsample::Test::F. Anova::OneWay subclasses it and Regression classes uses it.
3
+
1
4
  === 0.8.2 / 2010-04-01
2
5
  * Statsample::PromiseAfter replaced by external package DirtyMemoize [http://rubygems.org/gems/dirty-memoize]
3
6
  === 0.8.1 / 2010-03-29
@@ -75,6 +75,7 @@ lib/statsample/reliability.rb
75
75
  lib/statsample/resample.rb
76
76
  lib/statsample/srs.rb
77
77
  lib/statsample/test.rb
78
+ lib/statsample/test/f.rb
78
79
  lib/statsample/test/levene.rb
79
80
  lib/statsample/test/t.rb
80
81
  lib/statsample/test/umannwhitney.rb
@@ -111,6 +112,7 @@ test/test_statistics.rb
111
112
  test/test_stest.rb
112
113
  test/test_stratified.rb
113
114
  test/test_svg_graph.rb
115
+ test/test_test_f.rb
114
116
  test/test_test_t.rb
115
117
  test/test_umannwhitney.rb
116
118
  test/test_vector.rb
data/README.txt CHANGED
@@ -5,13 +5,13 @@ http://ruby-statsample.rubyforge.org/
5
5
 
6
6
  == DESCRIPTION:
7
7
 
8
- A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
8
+ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible)
9
9
 
10
10
  Includes:
11
11
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
12
12
  * Imports and exports datasets from and to Excel, CSV and plain text files.
13
13
  * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
14
- * Tests: T, Levene, U-Mannwhitney, One-Way Anova
14
+ * Tests: F (Anona One-Way), T, Levene, U-Mannwhitney.
15
15
  * Regression: Simple, Multiple, Probit and Logit
16
16
  * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
17
17
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
data/Rakefile CHANGED
@@ -39,7 +39,8 @@ task :makemo do
39
39
  end
40
40
 
41
41
  h=Hoe.spec('statsample') do
42
- self.version=Statsample::VERSION
42
+ self.testlib=:minitest unless RUBY_VERSION<="1.9"
43
+ self.version=Statsample::VERSION
43
44
  self.rubyforge_name = "ruby-statsample"
44
45
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
45
46
  self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.1.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"]
@@ -112,7 +112,7 @@ module Statsample
112
112
  false
113
113
  end
114
114
  end
115
- VERSION = '0.8.2'
115
+ VERSION = '0.9.0'
116
116
  SPLIT_TOKEN = ","
117
117
  autoload(:Database, 'statsample/converters')
118
118
  autoload(:Anova, 'statsample/anova')
@@ -8,31 +8,31 @@ module Statsample
8
8
  # anova=Statsample::Anova::OneWay.new([v1,v2,v3])
9
9
  # anova.f
10
10
  # => 0.0243902439024391
11
- # anova.significance
11
+ # anova.probability
12
12
  # => 0.975953044203438
13
13
  # anova.sst
14
14
  # => 32.9333333333333
15
15
  #
16
- class OneWay
17
- def initialize(vectors)
16
+ class OneWay < Statsample::Test::F
17
+ def initialize(vectors,opts=Hash.new)
18
18
  @vectors=vectors
19
- end
20
- # Total sum
21
- def sum
22
- @vectors.inject(0){|a,v| a+v.sum}
19
+ opts_default={:name=>_("Anova One-Way"), :name_numerator=>"Between Groups", :name_denominator=>"Within Groups"}
20
+ super(ssbg,sswg, df_bg, df_wg)
23
21
  end
24
22
  # Total mean
25
23
  def mean
24
+ sum=@vectors.inject(0){|a,v| a+v.sum}
26
25
  sum.quo(n)
27
26
  end
27
+
28
28
  # Total sum of squares
29
29
  def sst
30
- m=mean.to_f
31
- @vectors.inject(0) {|total,vector| total+vector.sum_of_squares(m) }
30
+ m=mean
31
+ @vectors.inject(0) {|total,vector| total+vector.ss(m) }
32
32
  end
33
33
  # Sum of squares within groups
34
34
  def sswg
35
- @vectors.inject(0) {|total,vector| total+vector.sum_of_squares }
35
+ @sswg||=@vectors.inject(0) {|total,vector| total+vector.ss }
36
36
  end
37
37
  # Sum of squares between groups
38
38
  def ssbg
@@ -43,29 +43,20 @@ module Statsample
43
43
  end
44
44
  # Degrees of freedom within groups
45
45
  def df_wg
46
- @vectors.inject(0) {|a,v| a+(v.size-1)}
46
+ @dk_wg||=n-k
47
+ end
48
+ def k
49
+ @k||=@vectors.size
47
50
  end
48
51
  # Degrees of freedom between groups
49
52
  def df_bg
50
- @vectors.size-1
51
- end
52
- # Total Degrees of freedom
53
- def df_total
54
- n-1
53
+ k-1
55
54
  end
56
55
  # Total number of cases
57
56
  def n
58
57
  @vectors.inject(0){|a,v| a+v.size}
59
58
  end
60
- # Fisher
61
- def f
62
- k=@vectors.size
63
- (ssbg*(n-k)) / (sswg*(k-1))
64
- end
65
- # Significance of Fisher
66
- def significance
67
- 1.0-Distribution::F.cdf(f,df_bg,df_wg)
68
- end
59
+
69
60
  end
70
61
  end
71
62
  end
@@ -3,7 +3,7 @@ require 'statsample/vector'
3
3
  class Hash
4
4
  # Creates a Statsample::Dataset based on a Hash
5
5
  def to_dataset(*args)
6
- Statsample::Dataset.new(self,*args)
6
+ Statsample::Dataset.new(self, *args)
7
7
  end
8
8
  end
9
9
 
@@ -1,4 +1,4 @@
1
- require 'statsample/dominanceanalysis/bootstrap'
1
+
2
2
  module Statsample
3
3
  # Dominance Analysis is a procedure based on an examination of the R<sup>2</sup> values
4
4
  # for all possible subset models, to identify the relevance of one or more
@@ -428,3 +428,5 @@ module Statsample
428
428
  end # end ModelData
429
429
  end # end Dominance Analysis
430
430
  end
431
+
432
+ require 'statsample/dominanceanalysis/bootstrap'
@@ -78,7 +78,7 @@ module Statsample
78
78
  RubyEngine.new(ds,y_var)
79
79
  else
80
80
  if Statsample.has_gsl?
81
- Statsample::Regression::Multiple::GslEngine.new(ds,y_var)
81
+ Statsample::Regression::Multiple::GslEngine.new(ds, y_var)
82
82
  else
83
83
  ds2=ds.dup_only_valid
84
84
  Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var)
@@ -12,9 +12,6 @@ module Statsample
12
12
  def self.univariate?
13
13
  true
14
14
  end
15
-
16
-
17
-
18
15
  def initialize(ds, y_var, opts = Hash.new)
19
16
  @ds=ds
20
17
  @cases=@ds.cases
@@ -25,17 +22,20 @@ module Statsample
25
22
  self.send("#{k}=",v) if self.respond_to? k
26
23
  }
27
24
  end
28
-
25
+ # Calculate F Test
26
+ def f_test
27
+ @f_test||=Statsample::Test::F.new(ssr, sse, df_r, df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
28
+ end
29
29
  # Retrieves a vector with predicted values for y
30
30
  def predicted
31
31
  (0...@ds.cases).collect { |i|
32
- invalid=false
33
- vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
34
- if invalid
35
- nil
36
- else
37
- process(vect)
38
- end
32
+ invalid=false
33
+ vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
34
+ if invalid
35
+ nil
36
+ else
37
+ process(vect)
38
+ end
39
39
  }.to_vector(:scale)
40
40
  end
41
41
  # Retrieves a vector with standarized values for y
@@ -97,11 +97,11 @@ module Statsample
97
97
  end
98
98
  # Fisher for Anova
99
99
  def f
100
- (ssr.quo(df_r)).quo(sse.quo(df_e))
100
+ f_test.f
101
101
  end
102
- # Significance of Fisher
103
- def significance
104
- (1.0-Distribution::F.cdf(f, df_r, df_e)).abs
102
+ # p-value of Fisher
103
+ def probability
104
+ f_test.probability
105
105
  end
106
106
  # Tolerance for a given variable
107
107
  # http://talkstats.com/showthread.php?t=5056
@@ -129,7 +129,7 @@ module Statsample
129
129
  }
130
130
  out
131
131
  end
132
- # Estandar error of R
132
+ # Estandar error of R^2
133
133
  def se_r2
134
134
  Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
135
135
  end
@@ -161,7 +161,7 @@ module Statsample
161
161
  rp.to_text
162
162
  end
163
163
  def report_building(b)
164
- b.section(:name=>_("Multiple Regression: ")+@name) do |g|
164
+ b.section(:name=>@name) do |g|
165
165
  c=coeffs
166
166
  g.text(_("Engine: %s") % self.class)
167
167
  g.text(_("Cases(listwise)=%d(%d)") % [@ds.cases, @ds_valid.cases])
@@ -170,12 +170,7 @@ module Statsample
170
170
 
171
171
  g.text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
172
172
 
173
- g.table(:name=>"ANOVA", :header=>%w{source ss df ms f s}) do |t|
174
- t.row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
175
- t.row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse),"",""])
176
-
177
- t.row([_("Total"), sprintf("%0.3f",sst), df_r+df_e,"","",""])
178
- end
173
+ g.parse_element(f_test)
179
174
  sc=standarized_coeffs
180
175
  cse=coeffs_se
181
176
  g.table(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
@@ -2,6 +2,10 @@ module Statsample
2
2
  module Regression
3
3
  module Multiple
4
4
  # Pure Ruby Class for Multiple Regression Analysis, based on a covariance or correlation matrix.
5
+ #
6
+ # Use Statsample::Regression::Multiple::RubyEngine if you have a
7
+ # Dataset, to avoid setting all details.
8
+ #
5
9
  # <b>Remember:</b> NEVER use a Covariance data if you have missing data. Use only correlation matrix on that case.
6
10
  #
7
11
  #
@@ -15,16 +19,13 @@ class MatrixEngine < BaseEngine
15
19
  # Hash of standard deviation of predictors.
16
20
  # Only useful for Correlation Matrix, because by default is set to 1
17
21
  attr_accessor :x_sd
18
- # Standard deviation of criteria.
22
+ # Standard deviation of criterion
19
23
  # Only useful for Correlation Matrix, because by default is set to 1
20
-
21
24
  attr_accessor :y_sd
22
25
  # Hash of mean for predictors. By default, set to 0
23
- #
24
26
  attr_accessor :x_mean
25
27
 
26
28
  # Mean for criteria. By default, set to 0
27
- #
28
29
  attr_accessor :y_mean
29
30
 
30
31
  # Number of cases
@@ -92,23 +93,25 @@ class MatrixEngine < BaseEngine
92
93
  standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
93
94
  }
94
95
  end
95
-
96
96
  end
97
97
  def cases
98
98
  raise "You should define the number of valid cases first" if @cases.nil?
99
99
  @cases
100
100
  end
101
101
  # Get R^2 for the regression
102
+ # For fixed models is the coefficient of determination.
103
+ # On random models, is the 'squared-multiple correlation'
102
104
  # Equal to
103
105
  # * 1-(|R| / |R_x|) or
104
106
  # * Sum(b_i*r_yi) <- used
105
107
  def r2
106
108
  @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
107
109
  end
110
+ # Multiple correlation, on random models.
108
111
  def r
109
112
  Math::sqrt(r2)
110
113
  end
111
-
114
+ # Value of constant
112
115
  def constant
113
116
  c=coeffs
114
117
  @y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
@@ -135,12 +138,10 @@ class MatrixEngine < BaseEngine
135
138
  def df_e
136
139
  cases-@n_predictors-1
137
140
  end
138
-
139
141
  # Tolerance for a given variable
140
142
  # defined as (1-R^2) of regression of other independent variables
141
143
  # over the selected
142
144
  # Reference:
143
- #
144
145
  # * http://talkstats.com/showthread.php?t=5056
145
146
  def tolerance(var)
146
147
  lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
@@ -150,8 +151,7 @@ class MatrixEngine < BaseEngine
150
151
  # Standard error of a coefficients depends on
151
152
  # * Tolerance of the coeffients: Higher tolerances implies higher error
152
153
  # * Higher r2 implies lower error
153
-
154
- # Reference:
154
+ # == Reference:
155
155
  # * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
156
156
  #
157
157
  def coeffs_se
@@ -162,13 +162,15 @@ class MatrixEngine < BaseEngine
162
162
  }
163
163
  out
164
164
  end
165
+ # t value for constant
165
166
  def constant_t
166
167
  return nil if constant_se.nil?
167
168
  constant.to_f/constant_se
168
169
  end
169
170
  # Standard error for constant.
170
- # Recreate the estimaded variance-covariance matrix
171
- # using means, standard deviation and covariance matrix
171
+ # This method recreates the estimaded variance-covariance matrix
172
+ # using means, standard deviation and covariance matrix.
173
+ # So, needs the covariance matrix.
172
174
  def constant_se
173
175
  return nil if @no_covariance
174
176
  means=@x_mean
@@ -178,6 +180,7 @@ class MatrixEngine < BaseEngine
178
180
  #sd[@y_var]=@y_sd
179
181
  sd[:constant]=0
180
182
  fields=[:constant]+@matrix_cov.fields-[@y_var]
183
+ # Recreate X'X using the variance-covariance matrix
181
184
  xt_x=Matrix.rows(fields.collect {|i|
182
185
  fields.collect {|j|
183
186
  if i==:constant or j==:constant
@@ -203,13 +206,11 @@ class MatrixEngine < BaseEngine
203
206
  g.text("R^2=#{sprintf('%0.3f',r2)}")
204
207
 
205
208
  g.text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
209
+
210
+ g.parse_element(f_test)
211
+
206
212
 
207
- g.table(:name=>"ANOVA", :header=>%w{source ss df ms f s}) do |t|
208
- t.row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
209
- t.row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse),"",""])
210
213
 
211
- t.row([_("Total"), sprintf("%0.3f",sst), df_r+df_e,"","",""])
212
- end
213
214
  sc=standarized_coeffs
214
215
  cse=coeffs_se
215
216
  g.table(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
@@ -5,7 +5,7 @@ module Statsample
5
5
  autoload(:UMannWhitney, 'statsample/test/umannwhitney')
6
6
  autoload(:Levene, 'statsample/test/levene')
7
7
  autoload(:T, 'statsample/test/t')
8
-
8
+ autoload(:F, 'statsample/test/f')
9
9
  # Returns probability of getting a value lower or higher
10
10
  # than sample, using cdf and number of tails.
11
11
  # * For one tail left, return the cdf
@@ -0,0 +1,61 @@
1
+ module Statsample
2
+ module Test
3
+ # From Wikipedia:
4
+ # An F-test is any statistical test in which the test statistic has an F-distribution under the null hypothesis. It is most often used when comparing statistical models that have been fit to a data set, in order to identify the model that best fits the population from which the data were sampled.
5
+ class F
6
+ include GetText
7
+ bindtextdomain("statsample")
8
+
9
+ include Statsample::Test
10
+
11
+ attr_reader :ss_num, :ss_den, :df_num, :df_den, :ss_total, :df_total
12
+ # Tails for probability (:both, :left or :right)
13
+ attr_accessor :tails
14
+ # Name of F analysis
15
+ attr_accessor :name
16
+ # Name of numerator
17
+ attr_accessor :name_numerator
18
+ # Name of denominator
19
+ attr_accessor :name_denominator
20
+
21
+ # Parameters:
22
+ # * ss_num: explained variance / between group variance
23
+ # * ss_den: unexplained variance / within group variance
24
+ # * df_num: degrees of freedom for explained variance / k-1
25
+ # * df_den: degrees of freedom for unexplained variance / n-k
26
+ def initialize(ss_num, ss_den, df_num, df_den, opts=Hash.new)
27
+ @ss_num=ss_num
28
+ @ss_den=ss_den
29
+ @df_num=df_num
30
+ @df_den=df_den
31
+ @ss_total=ss_num+ss_den
32
+ @df_total=df_num+df_den
33
+ opts_default={:tails=>:right, :name_numerator=>"Numerator", :name_denominator=>"Denominator", :name=>"F Test"}
34
+ @opts=opts_default.merge(opts)
35
+ raise "Tails should be right or left, not both" if @opts[:tails]==:both
36
+ opts_default.keys.each {|k|
37
+ send("#{k}=", @opts[k])
38
+ }
39
+ end
40
+ def summary
41
+ ReportBuilder.new(:no_title=>true).add(self).to_text
42
+ end
43
+ def f
44
+ (@ss_num.quo(@df_num)).quo(@ss_den.quo(@df_den))
45
+ end
46
+ # probability
47
+ def probability
48
+ p_using_cdf(Distribution::F.cdf(f, @df_num, @df_den), tails)
49
+ end
50
+ def report_building(builder)#:nodoc:
51
+ builder.section(:name=>@name) do |b|
52
+ b.table(:name=>_("%s Table") % @name, :header=>%w{source ss df f p}.map {|v| _(v)}) do |t|
53
+ t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",f), sprintf("%0.3f", probability)])
54
+ t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, "", ""])
55
+ t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total,"",""])
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -55,6 +55,15 @@ module Statsample
55
55
  # a=1000.times.map {rand(100)}.to_scale
56
56
  # t_1=Statsample::Test::T::OneSample.new(a, {:u=>50})
57
57
  # t_1.summary
58
+ #
59
+ # === Output
60
+ #
61
+ # = One Sample T Test
62
+ # Sample mean: 48.954
63
+ # Population mean:50
64
+ # Tails: both
65
+ # t = -1.1573, p=0.2474, d.f=999
66
+
58
67
  class OneSample
59
68
  include Math
60
69
  include Statsample::Test
@@ -76,7 +85,11 @@ module Statsample
76
85
 
77
86
  dirty_writer :u, :tails
78
87
  dirty_memoize :t, :probability
79
-
88
+ # Create a One Sample T Test
89
+ # Options:
90
+ # * :u = Mean to compare. Default= 0
91
+ # * :name = Name of the analysis
92
+ # * :tails = Tail for probability. Could be :both, :left, :right
80
93
  def initialize(vector, opts=Hash.new)
81
94
  @vector=vector
82
95
  default={:u=>0, :name=>"One Sample T Test", :tails=>:both}
@@ -113,9 +126,30 @@ module Statsample
113
126
  # == Usage
114
127
  # a=1000.times.map {rand(100)}.to_scale
115
128
  # b=1000.times.map {rand(100)}.to_scale
116
- # t_2=Statsample::Test::T::OneSample.new(a,b)
129
+ # t_2=Statsample::Test::T::TwoSamplesIndependent.new(a,b)
117
130
  # t_2.summary
118
-
131
+ # === Output
132
+ # = Two Sample T Test
133
+ # Mean and standard deviation
134
+ # +----------+---------+---------+------+
135
+ # | Variable | m | sd | n |
136
+ # +----------+---------+---------+------+
137
+ # | 1 | 49.3310 | 29.3042 | 1000 |
138
+ # | 2 | 47.8180 | 28.8640 | 1000 |
139
+ # +----------+---------+---------+------+
140
+ #
141
+ # == Levene Test
142
+ # Levene Test
143
+ # F: 0.3596
144
+ # p: 0.5488
145
+ # T statistics
146
+ # +--------------------+--------+-----------+----------------+
147
+ # | Type | t | df | p (both tails) |
148
+ # +--------------------+--------+-----------+----------------+
149
+ # | Equal variance | 1.1632 | 1998 | 0.2449 |
150
+ # | Non equal variance | 1.1632 | 1997.5424 | 0.1362 |
151
+ # +--------------------+--------+-----------+----------------+
152
+
119
153
  class TwoSamplesIndependent
120
154
  include Math
121
155
  include Statsample::Test
@@ -142,18 +176,20 @@ module Statsample
142
176
 
143
177
  dirty_writer :tails
144
178
  dirty_memoize :t_equal_variance, :t_not_equal_variance, :probability_equal_variance, :probability_not_equal_variance, :df_equal_variance, :df_not_equal_variance
145
-
179
+
180
+ # Create a Two Independent T Test
181
+ # Options:
182
+ # * :name = Name of the analysis
183
+ # * :tails = Tail for probability. Could be :both, :left, :right
146
184
  def initialize(v1, v2, opts=Hash.new)
147
185
  @v1=v1
148
186
  @v2=v2
149
- default={:u=>0, :name=>"Two Sample T Test", :paired_samples=>false, :tails=>:both}
187
+ default={:u=>0, :name=>"Two Sample T Test", :tails=>:both}
150
188
  @opts=default.merge(opts)
151
189
  @name=@opts[:name]
152
190
  @tails=@opts[:tails]
153
191
  end
154
-
155
-
156
-
192
+
157
193
  # Set t and probability for given u
158
194
  def compute
159
195
  @t_equal_variance= T.two_sample_independent(@v1.mean, @v2.mean, @v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid,true)
@@ -1,12 +1,11 @@
1
1
  require(File.dirname(__FILE__)+'/test_helpers.rb')
2
2
 
3
3
  class StatsampleAnovaTestCase < MiniTest::Unit::TestCase
4
- def initialize(*args)
4
+ def setup
5
5
  @v1=[3,3,2,3,6].to_vector(:scale)
6
6
  @v2=[7,6,5,6,7].to_vector(:scale)
7
7
  @v3=[9,8,9,7,8].to_vector(:scale)
8
8
  @anova=Statsample::Anova::OneWay.new([@v1,@v2,@v3])
9
- super
10
9
  end
11
10
  def test_basic
12
11
  assert_in_delta(72.933, @anova.sst,0.001)
@@ -19,7 +18,7 @@ class StatsampleAnovaTestCase < MiniTest::Unit::TestCase
19
18
  assert_in_delta(23.568,@anova.f,0.001)
20
19
  anova2=Statsample::Anova::OneWay.new([@v1,@v1,@v1,@v1,@v2])
21
20
  assert_in_delta(3.960, anova2.f,0.001)
22
- assert(@anova.significance<0.01)
23
- assert_in_delta(0.016, anova2.significance,0.001)
21
+ assert(@anova.probability<0.01)
22
+ assert_in_delta(0.016, anova2.probability,0.001)
24
23
  end
25
24
  end
@@ -12,7 +12,7 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
12
12
  v2=1000.times.collect {|a| rand()}.to_scale
13
13
  assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
14
14
  else
15
- puts "Bivariate::covariance not tested (needs GSL)"
15
+ skip "Bivariate::covariance not tested (needs GSL)"
16
16
  end
17
17
 
18
18
  end
@@ -24,7 +24,7 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
24
24
 
25
25
  assert_in_delta(GSL::Stats::correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1,v2), 1e-10)
26
26
  else
27
- puts "Not tested gsl versus ruby correlation (needs GSL)"
27
+ skip "Not tested gsl versus ruby correlation (needs GSL)"
28
28
  end
29
29
  end
30
30
  def test_pearson
@@ -67,6 +67,8 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
67
67
  if Statsample.has_gsl?
68
68
  poly.compute_two_step_mle_drasgow_gsl
69
69
  assert_in_delta(tetra.r,poly.r,0.0001)
70
+ else
71
+ skip "compute_two_step_mle_drasgow_gsl not tested (requires GSL)"
70
72
  end
71
73
  }
72
74
  end
@@ -112,7 +114,7 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
112
114
  assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
113
115
  assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
114
116
  else
115
- puts "Two-step optimized, polychoric series and Joint method for Polychoric requires GSL"
117
+ skip "Two-step optimized, polychoric series and Joint method for Polychoric requires GSL"
116
118
  end
117
119
  assert(poly.summary)
118
120
  end
@@ -1,9 +1,6 @@
1
1
  require(File.dirname(__FILE__)+'/test_helpers.rb')
2
2
 
3
3
  class StatsampleCombinationTestCase < MiniTest::Unit::TestCase
4
- def initialize(*args)
5
- super
6
- end
7
4
  def test_basic
8
5
  k=3
9
6
  n=5
@@ -34,7 +31,7 @@ class StatsampleCombinationTestCase < MiniTest::Unit::TestCase
34
31
 
35
32
  assert_equal(rb_array,gsl_array)
36
33
  else
37
- puts "Not CombinationRuby vs CombinationGSL (no gsl)"
34
+ skip "Not CombinationRuby vs CombinationGSL (no gsl)"
38
35
  end
39
36
  end
40
37
  end
@@ -33,7 +33,7 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
33
33
  _test_matrix(expected_fm_2,pca.feature_vector(2))
34
34
  assert(pca.summary)
35
35
  else
36
- puts "PCA not tested. Requires GSL"
36
+ skip "PCA not tested. Requires GSL"
37
37
  end
38
38
  end
39
39
 
@@ -67,7 +67,7 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
67
67
  assert(fa.summary)
68
68
 
69
69
  else
70
- puts "Principal Axis not tested. Requires GSL"
70
+ skip "Principal Axis not tested. Requires GSL"
71
71
  end
72
72
  end
73
73
 
@@ -89,7 +89,7 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
89
89
  refute(varimax.h2.nil?,"H2 shouldn't be empty")
90
90
  _test_matrix(expected,varimax.rotated)
91
91
  else
92
- puts "Rotation not tested. Requires GSL"
92
+ skip "Rotation not tested. Requires GSL"
93
93
  end
94
94
  end
95
95
  def _test_matrix(a,b)
@@ -14,6 +14,8 @@ class StatsampleGSLTestCase < MiniTest::Unit::TestCase
14
14
  matrix=gsl.to_matrix
15
15
  assert_equal(5,matrix.row_size)
16
16
  assert_equal(3,matrix.column_size)
17
+ else
18
+ skip("Needs GSL extension")
17
19
  end
18
20
  end
19
21
  end
@@ -3,8 +3,20 @@ require 'statsample'
3
3
  require 'minitest/unit'
4
4
  require 'tempfile'
5
5
  require 'tmpdir'
6
+ require 'shoulda'
7
+ module MiniTest
8
+ class Unit
9
+ class TestCase
10
+ include Shoulda::InstanceMethods
11
+ extend Shoulda::ClassMethods
12
+ include Shoulda::Assertions
13
+
14
+ end
15
+ end
16
+ end
6
17
 
7
18
  module MiniTest::Assertions
19
+
8
20
  alias :assert_raise :assert_raises unless method_defined? :assert_raise
9
21
  alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
10
22
  alias :assert_not_same :refute_same unless method_defined? :assert_not_same
@@ -48,7 +48,7 @@ class StatsampleMLETestCase < MiniTest::Unit::TestCase
48
48
  #p coeffs_nr
49
49
  ds=@ds_indep.dup
50
50
  ds.add_vector('y',y)
51
- lr=Statsample::Regression.multiple(ds,'y')
51
+ lr=Statsample::Regression.multiple(ds, 'y')
52
52
  lr_constant = lr.constant
53
53
  lr_coeffs = lr.coeffs
54
54
  assert_in_delta(coeffs_nr[0,0], lr_constant,0.0000001)
@@ -2,9 +2,6 @@ require(File.dirname(__FILE__)+'/test_helpers.rb')
2
2
 
3
3
 
4
4
  class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
5
- def initialize(*args)
6
- super
7
- end
8
5
  def test_creation
9
6
  v1a=[1,2,3,4,5].to_vector
10
7
  v2b=[11,21,31,41,51].to_vector
@@ -90,7 +90,7 @@ class StatsampleRegressionTestCase < MiniTest::Unit::TestCase
90
90
  assert_in_delta(residuals[i],c_residuals[i],0.001)
91
91
  }
92
92
  else
93
- puts "Regression::Multiple::GslEngine not tested (no Gsl)"
93
+ skip "Regression::Multiple::GslEngine not tested (no Gsl)"
94
94
  end
95
95
  end
96
96
 
@@ -115,7 +115,7 @@ class StatsampleRegressionTestCase < MiniTest::Unit::TestCase
115
115
  assert_in_delta(0.913,lr.r2,0.001)
116
116
 
117
117
  assert_in_delta(20.908, lr.f,0.001)
118
- assert_in_delta(0.001, lr.significance, 0.001)
118
+ assert_in_delta(0.001, lr.probability, 0.001)
119
119
  assert_in_delta(0.226,lr.tolerance("a"),0.001)
120
120
 
121
121
  coeffs_se={"a"=>1.171,"b"=>1.129,"c"=>0.072}
@@ -3,8 +3,7 @@ require(File.dirname(__FILE__)+'/test_helpers.rb')
3
3
 
4
4
  class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
5
5
 
6
- def initialize(*args)
7
- super
6
+ def setup
8
7
  @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
9
8
  @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
10
9
  @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
@@ -19,14 +18,4 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
19
18
  assert_in_delta(0.999,ia.item_total_correlation()['x1'],0.001)
20
19
  assert_in_delta(1050.455,ia.stats_if_deleted()['x1'][:variance_sample],0.001)
21
20
  end
22
- def test_icc
23
- #p @x1.factors
24
- icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
25
- # Need to create the test!!!!
26
- #p icc.curve_field('x1',1).sort
27
- #p icc.curve_field('x1',2).sort
28
- #p icc.curve_field('x1',3).sort
29
- #p icc.curve_field('x1',30).sort
30
-
31
- end
32
21
  end
@@ -17,7 +17,7 @@ class StatsampleSvgGraphTestCase < MiniTest::Unit::TestCase
17
17
  graph.histogram=h
18
18
  file.puts(graph.burn)
19
19
  else
20
- puts "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
20
+ skip "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
21
21
  end
22
22
  end
23
23
  def assert_svg(msg=nil)
@@ -48,7 +48,7 @@ class StatsampleSvgGraphTestCase < MiniTest::Unit::TestCase
48
48
  }
49
49
  assert(File.exists?(file))
50
50
  else
51
- puts "Statsample::Vector#svggraph_histogram.new not tested (no ruby-gsl)"
51
+ skip "Statsample::Vector#svggraph_histogram.new not tested (no ruby-gsl)"
52
52
  end
53
53
  end
54
54
  end
@@ -0,0 +1,37 @@
1
+ require(File.dirname(__FILE__)+'/test_helpers.rb')
2
+
3
+ class StatsampleTestFTestCase < MiniTest::Unit::TestCase
4
+ context(Statsample::Test::F) do
5
+ setup do
6
+ @ssb=84
7
+ @ssw=68
8
+ @f=Statsample::Test::F.new(@ssb,@ssw, 2,15)
9
+ end
10
+ should "have f equal to msb/msw" do
11
+ assert_equal((@ssb.quo(2)).quo(@ssw.quo(15)), @f.f)
12
+ end
13
+ should "have df total equal to df_num+df_den" do
14
+ assert_equal(17, @f.df_total)
15
+ end
16
+ should "have probability near 0.002" do
17
+ assert_in_delta(0.002, @f.probability, 0.0005)
18
+ end
19
+ context("#summary") do
20
+ setup do
21
+ @f.name_numerator="MSb"
22
+ @f.name_denominator="MSw"
23
+ @f.name="ANOVA"
24
+ @summary=@f.summary
25
+ end
26
+ should "have size > 0" do
27
+ assert(@summary.size>0)
28
+ end
29
+ should "include correct names for title, num and den" do
30
+ assert_match(@f.name_numerator, @summary)
31
+ assert_match(@f.name_denominator, @summary)
32
+ assert_match(@f.name, @summary)
33
+ end
34
+ end
35
+ end
36
+
37
+ end
@@ -1,35 +1,53 @@
1
1
  require(File.dirname(__FILE__)+'/test_helpers.rb')
2
2
 
3
3
  class StatsampleExcelTestCase < MiniTest::Unit::TestCase
4
- def setup
5
- @ds=Statsample::Excel.read(File.dirname(__FILE__)+"/test_xls.xls")
6
- end
7
- def test_read
8
- assert_equal(6,@ds.cases)
9
- assert_equal(%w{id name age city a1},@ds.fields)
10
- id=[1,2,3,4,5,6].to_vector(:scale)
11
- name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
12
- age=[20,23,25,nil,5.5,nil].to_vector(:scale)
13
- city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
14
- a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
15
- ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
16
- ds_exp.fields.each{|f|
17
- assert_equal(ds_exp[f],@ds[f])
18
- }
19
- assert_equal(ds_exp,@ds)
20
-
21
- end
22
- def test_nil
23
- assert_equal(nil,@ds['age'][5])
4
+ context "Excel reader" do
5
+ setup do
6
+ @ds=Statsample::Excel.read(File.dirname(__FILE__)+"/test_xls.xls")
7
+ end
8
+ should "set the number of cases" do
9
+ assert_equal(6,@ds.cases)
10
+ end
11
+ should "set correct field names" do
12
+ assert_equal(%w{id name age city a1},@ds.fields)
13
+ end
14
+ should "set a dataset equal to expected" do
15
+ id=[1,2,3,4,5,6].to_vector(:scale)
16
+ name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
17
+ age=[20,23,25,nil,5.5,nil].to_vector(:scale)
18
+ city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
19
+ a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
20
+ ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
21
+ ds_exp.fields.each{|f|
22
+ assert_equal(ds_exp[f],@ds[f])
23
+ }
24
+ assert_equal(ds_exp,@ds)
25
+ end
26
+ should "set to nil empty cells" do
27
+ assert_equal(nil,@ds['age'][5])
28
+ end
24
29
  end
25
- def test_write
26
- tempfile=Tempfile.new("test_write.xls")
27
- Statsample::Excel.write(@ds,tempfile.path)
28
- ds2=Statsample::Excel.read(tempfile.path)
29
- i=0
30
- ds2.each_array do |row|
31
- assert_equal(@ds.case_as_array(i),row)
32
- i+=1
30
+ context "Excel writer" do
31
+ setup do
32
+ a=100.times.map{rand(100)}.to_scale
33
+ b=(["b"]*100).to_vector
34
+ @ds={'b'=>b, 'a'=>a}.to_dataset(%w{b a})
35
+ tempfile=Tempfile.new("test_write.xls")
36
+ Statsample::Excel.write(@ds,tempfile.path)
37
+ @ds2=Statsample::Excel.read(tempfile.path)
38
+ end
39
+ should "return same fields as original" do
40
+ assert_equal(@ds.fields ,@ds2.fields)
41
+ end
42
+ should "return same number of cases as original" do
43
+ assert_equal(@ds.cases, @ds2.cases)
44
+ end
45
+ should "return same cases as original" do
46
+ i=0
47
+ @ds2.each_array do |row|
48
+ assert_equal(@ds.case_as_array(i),row)
49
+ i+=1
50
+ end
33
51
  end
34
52
  end
35
53
  end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 9
8
+ - 0
9
+ version: 0.9.0
5
10
  platform: ruby
6
11
  authors:
7
12
  - Claudio Bustos
@@ -30,107 +35,124 @@ cert_chain:
30
35
  rpP0jjs0
31
36
  -----END CERTIFICATE-----
32
37
 
33
- date: 2010-04-01 00:00:00 -03:00
38
+ date: 2010-04-04 00:00:00 -04:00
34
39
  default_executable:
35
40
  dependencies:
36
41
  - !ruby/object:Gem::Dependency
37
42
  name: spreadsheet
38
- type: :runtime
39
- version_requirement:
40
- version_requirements: !ruby/object:Gem::Requirement
43
+ prerelease: false
44
+ requirement: &id001 !ruby/object:Gem::Requirement
41
45
  requirements:
42
46
  - - ~>
43
47
  - !ruby/object:Gem::Version
48
+ segments:
49
+ - 0
50
+ - 6
51
+ - 0
44
52
  version: 0.6.0
45
- version:
53
+ type: :runtime
54
+ version_requirements: *id001
46
55
  - !ruby/object:Gem::Dependency
47
56
  name: svg-graph
48
- type: :runtime
49
- version_requirement:
50
- version_requirements: !ruby/object:Gem::Requirement
57
+ prerelease: false
58
+ requirement: &id002 !ruby/object:Gem::Requirement
51
59
  requirements:
52
60
  - - ~>
53
61
  - !ruby/object:Gem::Version
62
+ segments:
63
+ - 1
64
+ - 0
54
65
  version: "1.0"
55
- version:
66
+ type: :runtime
67
+ version_requirements: *id002
56
68
  - !ruby/object:Gem::Dependency
57
69
  name: reportbuilder
58
- type: :runtime
59
- version_requirement:
60
- version_requirements: !ruby/object:Gem::Requirement
70
+ prerelease: false
71
+ requirement: &id003 !ruby/object:Gem::Requirement
61
72
  requirements:
62
73
  - - ~>
63
74
  - !ruby/object:Gem::Version
75
+ segments:
76
+ - 1
77
+ - 0
64
78
  version: "1.0"
65
- version:
79
+ type: :runtime
80
+ version_requirements: *id003
66
81
  - !ruby/object:Gem::Dependency
67
82
  name: minimization
68
- type: :runtime
69
- version_requirement:
70
- version_requirements: !ruby/object:Gem::Requirement
83
+ prerelease: false
84
+ requirement: &id004 !ruby/object:Gem::Requirement
71
85
  requirements:
72
86
  - - ~>
73
87
  - !ruby/object:Gem::Version
88
+ segments:
89
+ - 0
90
+ - 1
91
+ - 0
74
92
  version: 0.1.0
75
- version:
93
+ type: :runtime
94
+ version_requirements: *id004
76
95
  - !ruby/object:Gem::Dependency
77
96
  name: fastercsv
78
- type: :runtime
79
- version_requirement:
80
- version_requirements: !ruby/object:Gem::Requirement
97
+ prerelease: false
98
+ requirement: &id005 !ruby/object:Gem::Requirement
81
99
  requirements:
82
100
  - - ">="
83
101
  - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
84
104
  version: "0"
85
- version:
105
+ type: :runtime
106
+ version_requirements: *id005
86
107
  - !ruby/object:Gem::Dependency
87
108
  name: dirty-memoize
88
- type: :runtime
89
- version_requirement:
90
- version_requirements: !ruby/object:Gem::Requirement
109
+ prerelease: false
110
+ requirement: &id006 !ruby/object:Gem::Requirement
91
111
  requirements:
92
112
  - - ~>
93
113
  - !ruby/object:Gem::Version
114
+ segments:
115
+ - 0
116
+ - 0
94
117
  version: "0.0"
95
- version:
118
+ type: :runtime
119
+ version_requirements: *id006
96
120
  - !ruby/object:Gem::Dependency
97
121
  name: rubyforge
98
- type: :development
99
- version_requirement:
100
- version_requirements: !ruby/object:Gem::Requirement
122
+ prerelease: false
123
+ requirement: &id007 !ruby/object:Gem::Requirement
101
124
  requirements:
102
125
  - - ">="
103
126
  - !ruby/object:Gem::Version
127
+ segments:
128
+ - 2
129
+ - 0
130
+ - 4
104
131
  version: 2.0.4
105
- version:
106
- - !ruby/object:Gem::Dependency
107
- name: gemcutter
108
132
  type: :development
109
- version_requirement:
110
- version_requirements: !ruby/object:Gem::Requirement
111
- requirements:
112
- - - ">="
113
- - !ruby/object:Gem::Version
114
- version: 0.5.0
115
- version:
133
+ version_requirements: *id007
116
134
  - !ruby/object:Gem::Dependency
117
135
  name: hoe
118
- type: :development
119
- version_requirement:
120
- version_requirements: !ruby/object:Gem::Requirement
136
+ prerelease: false
137
+ requirement: &id008 !ruby/object:Gem::Requirement
121
138
  requirements:
122
139
  - - ">="
123
140
  - !ruby/object:Gem::Version
124
- version: 2.5.1
125
- version:
141
+ segments:
142
+ - 2
143
+ - 6
144
+ - 0
145
+ version: 2.6.0
146
+ type: :development
147
+ version_requirements: *id008
126
148
  description: |-
127
- A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
149
+ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible)
128
150
 
129
151
  Includes:
130
152
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
131
153
  * Imports and exports datasets from and to Excel, CSV and plain text files.
132
154
  * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
133
- * Tests: T, Levene, U-Mannwhitney, One-Way Anova
155
+ * Tests: F (Anona One-Way), T, Levene, U-Mannwhitney.
134
156
  * Regression: Simple, Multiple, Probit and Logit
135
157
  * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
136
158
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
@@ -225,6 +247,7 @@ files:
225
247
  - lib/statsample/resample.rb
226
248
  - lib/statsample/srs.rb
227
249
  - lib/statsample/test.rb
250
+ - lib/statsample/test/f.rb
228
251
  - lib/statsample/test/levene.rb
229
252
  - lib/statsample/test/t.rb
230
253
  - lib/statsample/test/umannwhitney.rb
@@ -261,6 +284,7 @@ files:
261
284
  - test/test_stest.rb
262
285
  - test/test_stratified.rb
263
286
  - test/test_svg_graph.rb
287
+ - test/test_test_f.rb
264
288
  - test/test_test_t.rb
265
289
  - test/test_umannwhitney.rb
266
290
  - test/test_vector.rb
@@ -280,18 +304,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
280
304
  requirements:
281
305
  - - ">="
282
306
  - !ruby/object:Gem::Version
307
+ segments:
308
+ - 0
283
309
  version: "0"
284
- version:
285
310
  required_rubygems_version: !ruby/object:Gem::Requirement
286
311
  requirements:
287
312
  - - ">="
288
313
  - !ruby/object:Gem::Version
314
+ segments:
315
+ - 0
289
316
  version: "0"
290
- version:
291
317
  requirements: []
292
318
 
293
319
  rubyforge_project: ruby-statsample
294
- rubygems_version: 1.3.5
320
+ rubygems_version: 1.3.6
295
321
  signing_key:
296
322
  specification_version: 3
297
323
  summary: A suite for basic and advanced statistics on Ruby
@@ -322,6 +348,7 @@ test_files:
322
348
  - test/test_stest.rb
323
349
  - test/test_statistics.rb
324
350
  - test/test_reliability.rb
351
+ - test/test_test_f.rb
325
352
  - test/test_test_t.rb
326
353
  - test/test_histogram.rb
327
354
  - test/test_dataset.rb
metadata.gz.sig CHANGED
Binary file