statsample 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data.tar.gz.sig CHANGED
Binary file
@@ -1,3 +1,6 @@
1
+ === 0.9.0 / 2010-04-04
2
+ * New Statsample::Test::F. Anova::OneWay subclasses it and Regression classes uses it.
3
+
1
4
  === 0.8.2 / 2010-04-01
2
5
  * Statsample::PromiseAfter replaced by external package DirtyMemoize [http://rubygems.org/gems/dirty-memoize]
3
6
  === 0.8.1 / 2010-03-29
@@ -75,6 +75,7 @@ lib/statsample/reliability.rb
75
75
  lib/statsample/resample.rb
76
76
  lib/statsample/srs.rb
77
77
  lib/statsample/test.rb
78
+ lib/statsample/test/f.rb
78
79
  lib/statsample/test/levene.rb
79
80
  lib/statsample/test/t.rb
80
81
  lib/statsample/test/umannwhitney.rb
@@ -111,6 +112,7 @@ test/test_statistics.rb
111
112
  test/test_stest.rb
112
113
  test/test_stratified.rb
113
114
  test/test_svg_graph.rb
115
+ test/test_test_f.rb
114
116
  test/test_test_t.rb
115
117
  test/test_umannwhitney.rb
116
118
  test/test_vector.rb
data/README.txt CHANGED
@@ -5,13 +5,13 @@ http://ruby-statsample.rubyforge.org/
5
5
 
6
6
  == DESCRIPTION:
7
7
 
8
- A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
8
+ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible)
9
9
 
10
10
  Includes:
11
11
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
12
12
  * Imports and exports datasets from and to Excel, CSV and plain text files.
13
13
  * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
14
- * Tests: T, Levene, U-Mannwhitney, One-Way Anova
14
+ * Tests: F (Anona One-Way), T, Levene, U-Mannwhitney.
15
15
  * Regression: Simple, Multiple, Probit and Logit
16
16
  * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
17
17
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
data/Rakefile CHANGED
@@ -39,7 +39,8 @@ task :makemo do
39
39
  end
40
40
 
41
41
  h=Hoe.spec('statsample') do
42
- self.version=Statsample::VERSION
42
+ self.testlib=:minitest unless RUBY_VERSION<="1.9"
43
+ self.version=Statsample::VERSION
43
44
  self.rubyforge_name = "ruby-statsample"
44
45
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
45
46
  self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.1.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"]
@@ -112,7 +112,7 @@ module Statsample
112
112
  false
113
113
  end
114
114
  end
115
- VERSION = '0.8.2'
115
+ VERSION = '0.9.0'
116
116
  SPLIT_TOKEN = ","
117
117
  autoload(:Database, 'statsample/converters')
118
118
  autoload(:Anova, 'statsample/anova')
@@ -8,31 +8,31 @@ module Statsample
8
8
  # anova=Statsample::Anova::OneWay.new([v1,v2,v3])
9
9
  # anova.f
10
10
  # => 0.0243902439024391
11
- # anova.significance
11
+ # anova.probability
12
12
  # => 0.975953044203438
13
13
  # anova.sst
14
14
  # => 32.9333333333333
15
15
  #
16
- class OneWay
17
- def initialize(vectors)
16
+ class OneWay < Statsample::Test::F
17
+ def initialize(vectors,opts=Hash.new)
18
18
  @vectors=vectors
19
- end
20
- # Total sum
21
- def sum
22
- @vectors.inject(0){|a,v| a+v.sum}
19
+ opts_default={:name=>_("Anova One-Way"), :name_numerator=>"Between Groups", :name_denominator=>"Within Groups"}
20
+ super(ssbg,sswg, df_bg, df_wg)
23
21
  end
24
22
  # Total mean
25
23
  def mean
24
+ sum=@vectors.inject(0){|a,v| a+v.sum}
26
25
  sum.quo(n)
27
26
  end
27
+
28
28
  # Total sum of squares
29
29
  def sst
30
- m=mean.to_f
31
- @vectors.inject(0) {|total,vector| total+vector.sum_of_squares(m) }
30
+ m=mean
31
+ @vectors.inject(0) {|total,vector| total+vector.ss(m) }
32
32
  end
33
33
  # Sum of squares within groups
34
34
  def sswg
35
- @vectors.inject(0) {|total,vector| total+vector.sum_of_squares }
35
+ @sswg||=@vectors.inject(0) {|total,vector| total+vector.ss }
36
36
  end
37
37
  # Sum of squares between groups
38
38
  def ssbg
@@ -43,29 +43,20 @@ module Statsample
43
43
  end
44
44
  # Degrees of freedom within groups
45
45
  def df_wg
46
- @vectors.inject(0) {|a,v| a+(v.size-1)}
46
+ @dk_wg||=n-k
47
+ end
48
+ def k
49
+ @k||=@vectors.size
47
50
  end
48
51
  # Degrees of freedom between groups
49
52
  def df_bg
50
- @vectors.size-1
51
- end
52
- # Total Degrees of freedom
53
- def df_total
54
- n-1
53
+ k-1
55
54
  end
56
55
  # Total number of cases
57
56
  def n
58
57
  @vectors.inject(0){|a,v| a+v.size}
59
58
  end
60
- # Fisher
61
- def f
62
- k=@vectors.size
63
- (ssbg*(n-k)) / (sswg*(k-1))
64
- end
65
- # Significance of Fisher
66
- def significance
67
- 1.0-Distribution::F.cdf(f,df_bg,df_wg)
68
- end
59
+
69
60
  end
70
61
  end
71
62
  end
@@ -3,7 +3,7 @@ require 'statsample/vector'
3
3
  class Hash
4
4
  # Creates a Statsample::Dataset based on a Hash
5
5
  def to_dataset(*args)
6
- Statsample::Dataset.new(self,*args)
6
+ Statsample::Dataset.new(self, *args)
7
7
  end
8
8
  end
9
9
 
@@ -1,4 +1,4 @@
1
- require 'statsample/dominanceanalysis/bootstrap'
1
+
2
2
  module Statsample
3
3
  # Dominance Analysis is a procedure based on an examination of the R<sup>2</sup> values
4
4
  # for all possible subset models, to identify the relevance of one or more
@@ -428,3 +428,5 @@ module Statsample
428
428
  end # end ModelData
429
429
  end # end Dominance Analysis
430
430
  end
431
+
432
+ require 'statsample/dominanceanalysis/bootstrap'
@@ -78,7 +78,7 @@ module Statsample
78
78
  RubyEngine.new(ds,y_var)
79
79
  else
80
80
  if Statsample.has_gsl?
81
- Statsample::Regression::Multiple::GslEngine.new(ds,y_var)
81
+ Statsample::Regression::Multiple::GslEngine.new(ds, y_var)
82
82
  else
83
83
  ds2=ds.dup_only_valid
84
84
  Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var)
@@ -12,9 +12,6 @@ module Statsample
12
12
  def self.univariate?
13
13
  true
14
14
  end
15
-
16
-
17
-
18
15
  def initialize(ds, y_var, opts = Hash.new)
19
16
  @ds=ds
20
17
  @cases=@ds.cases
@@ -25,17 +22,20 @@ module Statsample
25
22
  self.send("#{k}=",v) if self.respond_to? k
26
23
  }
27
24
  end
28
-
25
+ # Calculate F Test
26
+ def f_test
27
+ @f_test||=Statsample::Test::F.new(ssr, sse, df_r, df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
28
+ end
29
29
  # Retrieves a vector with predicted values for y
30
30
  def predicted
31
31
  (0...@ds.cases).collect { |i|
32
- invalid=false
33
- vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
34
- if invalid
35
- nil
36
- else
37
- process(vect)
38
- end
32
+ invalid=false
33
+ vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
34
+ if invalid
35
+ nil
36
+ else
37
+ process(vect)
38
+ end
39
39
  }.to_vector(:scale)
40
40
  end
41
41
  # Retrieves a vector with standarized values for y
@@ -97,11 +97,11 @@ module Statsample
97
97
  end
98
98
  # Fisher for Anova
99
99
  def f
100
- (ssr.quo(df_r)).quo(sse.quo(df_e))
100
+ f_test.f
101
101
  end
102
- # Significance of Fisher
103
- def significance
104
- (1.0-Distribution::F.cdf(f, df_r, df_e)).abs
102
+ # p-value of Fisher
103
+ def probability
104
+ f_test.probability
105
105
  end
106
106
  # Tolerance for a given variable
107
107
  # http://talkstats.com/showthread.php?t=5056
@@ -129,7 +129,7 @@ module Statsample
129
129
  }
130
130
  out
131
131
  end
132
- # Estandar error of R
132
+ # Estandar error of R^2
133
133
  def se_r2
134
134
  Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
135
135
  end
@@ -161,7 +161,7 @@ module Statsample
161
161
  rp.to_text
162
162
  end
163
163
  def report_building(b)
164
- b.section(:name=>_("Multiple Regression: ")+@name) do |g|
164
+ b.section(:name=>@name) do |g|
165
165
  c=coeffs
166
166
  g.text(_("Engine: %s") % self.class)
167
167
  g.text(_("Cases(listwise)=%d(%d)") % [@ds.cases, @ds_valid.cases])
@@ -170,12 +170,7 @@ module Statsample
170
170
 
171
171
  g.text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
172
172
 
173
- g.table(:name=>"ANOVA", :header=>%w{source ss df ms f s}) do |t|
174
- t.row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
175
- t.row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse),"",""])
176
-
177
- t.row([_("Total"), sprintf("%0.3f",sst), df_r+df_e,"","",""])
178
- end
173
+ g.parse_element(f_test)
179
174
  sc=standarized_coeffs
180
175
  cse=coeffs_se
181
176
  g.table(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
@@ -2,6 +2,10 @@ module Statsample
2
2
  module Regression
3
3
  module Multiple
4
4
  # Pure Ruby Class for Multiple Regression Analysis, based on a covariance or correlation matrix.
5
+ #
6
+ # Use Statsample::Regression::Multiple::RubyEngine if you have a
7
+ # Dataset, to avoid setting all details.
8
+ #
5
9
  # <b>Remember:</b> NEVER use a Covariance data if you have missing data. Use only correlation matrix on that case.
6
10
  #
7
11
  #
@@ -15,16 +19,13 @@ class MatrixEngine < BaseEngine
15
19
  # Hash of standard deviation of predictors.
16
20
  # Only useful for Correlation Matrix, because by default is set to 1
17
21
  attr_accessor :x_sd
18
- # Standard deviation of criteria.
22
+ # Standard deviation of criterion
19
23
  # Only useful for Correlation Matrix, because by default is set to 1
20
-
21
24
  attr_accessor :y_sd
22
25
  # Hash of mean for predictors. By default, set to 0
23
- #
24
26
  attr_accessor :x_mean
25
27
 
26
28
  # Mean for criteria. By default, set to 0
27
- #
28
29
  attr_accessor :y_mean
29
30
 
30
31
  # Number of cases
@@ -92,23 +93,25 @@ class MatrixEngine < BaseEngine
92
93
  standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
93
94
  }
94
95
  end
95
-
96
96
  end
97
97
  def cases
98
98
  raise "You should define the number of valid cases first" if @cases.nil?
99
99
  @cases
100
100
  end
101
101
  # Get R^2 for the regression
102
+ # For fixed models is the coefficient of determination.
103
+ # On random models, is the 'squared-multiple correlation'
102
104
  # Equal to
103
105
  # * 1-(|R| / |R_x|) or
104
106
  # * Sum(b_i*r_yi) <- used
105
107
  def r2
106
108
  @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
107
109
  end
110
+ # Multiple correlation, on random models.
108
111
  def r
109
112
  Math::sqrt(r2)
110
113
  end
111
-
114
+ # Value of constant
112
115
  def constant
113
116
  c=coeffs
114
117
  @y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
@@ -135,12 +138,10 @@ class MatrixEngine < BaseEngine
135
138
  def df_e
136
139
  cases-@n_predictors-1
137
140
  end
138
-
139
141
  # Tolerance for a given variable
140
142
  # defined as (1-R^2) of regression of other independent variables
141
143
  # over the selected
142
144
  # Reference:
143
- #
144
145
  # * http://talkstats.com/showthread.php?t=5056
145
146
  def tolerance(var)
146
147
  lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
@@ -150,8 +151,7 @@ class MatrixEngine < BaseEngine
150
151
  # Standard error of a coefficients depends on
151
152
  # * Tolerance of the coeffients: Higher tolerances implies higher error
152
153
  # * Higher r2 implies lower error
153
-
154
- # Reference:
154
+ # == Reference:
155
155
  # * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
156
156
  #
157
157
  def coeffs_se
@@ -162,13 +162,15 @@ class MatrixEngine < BaseEngine
162
162
  }
163
163
  out
164
164
  end
165
+ # t value for constant
165
166
  def constant_t
166
167
  return nil if constant_se.nil?
167
168
  constant.to_f/constant_se
168
169
  end
169
170
  # Standard error for constant.
170
- # Recreate the estimaded variance-covariance matrix
171
- # using means, standard deviation and covariance matrix
171
+ # This method recreates the estimaded variance-covariance matrix
172
+ # using means, standard deviation and covariance matrix.
173
+ # So, needs the covariance matrix.
172
174
  def constant_se
173
175
  return nil if @no_covariance
174
176
  means=@x_mean
@@ -178,6 +180,7 @@ class MatrixEngine < BaseEngine
178
180
  #sd[@y_var]=@y_sd
179
181
  sd[:constant]=0
180
182
  fields=[:constant]+@matrix_cov.fields-[@y_var]
183
+ # Recreate X'X using the variance-covariance matrix
181
184
  xt_x=Matrix.rows(fields.collect {|i|
182
185
  fields.collect {|j|
183
186
  if i==:constant or j==:constant
@@ -203,13 +206,11 @@ class MatrixEngine < BaseEngine
203
206
  g.text("R^2=#{sprintf('%0.3f',r2)}")
204
207
 
205
208
  g.text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
209
+
210
+ g.parse_element(f_test)
211
+
206
212
 
207
- g.table(:name=>"ANOVA", :header=>%w{source ss df ms f s}) do |t|
208
- t.row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
209
- t.row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse),"",""])
210
213
 
211
- t.row([_("Total"), sprintf("%0.3f",sst), df_r+df_e,"","",""])
212
- end
213
214
  sc=standarized_coeffs
214
215
  cse=coeffs_se
215
216
  g.table(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
@@ -5,7 +5,7 @@ module Statsample
5
5
  autoload(:UMannWhitney, 'statsample/test/umannwhitney')
6
6
  autoload(:Levene, 'statsample/test/levene')
7
7
  autoload(:T, 'statsample/test/t')
8
-
8
+ autoload(:F, 'statsample/test/f')
9
9
  # Returns probability of getting a value lower or higher
10
10
  # than sample, using cdf and number of tails.
11
11
  # * For one tail left, return the cdf
@@ -0,0 +1,61 @@
1
+ module Statsample
2
+ module Test
3
+ # From Wikipedia:
4
+ # An F-test is any statistical test in which the test statistic has an F-distribution under the null hypothesis. It is most often used when comparing statistical models that have been fit to a data set, in order to identify the model that best fits the population from which the data were sampled.
5
+ class F
6
+ include GetText
7
+ bindtextdomain("statsample")
8
+
9
+ include Statsample::Test
10
+
11
+ attr_reader :ss_num, :ss_den, :df_num, :df_den, :ss_total, :df_total
12
+ # Tails for probability (:both, :left or :right)
13
+ attr_accessor :tails
14
+ # Name of F analysis
15
+ attr_accessor :name
16
+ # Name of numerator
17
+ attr_accessor :name_numerator
18
+ # Name of denominator
19
+ attr_accessor :name_denominator
20
+
21
+ # Parameters:
22
+ # * ss_num: explained variance / between group variance
23
+ # * ss_den: unexplained variance / within group variance
24
+ # * df_num: degrees of freedom for explained variance / k-1
25
+ # * df_den: degrees of freedom for unexplained variance / n-k
26
+ def initialize(ss_num, ss_den, df_num, df_den, opts=Hash.new)
27
+ @ss_num=ss_num
28
+ @ss_den=ss_den
29
+ @df_num=df_num
30
+ @df_den=df_den
31
+ @ss_total=ss_num+ss_den
32
+ @df_total=df_num+df_den
33
+ opts_default={:tails=>:right, :name_numerator=>"Numerator", :name_denominator=>"Denominator", :name=>"F Test"}
34
+ @opts=opts_default.merge(opts)
35
+ raise "Tails should be right or left, not both" if @opts[:tails]==:both
36
+ opts_default.keys.each {|k|
37
+ send("#{k}=", @opts[k])
38
+ }
39
+ end
40
+ def summary
41
+ ReportBuilder.new(:no_title=>true).add(self).to_text
42
+ end
43
+ def f
44
+ (@ss_num.quo(@df_num)).quo(@ss_den.quo(@df_den))
45
+ end
46
+ # probability
47
+ def probability
48
+ p_using_cdf(Distribution::F.cdf(f, @df_num, @df_den), tails)
49
+ end
50
+ def report_building(builder)#:nodoc:
51
+ builder.section(:name=>@name) do |b|
52
+ b.table(:name=>_("%s Table") % @name, :header=>%w{source ss df f p}.map {|v| _(v)}) do |t|
53
+ t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",f), sprintf("%0.3f", probability)])
54
+ t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, "", ""])
55
+ t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total,"",""])
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -55,6 +55,15 @@ module Statsample
55
55
  # a=1000.times.map {rand(100)}.to_scale
56
56
  # t_1=Statsample::Test::T::OneSample.new(a, {:u=>50})
57
57
  # t_1.summary
58
+ #
59
+ # === Output
60
+ #
61
+ # = One Sample T Test
62
+ # Sample mean: 48.954
63
+ # Population mean:50
64
+ # Tails: both
65
+ # t = -1.1573, p=0.2474, d.f=999
66
+
58
67
  class OneSample
59
68
  include Math
60
69
  include Statsample::Test
@@ -76,7 +85,11 @@ module Statsample
76
85
 
77
86
  dirty_writer :u, :tails
78
87
  dirty_memoize :t, :probability
79
-
88
+ # Create a One Sample T Test
89
+ # Options:
90
+ # * :u = Mean to compare. Default= 0
91
+ # * :name = Name of the analysis
92
+ # * :tails = Tail for probability. Could be :both, :left, :right
80
93
  def initialize(vector, opts=Hash.new)
81
94
  @vector=vector
82
95
  default={:u=>0, :name=>"One Sample T Test", :tails=>:both}
@@ -113,9 +126,30 @@ module Statsample
113
126
  # == Usage
114
127
  # a=1000.times.map {rand(100)}.to_scale
115
128
  # b=1000.times.map {rand(100)}.to_scale
116
- # t_2=Statsample::Test::T::OneSample.new(a,b)
129
+ # t_2=Statsample::Test::T::TwoSamplesIndependent.new(a,b)
117
130
  # t_2.summary
118
-
131
+ # === Output
132
+ # = Two Sample T Test
133
+ # Mean and standard deviation
134
+ # +----------+---------+---------+------+
135
+ # | Variable | m | sd | n |
136
+ # +----------+---------+---------+------+
137
+ # | 1 | 49.3310 | 29.3042 | 1000 |
138
+ # | 2 | 47.8180 | 28.8640 | 1000 |
139
+ # +----------+---------+---------+------+
140
+ #
141
+ # == Levene Test
142
+ # Levene Test
143
+ # F: 0.3596
144
+ # p: 0.5488
145
+ # T statistics
146
+ # +--------------------+--------+-----------+----------------+
147
+ # | Type | t | df | p (both tails) |
148
+ # +--------------------+--------+-----------+----------------+
149
+ # | Equal variance | 1.1632 | 1998 | 0.2449 |
150
+ # | Non equal variance | 1.1632 | 1997.5424 | 0.1362 |
151
+ # +--------------------+--------+-----------+----------------+
152
+
119
153
  class TwoSamplesIndependent
120
154
  include Math
121
155
  include Statsample::Test
@@ -142,18 +176,20 @@ module Statsample
142
176
 
143
177
  dirty_writer :tails
144
178
  dirty_memoize :t_equal_variance, :t_not_equal_variance, :probability_equal_variance, :probability_not_equal_variance, :df_equal_variance, :df_not_equal_variance
145
-
179
+
180
+ # Create a Two Independent T Test
181
+ # Options:
182
+ # * :name = Name of the analysis
183
+ # * :tails = Tail for probability. Could be :both, :left, :right
146
184
  def initialize(v1, v2, opts=Hash.new)
147
185
  @v1=v1
148
186
  @v2=v2
149
- default={:u=>0, :name=>"Two Sample T Test", :paired_samples=>false, :tails=>:both}
187
+ default={:u=>0, :name=>"Two Sample T Test", :tails=>:both}
150
188
  @opts=default.merge(opts)
151
189
  @name=@opts[:name]
152
190
  @tails=@opts[:tails]
153
191
  end
154
-
155
-
156
-
192
+
157
193
  # Set t and probability for given u
158
194
  def compute
159
195
  @t_equal_variance= T.two_sample_independent(@v1.mean, @v2.mean, @v1.sd, @v2.sd, @v1.n_valid, @v2.n_valid,true)
@@ -1,12 +1,11 @@
1
1
  require(File.dirname(__FILE__)+'/test_helpers.rb')
2
2
 
3
3
  class StatsampleAnovaTestCase < MiniTest::Unit::TestCase
4
- def initialize(*args)
4
+ def setup
5
5
  @v1=[3,3,2,3,6].to_vector(:scale)
6
6
  @v2=[7,6,5,6,7].to_vector(:scale)
7
7
  @v3=[9,8,9,7,8].to_vector(:scale)
8
8
  @anova=Statsample::Anova::OneWay.new([@v1,@v2,@v3])
9
- super
10
9
  end
11
10
  def test_basic
12
11
  assert_in_delta(72.933, @anova.sst,0.001)
@@ -19,7 +18,7 @@ class StatsampleAnovaTestCase < MiniTest::Unit::TestCase
19
18
  assert_in_delta(23.568,@anova.f,0.001)
20
19
  anova2=Statsample::Anova::OneWay.new([@v1,@v1,@v1,@v1,@v2])
21
20
  assert_in_delta(3.960, anova2.f,0.001)
22
- assert(@anova.significance<0.01)
23
- assert_in_delta(0.016, anova2.significance,0.001)
21
+ assert(@anova.probability<0.01)
22
+ assert_in_delta(0.016, anova2.probability,0.001)
24
23
  end
25
24
  end
@@ -12,7 +12,7 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
12
12
  v2=1000.times.collect {|a| rand()}.to_scale
13
13
  assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
14
14
  else
15
- puts "Bivariate::covariance not tested (needs GSL)"
15
+ skip "Bivariate::covariance not tested (needs GSL)"
16
16
  end
17
17
 
18
18
  end
@@ -24,7 +24,7 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
24
24
 
25
25
  assert_in_delta(GSL::Stats::correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1,v2), 1e-10)
26
26
  else
27
- puts "Not tested gsl versus ruby correlation (needs GSL)"
27
+ skip "Not tested gsl versus ruby correlation (needs GSL)"
28
28
  end
29
29
  end
30
30
  def test_pearson
@@ -67,6 +67,8 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
67
67
  if Statsample.has_gsl?
68
68
  poly.compute_two_step_mle_drasgow_gsl
69
69
  assert_in_delta(tetra.r,poly.r,0.0001)
70
+ else
71
+ skip "compute_two_step_mle_drasgow_gsl not tested (requires GSL)"
70
72
  end
71
73
  }
72
74
  end
@@ -112,7 +114,7 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
112
114
  assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
113
115
  assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
114
116
  else
115
- puts "Two-step optimized, polychoric series and Joint method for Polychoric requires GSL"
117
+ skip "Two-step optimized, polychoric series and Joint method for Polychoric requires GSL"
116
118
  end
117
119
  assert(poly.summary)
118
120
  end
@@ -1,9 +1,6 @@
1
1
  require(File.dirname(__FILE__)+'/test_helpers.rb')
2
2
 
3
3
  class StatsampleCombinationTestCase < MiniTest::Unit::TestCase
4
- def initialize(*args)
5
- super
6
- end
7
4
  def test_basic
8
5
  k=3
9
6
  n=5
@@ -34,7 +31,7 @@ class StatsampleCombinationTestCase < MiniTest::Unit::TestCase
34
31
 
35
32
  assert_equal(rb_array,gsl_array)
36
33
  else
37
- puts "Not CombinationRuby vs CombinationGSL (no gsl)"
34
+ skip "Not CombinationRuby vs CombinationGSL (no gsl)"
38
35
  end
39
36
  end
40
37
  end
@@ -33,7 +33,7 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
33
33
  _test_matrix(expected_fm_2,pca.feature_vector(2))
34
34
  assert(pca.summary)
35
35
  else
36
- puts "PCA not tested. Requires GSL"
36
+ skip "PCA not tested. Requires GSL"
37
37
  end
38
38
  end
39
39
 
@@ -67,7 +67,7 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
67
67
  assert(fa.summary)
68
68
 
69
69
  else
70
- puts "Principal Axis not tested. Requires GSL"
70
+ skip "Principal Axis not tested. Requires GSL"
71
71
  end
72
72
  end
73
73
 
@@ -89,7 +89,7 @@ class StatsampleFactorTestCase < MiniTest::Unit::TestCase
89
89
  refute(varimax.h2.nil?,"H2 shouldn't be empty")
90
90
  _test_matrix(expected,varimax.rotated)
91
91
  else
92
- puts "Rotation not tested. Requires GSL"
92
+ skip "Rotation not tested. Requires GSL"
93
93
  end
94
94
  end
95
95
  def _test_matrix(a,b)
@@ -14,6 +14,8 @@ class StatsampleGSLTestCase < MiniTest::Unit::TestCase
14
14
  matrix=gsl.to_matrix
15
15
  assert_equal(5,matrix.row_size)
16
16
  assert_equal(3,matrix.column_size)
17
+ else
18
+ skip("Needs GSL extension")
17
19
  end
18
20
  end
19
21
  end
@@ -3,8 +3,20 @@ require 'statsample'
3
3
  require 'minitest/unit'
4
4
  require 'tempfile'
5
5
  require 'tmpdir'
6
+ require 'shoulda'
7
+ module MiniTest
8
+ class Unit
9
+ class TestCase
10
+ include Shoulda::InstanceMethods
11
+ extend Shoulda::ClassMethods
12
+ include Shoulda::Assertions
13
+
14
+ end
15
+ end
16
+ end
6
17
 
7
18
  module MiniTest::Assertions
19
+
8
20
  alias :assert_raise :assert_raises unless method_defined? :assert_raise
9
21
  alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
10
22
  alias :assert_not_same :refute_same unless method_defined? :assert_not_same
@@ -48,7 +48,7 @@ class StatsampleMLETestCase < MiniTest::Unit::TestCase
48
48
  #p coeffs_nr
49
49
  ds=@ds_indep.dup
50
50
  ds.add_vector('y',y)
51
- lr=Statsample::Regression.multiple(ds,'y')
51
+ lr=Statsample::Regression.multiple(ds, 'y')
52
52
  lr_constant = lr.constant
53
53
  lr_coeffs = lr.coeffs
54
54
  assert_in_delta(coeffs_nr[0,0], lr_constant,0.0000001)
@@ -2,9 +2,6 @@ require(File.dirname(__FILE__)+'/test_helpers.rb')
2
2
 
3
3
 
4
4
  class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
5
- def initialize(*args)
6
- super
7
- end
8
5
  def test_creation
9
6
  v1a=[1,2,3,4,5].to_vector
10
7
  v2b=[11,21,31,41,51].to_vector
@@ -90,7 +90,7 @@ class StatsampleRegressionTestCase < MiniTest::Unit::TestCase
90
90
  assert_in_delta(residuals[i],c_residuals[i],0.001)
91
91
  }
92
92
  else
93
- puts "Regression::Multiple::GslEngine not tested (no Gsl)"
93
+ skip "Regression::Multiple::GslEngine not tested (no Gsl)"
94
94
  end
95
95
  end
96
96
 
@@ -115,7 +115,7 @@ class StatsampleRegressionTestCase < MiniTest::Unit::TestCase
115
115
  assert_in_delta(0.913,lr.r2,0.001)
116
116
 
117
117
  assert_in_delta(20.908, lr.f,0.001)
118
- assert_in_delta(0.001, lr.significance, 0.001)
118
+ assert_in_delta(0.001, lr.probability, 0.001)
119
119
  assert_in_delta(0.226,lr.tolerance("a"),0.001)
120
120
 
121
121
  coeffs_se={"a"=>1.171,"b"=>1.129,"c"=>0.072}
@@ -3,8 +3,7 @@ require(File.dirname(__FILE__)+'/test_helpers.rb')
3
3
 
4
4
  class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
5
5
 
6
- def initialize(*args)
7
- super
6
+ def setup
8
7
  @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
9
8
  @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
10
9
  @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
@@ -19,14 +18,4 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
19
18
  assert_in_delta(0.999,ia.item_total_correlation()['x1'],0.001)
20
19
  assert_in_delta(1050.455,ia.stats_if_deleted()['x1'][:variance_sample],0.001)
21
20
  end
22
- def test_icc
23
- #p @x1.factors
24
- icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
25
- # Need to create the test!!!!
26
- #p icc.curve_field('x1',1).sort
27
- #p icc.curve_field('x1',2).sort
28
- #p icc.curve_field('x1',3).sort
29
- #p icc.curve_field('x1',30).sort
30
-
31
- end
32
21
  end
@@ -17,7 +17,7 @@ class StatsampleSvgGraphTestCase < MiniTest::Unit::TestCase
17
17
  graph.histogram=h
18
18
  file.puts(graph.burn)
19
19
  else
20
- puts "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
20
+ skip "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
21
21
  end
22
22
  end
23
23
  def assert_svg(msg=nil)
@@ -48,7 +48,7 @@ class StatsampleSvgGraphTestCase < MiniTest::Unit::TestCase
48
48
  }
49
49
  assert(File.exists?(file))
50
50
  else
51
- puts "Statsample::Vector#svggraph_histogram.new not tested (no ruby-gsl)"
51
+ skip "Statsample::Vector#svggraph_histogram.new not tested (no ruby-gsl)"
52
52
  end
53
53
  end
54
54
  end
@@ -0,0 +1,37 @@
1
+ require(File.dirname(__FILE__)+'/test_helpers.rb')
2
+
3
+ class StatsampleTestFTestCase < MiniTest::Unit::TestCase
4
+ context(Statsample::Test::F) do
5
+ setup do
6
+ @ssb=84
7
+ @ssw=68
8
+ @f=Statsample::Test::F.new(@ssb,@ssw, 2,15)
9
+ end
10
+ should "have f equal to msb/msw" do
11
+ assert_equal((@ssb.quo(2)).quo(@ssw.quo(15)), @f.f)
12
+ end
13
+ should "have df total equal to df_num+df_den" do
14
+ assert_equal(17, @f.df_total)
15
+ end
16
+ should "have probability near 0.002" do
17
+ assert_in_delta(0.002, @f.probability, 0.0005)
18
+ end
19
+ context("#summary") do
20
+ setup do
21
+ @f.name_numerator="MSb"
22
+ @f.name_denominator="MSw"
23
+ @f.name="ANOVA"
24
+ @summary=@f.summary
25
+ end
26
+ should "have size > 0" do
27
+ assert(@summary.size>0)
28
+ end
29
+ should "include correct names for title, num and den" do
30
+ assert_match(@f.name_numerator, @summary)
31
+ assert_match(@f.name_denominator, @summary)
32
+ assert_match(@f.name, @summary)
33
+ end
34
+ end
35
+ end
36
+
37
+ end
@@ -1,35 +1,53 @@
1
1
  require(File.dirname(__FILE__)+'/test_helpers.rb')
2
2
 
3
3
  class StatsampleExcelTestCase < MiniTest::Unit::TestCase
4
- def setup
5
- @ds=Statsample::Excel.read(File.dirname(__FILE__)+"/test_xls.xls")
6
- end
7
- def test_read
8
- assert_equal(6,@ds.cases)
9
- assert_equal(%w{id name age city a1},@ds.fields)
10
- id=[1,2,3,4,5,6].to_vector(:scale)
11
- name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
12
- age=[20,23,25,nil,5.5,nil].to_vector(:scale)
13
- city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
14
- a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
15
- ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
16
- ds_exp.fields.each{|f|
17
- assert_equal(ds_exp[f],@ds[f])
18
- }
19
- assert_equal(ds_exp,@ds)
20
-
21
- end
22
- def test_nil
23
- assert_equal(nil,@ds['age'][5])
4
+ context "Excel reader" do
5
+ setup do
6
+ @ds=Statsample::Excel.read(File.dirname(__FILE__)+"/test_xls.xls")
7
+ end
8
+ should "set the number of cases" do
9
+ assert_equal(6,@ds.cases)
10
+ end
11
+ should "set correct field names" do
12
+ assert_equal(%w{id name age city a1},@ds.fields)
13
+ end
14
+ should "set a dataset equal to expected" do
15
+ id=[1,2,3,4,5,6].to_vector(:scale)
16
+ name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
17
+ age=[20,23,25,nil,5.5,nil].to_vector(:scale)
18
+ city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
19
+ a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
20
+ ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
21
+ ds_exp.fields.each{|f|
22
+ assert_equal(ds_exp[f],@ds[f])
23
+ }
24
+ assert_equal(ds_exp,@ds)
25
+ end
26
+ should "set to nil empty cells" do
27
+ assert_equal(nil,@ds['age'][5])
28
+ end
24
29
  end
25
- def test_write
26
- tempfile=Tempfile.new("test_write.xls")
27
- Statsample::Excel.write(@ds,tempfile.path)
28
- ds2=Statsample::Excel.read(tempfile.path)
29
- i=0
30
- ds2.each_array do |row|
31
- assert_equal(@ds.case_as_array(i),row)
32
- i+=1
30
+ context "Excel writer" do
31
+ setup do
32
+ a=100.times.map{rand(100)}.to_scale
33
+ b=(["b"]*100).to_vector
34
+ @ds={'b'=>b, 'a'=>a}.to_dataset(%w{b a})
35
+ tempfile=Tempfile.new("test_write.xls")
36
+ Statsample::Excel.write(@ds,tempfile.path)
37
+ @ds2=Statsample::Excel.read(tempfile.path)
38
+ end
39
+ should "return same fields as original" do
40
+ assert_equal(@ds.fields ,@ds2.fields)
41
+ end
42
+ should "return same number of cases as original" do
43
+ assert_equal(@ds.cases, @ds2.cases)
44
+ end
45
+ should "return same cases as original" do
46
+ i=0
47
+ @ds2.each_array do |row|
48
+ assert_equal(@ds.case_as_array(i),row)
49
+ i+=1
50
+ end
33
51
  end
34
52
  end
35
53
  end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 9
8
+ - 0
9
+ version: 0.9.0
5
10
  platform: ruby
6
11
  authors:
7
12
  - Claudio Bustos
@@ -30,107 +35,124 @@ cert_chain:
30
35
  rpP0jjs0
31
36
  -----END CERTIFICATE-----
32
37
 
33
- date: 2010-04-01 00:00:00 -03:00
38
+ date: 2010-04-04 00:00:00 -04:00
34
39
  default_executable:
35
40
  dependencies:
36
41
  - !ruby/object:Gem::Dependency
37
42
  name: spreadsheet
38
- type: :runtime
39
- version_requirement:
40
- version_requirements: !ruby/object:Gem::Requirement
43
+ prerelease: false
44
+ requirement: &id001 !ruby/object:Gem::Requirement
41
45
  requirements:
42
46
  - - ~>
43
47
  - !ruby/object:Gem::Version
48
+ segments:
49
+ - 0
50
+ - 6
51
+ - 0
44
52
  version: 0.6.0
45
- version:
53
+ type: :runtime
54
+ version_requirements: *id001
46
55
  - !ruby/object:Gem::Dependency
47
56
  name: svg-graph
48
- type: :runtime
49
- version_requirement:
50
- version_requirements: !ruby/object:Gem::Requirement
57
+ prerelease: false
58
+ requirement: &id002 !ruby/object:Gem::Requirement
51
59
  requirements:
52
60
  - - ~>
53
61
  - !ruby/object:Gem::Version
62
+ segments:
63
+ - 1
64
+ - 0
54
65
  version: "1.0"
55
- version:
66
+ type: :runtime
67
+ version_requirements: *id002
56
68
  - !ruby/object:Gem::Dependency
57
69
  name: reportbuilder
58
- type: :runtime
59
- version_requirement:
60
- version_requirements: !ruby/object:Gem::Requirement
70
+ prerelease: false
71
+ requirement: &id003 !ruby/object:Gem::Requirement
61
72
  requirements:
62
73
  - - ~>
63
74
  - !ruby/object:Gem::Version
75
+ segments:
76
+ - 1
77
+ - 0
64
78
  version: "1.0"
65
- version:
79
+ type: :runtime
80
+ version_requirements: *id003
66
81
  - !ruby/object:Gem::Dependency
67
82
  name: minimization
68
- type: :runtime
69
- version_requirement:
70
- version_requirements: !ruby/object:Gem::Requirement
83
+ prerelease: false
84
+ requirement: &id004 !ruby/object:Gem::Requirement
71
85
  requirements:
72
86
  - - ~>
73
87
  - !ruby/object:Gem::Version
88
+ segments:
89
+ - 0
90
+ - 1
91
+ - 0
74
92
  version: 0.1.0
75
- version:
93
+ type: :runtime
94
+ version_requirements: *id004
76
95
  - !ruby/object:Gem::Dependency
77
96
  name: fastercsv
78
- type: :runtime
79
- version_requirement:
80
- version_requirements: !ruby/object:Gem::Requirement
97
+ prerelease: false
98
+ requirement: &id005 !ruby/object:Gem::Requirement
81
99
  requirements:
82
100
  - - ">="
83
101
  - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
84
104
  version: "0"
85
- version:
105
+ type: :runtime
106
+ version_requirements: *id005
86
107
  - !ruby/object:Gem::Dependency
87
108
  name: dirty-memoize
88
- type: :runtime
89
- version_requirement:
90
- version_requirements: !ruby/object:Gem::Requirement
109
+ prerelease: false
110
+ requirement: &id006 !ruby/object:Gem::Requirement
91
111
  requirements:
92
112
  - - ~>
93
113
  - !ruby/object:Gem::Version
114
+ segments:
115
+ - 0
116
+ - 0
94
117
  version: "0.0"
95
- version:
118
+ type: :runtime
119
+ version_requirements: *id006
96
120
  - !ruby/object:Gem::Dependency
97
121
  name: rubyforge
98
- type: :development
99
- version_requirement:
100
- version_requirements: !ruby/object:Gem::Requirement
122
+ prerelease: false
123
+ requirement: &id007 !ruby/object:Gem::Requirement
101
124
  requirements:
102
125
  - - ">="
103
126
  - !ruby/object:Gem::Version
127
+ segments:
128
+ - 2
129
+ - 0
130
+ - 4
104
131
  version: 2.0.4
105
- version:
106
- - !ruby/object:Gem::Dependency
107
- name: gemcutter
108
132
  type: :development
109
- version_requirement:
110
- version_requirements: !ruby/object:Gem::Requirement
111
- requirements:
112
- - - ">="
113
- - !ruby/object:Gem::Version
114
- version: 0.5.0
115
- version:
133
+ version_requirements: *id007
116
134
  - !ruby/object:Gem::Dependency
117
135
  name: hoe
118
- type: :development
119
- version_requirement:
120
- version_requirements: !ruby/object:Gem::Requirement
136
+ prerelease: false
137
+ requirement: &id008 !ruby/object:Gem::Requirement
121
138
  requirements:
122
139
  - - ">="
123
140
  - !ruby/object:Gem::Version
124
- version: 2.5.1
125
- version:
141
+ segments:
142
+ - 2
143
+ - 6
144
+ - 0
145
+ version: 2.6.0
146
+ type: :development
147
+ version_requirements: *id008
126
148
  description: |-
127
- A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
149
+ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible)
128
150
 
129
151
  Includes:
130
152
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
131
153
  * Imports and exports datasets from and to Excel, CSV and plain text files.
132
154
  * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
133
- * Tests: T, Levene, U-Mannwhitney, One-Way Anova
155
+ * Tests: F (Anona One-Way), T, Levene, U-Mannwhitney.
134
156
  * Regression: Simple, Multiple, Probit and Logit
135
157
  * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
136
158
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
@@ -225,6 +247,7 @@ files:
225
247
  - lib/statsample/resample.rb
226
248
  - lib/statsample/srs.rb
227
249
  - lib/statsample/test.rb
250
+ - lib/statsample/test/f.rb
228
251
  - lib/statsample/test/levene.rb
229
252
  - lib/statsample/test/t.rb
230
253
  - lib/statsample/test/umannwhitney.rb
@@ -261,6 +284,7 @@ files:
261
284
  - test/test_stest.rb
262
285
  - test/test_stratified.rb
263
286
  - test/test_svg_graph.rb
287
+ - test/test_test_f.rb
264
288
  - test/test_test_t.rb
265
289
  - test/test_umannwhitney.rb
266
290
  - test/test_vector.rb
@@ -280,18 +304,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
280
304
  requirements:
281
305
  - - ">="
282
306
  - !ruby/object:Gem::Version
307
+ segments:
308
+ - 0
283
309
  version: "0"
284
- version:
285
310
  required_rubygems_version: !ruby/object:Gem::Requirement
286
311
  requirements:
287
312
  - - ">="
288
313
  - !ruby/object:Gem::Version
314
+ segments:
315
+ - 0
289
316
  version: "0"
290
- version:
291
317
  requirements: []
292
318
 
293
319
  rubyforge_project: ruby-statsample
294
- rubygems_version: 1.3.5
320
+ rubygems_version: 1.3.6
295
321
  signing_key:
296
322
  specification_version: 3
297
323
  summary: A suite for basic and advanced statistics on Ruby
@@ -322,6 +348,7 @@ test_files:
322
348
  - test/test_stest.rb
323
349
  - test/test_statistics.rb
324
350
  - test/test_reliability.rb
351
+ - test/test_test_f.rb
325
352
  - test/test_test_t.rb
326
353
  - test/test_histogram.rb
327
354
  - test/test_dataset.rb
metadata.gz.sig CHANGED
Binary file