statsample 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +20 -1
  3. data/Manifest.txt +8 -1
  4. data/README.txt +11 -7
  5. data/Rakefile +2 -2
  6. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  7. data/examples/dataset.rb +8 -0
  8. data/examples/multiple_regression.rb +1 -1
  9. data/examples/parallel_analysis.rb +29 -0
  10. data/examples/parallel_analysis_tetrachoric.rb +30 -0
  11. data/examples/vector.rb +6 -0
  12. data/lib/distribution.rb +16 -6
  13. data/lib/distribution/normal.rb +27 -20
  14. data/lib/distribution/normalbivariate.rb +1 -1
  15. data/lib/statsample.rb +19 -2
  16. data/lib/statsample/anova.rb +118 -16
  17. data/lib/statsample/bivariate.rb +27 -13
  18. data/lib/statsample/bivariate/polychoric.rb +18 -5
  19. data/lib/statsample/crosstab.rb +66 -74
  20. data/lib/statsample/dataset.rb +52 -45
  21. data/lib/statsample/dominanceanalysis.rb +2 -5
  22. data/lib/statsample/factor.rb +1 -1
  23. data/lib/statsample/factor/parallelanalysis.rb +122 -0
  24. data/lib/statsample/factor/pca.rb +23 -28
  25. data/lib/statsample/factor/principalaxis.rb +8 -3
  26. data/lib/statsample/matrix.rb +27 -24
  27. data/lib/statsample/mle.rb +11 -11
  28. data/lib/statsample/permutation.rb +2 -1
  29. data/lib/statsample/regression.rb +10 -8
  30. data/lib/statsample/regression/multiple/baseengine.rb +36 -25
  31. data/lib/statsample/regression/multiple/gslengine.rb +14 -0
  32. data/lib/statsample/regression/multiple/matrixengine.rb +4 -32
  33. data/lib/statsample/regression/multiple/rubyengine.rb +2 -6
  34. data/lib/statsample/regression/simple.rb +1 -1
  35. data/lib/statsample/reliability.rb +42 -54
  36. data/lib/statsample/test.rb +10 -6
  37. data/lib/statsample/test/f.rb +16 -26
  38. data/lib/statsample/test/levene.rb +4 -8
  39. data/lib/statsample/test/t.rb +30 -24
  40. data/lib/statsample/test/umannwhitney.rb +13 -6
  41. data/lib/statsample/vector.rb +86 -76
  42. data/po/es/statsample.mo +0 -0
  43. data/po/es/statsample.po +127 -94
  44. data/po/statsample.pot +114 -79
  45. data/test/test_anovaoneway.rb +27 -0
  46. data/test/test_anovawithvectors.rb +97 -0
  47. data/test/test_bivariate.rb +6 -57
  48. data/test/test_bivariate_polychoric.rb +65 -0
  49. data/test/test_crosstab.rb +6 -0
  50. data/test/test_dataset.rb +29 -1
  51. data/test/test_distribution.rb +6 -13
  52. data/test/test_dominance_analysis.rb +1 -1
  53. data/test/test_factor.rb +3 -3
  54. data/test/test_helpers.rb +18 -18
  55. data/test/test_matrix.rb +33 -20
  56. data/test/test_permutation.rb +36 -30
  57. data/test/test_regression.rb +26 -8
  58. data/test/test_reliability.rb +104 -14
  59. data/test/test_test_f.rb +11 -14
  60. data/test/test_test_t.rb +42 -35
  61. data/test/test_umannwhitney.rb +22 -10
  62. data/test/test_vector.rb +204 -102
  63. metadata +57 -81
  64. metadata.gz.sig +0 -0
  65. data/test/test_anova.rb +0 -24
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,6 +1,25 @@
1
+ === 0.10.0 / 2010-04-13
2
+
3
+ <b>API modifications</b>
4
+ * Refactoring of Statsample::Anova module.
5
+ * Statsample::Anova::OneWay :implementation of generic ANOVA One-Way, used by Multiple Regression, for example.
6
+ * Statsample::Anova::OneWayWithVectors: implementation of ANOVA One-Way to test differences of means.
7
+
8
+ <b>New features</b>
9
+ * New Statsample::Factor::Parallel Analysis, to performs Horn's 'parallel analysis' to a PCA, to adjust for sample bias on retention of components.
10
+ * New Statsample.only_valid_clone and Statsample::Dataset.clone, which allows to create shallow copys of valid vector and datasets. Used by correlation matrix methods to optimize calculations
11
+ * New module Statsample::Summarizable, which add GetText and ReportBuilder support to classes. Better summaries for Vector, Dataset, Crosstab, PrincipalAxis, PCA and Regression::Multiple classes
12
+
13
+ <b>Optimizations and bug fix</b>
14
+
15
+ * Refactoring of Statsample::Regression::Multiple classes. Still needs works
16
+ * Bug fix on Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis
17
+ * Bug fix on Statsample::Bivariate::Polychoric.new_with_vectors. Should be defined class method, no instance method.
18
+ * Optimized correlation and covariance matrix. Only calculates the half of matrix and the other half is returned from cache
19
+ * More tests coverage. RCOV Total: 82.51% , Code: 77.83%
20
+
1
21
  === 0.9.0 / 2010-04-04
2
22
  * New Statsample::Test::F. Anova::OneWay subclasses it and Regression classes uses it.
3
-
4
23
  === 0.8.2 / 2010-04-01
5
24
  * Statsample::PromiseAfter replaced by external package DirtyMemoize [http://rubygems.org/gems/dirty-memoize]
6
25
  === 0.8.1 / 2010-03-29
data/Manifest.txt CHANGED
@@ -11,15 +11,19 @@ data/test_binomial.csv
11
11
  data/tetmat_matrix.txt
12
12
  data/tetmat_test.txt
13
13
  examples/correlation_matrix.rb
14
+ examples/dataset.rb
14
15
  examples/dominance_analysis.rb
15
16
  examples/dominance_analysis_bootstrap.rb
16
17
  examples/levene.rb
17
18
  examples/multiple_regression.rb
18
19
  examples/multivariate_correlation.rb
20
+ examples/parallel_analysis.rb
21
+ examples/parallel_analysis_tetrachoric.rb
19
22
  examples/polychoric.rb
20
23
  examples/principal_axis.rb
21
24
  examples/t_test.rb
22
25
  examples/tetrachoric.rb
26
+ examples/vector.rb
23
27
  lib/distribution.rb
24
28
  lib/distribution/chisquare.rb
25
29
  lib/distribution/f.rb
@@ -43,6 +47,7 @@ lib/statsample/dataset.rb
43
47
  lib/statsample/dominanceanalysis.rb
44
48
  lib/statsample/dominanceanalysis/bootstrap.rb
45
49
  lib/statsample/factor.rb
50
+ lib/statsample/factor/parallelanalysis.rb
46
51
  lib/statsample/factor/pca.rb
47
52
  lib/statsample/factor/principalaxis.rb
48
53
  lib/statsample/factor/rotation.rb
@@ -84,8 +89,10 @@ po/es/statsample.mo
84
89
  po/es/statsample.po
85
90
  po/statsample.pot
86
91
  setup.rb
87
- test/test_anova.rb
92
+ test/test_anovaoneway.rb
93
+ test/test_anovawithvectors.rb
88
94
  test/test_bivariate.rb
95
+ test/test_bivariate_polychoric.rb
89
96
  test/test_codification.rb
90
97
  test/test_combination.rb
91
98
  test/test_crosstab.rb
data/README.txt CHANGED
@@ -5,18 +5,19 @@ http://ruby-statsample.rubyforge.org/
5
5
 
6
6
  == DESCRIPTION:
7
7
 
8
- A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible)
8
+ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible).
9
9
 
10
- Includes:
10
+ Include:
11
11
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
12
12
  * Imports and exports datasets from and to Excel, CSV and plain text files.
13
- * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
14
- * Tests: F (Anona One-Way), T, Levene, U-Mannwhitney.
15
- * Regression: Simple, Multiple, Probit and Logit
16
- * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
13
+ * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric.
14
+ * Anova: generic and vector-based One-way ANOVA
15
+ * Tests: F, T, Levene, U-Mannwhitney.
16
+ * Regression: Simple, Multiple (OLS), Probit and Logit
17
+ * Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis, for estimation of number of factors.
17
18
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
18
19
  * Sample calculation related formulas
19
- * Creates reports on text, html and rtf, using ReportBuilder
20
+ * Creates reports on text, html and rtf, using ReportBuilder gem
20
21
 
21
22
  == FEATURES:
22
23
 
@@ -24,6 +25,7 @@ Includes:
24
25
  * Statsample::Vector: An extension of an array, with statistical methods like sum, mean and standard deviation
25
26
  * Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample.
26
27
  * Statsample::Multiset: multiple datasets with same fields and type of vectors
28
+ * Anova module provides generic Statsample::Anova::OneWay and vector based Statsample::Anova::OneWayWithVectors
27
29
  * Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
28
30
  * Multiple types of regression.
29
31
  * Simple Regression : Statsample::Regression::Simple
@@ -38,6 +40,7 @@ Includes:
38
40
  * Statsample::Factor::Varimax
39
41
  * Statsample::Factor::Equimax
40
42
  * Statsample::Factor::Quartimax
43
+ * Statsample::Factor::ParallelAnalysis performs Horn's 'parallel analysis' to a principal components analysis to adjust for sample bias in the retention of components.
41
44
  * Dominance Analysis. Based on Budescu and Azen papers, Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables and DominanceAnalysisBootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
42
45
  * Module Statsample::Codification, to help to codify open questions
43
46
  * Converters to import and export data:
@@ -53,6 +56,7 @@ Includes:
53
56
  * Statsample::Test::Levene
54
57
  * Statsample::Test::UMannWhitney
55
58
  * Statsample::Test::T
59
+ * Statsample::Test::F
56
60
  * Interfaces to gdchart, gnuplot and SVG::Graph
57
61
 
58
62
 
data/Rakefile CHANGED
@@ -39,11 +39,11 @@ task :makemo do
39
39
  end
40
40
 
41
41
  h=Hoe.spec('statsample') do
42
- self.testlib=:minitest unless RUBY_VERSION<="1.9"
43
42
  self.version=Statsample::VERSION
43
+ self.testlib=:minitest
44
44
  self.rubyforge_name = "ruby-statsample"
45
45
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
46
- self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.1.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"]
46
+ self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"]
47
47
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
48
48
  self.need_rdoc=false
49
49
  end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ require 'statsample'
4
+ a=1000.times.collect {r=rand(5); r==4 ? nil: r;}.to_scale
5
+ b=1000.times.collect {r=rand(5); r==4 ? nil: r;}.to_scale
6
+
7
+ ds={'a'=>a,'b'=>b}.to_dataset
8
+ puts ds.summary
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/ruby
2
- $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
+ #$:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
5
  require 'benchmark'
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ samples=100
6
+ variables=30
7
+ rng = GSL::Rng.alloc()
8
+ f1=samples.times.collect {rng.ugaussian()}.to_scale
9
+ f2=samples.times.collect {rng.ugaussian()}.to_scale
10
+ f3=samples.times.collect {rng.ugaussian()}.to_scale
11
+
12
+ vectors={}
13
+
14
+ variables.times do |i|
15
+ vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.ugaussian()}.to_scale
16
+ end
17
+ ds=vectors.to_dataset
18
+
19
+ pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>10, :debug=>true)
20
+ pca=Statsample::Factor::PCA.new(Statsample::Bivariate.correlation_matrix(ds))
21
+ rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do |g|
22
+ g.text("There are 3 real factors on data")
23
+ g.parse_element(pca)
24
+ g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
25
+ g.parse_element(pa)
26
+ g.text("Parallel Analysis returns #{pa.number_of_factors} factors to preserve")
27
+ end
28
+
29
+ puts rb.to_text
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ samples=1000
6
+ variables=30
7
+ rng = GSL::Rng.alloc()
8
+ f1=samples.times.collect {rng.ugaussian()}.to_scale
9
+ f2=samples.times.collect {rng.ugaussian()}.to_scale
10
+ f3=samples.times.collect {rng.ugaussian()}.to_scale
11
+
12
+ vectors={}
13
+
14
+ variables.times do |i|
15
+ vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*(i-30)+f2[nv]*(i+30)+f3[nv]*(i+15) + rng.ugaussian() > 0 ? 1 : 0}.to_scale
16
+ end
17
+ ds=vectors.to_dataset
18
+
19
+ pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>10, :matrix_method=>:tetrachoric_correlation_matrix, :debug=>true)
20
+
21
+ pca=Statsample::Factor::PCA.new(Statsample::Bivariate.tetrachoric_correlation_matrix(ds))
22
+ rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do |g|
23
+ g.text("There are 3 real factors on data")
24
+ g.parse_element(pca)
25
+ g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
26
+ g.parse_element(pa)
27
+ g.text("Parallel Analysis returns #{pa.number_of_factors} factors to preserve")
28
+ end
29
+
30
+ puts rb.to_text
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ a=1000.times.collect {r=rand(5); r==4 ? nil: r;}.to_scale
6
+ puts a.summary
data/lib/distribution.rb CHANGED
@@ -8,10 +8,20 @@ require 'statistics2'
8
8
  # Distribution::Normal.p_value(0.95)
9
9
  # => 1.64485364660836
10
10
  module Distribution
11
- autoload(:ChiSquare, 'distribution/chisquare')
12
- autoload(:T, 'distribution/t')
13
- autoload(:F, 'distribution/f')
14
- autoload(:Normal, 'distribution/normal')
15
- autoload(:NormalBivariate, 'distribution/normalbivariate')
16
- autoload(:NormalMultivariate, 'distribution/normalmultivariate')
11
+ begin
12
+ require 'rbgsl'
13
+ def self.has_gsl?
14
+ true
15
+ end
16
+ rescue LoadError
17
+ def self.has_gsl?
18
+ false
19
+ end
20
+ end
21
+ autoload(:ChiSquare, 'distribution/chisquare')
22
+ autoload(:T, 'distribution/t')
23
+ autoload(:F, 'distribution/f')
24
+ autoload(:Normal, 'distribution/normal')
25
+ autoload(:NormalBivariate, 'distribution/normalbivariate')
26
+ # autoload(:NormalMultivariate, 'distribution/normalmultivariate')
17
27
  end
@@ -1,25 +1,32 @@
1
1
  module Distribution
2
- # Calculate cdf and inverse cdf for Normal Distribution.
3
- # Uses Statistics2 module
4
- module Normal
5
- class << self
6
- # Return the P-value of the corresponding integral
7
- def p_value(pr)
8
- Statistics2.pnormaldist(pr)
9
- end
10
- # Normal cumulative distribution function (cdf).
11
- #
12
- # Returns the integral of normal distribution
13
- # over (-Infty, x].
14
- #
15
- def cdf(x)
16
- Statistics2.normaldist(x)
17
- end
18
- # Normal probability density function (pdf)
19
- # With x=0 and sigma=1
20
- def pdf(x)
21
- (1.0/Math::sqrt(2*Math::PI))*Math::exp(-(x**2/2.0))
2
+ # Calculate cdf and inverse cdf for Normal Distribution.
3
+ # Uses Statistics2 module
4
+ module Normal
5
+ class << self
6
+ # Return the P-value of the corresponding integral
7
+ def p_value(pr)
8
+ Statistics2.pnormaldist(pr)
9
+ end
10
+ # Normal cumulative distribution function (cdf).
11
+ #
12
+ # Returns the integral of normal distribution
13
+ # over (-Infty, x].
14
+ #
15
+ def cdf(x)
16
+ Statistics2.normaldist(x)
17
+ end
18
+
19
+ if false and Distribution.has_gsl?
20
+ alias :cdf_ruby :cdf
21
+ def cdf(x) # :nodoc:
22
+ GSL::Cdf::gaussian_P(x)
22
23
  end
23
24
  end
25
+ # Normal probability density function (pdf)
26
+ # With x=0 and sigma=1
27
+ def pdf(x)
28
+ (1.0/Math::sqrt(2*Math::PI))*Math::exp(-(x**2/2.0))
29
+ end
24
30
  end
31
+ end
25
32
  end
@@ -120,7 +120,7 @@ module Distribution
120
120
  sum
121
121
  end
122
122
  # Normal cumulative distribution function (cdf) for a given x, y and rho.
123
- # Based on Fortran code by Alan Genz
123
+ # Ported from Fortran code by Alan Genz
124
124
  #
125
125
  # Original documentation
126
126
  # DOUBLE PRECISION FUNCTION BVND( DH, DK, R )
data/lib/statsample.rb CHANGED
@@ -112,7 +112,7 @@ module Statsample
112
112
  false
113
113
  end
114
114
  end
115
- VERSION = '0.9.0'
115
+ VERSION = '0.10.0'
116
116
  SPLIT_TOKEN = ","
117
117
  autoload(:Database, 'statsample/converters')
118
118
  autoload(:Anova, 'statsample/anova')
@@ -186,6 +186,16 @@ module Statsample
186
186
  ds=Statsample::Dataset.new(h).dup_only_valid
187
187
  ds.vectors.values
188
188
  end
189
+ # Cheap version of #only_valid.
190
+ # If any vectors have missing_values, return only valid.
191
+ # If not, return the vectors it self
192
+ def only_valid_clone(*vs)
193
+ if vs.any? {|v| v.has_missing_data?}
194
+ only_valid(*vs)
195
+ else
196
+ vs
197
+ end
198
+ end
189
199
  end
190
200
 
191
201
 
@@ -214,7 +224,14 @@ module Statsample
214
224
  fp.close
215
225
  end
216
226
  end
217
-
227
+ # Provides basic method to generate summaries
228
+ module Summarizable
229
+ include GetText
230
+ bindtextdomain("statsample")
231
+ def summary(method=:to_text)
232
+ ReportBuilder.new(:no_title=>true).add(self).send(method)
233
+ end
234
+ end
218
235
  module STATSAMPLE__ #:nodoc:
219
236
  end
220
237
  end
@@ -1,11 +1,86 @@
1
1
  module Statsample
2
2
  module Anova
3
- # One Way Anova
3
+ class << self
4
+ def oneway(*args)
5
+ OneWay.new(*args)
6
+ end
7
+ def oneway_with_vectors(*args)
8
+ OneWayWithVectors.new(*args)
9
+ end
10
+ end
11
+ # = Generic Anova one-way.
12
+ # You could enter the sum of squares or the mean squares. You
13
+ # should enter the degrees of freedom for numerator and denominator.
14
+ # == Usage
15
+ # anova=Statsample::Anova::OneWay(:ss_num=>10,:ss_den=>20, :df_num=>2, :df_den=>10, @name=>"ANOVA for....")
16
+ class OneWay
17
+ include GetText
18
+ bindtextdomain("statsample")
19
+ attr_reader :df_num, :df_den, :ss_num, :ss_den, :ms_num, :ms_den, :ms_total, :df_total, :ss_total
20
+ # Name of ANOVA Analisys
21
+ attr_accessor :name
22
+ attr_accessor :name_denominator
23
+ attr_accessor :name_numerator
24
+ def initialize(opts=Hash.new)
25
+ # First see if sum of squares or mean squares are entered
26
+ raise ArgumentError, "You should set d.f." unless (opts.has_key? :df_num and opts.has_key? :df_den)
27
+ @df_num=opts.delete :df_num
28
+ @df_den=opts.delete :df_den
29
+ @df_total=@df_num+@df_den
30
+ if(opts.has_key? :ss_num and opts.has_key? :ss_den)
31
+ @ss_num = opts.delete :ss_num
32
+ @ss_den =opts.delete :ss_den
33
+ @ms_num =@ss_num.quo(@df_num)
34
+ @ms_den =@ss_den.quo(@df_den)
35
+ elsif (opts.has_key? :ms_num and opts.has_key? :ms_den)
36
+ @ms_num =opts.delete :ms_num
37
+ @ms_den =opts.delete :ms_den
38
+ @ss_num =@ms_num * @df_num
39
+ @ss_den =@ss_den * @df_den
40
+ end
41
+ @ss_total=@ss_num+@ss_den
42
+ @ms_total=@ms_num+@ms_den
43
+ opts_default={:name=>"ANOVA",
44
+ :name_denominator=>"Explained variance",
45
+ :name_numerator=>"Unexplained variance"}
46
+ @opts=opts_default.merge(opts)
47
+ opts_default.keys.each {|k|
48
+ send("#{k}=", @opts[k])
49
+ }
50
+ @f_object=Statsample::Test::F.new(@ms_num,@ms_den,@df_num,@df_den)
51
+ end
52
+ # F value
53
+ def f
54
+ @f_object.f
55
+ end
56
+ # P-value of F test
57
+ def probability
58
+ @f_object.probability
59
+ end
60
+ # Summary of Anova analysis
61
+ def summary
62
+ ReportBuilder.new(:no_title=>true).add(self).to_text
63
+ end
64
+ def report_building(builder) #:nodoc:
65
+ builder.section(:name=>@name) do |b|
66
+ report_building_table(b)
67
+ end
68
+ end
69
+ def report_building_table(builder) #:nodoc:
70
+ builder.table(:name=>_("%s Table") % @name, :header=>%w{source ss df ms f p}.map {|v| _(v)}) do |t|
71
+ t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",@ms_num), sprintf("%0.3f",f), sprintf("%0.3f", probability)])
72
+ t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, sprintf("%0.3f",@ms_den), "", ""])
73
+ t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total, sprintf("%0.3f",@ms_total),"",""])
74
+ end
75
+ end
76
+
77
+ end
78
+ # One Way Anova with vectors
4
79
  # Example:
5
80
  # v1=[2,3,4,5,6].to_scale
6
81
  # v2=[3,3,4,5,6].to_scale
7
82
  # v3=[5,3,1,5,6].to_scale
8
- # anova=Statsample::Anova::OneWay.new([v1,v2,v3])
83
+ # anova=Statsample::Anova::OneWayWithVectors.new([v1,v2,v3])
9
84
  # anova.f
10
85
  # => 0.0243902439024391
11
86
  # anova.probability
@@ -13,30 +88,43 @@ module Statsample
13
88
  # anova.sst
14
89
  # => 32.9333333333333
15
90
  #
16
- class OneWay < Statsample::Test::F
17
- def initialize(vectors,opts=Hash.new)
18
- @vectors=vectors
19
- opts_default={:name=>_("Anova One-Way"), :name_numerator=>"Between Groups", :name_denominator=>"Within Groups"}
20
- super(ssbg,sswg, df_bg, df_wg)
91
+ class OneWayWithVectors < OneWay
92
+ # Show on summary Levene test
93
+ attr_accessor :summary_levene
94
+ # Show on summary descriptives for vectors
95
+ attr_accessor :summary_descriptives
96
+ def initialize(*args)
97
+ if args[0].is_a? Array
98
+ @vectors=args.shift
99
+ else
100
+ @vectors=args.find_all {|v| v.is_a? Statsample::Vector}
101
+ opts=args.find {|v| v.is_a? Hash}
102
+ end
103
+ opts||=Hash.new
104
+ opts_default={:name=>_("Anova One-Way"),
105
+ :name_numerator=>"Between Groups",
106
+ :name_denominator=>"Within Groups",
107
+ :summary_descriptives=>false,
108
+ :summary_levene=>false}
109
+ @opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
110
+ super(@opts)
111
+ end
112
+ alias :sst :ss_total
113
+ def levene
114
+ Statsample::Test.levene(@vectors, :name=>_("Test of Homogeneity of variances (Levene)"))
21
115
  end
22
116
  # Total mean
23
- def mean
117
+ def total_mean
24
118
  sum=@vectors.inject(0){|a,v| a+v.sum}
25
119
  sum.quo(n)
26
120
  end
27
-
28
- # Total sum of squares
29
- def sst
30
- m=mean
31
- @vectors.inject(0) {|total,vector| total+vector.ss(m) }
32
- end
33
121
  # Sum of squares within groups
34
122
  def sswg
35
123
  @sswg||=@vectors.inject(0) {|total,vector| total+vector.ss }
36
124
  end
37
125
  # Sum of squares between groups
38
126
  def ssbg
39
- m=mean
127
+ m=total_mean
40
128
  @vectors.inject(0) do |total,vector|
41
129
  total + (vector.mean-m).square * vector.size
42
130
  end
@@ -56,7 +144,21 @@ module Statsample
56
144
  def n
57
145
  @vectors.inject(0){|a,v| a+v.size}
58
146
  end
59
-
147
+ def report_building(builder) # :nodoc:
148
+ builder.section(:name=>@name) do |s|
149
+ if summary_descriptives
150
+ s.table(:name=>_("Descriptives"),:header=>%w{Name N Mean SD Min Max}.map {|v| _(v)}) do |t|
151
+ @vectors.each do |v|
152
+ t.row [v.name, v.n_valid, "%0.4f" % v.mean, "%0.4f" % v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
153
+ end
154
+ end
155
+ end
156
+ if summary_levene
157
+ s.parse_element(levene)
158
+ end
159
+ report_building_table(s)
160
+ end
161
+ end
60
162
  end
61
163
  end
62
164
  end