statsample 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +20 -1
  3. data/Manifest.txt +8 -1
  4. data/README.txt +11 -7
  5. data/Rakefile +2 -2
  6. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  7. data/examples/dataset.rb +8 -0
  8. data/examples/multiple_regression.rb +1 -1
  9. data/examples/parallel_analysis.rb +29 -0
  10. data/examples/parallel_analysis_tetrachoric.rb +30 -0
  11. data/examples/vector.rb +6 -0
  12. data/lib/distribution.rb +16 -6
  13. data/lib/distribution/normal.rb +27 -20
  14. data/lib/distribution/normalbivariate.rb +1 -1
  15. data/lib/statsample.rb +19 -2
  16. data/lib/statsample/anova.rb +118 -16
  17. data/lib/statsample/bivariate.rb +27 -13
  18. data/lib/statsample/bivariate/polychoric.rb +18 -5
  19. data/lib/statsample/crosstab.rb +66 -74
  20. data/lib/statsample/dataset.rb +52 -45
  21. data/lib/statsample/dominanceanalysis.rb +2 -5
  22. data/lib/statsample/factor.rb +1 -1
  23. data/lib/statsample/factor/parallelanalysis.rb +122 -0
  24. data/lib/statsample/factor/pca.rb +23 -28
  25. data/lib/statsample/factor/principalaxis.rb +8 -3
  26. data/lib/statsample/matrix.rb +27 -24
  27. data/lib/statsample/mle.rb +11 -11
  28. data/lib/statsample/permutation.rb +2 -1
  29. data/lib/statsample/regression.rb +10 -8
  30. data/lib/statsample/regression/multiple/baseengine.rb +36 -25
  31. data/lib/statsample/regression/multiple/gslengine.rb +14 -0
  32. data/lib/statsample/regression/multiple/matrixengine.rb +4 -32
  33. data/lib/statsample/regression/multiple/rubyengine.rb +2 -6
  34. data/lib/statsample/regression/simple.rb +1 -1
  35. data/lib/statsample/reliability.rb +42 -54
  36. data/lib/statsample/test.rb +10 -6
  37. data/lib/statsample/test/f.rb +16 -26
  38. data/lib/statsample/test/levene.rb +4 -8
  39. data/lib/statsample/test/t.rb +30 -24
  40. data/lib/statsample/test/umannwhitney.rb +13 -6
  41. data/lib/statsample/vector.rb +86 -76
  42. data/po/es/statsample.mo +0 -0
  43. data/po/es/statsample.po +127 -94
  44. data/po/statsample.pot +114 -79
  45. data/test/test_anovaoneway.rb +27 -0
  46. data/test/test_anovawithvectors.rb +97 -0
  47. data/test/test_bivariate.rb +6 -57
  48. data/test/test_bivariate_polychoric.rb +65 -0
  49. data/test/test_crosstab.rb +6 -0
  50. data/test/test_dataset.rb +29 -1
  51. data/test/test_distribution.rb +6 -13
  52. data/test/test_dominance_analysis.rb +1 -1
  53. data/test/test_factor.rb +3 -3
  54. data/test/test_helpers.rb +18 -18
  55. data/test/test_matrix.rb +33 -20
  56. data/test/test_permutation.rb +36 -30
  57. data/test/test_regression.rb +26 -8
  58. data/test/test_reliability.rb +104 -14
  59. data/test/test_test_f.rb +11 -14
  60. data/test/test_test_t.rb +42 -35
  61. data/test/test_umannwhitney.rb +22 -10
  62. data/test/test_vector.rb +204 -102
  63. metadata +57 -81
  64. metadata.gz.sig +0 -0
  65. data/test/test_anova.rb +0 -24
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,6 +1,25 @@
1
+ === 0.10.0 / 2010-04-13
2
+
3
+ <b>API modifications</b>
4
+ * Refactoring of Statsample::Anova module.
5
+ * Statsample::Anova::OneWay :implementation of generic ANOVA One-Way, used by Multiple Regression, for example.
6
+ * Statsample::Anova::OneWayWithVectors: implementation of ANOVA One-Way to test differences of means.
7
+
8
+ <b>New features</b>
9
+ * New Statsample::Factor::Parallel Analysis, to performs Horn's 'parallel analysis' to a PCA, to adjust for sample bias on retention of components.
10
+ * New Statsample.only_valid_clone and Statsample::Dataset.clone, which allows to create shallow copys of valid vector and datasets. Used by correlation matrix methods to optimize calculations
11
+ * New module Statsample::Summarizable, which add GetText and ReportBuilder support to classes. Better summaries for Vector, Dataset, Crosstab, PrincipalAxis, PCA and Regression::Multiple classes
12
+
13
+ <b>Optimizations and bug fix</b>
14
+
15
+ * Refactoring of Statsample::Regression::Multiple classes. Still needs works
16
+ * Bug fix on Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis
17
+ * Bug fix on Statsample::Bivariate::Polychoric.new_with_vectors. Should be defined class method, no instance method.
18
+ * Optimized correlation and covariance matrix. Only calculates the half of matrix and the other half is returned from cache
19
+ * More tests coverage. RCOV Total: 82.51% , Code: 77.83%
20
+
1
21
  === 0.9.0 / 2010-04-04
2
22
  * New Statsample::Test::F. Anova::OneWay subclasses it and Regression classes uses it.
3
-
4
23
  === 0.8.2 / 2010-04-01
5
24
  * Statsample::PromiseAfter replaced by external package DirtyMemoize [http://rubygems.org/gems/dirty-memoize]
6
25
  === 0.8.1 / 2010-03-29
data/Manifest.txt CHANGED
@@ -11,15 +11,19 @@ data/test_binomial.csv
11
11
  data/tetmat_matrix.txt
12
12
  data/tetmat_test.txt
13
13
  examples/correlation_matrix.rb
14
+ examples/dataset.rb
14
15
  examples/dominance_analysis.rb
15
16
  examples/dominance_analysis_bootstrap.rb
16
17
  examples/levene.rb
17
18
  examples/multiple_regression.rb
18
19
  examples/multivariate_correlation.rb
20
+ examples/parallel_analysis.rb
21
+ examples/parallel_analysis_tetrachoric.rb
19
22
  examples/polychoric.rb
20
23
  examples/principal_axis.rb
21
24
  examples/t_test.rb
22
25
  examples/tetrachoric.rb
26
+ examples/vector.rb
23
27
  lib/distribution.rb
24
28
  lib/distribution/chisquare.rb
25
29
  lib/distribution/f.rb
@@ -43,6 +47,7 @@ lib/statsample/dataset.rb
43
47
  lib/statsample/dominanceanalysis.rb
44
48
  lib/statsample/dominanceanalysis/bootstrap.rb
45
49
  lib/statsample/factor.rb
50
+ lib/statsample/factor/parallelanalysis.rb
46
51
  lib/statsample/factor/pca.rb
47
52
  lib/statsample/factor/principalaxis.rb
48
53
  lib/statsample/factor/rotation.rb
@@ -84,8 +89,10 @@ po/es/statsample.mo
84
89
  po/es/statsample.po
85
90
  po/statsample.pot
86
91
  setup.rb
87
- test/test_anova.rb
92
+ test/test_anovaoneway.rb
93
+ test/test_anovawithvectors.rb
88
94
  test/test_bivariate.rb
95
+ test/test_bivariate_polychoric.rb
89
96
  test/test_codification.rb
90
97
  test/test_combination.rb
91
98
  test/test_crosstab.rb
data/README.txt CHANGED
@@ -5,18 +5,19 @@ http://ruby-statsample.rubyforge.org/
5
5
 
6
6
  == DESCRIPTION:
7
7
 
8
- A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible)
8
+ A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible).
9
9
 
10
- Includes:
10
+ Include:
11
11
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
12
12
  * Imports and exports datasets from and to Excel, CSV and plain text files.
13
- * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
14
- * Tests: F (Anona One-Way), T, Levene, U-Mannwhitney.
15
- * Regression: Simple, Multiple, Probit and Logit
16
- * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
13
+ * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric.
14
+ * Anova: generic and vector-based One-way ANOVA
15
+ * Tests: F, T, Levene, U-Mannwhitney.
16
+ * Regression: Simple, Multiple (OLS), Probit and Logit
17
+ * Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis, for estimation of number of factors.
17
18
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
18
19
  * Sample calculation related formulas
19
- * Creates reports on text, html and rtf, using ReportBuilder
20
+ * Creates reports on text, html and rtf, using ReportBuilder gem
20
21
 
21
22
  == FEATURES:
22
23
 
@@ -24,6 +25,7 @@ Includes:
24
25
  * Statsample::Vector: An extension of an array, with statistical methods like sum, mean and standard deviation
25
26
  * Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample.
26
27
  * Statsample::Multiset: multiple datasets with same fields and type of vectors
28
+ * Anova module provides generic Statsample::Anova::OneWay and vector based Statsample::Anova::OneWayWithVectors
27
29
  * Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
28
30
  * Multiple types of regression.
29
31
  * Simple Regression : Statsample::Regression::Simple
@@ -38,6 +40,7 @@ Includes:
38
40
  * Statsample::Factor::Varimax
39
41
  * Statsample::Factor::Equimax
40
42
  * Statsample::Factor::Quartimax
43
+ * Statsample::Factor::ParallelAnalysis performs Horn's 'parallel analysis' to a principal components analysis to adjust for sample bias in the retention of components.
41
44
  * Dominance Analysis. Based on Budescu and Azen papers, Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables and DominanceAnalysisBootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
42
45
  * Module Statsample::Codification, to help to codify open questions
43
46
  * Converters to import and export data:
@@ -53,6 +56,7 @@ Includes:
53
56
  * Statsample::Test::Levene
54
57
  * Statsample::Test::UMannWhitney
55
58
  * Statsample::Test::T
59
+ * Statsample::Test::F
56
60
  * Interfaces to gdchart, gnuplot and SVG::Graph
57
61
 
58
62
 
data/Rakefile CHANGED
@@ -39,11 +39,11 @@ task :makemo do
39
39
  end
40
40
 
41
41
  h=Hoe.spec('statsample') do
42
- self.testlib=:minitest unless RUBY_VERSION<="1.9"
43
42
  self.version=Statsample::VERSION
43
+ self.testlib=:minitest
44
44
  self.rubyforge_name = "ruby-statsample"
45
45
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
46
- self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.1.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"]
46
+ self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"]
47
47
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
48
48
  self.need_rdoc=false
49
49
  end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ require 'statsample'
4
+ a=1000.times.collect {r=rand(5); r==4 ? nil: r;}.to_scale
5
+ b=1000.times.collect {r=rand(5); r==4 ? nil: r;}.to_scale
6
+
7
+ ds={'a'=>a,'b'=>b}.to_dataset
8
+ puts ds.summary
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/ruby
2
- $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
+ #$:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
5
  require 'benchmark'
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ samples=100
6
+ variables=30
7
+ rng = GSL::Rng.alloc()
8
+ f1=samples.times.collect {rng.ugaussian()}.to_scale
9
+ f2=samples.times.collect {rng.ugaussian()}.to_scale
10
+ f3=samples.times.collect {rng.ugaussian()}.to_scale
11
+
12
+ vectors={}
13
+
14
+ variables.times do |i|
15
+ vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.ugaussian()}.to_scale
16
+ end
17
+ ds=vectors.to_dataset
18
+
19
+ pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>10, :debug=>true)
20
+ pca=Statsample::Factor::PCA.new(Statsample::Bivariate.correlation_matrix(ds))
21
+ rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do |g|
22
+ g.text("There are 3 real factors on data")
23
+ g.parse_element(pca)
24
+ g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
25
+ g.parse_element(pa)
26
+ g.text("Parallel Analysis returns #{pa.number_of_factors} factors to preserve")
27
+ end
28
+
29
+ puts rb.to_text
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ samples=1000
6
+ variables=30
7
+ rng = GSL::Rng.alloc()
8
+ f1=samples.times.collect {rng.ugaussian()}.to_scale
9
+ f2=samples.times.collect {rng.ugaussian()}.to_scale
10
+ f3=samples.times.collect {rng.ugaussian()}.to_scale
11
+
12
+ vectors={}
13
+
14
+ variables.times do |i|
15
+ vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*(i-30)+f2[nv]*(i+30)+f3[nv]*(i+15) + rng.ugaussian() > 0 ? 1 : 0}.to_scale
16
+ end
17
+ ds=vectors.to_dataset
18
+
19
+ pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>10, :matrix_method=>:tetrachoric_correlation_matrix, :debug=>true)
20
+
21
+ pca=Statsample::Factor::PCA.new(Statsample::Bivariate.tetrachoric_correlation_matrix(ds))
22
+ rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do |g|
23
+ g.text("There are 3 real factors on data")
24
+ g.parse_element(pca)
25
+ g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
26
+ g.parse_element(pa)
27
+ g.text("Parallel Analysis returns #{pa.number_of_factors} factors to preserve")
28
+ end
29
+
30
+ puts rb.to_text
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ a=1000.times.collect {r=rand(5); r==4 ? nil: r;}.to_scale
6
+ puts a.summary
data/lib/distribution.rb CHANGED
@@ -8,10 +8,20 @@ require 'statistics2'
8
8
  # Distribution::Normal.p_value(0.95)
9
9
  # => 1.64485364660836
10
10
  module Distribution
11
- autoload(:ChiSquare, 'distribution/chisquare')
12
- autoload(:T, 'distribution/t')
13
- autoload(:F, 'distribution/f')
14
- autoload(:Normal, 'distribution/normal')
15
- autoload(:NormalBivariate, 'distribution/normalbivariate')
16
- autoload(:NormalMultivariate, 'distribution/normalmultivariate')
11
+ begin
12
+ require 'rbgsl'
13
+ def self.has_gsl?
14
+ true
15
+ end
16
+ rescue LoadError
17
+ def self.has_gsl?
18
+ false
19
+ end
20
+ end
21
+ autoload(:ChiSquare, 'distribution/chisquare')
22
+ autoload(:T, 'distribution/t')
23
+ autoload(:F, 'distribution/f')
24
+ autoload(:Normal, 'distribution/normal')
25
+ autoload(:NormalBivariate, 'distribution/normalbivariate')
26
+ # autoload(:NormalMultivariate, 'distribution/normalmultivariate')
17
27
  end
@@ -1,25 +1,32 @@
1
1
  module Distribution
2
- # Calculate cdf and inverse cdf for Normal Distribution.
3
- # Uses Statistics2 module
4
- module Normal
5
- class << self
6
- # Return the P-value of the corresponding integral
7
- def p_value(pr)
8
- Statistics2.pnormaldist(pr)
9
- end
10
- # Normal cumulative distribution function (cdf).
11
- #
12
- # Returns the integral of normal distribution
13
- # over (-Infty, x].
14
- #
15
- def cdf(x)
16
- Statistics2.normaldist(x)
17
- end
18
- # Normal probability density function (pdf)
19
- # With x=0 and sigma=1
20
- def pdf(x)
21
- (1.0/Math::sqrt(2*Math::PI))*Math::exp(-(x**2/2.0))
2
+ # Calculate cdf and inverse cdf for Normal Distribution.
3
+ # Uses Statistics2 module
4
+ module Normal
5
+ class << self
6
+ # Return the P-value of the corresponding integral
7
+ def p_value(pr)
8
+ Statistics2.pnormaldist(pr)
9
+ end
10
+ # Normal cumulative distribution function (cdf).
11
+ #
12
+ # Returns the integral of normal distribution
13
+ # over (-Infty, x].
14
+ #
15
+ def cdf(x)
16
+ Statistics2.normaldist(x)
17
+ end
18
+
19
+ if false and Distribution.has_gsl?
20
+ alias :cdf_ruby :cdf
21
+ def cdf(x) # :nodoc:
22
+ GSL::Cdf::gaussian_P(x)
22
23
  end
23
24
  end
25
+ # Normal probability density function (pdf)
26
+ # With x=0 and sigma=1
27
+ def pdf(x)
28
+ (1.0/Math::sqrt(2*Math::PI))*Math::exp(-(x**2/2.0))
29
+ end
24
30
  end
31
+ end
25
32
  end
@@ -120,7 +120,7 @@ module Distribution
120
120
  sum
121
121
  end
122
122
  # Normal cumulative distribution function (cdf) for a given x, y and rho.
123
- # Based on Fortran code by Alan Genz
123
+ # Ported from Fortran code by Alan Genz
124
124
  #
125
125
  # Original documentation
126
126
  # DOUBLE PRECISION FUNCTION BVND( DH, DK, R )
data/lib/statsample.rb CHANGED
@@ -112,7 +112,7 @@ module Statsample
112
112
  false
113
113
  end
114
114
  end
115
- VERSION = '0.9.0'
115
+ VERSION = '0.10.0'
116
116
  SPLIT_TOKEN = ","
117
117
  autoload(:Database, 'statsample/converters')
118
118
  autoload(:Anova, 'statsample/anova')
@@ -186,6 +186,16 @@ module Statsample
186
186
  ds=Statsample::Dataset.new(h).dup_only_valid
187
187
  ds.vectors.values
188
188
  end
189
+ # Cheap version of #only_valid.
190
+ # If any vectors have missing_values, return only valid.
191
+ # If not, return the vectors it self
192
+ def only_valid_clone(*vs)
193
+ if vs.any? {|v| v.has_missing_data?}
194
+ only_valid(*vs)
195
+ else
196
+ vs
197
+ end
198
+ end
189
199
  end
190
200
 
191
201
 
@@ -214,7 +224,14 @@ module Statsample
214
224
  fp.close
215
225
  end
216
226
  end
217
-
227
+ # Provides basic method to generate summaries
228
+ module Summarizable
229
+ include GetText
230
+ bindtextdomain("statsample")
231
+ def summary(method=:to_text)
232
+ ReportBuilder.new(:no_title=>true).add(self).send(method)
233
+ end
234
+ end
218
235
  module STATSAMPLE__ #:nodoc:
219
236
  end
220
237
  end
@@ -1,11 +1,86 @@
1
1
  module Statsample
2
2
  module Anova
3
- # One Way Anova
3
+ class << self
4
+ def oneway(*args)
5
+ OneWay.new(*args)
6
+ end
7
+ def oneway_with_vectors(*args)
8
+ OneWayWithVectors.new(*args)
9
+ end
10
+ end
11
+ # = Generic Anova one-way.
12
+ # You could enter the sum of squares or the mean squares. You
13
+ # should enter the degrees of freedom for numerator and denominator.
14
+ # == Usage
15
+ # anova=Statsample::Anova::OneWay(:ss_num=>10,:ss_den=>20, :df_num=>2, :df_den=>10, @name=>"ANOVA for....")
16
+ class OneWay
17
+ include GetText
18
+ bindtextdomain("statsample")
19
+ attr_reader :df_num, :df_den, :ss_num, :ss_den, :ms_num, :ms_den, :ms_total, :df_total, :ss_total
20
+ # Name of ANOVA Analisys
21
+ attr_accessor :name
22
+ attr_accessor :name_denominator
23
+ attr_accessor :name_numerator
24
+ def initialize(opts=Hash.new)
25
+ # First see if sum of squares or mean squares are entered
26
+ raise ArgumentError, "You should set d.f." unless (opts.has_key? :df_num and opts.has_key? :df_den)
27
+ @df_num=opts.delete :df_num
28
+ @df_den=opts.delete :df_den
29
+ @df_total=@df_num+@df_den
30
+ if(opts.has_key? :ss_num and opts.has_key? :ss_den)
31
+ @ss_num = opts.delete :ss_num
32
+ @ss_den =opts.delete :ss_den
33
+ @ms_num =@ss_num.quo(@df_num)
34
+ @ms_den =@ss_den.quo(@df_den)
35
+ elsif (opts.has_key? :ms_num and opts.has_key? :ms_den)
36
+ @ms_num =opts.delete :ms_num
37
+ @ms_den =opts.delete :ms_den
38
+ @ss_num =@ms_num * @df_num
39
+ @ss_den =@ss_den * @df_den
40
+ end
41
+ @ss_total=@ss_num+@ss_den
42
+ @ms_total=@ms_num+@ms_den
43
+ opts_default={:name=>"ANOVA",
44
+ :name_denominator=>"Explained variance",
45
+ :name_numerator=>"Unexplained variance"}
46
+ @opts=opts_default.merge(opts)
47
+ opts_default.keys.each {|k|
48
+ send("#{k}=", @opts[k])
49
+ }
50
+ @f_object=Statsample::Test::F.new(@ms_num,@ms_den,@df_num,@df_den)
51
+ end
52
+ # F value
53
+ def f
54
+ @f_object.f
55
+ end
56
+ # P-value of F test
57
+ def probability
58
+ @f_object.probability
59
+ end
60
+ # Summary of Anova analysis
61
+ def summary
62
+ ReportBuilder.new(:no_title=>true).add(self).to_text
63
+ end
64
+ def report_building(builder) #:nodoc:
65
+ builder.section(:name=>@name) do |b|
66
+ report_building_table(b)
67
+ end
68
+ end
69
+ def report_building_table(builder) #:nodoc:
70
+ builder.table(:name=>_("%s Table") % @name, :header=>%w{source ss df ms f p}.map {|v| _(v)}) do |t|
71
+ t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",@ms_num), sprintf("%0.3f",f), sprintf("%0.3f", probability)])
72
+ t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, sprintf("%0.3f",@ms_den), "", ""])
73
+ t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total, sprintf("%0.3f",@ms_total),"",""])
74
+ end
75
+ end
76
+
77
+ end
78
+ # One Way Anova with vectors
4
79
  # Example:
5
80
  # v1=[2,3,4,5,6].to_scale
6
81
  # v2=[3,3,4,5,6].to_scale
7
82
  # v3=[5,3,1,5,6].to_scale
8
- # anova=Statsample::Anova::OneWay.new([v1,v2,v3])
83
+ # anova=Statsample::Anova::OneWayWithVectors.new([v1,v2,v3])
9
84
  # anova.f
10
85
  # => 0.0243902439024391
11
86
  # anova.probability
@@ -13,30 +88,43 @@ module Statsample
13
88
  # anova.sst
14
89
  # => 32.9333333333333
15
90
  #
16
- class OneWay < Statsample::Test::F
17
- def initialize(vectors,opts=Hash.new)
18
- @vectors=vectors
19
- opts_default={:name=>_("Anova One-Way"), :name_numerator=>"Between Groups", :name_denominator=>"Within Groups"}
20
- super(ssbg,sswg, df_bg, df_wg)
91
+ class OneWayWithVectors < OneWay
92
+ # Show on summary Levene test
93
+ attr_accessor :summary_levene
94
+ # Show on summary descriptives for vectors
95
+ attr_accessor :summary_descriptives
96
+ def initialize(*args)
97
+ if args[0].is_a? Array
98
+ @vectors=args.shift
99
+ else
100
+ @vectors=args.find_all {|v| v.is_a? Statsample::Vector}
101
+ opts=args.find {|v| v.is_a? Hash}
102
+ end
103
+ opts||=Hash.new
104
+ opts_default={:name=>_("Anova One-Way"),
105
+ :name_numerator=>"Between Groups",
106
+ :name_denominator=>"Within Groups",
107
+ :summary_descriptives=>false,
108
+ :summary_levene=>false}
109
+ @opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
110
+ super(@opts)
111
+ end
112
+ alias :sst :ss_total
113
+ def levene
114
+ Statsample::Test.levene(@vectors, :name=>_("Test of Homogeneity of variances (Levene)"))
21
115
  end
22
116
  # Total mean
23
- def mean
117
+ def total_mean
24
118
  sum=@vectors.inject(0){|a,v| a+v.sum}
25
119
  sum.quo(n)
26
120
  end
27
-
28
- # Total sum of squares
29
- def sst
30
- m=mean
31
- @vectors.inject(0) {|total,vector| total+vector.ss(m) }
32
- end
33
121
  # Sum of squares within groups
34
122
  def sswg
35
123
  @sswg||=@vectors.inject(0) {|total,vector| total+vector.ss }
36
124
  end
37
125
  # Sum of squares between groups
38
126
  def ssbg
39
- m=mean
127
+ m=total_mean
40
128
  @vectors.inject(0) do |total,vector|
41
129
  total + (vector.mean-m).square * vector.size
42
130
  end
@@ -56,7 +144,21 @@ module Statsample
56
144
  def n
57
145
  @vectors.inject(0){|a,v| a+v.size}
58
146
  end
59
-
147
+ def report_building(builder) # :nodoc:
148
+ builder.section(:name=>@name) do |s|
149
+ if summary_descriptives
150
+ s.table(:name=>_("Descriptives"),:header=>%w{Name N Mean SD Min Max}.map {|v| _(v)}) do |t|
151
+ @vectors.each do |v|
152
+ t.row [v.name, v.n_valid, "%0.4f" % v.mean, "%0.4f" % v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
153
+ end
154
+ end
155
+ end
156
+ if summary_levene
157
+ s.parse_element(levene)
158
+ end
159
+ report_building_table(s)
160
+ end
161
+ end
60
162
  end
61
163
  end
62
164
  end