statsample 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +20 -1
- data/Manifest.txt +8 -1
- data/README.txt +11 -7
- data/Rakefile +2 -2
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/examples/dataset.rb +8 -0
- data/examples/multiple_regression.rb +1 -1
- data/examples/parallel_analysis.rb +29 -0
- data/examples/parallel_analysis_tetrachoric.rb +30 -0
- data/examples/vector.rb +6 -0
- data/lib/distribution.rb +16 -6
- data/lib/distribution/normal.rb +27 -20
- data/lib/distribution/normalbivariate.rb +1 -1
- data/lib/statsample.rb +19 -2
- data/lib/statsample/anova.rb +118 -16
- data/lib/statsample/bivariate.rb +27 -13
- data/lib/statsample/bivariate/polychoric.rb +18 -5
- data/lib/statsample/crosstab.rb +66 -74
- data/lib/statsample/dataset.rb +52 -45
- data/lib/statsample/dominanceanalysis.rb +2 -5
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/factor/parallelanalysis.rb +122 -0
- data/lib/statsample/factor/pca.rb +23 -28
- data/lib/statsample/factor/principalaxis.rb +8 -3
- data/lib/statsample/matrix.rb +27 -24
- data/lib/statsample/mle.rb +11 -11
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression.rb +10 -8
- data/lib/statsample/regression/multiple/baseengine.rb +36 -25
- data/lib/statsample/regression/multiple/gslengine.rb +14 -0
- data/lib/statsample/regression/multiple/matrixengine.rb +4 -32
- data/lib/statsample/regression/multiple/rubyengine.rb +2 -6
- data/lib/statsample/regression/simple.rb +1 -1
- data/lib/statsample/reliability.rb +42 -54
- data/lib/statsample/test.rb +10 -6
- data/lib/statsample/test/f.rb +16 -26
- data/lib/statsample/test/levene.rb +4 -8
- data/lib/statsample/test/t.rb +30 -24
- data/lib/statsample/test/umannwhitney.rb +13 -6
- data/lib/statsample/vector.rb +86 -76
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +127 -94
- data/po/statsample.pot +114 -79
- data/test/test_anovaoneway.rb +27 -0
- data/test/test_anovawithvectors.rb +97 -0
- data/test/test_bivariate.rb +6 -57
- data/test/test_bivariate_polychoric.rb +65 -0
- data/test/test_crosstab.rb +6 -0
- data/test/test_dataset.rb +29 -1
- data/test/test_distribution.rb +6 -13
- data/test/test_dominance_analysis.rb +1 -1
- data/test/test_factor.rb +3 -3
- data/test/test_helpers.rb +18 -18
- data/test/test_matrix.rb +33 -20
- data/test/test_permutation.rb +36 -30
- data/test/test_regression.rb +26 -8
- data/test/test_reliability.rb +104 -14
- data/test/test_test_f.rb +11 -14
- data/test/test_test_t.rb +42 -35
- data/test/test_umannwhitney.rb +22 -10
- data/test/test_vector.rb +204 -102
- metadata +57 -81
- metadata.gz.sig +0 -0
- data/test/test_anova.rb +0 -24
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,6 +1,25 @@
|
|
1
|
+
=== 0.10.0 / 2010-04-13
|
2
|
+
|
3
|
+
<b>API modifications</b>
|
4
|
+
* Refactoring of Statsample::Anova module.
|
5
|
+
* Statsample::Anova::OneWay :implementation of generic ANOVA One-Way, used by Multiple Regression, for example.
|
6
|
+
* Statsample::Anova::OneWayWithVectors: implementation of ANOVA One-Way to test differences of means.
|
7
|
+
|
8
|
+
<b>New features</b>
|
9
|
+
* New Statsample::Factor::Parallel Analysis, to performs Horn's 'parallel analysis' to a PCA, to adjust for sample bias on retention of components.
|
10
|
+
* New Statsample.only_valid_clone and Statsample::Dataset.clone, which allows to create shallow copys of valid vector and datasets. Used by correlation matrix methods to optimize calculations
|
11
|
+
* New module Statsample::Summarizable, which add GetText and ReportBuilder support to classes. Better summaries for Vector, Dataset, Crosstab, PrincipalAxis, PCA and Regression::Multiple classes
|
12
|
+
|
13
|
+
<b>Optimizations and bug fix</b>
|
14
|
+
|
15
|
+
* Refactoring of Statsample::Regression::Multiple classes. Still needs works
|
16
|
+
* Bug fix on Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis
|
17
|
+
* Bug fix on Statsample::Bivariate::Polychoric.new_with_vectors. Should be defined class method, no instance method.
|
18
|
+
* Optimized correlation and covariance matrix. Only calculates the half of matrix and the other half is returned from cache
|
19
|
+
* More tests coverage. RCOV Total: 82.51% , Code: 77.83%
|
20
|
+
|
1
21
|
=== 0.9.0 / 2010-04-04
|
2
22
|
* New Statsample::Test::F. Anova::OneWay subclasses it and Regression classes uses it.
|
3
|
-
|
4
23
|
=== 0.8.2 / 2010-04-01
|
5
24
|
* Statsample::PromiseAfter replaced by external package DirtyMemoize [http://rubygems.org/gems/dirty-memoize]
|
6
25
|
=== 0.8.1 / 2010-03-29
|
data/Manifest.txt
CHANGED
@@ -11,15 +11,19 @@ data/test_binomial.csv
|
|
11
11
|
data/tetmat_matrix.txt
|
12
12
|
data/tetmat_test.txt
|
13
13
|
examples/correlation_matrix.rb
|
14
|
+
examples/dataset.rb
|
14
15
|
examples/dominance_analysis.rb
|
15
16
|
examples/dominance_analysis_bootstrap.rb
|
16
17
|
examples/levene.rb
|
17
18
|
examples/multiple_regression.rb
|
18
19
|
examples/multivariate_correlation.rb
|
20
|
+
examples/parallel_analysis.rb
|
21
|
+
examples/parallel_analysis_tetrachoric.rb
|
19
22
|
examples/polychoric.rb
|
20
23
|
examples/principal_axis.rb
|
21
24
|
examples/t_test.rb
|
22
25
|
examples/tetrachoric.rb
|
26
|
+
examples/vector.rb
|
23
27
|
lib/distribution.rb
|
24
28
|
lib/distribution/chisquare.rb
|
25
29
|
lib/distribution/f.rb
|
@@ -43,6 +47,7 @@ lib/statsample/dataset.rb
|
|
43
47
|
lib/statsample/dominanceanalysis.rb
|
44
48
|
lib/statsample/dominanceanalysis/bootstrap.rb
|
45
49
|
lib/statsample/factor.rb
|
50
|
+
lib/statsample/factor/parallelanalysis.rb
|
46
51
|
lib/statsample/factor/pca.rb
|
47
52
|
lib/statsample/factor/principalaxis.rb
|
48
53
|
lib/statsample/factor/rotation.rb
|
@@ -84,8 +89,10 @@ po/es/statsample.mo
|
|
84
89
|
po/es/statsample.po
|
85
90
|
po/statsample.pot
|
86
91
|
setup.rb
|
87
|
-
test/
|
92
|
+
test/test_anovaoneway.rb
|
93
|
+
test/test_anovawithvectors.rb
|
88
94
|
test/test_bivariate.rb
|
95
|
+
test/test_bivariate_polychoric.rb
|
89
96
|
test/test_codification.rb
|
90
97
|
test/test_combination.rb
|
91
98
|
test/test_crosstab.rb
|
data/README.txt
CHANGED
@@ -5,18 +5,19 @@ http://ruby-statsample.rubyforge.org/
|
|
5
5
|
|
6
6
|
== DESCRIPTION:
|
7
7
|
|
8
|
-
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible)
|
8
|
+
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible).
|
9
9
|
|
10
|
-
|
10
|
+
Include:
|
11
11
|
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
12
12
|
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
13
|
-
* Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
|
14
|
-
*
|
15
|
-
*
|
16
|
-
*
|
13
|
+
* Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric.
|
14
|
+
* Anova: generic and vector-based One-way ANOVA
|
15
|
+
* Tests: F, T, Levene, U-Mannwhitney.
|
16
|
+
* Regression: Simple, Multiple (OLS), Probit and Logit
|
17
|
+
* Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis, for estimation of number of factors.
|
17
18
|
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
18
19
|
* Sample calculation related formulas
|
19
|
-
* Creates reports on text, html and rtf, using ReportBuilder
|
20
|
+
* Creates reports on text, html and rtf, using ReportBuilder gem
|
20
21
|
|
21
22
|
== FEATURES:
|
22
23
|
|
@@ -24,6 +25,7 @@ Includes:
|
|
24
25
|
* Statsample::Vector: An extension of an array, with statistical methods like sum, mean and standard deviation
|
25
26
|
* Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample.
|
26
27
|
* Statsample::Multiset: multiple datasets with same fields and type of vectors
|
28
|
+
* Anova module provides generic Statsample::Anova::OneWay and vector based Statsample::Anova::OneWayWithVectors
|
27
29
|
* Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
|
28
30
|
* Multiple types of regression.
|
29
31
|
* Simple Regression : Statsample::Regression::Simple
|
@@ -38,6 +40,7 @@ Includes:
|
|
38
40
|
* Statsample::Factor::Varimax
|
39
41
|
* Statsample::Factor::Equimax
|
40
42
|
* Statsample::Factor::Quartimax
|
43
|
+
* Statsample::Factor::ParallelAnalysis performs Horn's 'parallel analysis' to a principal components analysis to adjust for sample bias in the retention of components.
|
41
44
|
* Dominance Analysis. Based on Budescu and Azen papers, Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables and DominanceAnalysisBootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
|
42
45
|
* Module Statsample::Codification, to help to codify open questions
|
43
46
|
* Converters to import and export data:
|
@@ -53,6 +56,7 @@ Includes:
|
|
53
56
|
* Statsample::Test::Levene
|
54
57
|
* Statsample::Test::UMannWhitney
|
55
58
|
* Statsample::Test::T
|
59
|
+
* Statsample::Test::F
|
56
60
|
* Interfaces to gdchart, gnuplot and SVG::Graph
|
57
61
|
|
58
62
|
|
data/Rakefile
CHANGED
@@ -39,11 +39,11 @@ task :makemo do
|
|
39
39
|
end
|
40
40
|
|
41
41
|
h=Hoe.spec('statsample') do
|
42
|
-
self.testlib=:minitest unless RUBY_VERSION<="1.9"
|
43
42
|
self.version=Statsample::VERSION
|
43
|
+
self.testlib=:minitest
|
44
44
|
self.rubyforge_name = "ruby-statsample"
|
45
45
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
46
|
-
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.
|
46
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"]
|
47
47
|
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
48
48
|
self.need_rdoc=false
|
49
49
|
end
|
Binary file
|
data/examples/dataset.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
|
4
|
+
require 'statsample'
|
5
|
+
samples=100
|
6
|
+
variables=30
|
7
|
+
rng = GSL::Rng.alloc()
|
8
|
+
f1=samples.times.collect {rng.ugaussian()}.to_scale
|
9
|
+
f2=samples.times.collect {rng.ugaussian()}.to_scale
|
10
|
+
f3=samples.times.collect {rng.ugaussian()}.to_scale
|
11
|
+
|
12
|
+
vectors={}
|
13
|
+
|
14
|
+
variables.times do |i|
|
15
|
+
vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.ugaussian()}.to_scale
|
16
|
+
end
|
17
|
+
ds=vectors.to_dataset
|
18
|
+
|
19
|
+
pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>10, :debug=>true)
|
20
|
+
pca=Statsample::Factor::PCA.new(Statsample::Bivariate.correlation_matrix(ds))
|
21
|
+
rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do |g|
|
22
|
+
g.text("There are 3 real factors on data")
|
23
|
+
g.parse_element(pca)
|
24
|
+
g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
|
25
|
+
g.parse_element(pa)
|
26
|
+
g.text("Parallel Analysis returns #{pa.number_of_factors} factors to preserve")
|
27
|
+
end
|
28
|
+
|
29
|
+
puts rb.to_text
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
|
4
|
+
require 'statsample'
|
5
|
+
samples=1000
|
6
|
+
variables=30
|
7
|
+
rng = GSL::Rng.alloc()
|
8
|
+
f1=samples.times.collect {rng.ugaussian()}.to_scale
|
9
|
+
f2=samples.times.collect {rng.ugaussian()}.to_scale
|
10
|
+
f3=samples.times.collect {rng.ugaussian()}.to_scale
|
11
|
+
|
12
|
+
vectors={}
|
13
|
+
|
14
|
+
variables.times do |i|
|
15
|
+
vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*(i-30)+f2[nv]*(i+30)+f3[nv]*(i+15) + rng.ugaussian() > 0 ? 1 : 0}.to_scale
|
16
|
+
end
|
17
|
+
ds=vectors.to_dataset
|
18
|
+
|
19
|
+
pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>10, :matrix_method=>:tetrachoric_correlation_matrix, :debug=>true)
|
20
|
+
|
21
|
+
pca=Statsample::Factor::PCA.new(Statsample::Bivariate.tetrachoric_correlation_matrix(ds))
|
22
|
+
rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do |g|
|
23
|
+
g.text("There are 3 real factors on data")
|
24
|
+
g.parse_element(pca)
|
25
|
+
g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
|
26
|
+
g.parse_element(pa)
|
27
|
+
g.text("Parallel Analysis returns #{pa.number_of_factors} factors to preserve")
|
28
|
+
end
|
29
|
+
|
30
|
+
puts rb.to_text
|
data/examples/vector.rb
ADDED
data/lib/distribution.rb
CHANGED
@@ -8,10 +8,20 @@ require 'statistics2'
|
|
8
8
|
# Distribution::Normal.p_value(0.95)
|
9
9
|
# => 1.64485364660836
|
10
10
|
module Distribution
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
begin
|
12
|
+
require 'rbgsl'
|
13
|
+
def self.has_gsl?
|
14
|
+
true
|
15
|
+
end
|
16
|
+
rescue LoadError
|
17
|
+
def self.has_gsl?
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
autoload(:ChiSquare, 'distribution/chisquare')
|
22
|
+
autoload(:T, 'distribution/t')
|
23
|
+
autoload(:F, 'distribution/f')
|
24
|
+
autoload(:Normal, 'distribution/normal')
|
25
|
+
autoload(:NormalBivariate, 'distribution/normalbivariate')
|
26
|
+
# autoload(:NormalMultivariate, 'distribution/normalmultivariate')
|
17
27
|
end
|
data/lib/distribution/normal.rb
CHANGED
@@ -1,25 +1,32 @@
|
|
1
1
|
module Distribution
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
2
|
+
# Calculate cdf and inverse cdf for Normal Distribution.
|
3
|
+
# Uses Statistics2 module
|
4
|
+
module Normal
|
5
|
+
class << self
|
6
|
+
# Return the P-value of the corresponding integral
|
7
|
+
def p_value(pr)
|
8
|
+
Statistics2.pnormaldist(pr)
|
9
|
+
end
|
10
|
+
# Normal cumulative distribution function (cdf).
|
11
|
+
#
|
12
|
+
# Returns the integral of normal distribution
|
13
|
+
# over (-Infty, x].
|
14
|
+
#
|
15
|
+
def cdf(x)
|
16
|
+
Statistics2.normaldist(x)
|
17
|
+
end
|
18
|
+
|
19
|
+
if false and Distribution.has_gsl?
|
20
|
+
alias :cdf_ruby :cdf
|
21
|
+
def cdf(x) # :nodoc:
|
22
|
+
GSL::Cdf::gaussian_P(x)
|
22
23
|
end
|
23
24
|
end
|
25
|
+
# Normal probability density function (pdf)
|
26
|
+
# With x=0 and sigma=1
|
27
|
+
def pdf(x)
|
28
|
+
(1.0/Math::sqrt(2*Math::PI))*Math::exp(-(x**2/2.0))
|
29
|
+
end
|
24
30
|
end
|
31
|
+
end
|
25
32
|
end
|
@@ -120,7 +120,7 @@ module Distribution
|
|
120
120
|
sum
|
121
121
|
end
|
122
122
|
# Normal cumulative distribution function (cdf) for a given x, y and rho.
|
123
|
-
#
|
123
|
+
# Ported from Fortran code by Alan Genz
|
124
124
|
#
|
125
125
|
# Original documentation
|
126
126
|
# DOUBLE PRECISION FUNCTION BVND( DH, DK, R )
|
data/lib/statsample.rb
CHANGED
@@ -112,7 +112,7 @@ module Statsample
|
|
112
112
|
false
|
113
113
|
end
|
114
114
|
end
|
115
|
-
VERSION = '0.
|
115
|
+
VERSION = '0.10.0'
|
116
116
|
SPLIT_TOKEN = ","
|
117
117
|
autoload(:Database, 'statsample/converters')
|
118
118
|
autoload(:Anova, 'statsample/anova')
|
@@ -186,6 +186,16 @@ module Statsample
|
|
186
186
|
ds=Statsample::Dataset.new(h).dup_only_valid
|
187
187
|
ds.vectors.values
|
188
188
|
end
|
189
|
+
# Cheap version of #only_valid.
|
190
|
+
# If any vectors have missing_values, return only valid.
|
191
|
+
# If not, return the vectors it self
|
192
|
+
def only_valid_clone(*vs)
|
193
|
+
if vs.any? {|v| v.has_missing_data?}
|
194
|
+
only_valid(*vs)
|
195
|
+
else
|
196
|
+
vs
|
197
|
+
end
|
198
|
+
end
|
189
199
|
end
|
190
200
|
|
191
201
|
|
@@ -214,7 +224,14 @@ module Statsample
|
|
214
224
|
fp.close
|
215
225
|
end
|
216
226
|
end
|
217
|
-
|
227
|
+
# Provides basic method to generate summaries
|
228
|
+
module Summarizable
|
229
|
+
include GetText
|
230
|
+
bindtextdomain("statsample")
|
231
|
+
def summary(method=:to_text)
|
232
|
+
ReportBuilder.new(:no_title=>true).add(self).send(method)
|
233
|
+
end
|
234
|
+
end
|
218
235
|
module STATSAMPLE__ #:nodoc:
|
219
236
|
end
|
220
237
|
end
|
data/lib/statsample/anova.rb
CHANGED
@@ -1,11 +1,86 @@
|
|
1
1
|
module Statsample
|
2
2
|
module Anova
|
3
|
-
|
3
|
+
class << self
|
4
|
+
def oneway(*args)
|
5
|
+
OneWay.new(*args)
|
6
|
+
end
|
7
|
+
def oneway_with_vectors(*args)
|
8
|
+
OneWayWithVectors.new(*args)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
# = Generic Anova one-way.
|
12
|
+
# You could enter the sum of squares or the mean squares. You
|
13
|
+
# should enter the degrees of freedom for numerator and denominator.
|
14
|
+
# == Usage
|
15
|
+
# anova=Statsample::Anova::OneWay(:ss_num=>10,:ss_den=>20, :df_num=>2, :df_den=>10, @name=>"ANOVA for....")
|
16
|
+
class OneWay
|
17
|
+
include GetText
|
18
|
+
bindtextdomain("statsample")
|
19
|
+
attr_reader :df_num, :df_den, :ss_num, :ss_den, :ms_num, :ms_den, :ms_total, :df_total, :ss_total
|
20
|
+
# Name of ANOVA Analisys
|
21
|
+
attr_accessor :name
|
22
|
+
attr_accessor :name_denominator
|
23
|
+
attr_accessor :name_numerator
|
24
|
+
def initialize(opts=Hash.new)
|
25
|
+
# First see if sum of squares or mean squares are entered
|
26
|
+
raise ArgumentError, "You should set d.f." unless (opts.has_key? :df_num and opts.has_key? :df_den)
|
27
|
+
@df_num=opts.delete :df_num
|
28
|
+
@df_den=opts.delete :df_den
|
29
|
+
@df_total=@df_num+@df_den
|
30
|
+
if(opts.has_key? :ss_num and opts.has_key? :ss_den)
|
31
|
+
@ss_num = opts.delete :ss_num
|
32
|
+
@ss_den =opts.delete :ss_den
|
33
|
+
@ms_num =@ss_num.quo(@df_num)
|
34
|
+
@ms_den =@ss_den.quo(@df_den)
|
35
|
+
elsif (opts.has_key? :ms_num and opts.has_key? :ms_den)
|
36
|
+
@ms_num =opts.delete :ms_num
|
37
|
+
@ms_den =opts.delete :ms_den
|
38
|
+
@ss_num =@ms_num * @df_num
|
39
|
+
@ss_den =@ss_den * @df_den
|
40
|
+
end
|
41
|
+
@ss_total=@ss_num+@ss_den
|
42
|
+
@ms_total=@ms_num+@ms_den
|
43
|
+
opts_default={:name=>"ANOVA",
|
44
|
+
:name_denominator=>"Explained variance",
|
45
|
+
:name_numerator=>"Unexplained variance"}
|
46
|
+
@opts=opts_default.merge(opts)
|
47
|
+
opts_default.keys.each {|k|
|
48
|
+
send("#{k}=", @opts[k])
|
49
|
+
}
|
50
|
+
@f_object=Statsample::Test::F.new(@ms_num,@ms_den,@df_num,@df_den)
|
51
|
+
end
|
52
|
+
# F value
|
53
|
+
def f
|
54
|
+
@f_object.f
|
55
|
+
end
|
56
|
+
# P-value of F test
|
57
|
+
def probability
|
58
|
+
@f_object.probability
|
59
|
+
end
|
60
|
+
# Summary of Anova analysis
|
61
|
+
def summary
|
62
|
+
ReportBuilder.new(:no_title=>true).add(self).to_text
|
63
|
+
end
|
64
|
+
def report_building(builder) #:nodoc:
|
65
|
+
builder.section(:name=>@name) do |b|
|
66
|
+
report_building_table(b)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
def report_building_table(builder) #:nodoc:
|
70
|
+
builder.table(:name=>_("%s Table") % @name, :header=>%w{source ss df ms f p}.map {|v| _(v)}) do |t|
|
71
|
+
t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",@ms_num), sprintf("%0.3f",f), sprintf("%0.3f", probability)])
|
72
|
+
t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, sprintf("%0.3f",@ms_den), "", ""])
|
73
|
+
t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total, sprintf("%0.3f",@ms_total),"",""])
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
# One Way Anova with vectors
|
4
79
|
# Example:
|
5
80
|
# v1=[2,3,4,5,6].to_scale
|
6
81
|
# v2=[3,3,4,5,6].to_scale
|
7
82
|
# v3=[5,3,1,5,6].to_scale
|
8
|
-
# anova=Statsample::Anova::
|
83
|
+
# anova=Statsample::Anova::OneWayWithVectors.new([v1,v2,v3])
|
9
84
|
# anova.f
|
10
85
|
# => 0.0243902439024391
|
11
86
|
# anova.probability
|
@@ -13,30 +88,43 @@ module Statsample
|
|
13
88
|
# anova.sst
|
14
89
|
# => 32.9333333333333
|
15
90
|
#
|
16
|
-
class
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
91
|
+
class OneWayWithVectors < OneWay
|
92
|
+
# Show on summary Levene test
|
93
|
+
attr_accessor :summary_levene
|
94
|
+
# Show on summary descriptives for vectors
|
95
|
+
attr_accessor :summary_descriptives
|
96
|
+
def initialize(*args)
|
97
|
+
if args[0].is_a? Array
|
98
|
+
@vectors=args.shift
|
99
|
+
else
|
100
|
+
@vectors=args.find_all {|v| v.is_a? Statsample::Vector}
|
101
|
+
opts=args.find {|v| v.is_a? Hash}
|
102
|
+
end
|
103
|
+
opts||=Hash.new
|
104
|
+
opts_default={:name=>_("Anova One-Way"),
|
105
|
+
:name_numerator=>"Between Groups",
|
106
|
+
:name_denominator=>"Within Groups",
|
107
|
+
:summary_descriptives=>false,
|
108
|
+
:summary_levene=>false}
|
109
|
+
@opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
|
110
|
+
super(@opts)
|
111
|
+
end
|
112
|
+
alias :sst :ss_total
|
113
|
+
def levene
|
114
|
+
Statsample::Test.levene(@vectors, :name=>_("Test of Homogeneity of variances (Levene)"))
|
21
115
|
end
|
22
116
|
# Total mean
|
23
|
-
def
|
117
|
+
def total_mean
|
24
118
|
sum=@vectors.inject(0){|a,v| a+v.sum}
|
25
119
|
sum.quo(n)
|
26
120
|
end
|
27
|
-
|
28
|
-
# Total sum of squares
|
29
|
-
def sst
|
30
|
-
m=mean
|
31
|
-
@vectors.inject(0) {|total,vector| total+vector.ss(m) }
|
32
|
-
end
|
33
121
|
# Sum of squares within groups
|
34
122
|
def sswg
|
35
123
|
@sswg||=@vectors.inject(0) {|total,vector| total+vector.ss }
|
36
124
|
end
|
37
125
|
# Sum of squares between groups
|
38
126
|
def ssbg
|
39
|
-
m=
|
127
|
+
m=total_mean
|
40
128
|
@vectors.inject(0) do |total,vector|
|
41
129
|
total + (vector.mean-m).square * vector.size
|
42
130
|
end
|
@@ -56,7 +144,21 @@ module Statsample
|
|
56
144
|
def n
|
57
145
|
@vectors.inject(0){|a,v| a+v.size}
|
58
146
|
end
|
59
|
-
|
147
|
+
def report_building(builder) # :nodoc:
|
148
|
+
builder.section(:name=>@name) do |s|
|
149
|
+
if summary_descriptives
|
150
|
+
s.table(:name=>_("Descriptives"),:header=>%w{Name N Mean SD Min Max}.map {|v| _(v)}) do |t|
|
151
|
+
@vectors.each do |v|
|
152
|
+
t.row [v.name, v.n_valid, "%0.4f" % v.mean, "%0.4f" % v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
if summary_levene
|
157
|
+
s.parse_element(levene)
|
158
|
+
end
|
159
|
+
report_building_table(s)
|
160
|
+
end
|
161
|
+
end
|
60
162
|
end
|
61
163
|
end
|
62
164
|
end
|