statsample 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +20 -1
- data/Manifest.txt +8 -1
- data/README.txt +11 -7
- data/Rakefile +2 -2
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/examples/dataset.rb +8 -0
- data/examples/multiple_regression.rb +1 -1
- data/examples/parallel_analysis.rb +29 -0
- data/examples/parallel_analysis_tetrachoric.rb +30 -0
- data/examples/vector.rb +6 -0
- data/lib/distribution.rb +16 -6
- data/lib/distribution/normal.rb +27 -20
- data/lib/distribution/normalbivariate.rb +1 -1
- data/lib/statsample.rb +19 -2
- data/lib/statsample/anova.rb +118 -16
- data/lib/statsample/bivariate.rb +27 -13
- data/lib/statsample/bivariate/polychoric.rb +18 -5
- data/lib/statsample/crosstab.rb +66 -74
- data/lib/statsample/dataset.rb +52 -45
- data/lib/statsample/dominanceanalysis.rb +2 -5
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/factor/parallelanalysis.rb +122 -0
- data/lib/statsample/factor/pca.rb +23 -28
- data/lib/statsample/factor/principalaxis.rb +8 -3
- data/lib/statsample/matrix.rb +27 -24
- data/lib/statsample/mle.rb +11 -11
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression.rb +10 -8
- data/lib/statsample/regression/multiple/baseengine.rb +36 -25
- data/lib/statsample/regression/multiple/gslengine.rb +14 -0
- data/lib/statsample/regression/multiple/matrixengine.rb +4 -32
- data/lib/statsample/regression/multiple/rubyengine.rb +2 -6
- data/lib/statsample/regression/simple.rb +1 -1
- data/lib/statsample/reliability.rb +42 -54
- data/lib/statsample/test.rb +10 -6
- data/lib/statsample/test/f.rb +16 -26
- data/lib/statsample/test/levene.rb +4 -8
- data/lib/statsample/test/t.rb +30 -24
- data/lib/statsample/test/umannwhitney.rb +13 -6
- data/lib/statsample/vector.rb +86 -76
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +127 -94
- data/po/statsample.pot +114 -79
- data/test/test_anovaoneway.rb +27 -0
- data/test/test_anovawithvectors.rb +97 -0
- data/test/test_bivariate.rb +6 -57
- data/test/test_bivariate_polychoric.rb +65 -0
- data/test/test_crosstab.rb +6 -0
- data/test/test_dataset.rb +29 -1
- data/test/test_distribution.rb +6 -13
- data/test/test_dominance_analysis.rb +1 -1
- data/test/test_factor.rb +3 -3
- data/test/test_helpers.rb +18 -18
- data/test/test_matrix.rb +33 -20
- data/test/test_permutation.rb +36 -30
- data/test/test_regression.rb +26 -8
- data/test/test_reliability.rb +104 -14
- data/test/test_test_f.rb +11 -14
- data/test/test_test_t.rb +42 -35
- data/test/test_umannwhitney.rb +22 -10
- data/test/test_vector.rb +204 -102
- metadata +57 -81
- metadata.gz.sig +0 -0
- data/test/test_anova.rb +0 -24
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,6 +1,25 @@
|
|
1
|
+
=== 0.10.0 / 2010-04-13
|
2
|
+
|
3
|
+
<b>API modifications</b>
|
4
|
+
* Refactoring of Statsample::Anova module.
|
5
|
+
* Statsample::Anova::OneWay :implementation of generic ANOVA One-Way, used by Multiple Regression, for example.
|
6
|
+
* Statsample::Anova::OneWayWithVectors: implementation of ANOVA One-Way to test differences of means.
|
7
|
+
|
8
|
+
<b>New features</b>
|
9
|
+
* New Statsample::Factor::Parallel Analysis, to performs Horn's 'parallel analysis' to a PCA, to adjust for sample bias on retention of components.
|
10
|
+
* New Statsample.only_valid_clone and Statsample::Dataset.clone, which allows to create shallow copys of valid vector and datasets. Used by correlation matrix methods to optimize calculations
|
11
|
+
* New module Statsample::Summarizable, which add GetText and ReportBuilder support to classes. Better summaries for Vector, Dataset, Crosstab, PrincipalAxis, PCA and Regression::Multiple classes
|
12
|
+
|
13
|
+
<b>Optimizations and bug fix</b>
|
14
|
+
|
15
|
+
* Refactoring of Statsample::Regression::Multiple classes. Still needs works
|
16
|
+
* Bug fix on Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis
|
17
|
+
* Bug fix on Statsample::Bivariate::Polychoric.new_with_vectors. Should be defined class method, no instance method.
|
18
|
+
* Optimized correlation and covariance matrix. Only calculates the half of matrix and the other half is returned from cache
|
19
|
+
* More tests coverage. RCOV Total: 82.51% , Code: 77.83%
|
20
|
+
|
1
21
|
=== 0.9.0 / 2010-04-04
|
2
22
|
* New Statsample::Test::F. Anova::OneWay subclasses it and Regression classes uses it.
|
3
|
-
|
4
23
|
=== 0.8.2 / 2010-04-01
|
5
24
|
* Statsample::PromiseAfter replaced by external package DirtyMemoize [http://rubygems.org/gems/dirty-memoize]
|
6
25
|
=== 0.8.1 / 2010-03-29
|
data/Manifest.txt
CHANGED
@@ -11,15 +11,19 @@ data/test_binomial.csv
|
|
11
11
|
data/tetmat_matrix.txt
|
12
12
|
data/tetmat_test.txt
|
13
13
|
examples/correlation_matrix.rb
|
14
|
+
examples/dataset.rb
|
14
15
|
examples/dominance_analysis.rb
|
15
16
|
examples/dominance_analysis_bootstrap.rb
|
16
17
|
examples/levene.rb
|
17
18
|
examples/multiple_regression.rb
|
18
19
|
examples/multivariate_correlation.rb
|
20
|
+
examples/parallel_analysis.rb
|
21
|
+
examples/parallel_analysis_tetrachoric.rb
|
19
22
|
examples/polychoric.rb
|
20
23
|
examples/principal_axis.rb
|
21
24
|
examples/t_test.rb
|
22
25
|
examples/tetrachoric.rb
|
26
|
+
examples/vector.rb
|
23
27
|
lib/distribution.rb
|
24
28
|
lib/distribution/chisquare.rb
|
25
29
|
lib/distribution/f.rb
|
@@ -43,6 +47,7 @@ lib/statsample/dataset.rb
|
|
43
47
|
lib/statsample/dominanceanalysis.rb
|
44
48
|
lib/statsample/dominanceanalysis/bootstrap.rb
|
45
49
|
lib/statsample/factor.rb
|
50
|
+
lib/statsample/factor/parallelanalysis.rb
|
46
51
|
lib/statsample/factor/pca.rb
|
47
52
|
lib/statsample/factor/principalaxis.rb
|
48
53
|
lib/statsample/factor/rotation.rb
|
@@ -84,8 +89,10 @@ po/es/statsample.mo
|
|
84
89
|
po/es/statsample.po
|
85
90
|
po/statsample.pot
|
86
91
|
setup.rb
|
87
|
-
test/
|
92
|
+
test/test_anovaoneway.rb
|
93
|
+
test/test_anovawithvectors.rb
|
88
94
|
test/test_bivariate.rb
|
95
|
+
test/test_bivariate_polychoric.rb
|
89
96
|
test/test_codification.rb
|
90
97
|
test/test_combination.rb
|
91
98
|
test/test_crosstab.rb
|
data/README.txt
CHANGED
@@ -5,18 +5,19 @@ http://ruby-statsample.rubyforge.org/
|
|
5
5
|
|
6
6
|
== DESCRIPTION:
|
7
7
|
|
8
|
-
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible)
|
8
|
+
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible).
|
9
9
|
|
10
|
-
|
10
|
+
Include:
|
11
11
|
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
12
12
|
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
13
|
-
* Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
|
14
|
-
*
|
15
|
-
*
|
16
|
-
*
|
13
|
+
* Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric.
|
14
|
+
* Anova: generic and vector-based One-way ANOVA
|
15
|
+
* Tests: F, T, Levene, U-Mannwhitney.
|
16
|
+
* Regression: Simple, Multiple (OLS), Probit and Logit
|
17
|
+
* Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis, for estimation of number of factors.
|
17
18
|
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
18
19
|
* Sample calculation related formulas
|
19
|
-
* Creates reports on text, html and rtf, using ReportBuilder
|
20
|
+
* Creates reports on text, html and rtf, using ReportBuilder gem
|
20
21
|
|
21
22
|
== FEATURES:
|
22
23
|
|
@@ -24,6 +25,7 @@ Includes:
|
|
24
25
|
* Statsample::Vector: An extension of an array, with statistical methods like sum, mean and standard deviation
|
25
26
|
* Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample.
|
26
27
|
* Statsample::Multiset: multiple datasets with same fields and type of vectors
|
28
|
+
* Anova module provides generic Statsample::Anova::OneWay and vector based Statsample::Anova::OneWayWithVectors
|
27
29
|
* Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
|
28
30
|
* Multiple types of regression.
|
29
31
|
* Simple Regression : Statsample::Regression::Simple
|
@@ -38,6 +40,7 @@ Includes:
|
|
38
40
|
* Statsample::Factor::Varimax
|
39
41
|
* Statsample::Factor::Equimax
|
40
42
|
* Statsample::Factor::Quartimax
|
43
|
+
* Statsample::Factor::ParallelAnalysis performs Horn's 'parallel analysis' to a principal components analysis to adjust for sample bias in the retention of components.
|
41
44
|
* Dominance Analysis. Based on Budescu and Azen papers, Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables and DominanceAnalysisBootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
|
42
45
|
* Module Statsample::Codification, to help to codify open questions
|
43
46
|
* Converters to import and export data:
|
@@ -53,6 +56,7 @@ Includes:
|
|
53
56
|
* Statsample::Test::Levene
|
54
57
|
* Statsample::Test::UMannWhitney
|
55
58
|
* Statsample::Test::T
|
59
|
+
* Statsample::Test::F
|
56
60
|
* Interfaces to gdchart, gnuplot and SVG::Graph
|
57
61
|
|
58
62
|
|
data/Rakefile
CHANGED
@@ -39,11 +39,11 @@ task :makemo do
|
|
39
39
|
end
|
40
40
|
|
41
41
|
h=Hoe.spec('statsample') do
|
42
|
-
self.testlib=:minitest unless RUBY_VERSION<="1.9"
|
43
42
|
self.version=Statsample::VERSION
|
43
|
+
self.testlib=:minitest
|
44
44
|
self.rubyforge_name = "ruby-statsample"
|
45
45
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
46
|
-
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.
|
46
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"]
|
47
47
|
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
48
48
|
self.need_rdoc=false
|
49
49
|
end
|
Binary file
|
data/examples/dataset.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
|
4
|
+
require 'statsample'
|
5
|
+
samples=100
|
6
|
+
variables=30
|
7
|
+
rng = GSL::Rng.alloc()
|
8
|
+
f1=samples.times.collect {rng.ugaussian()}.to_scale
|
9
|
+
f2=samples.times.collect {rng.ugaussian()}.to_scale
|
10
|
+
f3=samples.times.collect {rng.ugaussian()}.to_scale
|
11
|
+
|
12
|
+
vectors={}
|
13
|
+
|
14
|
+
variables.times do |i|
|
15
|
+
vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.ugaussian()}.to_scale
|
16
|
+
end
|
17
|
+
ds=vectors.to_dataset
|
18
|
+
|
19
|
+
pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>10, :debug=>true)
|
20
|
+
pca=Statsample::Factor::PCA.new(Statsample::Bivariate.correlation_matrix(ds))
|
21
|
+
rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do |g|
|
22
|
+
g.text("There are 3 real factors on data")
|
23
|
+
g.parse_element(pca)
|
24
|
+
g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
|
25
|
+
g.parse_element(pa)
|
26
|
+
g.text("Parallel Analysis returns #{pa.number_of_factors} factors to preserve")
|
27
|
+
end
|
28
|
+
|
29
|
+
puts rb.to_text
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
|
4
|
+
require 'statsample'
|
5
|
+
samples=1000
|
6
|
+
variables=30
|
7
|
+
rng = GSL::Rng.alloc()
|
8
|
+
f1=samples.times.collect {rng.ugaussian()}.to_scale
|
9
|
+
f2=samples.times.collect {rng.ugaussian()}.to_scale
|
10
|
+
f3=samples.times.collect {rng.ugaussian()}.to_scale
|
11
|
+
|
12
|
+
vectors={}
|
13
|
+
|
14
|
+
variables.times do |i|
|
15
|
+
vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*(i-30)+f2[nv]*(i+30)+f3[nv]*(i+15) + rng.ugaussian() > 0 ? 1 : 0}.to_scale
|
16
|
+
end
|
17
|
+
ds=vectors.to_dataset
|
18
|
+
|
19
|
+
pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>10, :matrix_method=>:tetrachoric_correlation_matrix, :debug=>true)
|
20
|
+
|
21
|
+
pca=Statsample::Factor::PCA.new(Statsample::Bivariate.tetrachoric_correlation_matrix(ds))
|
22
|
+
rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do |g|
|
23
|
+
g.text("There are 3 real factors on data")
|
24
|
+
g.parse_element(pca)
|
25
|
+
g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
|
26
|
+
g.parse_element(pa)
|
27
|
+
g.text("Parallel Analysis returns #{pa.number_of_factors} factors to preserve")
|
28
|
+
end
|
29
|
+
|
30
|
+
puts rb.to_text
|
data/examples/vector.rb
ADDED
data/lib/distribution.rb
CHANGED
@@ -8,10 +8,20 @@ require 'statistics2'
|
|
8
8
|
# Distribution::Normal.p_value(0.95)
|
9
9
|
# => 1.64485364660836
|
10
10
|
module Distribution
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
begin
|
12
|
+
require 'rbgsl'
|
13
|
+
def self.has_gsl?
|
14
|
+
true
|
15
|
+
end
|
16
|
+
rescue LoadError
|
17
|
+
def self.has_gsl?
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
autoload(:ChiSquare, 'distribution/chisquare')
|
22
|
+
autoload(:T, 'distribution/t')
|
23
|
+
autoload(:F, 'distribution/f')
|
24
|
+
autoload(:Normal, 'distribution/normal')
|
25
|
+
autoload(:NormalBivariate, 'distribution/normalbivariate')
|
26
|
+
# autoload(:NormalMultivariate, 'distribution/normalmultivariate')
|
17
27
|
end
|
data/lib/distribution/normal.rb
CHANGED
@@ -1,25 +1,32 @@
|
|
1
1
|
module Distribution
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
2
|
+
# Calculate cdf and inverse cdf for Normal Distribution.
|
3
|
+
# Uses Statistics2 module
|
4
|
+
module Normal
|
5
|
+
class << self
|
6
|
+
# Return the P-value of the corresponding integral
|
7
|
+
def p_value(pr)
|
8
|
+
Statistics2.pnormaldist(pr)
|
9
|
+
end
|
10
|
+
# Normal cumulative distribution function (cdf).
|
11
|
+
#
|
12
|
+
# Returns the integral of normal distribution
|
13
|
+
# over (-Infty, x].
|
14
|
+
#
|
15
|
+
def cdf(x)
|
16
|
+
Statistics2.normaldist(x)
|
17
|
+
end
|
18
|
+
|
19
|
+
if false and Distribution.has_gsl?
|
20
|
+
alias :cdf_ruby :cdf
|
21
|
+
def cdf(x) # :nodoc:
|
22
|
+
GSL::Cdf::gaussian_P(x)
|
22
23
|
end
|
23
24
|
end
|
25
|
+
# Normal probability density function (pdf)
|
26
|
+
# With x=0 and sigma=1
|
27
|
+
def pdf(x)
|
28
|
+
(1.0/Math::sqrt(2*Math::PI))*Math::exp(-(x**2/2.0))
|
29
|
+
end
|
24
30
|
end
|
31
|
+
end
|
25
32
|
end
|
@@ -120,7 +120,7 @@ module Distribution
|
|
120
120
|
sum
|
121
121
|
end
|
122
122
|
# Normal cumulative distribution function (cdf) for a given x, y and rho.
|
123
|
-
#
|
123
|
+
# Ported from Fortran code by Alan Genz
|
124
124
|
#
|
125
125
|
# Original documentation
|
126
126
|
# DOUBLE PRECISION FUNCTION BVND( DH, DK, R )
|
data/lib/statsample.rb
CHANGED
@@ -112,7 +112,7 @@ module Statsample
|
|
112
112
|
false
|
113
113
|
end
|
114
114
|
end
|
115
|
-
VERSION = '0.
|
115
|
+
VERSION = '0.10.0'
|
116
116
|
SPLIT_TOKEN = ","
|
117
117
|
autoload(:Database, 'statsample/converters')
|
118
118
|
autoload(:Anova, 'statsample/anova')
|
@@ -186,6 +186,16 @@ module Statsample
|
|
186
186
|
ds=Statsample::Dataset.new(h).dup_only_valid
|
187
187
|
ds.vectors.values
|
188
188
|
end
|
189
|
+
# Cheap version of #only_valid.
|
190
|
+
# If any vectors have missing_values, return only valid.
|
191
|
+
# If not, return the vectors it self
|
192
|
+
def only_valid_clone(*vs)
|
193
|
+
if vs.any? {|v| v.has_missing_data?}
|
194
|
+
only_valid(*vs)
|
195
|
+
else
|
196
|
+
vs
|
197
|
+
end
|
198
|
+
end
|
189
199
|
end
|
190
200
|
|
191
201
|
|
@@ -214,7 +224,14 @@ module Statsample
|
|
214
224
|
fp.close
|
215
225
|
end
|
216
226
|
end
|
217
|
-
|
227
|
+
# Provides basic method to generate summaries
|
228
|
+
module Summarizable
|
229
|
+
include GetText
|
230
|
+
bindtextdomain("statsample")
|
231
|
+
def summary(method=:to_text)
|
232
|
+
ReportBuilder.new(:no_title=>true).add(self).send(method)
|
233
|
+
end
|
234
|
+
end
|
218
235
|
module STATSAMPLE__ #:nodoc:
|
219
236
|
end
|
220
237
|
end
|
data/lib/statsample/anova.rb
CHANGED
@@ -1,11 +1,86 @@
|
|
1
1
|
module Statsample
|
2
2
|
module Anova
|
3
|
-
|
3
|
+
class << self
|
4
|
+
def oneway(*args)
|
5
|
+
OneWay.new(*args)
|
6
|
+
end
|
7
|
+
def oneway_with_vectors(*args)
|
8
|
+
OneWayWithVectors.new(*args)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
# = Generic Anova one-way.
|
12
|
+
# You could enter the sum of squares or the mean squares. You
|
13
|
+
# should enter the degrees of freedom for numerator and denominator.
|
14
|
+
# == Usage
|
15
|
+
# anova=Statsample::Anova::OneWay(:ss_num=>10,:ss_den=>20, :df_num=>2, :df_den=>10, @name=>"ANOVA for....")
|
16
|
+
class OneWay
|
17
|
+
include GetText
|
18
|
+
bindtextdomain("statsample")
|
19
|
+
attr_reader :df_num, :df_den, :ss_num, :ss_den, :ms_num, :ms_den, :ms_total, :df_total, :ss_total
|
20
|
+
# Name of ANOVA Analisys
|
21
|
+
attr_accessor :name
|
22
|
+
attr_accessor :name_denominator
|
23
|
+
attr_accessor :name_numerator
|
24
|
+
def initialize(opts=Hash.new)
|
25
|
+
# First see if sum of squares or mean squares are entered
|
26
|
+
raise ArgumentError, "You should set d.f." unless (opts.has_key? :df_num and opts.has_key? :df_den)
|
27
|
+
@df_num=opts.delete :df_num
|
28
|
+
@df_den=opts.delete :df_den
|
29
|
+
@df_total=@df_num+@df_den
|
30
|
+
if(opts.has_key? :ss_num and opts.has_key? :ss_den)
|
31
|
+
@ss_num = opts.delete :ss_num
|
32
|
+
@ss_den =opts.delete :ss_den
|
33
|
+
@ms_num =@ss_num.quo(@df_num)
|
34
|
+
@ms_den =@ss_den.quo(@df_den)
|
35
|
+
elsif (opts.has_key? :ms_num and opts.has_key? :ms_den)
|
36
|
+
@ms_num =opts.delete :ms_num
|
37
|
+
@ms_den =opts.delete :ms_den
|
38
|
+
@ss_num =@ms_num * @df_num
|
39
|
+
@ss_den =@ss_den * @df_den
|
40
|
+
end
|
41
|
+
@ss_total=@ss_num+@ss_den
|
42
|
+
@ms_total=@ms_num+@ms_den
|
43
|
+
opts_default={:name=>"ANOVA",
|
44
|
+
:name_denominator=>"Explained variance",
|
45
|
+
:name_numerator=>"Unexplained variance"}
|
46
|
+
@opts=opts_default.merge(opts)
|
47
|
+
opts_default.keys.each {|k|
|
48
|
+
send("#{k}=", @opts[k])
|
49
|
+
}
|
50
|
+
@f_object=Statsample::Test::F.new(@ms_num,@ms_den,@df_num,@df_den)
|
51
|
+
end
|
52
|
+
# F value
|
53
|
+
def f
|
54
|
+
@f_object.f
|
55
|
+
end
|
56
|
+
# P-value of F test
|
57
|
+
def probability
|
58
|
+
@f_object.probability
|
59
|
+
end
|
60
|
+
# Summary of Anova analysis
|
61
|
+
def summary
|
62
|
+
ReportBuilder.new(:no_title=>true).add(self).to_text
|
63
|
+
end
|
64
|
+
def report_building(builder) #:nodoc:
|
65
|
+
builder.section(:name=>@name) do |b|
|
66
|
+
report_building_table(b)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
def report_building_table(builder) #:nodoc:
|
70
|
+
builder.table(:name=>_("%s Table") % @name, :header=>%w{source ss df ms f p}.map {|v| _(v)}) do |t|
|
71
|
+
t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",@ms_num), sprintf("%0.3f",f), sprintf("%0.3f", probability)])
|
72
|
+
t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, sprintf("%0.3f",@ms_den), "", ""])
|
73
|
+
t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total, sprintf("%0.3f",@ms_total),"",""])
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
# One Way Anova with vectors
|
4
79
|
# Example:
|
5
80
|
# v1=[2,3,4,5,6].to_scale
|
6
81
|
# v2=[3,3,4,5,6].to_scale
|
7
82
|
# v3=[5,3,1,5,6].to_scale
|
8
|
-
# anova=Statsample::Anova::
|
83
|
+
# anova=Statsample::Anova::OneWayWithVectors.new([v1,v2,v3])
|
9
84
|
# anova.f
|
10
85
|
# => 0.0243902439024391
|
11
86
|
# anova.probability
|
@@ -13,30 +88,43 @@ module Statsample
|
|
13
88
|
# anova.sst
|
14
89
|
# => 32.9333333333333
|
15
90
|
#
|
16
|
-
class
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
91
|
+
class OneWayWithVectors < OneWay
|
92
|
+
# Show on summary Levene test
|
93
|
+
attr_accessor :summary_levene
|
94
|
+
# Show on summary descriptives for vectors
|
95
|
+
attr_accessor :summary_descriptives
|
96
|
+
def initialize(*args)
|
97
|
+
if args[0].is_a? Array
|
98
|
+
@vectors=args.shift
|
99
|
+
else
|
100
|
+
@vectors=args.find_all {|v| v.is_a? Statsample::Vector}
|
101
|
+
opts=args.find {|v| v.is_a? Hash}
|
102
|
+
end
|
103
|
+
opts||=Hash.new
|
104
|
+
opts_default={:name=>_("Anova One-Way"),
|
105
|
+
:name_numerator=>"Between Groups",
|
106
|
+
:name_denominator=>"Within Groups",
|
107
|
+
:summary_descriptives=>false,
|
108
|
+
:summary_levene=>false}
|
109
|
+
@opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
|
110
|
+
super(@opts)
|
111
|
+
end
|
112
|
+
alias :sst :ss_total
|
113
|
+
def levene
|
114
|
+
Statsample::Test.levene(@vectors, :name=>_("Test of Homogeneity of variances (Levene)"))
|
21
115
|
end
|
22
116
|
# Total mean
|
23
|
-
def
|
117
|
+
def total_mean
|
24
118
|
sum=@vectors.inject(0){|a,v| a+v.sum}
|
25
119
|
sum.quo(n)
|
26
120
|
end
|
27
|
-
|
28
|
-
# Total sum of squares
|
29
|
-
def sst
|
30
|
-
m=mean
|
31
|
-
@vectors.inject(0) {|total,vector| total+vector.ss(m) }
|
32
|
-
end
|
33
121
|
# Sum of squares within groups
|
34
122
|
def sswg
|
35
123
|
@sswg||=@vectors.inject(0) {|total,vector| total+vector.ss }
|
36
124
|
end
|
37
125
|
# Sum of squares between groups
|
38
126
|
def ssbg
|
39
|
-
m=
|
127
|
+
m=total_mean
|
40
128
|
@vectors.inject(0) do |total,vector|
|
41
129
|
total + (vector.mean-m).square * vector.size
|
42
130
|
end
|
@@ -56,7 +144,21 @@ module Statsample
|
|
56
144
|
def n
|
57
145
|
@vectors.inject(0){|a,v| a+v.size}
|
58
146
|
end
|
59
|
-
|
147
|
+
def report_building(builder) # :nodoc:
|
148
|
+
builder.section(:name=>@name) do |s|
|
149
|
+
if summary_descriptives
|
150
|
+
s.table(:name=>_("Descriptives"),:header=>%w{Name N Mean SD Min Max}.map {|v| _(v)}) do |t|
|
151
|
+
@vectors.each do |v|
|
152
|
+
t.row [v.name, v.n_valid, "%0.4f" % v.mean, "%0.4f" % v.sd, "%0.4f" % v.min, "%0.4f" % v.max]
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
if summary_levene
|
157
|
+
s.parse_element(levene)
|
158
|
+
end
|
159
|
+
report_building_table(s)
|
160
|
+
end
|
161
|
+
end
|
60
162
|
end
|
61
163
|
end
|
62
164
|
end
|