statsample 0.14.1 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +9 -0
- data/Manifest.txt +5 -0
- data/README.txt +8 -2
- data/examples/icc.rb +26 -0
- data/grab_references.rb +28 -0
- data/lib/distribution.rb +5 -11
- data/lib/distribution/f.rb +12 -2
- data/lib/statsample.rb +11 -9
- data/lib/statsample/anova/oneway.rb +1 -1
- data/lib/statsample/anova/twoway.rb +4 -10
- data/lib/statsample/dataset.rb +1 -1
- data/lib/statsample/dominanceanalysis.rb +1 -1
- data/lib/statsample/dominanceanalysis/bootstrap.rb +0 -1
- data/lib/statsample/factor/map.rb +1 -1
- data/lib/statsample/factor/parallelanalysis.rb +2 -2
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +1 -2
- data/lib/statsample/factor/rotation.rb +2 -2
- data/lib/statsample/histogram.rb +2 -2
- data/lib/statsample/reliability.rb +17 -0
- data/lib/statsample/reliability/icc.rb +404 -0
- data/lib/statsample/reliability/scaleanalysis.rb +18 -6
- data/lib/statsample/srs.rb +5 -1
- data/lib/statsample/test.rb +1 -2
- data/lib/statsample/test/f.rb +5 -1
- data/lib/statsample/test/umannwhitney.rb +3 -3
- data/lib/statsample/vector.rb +2 -4
- data/references.txt +22 -0
- data/test/test_bivariate.rb +11 -11
- data/test/test_reliability.rb +11 -0
- data/test/test_reliability_icc.rb +188 -0
- data/test/test_test_f.rb +2 -2
- metadata +12 -4
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
=== 0.15.0 / 2010-09-07
|
2
|
+
* Added class Statsample::Reliability::ICC for calculation of Intra-class correlation (Shrout & Fleiss, 1979; McGraw & Wong, 1996). Tested with SPSS and R values.
|
3
|
+
* References: Updated and standarized references on many classes. Added grab_references.rb script, to create a list of references for library
|
4
|
+
* Added Spearman-Brown prophecy on Reliability module
|
5
|
+
* Distribution::F uses Gsl when available
|
6
|
+
* Added mean r.p.b. and item sd on Scale Analysis
|
7
|
+
* Corrected bug on Vector.ary_method and example of Anova Two Way using vector.
|
8
|
+
|
9
|
+
|
1
10
|
=== 0.14.1 / 2010-08-18
|
2
11
|
|
3
12
|
* Added extra information on $DEBUG=true.
|
data/Manifest.txt
CHANGED
@@ -16,6 +16,7 @@ examples/correlation_matrix.rb
|
|
16
16
|
examples/dataset.rb
|
17
17
|
examples/dominance_analysis.rb
|
18
18
|
examples/dominance_analysis_bootstrap.rb
|
19
|
+
examples/icc.rb
|
19
20
|
examples/levene.rb
|
20
21
|
examples/multiple_regression.rb
|
21
22
|
examples/multivariate_correlation.rb
|
@@ -29,6 +30,7 @@ examples/tetrachoric.rb
|
|
29
30
|
examples/u_test.rb
|
30
31
|
examples/vector.rb
|
31
32
|
examples/velicer_map_test.rb
|
33
|
+
grab_references.rb
|
32
34
|
lib/distribution.rb
|
33
35
|
lib/distribution/chisquare.rb
|
34
36
|
lib/distribution/f.rb
|
@@ -84,6 +86,7 @@ lib/statsample/regression/multiple/matrixengine.rb
|
|
84
86
|
lib/statsample/regression/multiple/rubyengine.rb
|
85
87
|
lib/statsample/regression/simple.rb
|
86
88
|
lib/statsample/reliability.rb
|
89
|
+
lib/statsample/reliability/icc.rb
|
87
90
|
lib/statsample/reliability/multiscaleanalysis.rb
|
88
91
|
lib/statsample/reliability/scaleanalysis.rb
|
89
92
|
lib/statsample/resample.rb
|
@@ -100,6 +103,7 @@ lib/statsample/vector.rb
|
|
100
103
|
po/es/statsample.mo
|
101
104
|
po/es/statsample.po
|
102
105
|
po/statsample.pot
|
106
|
+
references.txt
|
103
107
|
setup.rb
|
104
108
|
test/fixtures/correlation_matrix.rb
|
105
109
|
test/helpers_tests.rb
|
@@ -128,6 +132,7 @@ test/test_multiset.rb
|
|
128
132
|
test/test_permutation.rb
|
129
133
|
test/test_regression.rb
|
130
134
|
test/test_reliability.rb
|
135
|
+
test/test_reliability_icc.rb
|
131
136
|
test/test_resample.rb
|
132
137
|
test/test_rserve_extension.rb
|
133
138
|
test/test_srs.rb
|
data/README.txt
CHANGED
@@ -11,6 +11,7 @@ Include:
|
|
11
11
|
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
12
12
|
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
13
13
|
* Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial, tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by +statsample-bivariate-extension+ gem.
|
14
|
+
* Intra-class correlation
|
14
15
|
* Anova: generic and vector-based One-way ANOVA and Two-way ANOVA
|
15
16
|
* Tests: F, T, Levene, U-Mannwhitney.
|
16
17
|
* Regression: Simple, Multiple (OLS), Probit and Logit
|
@@ -57,10 +58,13 @@ Include:
|
|
57
58
|
* Statsample::GGobi : Write Ggobi files
|
58
59
|
* Module Statsample::Crosstab provides function to create crosstab for categorical data
|
59
60
|
* Module Statsample::Reliability provides functions to analyze scales with psychometric methods.
|
60
|
-
* Class ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
|
61
|
-
* Class MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
|
61
|
+
* Class Statsample::Reliability::ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
|
62
|
+
* Class Statsample::Reliability::MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
|
63
|
+
* Class Statsample::Reliability::ICC provides intra-class correlation, using Shrout & Fleiss(1979) and McGraw & Wong (1996) formulation.
|
62
64
|
* Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
|
63
65
|
* Module Statsample::Test provides several methods and classes to perform inferencial statistics
|
66
|
+
* Statsample::Test::BartlettSphericity
|
67
|
+
* Statsample::Test::ChiSquare
|
64
68
|
* Statsample::Test::Levene
|
65
69
|
* Statsample::Test::UMannWhitney
|
66
70
|
* Statsample::Test::T
|
@@ -71,6 +75,8 @@ Include:
|
|
71
75
|
|
72
76
|
== Examples of use:
|
73
77
|
|
78
|
+
See multiples examples of use on [http://github.com/clbustos/statsample/tree/master/examples/]
|
79
|
+
|
74
80
|
=== Correlation matrix
|
75
81
|
|
76
82
|
require 'statsample'
|
data/examples/icc.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
|
4
|
+
require 'statsample'
|
5
|
+
size=1000
|
6
|
+
a=size.times.map {rand(10)}.to_scale
|
7
|
+
b=a.recode{|i|i+rand(4)-2}
|
8
|
+
c=a.recode{|i|i+rand(4)-2}
|
9
|
+
d=a.recode{|i|i+rand(4)-2}
|
10
|
+
@ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
@icc=Statsample::Reliability::ICC.new(@ds)
|
15
|
+
|
16
|
+
puts @icc.summary
|
17
|
+
|
18
|
+
@icc.type=:icc_3_1
|
19
|
+
|
20
|
+
puts @icc.summary
|
21
|
+
|
22
|
+
|
23
|
+
@icc.type=:icc_a_k
|
24
|
+
|
25
|
+
puts @icc.summary
|
26
|
+
|
data/grab_references.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
2
|
+
require 'reportbuilder'
|
3
|
+
refs=[]
|
4
|
+
Dir.glob "**/*.rb" do |f|
|
5
|
+
reference=false
|
6
|
+
File.open(f).each_line do |l|
|
7
|
+
|
8
|
+
if l=~/== Reference/
|
9
|
+
reference=true
|
10
|
+
elsif reference
|
11
|
+
if l=~/\*\s+(.+)/
|
12
|
+
refs.push $1
|
13
|
+
else
|
14
|
+
reference=false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
rb=ReportBuilder.new(:name=>"References") do |g|
|
23
|
+
refs.uniq.sort.each do |r|
|
24
|
+
g.text "* #{r}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
rb.save_text("references.txt")
|
data/lib/distribution.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
begin
|
2
|
-
|
2
|
+
require 'statistics2'
|
3
3
|
rescue LoadError
|
4
|
-
|
4
|
+
puts "You should install statistics2"
|
5
5
|
end
|
6
6
|
# Several distributions modules to calculate cdf, inverse cdf and pdf
|
7
7
|
# See Distribution::Pdf for interface.
|
@@ -12,16 +12,10 @@ end
|
|
12
12
|
# Distribution::Normal.p_value(0.95)
|
13
13
|
# => 1.64485364660836
|
14
14
|
module Distribution
|
15
|
-
|
16
|
-
|
17
|
-
def self.has_gsl?
|
18
|
-
true
|
19
|
-
end
|
20
|
-
rescue LoadError
|
21
|
-
def self.has_gsl?
|
22
|
-
false
|
23
|
-
end
|
15
|
+
def self.has_gsl?
|
16
|
+
Statsample.has_gsl?
|
24
17
|
end
|
18
|
+
|
25
19
|
autoload(:ChiSquare, 'distribution/chisquare')
|
26
20
|
autoload(:T, 'distribution/t')
|
27
21
|
autoload(:F, 'distribution/f')
|
data/lib/distribution/f.rb
CHANGED
@@ -8,7 +8,13 @@ module Distribution
|
|
8
8
|
#
|
9
9
|
# Distribution::F.p_value(0.95,1,2)
|
10
10
|
def p_value(pr,k1,k2)
|
11
|
-
Statistics2
|
11
|
+
# Statistics2 has some troubles with extreme f values
|
12
|
+
if Distribution.has_gsl?
|
13
|
+
GSL::Cdf.fdist_Pinv(pr,k1,k2)
|
14
|
+
else
|
15
|
+
#puts "F:#{k1}, #{k2},#{pr}"
|
16
|
+
Statistics2.pfdist(k1,k2, pr)
|
17
|
+
end
|
12
18
|
end
|
13
19
|
# F cumulative distribution function (cdf).
|
14
20
|
#
|
@@ -18,7 +24,11 @@ module Distribution
|
|
18
24
|
# Distribution::F.cdf(20,3,2)
|
19
25
|
#
|
20
26
|
def cdf(x, k1, k2)
|
21
|
-
|
27
|
+
if Distribution.has_gsl?
|
28
|
+
GSL::Cdf.fdist_P(x.to_f,k1,k2)
|
29
|
+
else
|
30
|
+
Statistics2.fdist(k1, k2,x)
|
31
|
+
end
|
22
32
|
end
|
23
33
|
end
|
24
34
|
end
|
data/lib/statsample.rb
CHANGED
@@ -102,18 +102,20 @@ end
|
|
102
102
|
# * Interfaces to gdchart, gnuplot and SVG::Graph
|
103
103
|
#
|
104
104
|
module Statsample
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
105
|
+
@@has_gsl=nil
|
106
|
+
def self.has_gsl?
|
107
|
+
if @@has_gsl.nil?
|
108
|
+
begin
|
109
|
+
require 'rbgsl'
|
110
|
+
@@has_gsl=true
|
111
|
+
rescue LoadError
|
112
|
+
@@has_gsl=false
|
113
|
+
end
|
113
114
|
end
|
115
|
+
@@has_gsl
|
114
116
|
end
|
115
117
|
|
116
|
-
VERSION = '0.
|
118
|
+
VERSION = '0.15.0'
|
117
119
|
SPLIT_TOKEN = ","
|
118
120
|
autoload(:Database, 'statsample/converters')
|
119
121
|
autoload(:Anova, 'statsample/anova')
|
@@ -92,7 +92,7 @@ module Statsample
|
|
92
92
|
:name_numerator=>_("Between Groups"),
|
93
93
|
:name_denominator=>_("Within Groups"),
|
94
94
|
:summary_descriptives=>false,
|
95
|
-
:summary_levene=>
|
95
|
+
:summary_levene=>true}
|
96
96
|
@opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
|
97
97
|
super(@opts)
|
98
98
|
end
|
@@ -107,16 +107,10 @@ module Statsample
|
|
107
107
|
|
108
108
|
# Two Way Anova with vectors
|
109
109
|
# Example:
|
110
|
-
# v1=[
|
111
|
-
# v2=[
|
112
|
-
# v3=[5,3,1,5
|
113
|
-
# anova=Statsample::Anova::
|
114
|
-
# anova.f
|
115
|
-
# => 0.0243902439024391
|
116
|
-
# anova.probability
|
117
|
-
# => 0.975953044203438
|
118
|
-
# anova.sst
|
119
|
-
# => 32.9333333333333
|
110
|
+
# v1=[1,1,2,2].to_scale
|
111
|
+
# v2=[1,2,1,2].to_scale
|
112
|
+
# v3=[5,3,1,5].to_scale
|
113
|
+
# anova=Statsample::Anova::TwoWayWithVectors.new(:a=>v1,:b=>v2, :dependent=>v3)
|
120
114
|
#
|
121
115
|
class TwoWayWithVectors < TwoWay
|
122
116
|
# Show summary Levene test
|
data/lib/statsample/dataset.rb
CHANGED
@@ -407,7 +407,7 @@ module Statsample
|
|
407
407
|
# if fields parameter is empty, return the mean for all fields
|
408
408
|
# if max invalid parameter > 0, returns the mean for all tuples
|
409
409
|
# with 0 to max_invalid invalid fields
|
410
|
-
def vector_mean(fields=nil,max_invalid=0)
|
410
|
+
def vector_mean(fields=nil, max_invalid=0)
|
411
411
|
a=[]
|
412
412
|
fields=check_fields(fields)
|
413
413
|
size=fields.size
|
@@ -50,7 +50,7 @@ module Statsample
|
|
50
50
|
# | b - c | 1.0 | 1.0 | 1.0 |
|
51
51
|
# -----------------------------------------
|
52
52
|
#
|
53
|
-
# ==
|
53
|
+
# == Reference:
|
54
54
|
# * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
|
55
55
|
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
56
56
|
# * Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
|
@@ -63,7 +63,6 @@ module Statsample
|
|
63
63
|
# ---------------------------------------
|
64
64
|
#
|
65
65
|
# == References:
|
66
|
-
#
|
67
66
|
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
68
67
|
class Bootstrap
|
69
68
|
include Writable
|
@@ -31,7 +31,7 @@ module Statsample
|
|
31
31
|
# Current algorithm is loosely based on SPSS O'Connor algorithm
|
32
32
|
#
|
33
33
|
# == Reference
|
34
|
-
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer
|
34
|
+
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
|
35
35
|
#
|
36
36
|
|
37
37
|
|
@@ -15,9 +15,9 @@ module Statsample
|
|
15
15
|
# *With number of cases and variables*
|
16
16
|
# pa=Statsample::Factor::ParallelAnalysis.with_random_data(100,8)
|
17
17
|
#
|
18
|
-
# ==
|
18
|
+
# == Reference:
|
19
19
|
# * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
|
20
|
-
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer
|
20
|
+
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
|
21
21
|
# * Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
|
22
22
|
|
23
23
|
class ParallelAnalysis
|
@@ -24,8 +24,7 @@ module Factor
|
|
24
24
|
# => [0.962964636346122, 0.962964636346122]
|
25
25
|
#
|
26
26
|
# == References:
|
27
|
-
#
|
28
|
-
# * SPSS manual
|
27
|
+
# * SPSS Manual
|
29
28
|
# * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
|
30
29
|
#
|
31
30
|
class PCA
|
@@ -22,8 +22,7 @@ module Factor
|
|
22
22
|
# => [0.962964636346122, 0.962964636346122]
|
23
23
|
#
|
24
24
|
# == References:
|
25
|
-
#
|
26
|
-
# * SPSS manual
|
25
|
+
# * SPSS Manual
|
27
26
|
# * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
|
28
27
|
#
|
29
28
|
class PrincipalAxis
|
@@ -2,9 +2,9 @@ module Statsample
|
|
2
2
|
module Factor
|
3
3
|
# Base class for component matrix rotation.
|
4
4
|
#
|
5
|
-
# ==
|
5
|
+
# == Reference:
|
6
6
|
# * SPSS Manual
|
7
|
-
# *
|
7
|
+
# * Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
|
8
8
|
#
|
9
9
|
# Use subclasses Varimax, Equimax or Quartimax for desired type of rotation
|
10
10
|
# Use:
|
data/lib/statsample/histogram.rb
CHANGED
@@ -34,8 +34,8 @@ module Statsample
|
|
34
34
|
# add an extra bin to your histogram.
|
35
35
|
#
|
36
36
|
#
|
37
|
-
# Reference:
|
38
|
-
# http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
|
37
|
+
# == Reference:
|
38
|
+
# * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
|
39
39
|
|
40
40
|
class Histogram
|
41
41
|
class << self
|
@@ -21,6 +21,21 @@ module Statsample
|
|
21
21
|
}.to_dataset
|
22
22
|
cronbach_alpha(ds)
|
23
23
|
end
|
24
|
+
# Predicted reliability of a test by replicating
|
25
|
+
# +n+ times the number of items
|
26
|
+
def spearman_brown_prophecy(r,n)
|
27
|
+
(n*r).quo(1+(n-1)*r)
|
28
|
+
end
|
29
|
+
|
30
|
+
alias :sbp :spearman_brown_prophecy
|
31
|
+
# Returns the number of items
|
32
|
+
# to obtain +r_d+ desired reliability
|
33
|
+
# from +r+ current reliability, achieved with
|
34
|
+
# +n+ items
|
35
|
+
def n_for_desired_reliability(r,r_d,n=1)
|
36
|
+
(r_d*(1-r)).quo(r*(1-r_d))*n
|
37
|
+
end
|
38
|
+
|
24
39
|
# Get Cronbach alpha from <tt>n</tt> cases,
|
25
40
|
# <tt>s2</tt> mean variance and <tt>cov</tt>
|
26
41
|
# mean covariance
|
@@ -120,5 +135,7 @@ module Statsample
|
|
120
135
|
end # self
|
121
136
|
end # Reliability
|
122
137
|
end # Statsample
|
138
|
+
|
139
|
+
require 'statsample/reliability/icc.rb'
|
123
140
|
require 'statsample/reliability/scaleanalysis.rb'
|
124
141
|
require 'statsample/reliability/multiscaleanalysis.rb'
|
@@ -0,0 +1,404 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Reliability
|
3
|
+
# = Intra-class correlation
|
4
|
+
# According to Shrout & Fleiss (1979, p.422): "ICC is the correlation
|
5
|
+
# between one measurement (either a single rating or a mean of
|
6
|
+
# several ratings) on a target and another measurement obtained on that target"
|
7
|
+
#
|
8
|
+
# == Reference
|
9
|
+
# * Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
|
10
|
+
# * McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.
|
11
|
+
|
12
|
+
class ICC
|
13
|
+
include Summarizable
|
14
|
+
|
15
|
+
# Create a ICC analysis for a given dataset
|
16
|
+
# Each vector is a different measurement. Only uses complete data
|
17
|
+
# (listwise deletion).
|
18
|
+
#
|
19
|
+
|
20
|
+
attr_reader :df_bt
|
21
|
+
attr_reader :df_wt
|
22
|
+
attr_reader :df_bj
|
23
|
+
attr_reader :df_residual
|
24
|
+
|
25
|
+
attr_reader :ms_bt
|
26
|
+
attr_reader :ms_wt
|
27
|
+
attr_reader :ms_bj
|
28
|
+
attr_reader :ms_residual
|
29
|
+
|
30
|
+
alias :bms :ms_bt
|
31
|
+
alias :wms :ms_wt
|
32
|
+
alias :jms :ms_bj
|
33
|
+
alias :ems :ms_residual
|
34
|
+
|
35
|
+
alias :msr :ms_bt
|
36
|
+
alias :msw :ms_wt
|
37
|
+
alias :msc :ms_bj
|
38
|
+
alias :mse :ms_residual
|
39
|
+
|
40
|
+
# :section: Shrout and Fleiss ICC denominations
|
41
|
+
attr_reader :icc_1_1
|
42
|
+
attr_reader :icc_2_1
|
43
|
+
attr_reader :icc_3_1
|
44
|
+
attr_reader :icc_1_k
|
45
|
+
attr_reader :icc_2_k
|
46
|
+
attr_reader :icc_3_k
|
47
|
+
|
48
|
+
# :section: McGraw and Wong ICC denominations
|
49
|
+
|
50
|
+
attr_reader :icc_1
|
51
|
+
attr_reader :icc_c_1
|
52
|
+
attr_reader :icc_a_1
|
53
|
+
attr_reader :icc_k
|
54
|
+
attr_reader :icc_c_k
|
55
|
+
attr_reader :icc_a_k
|
56
|
+
|
57
|
+
|
58
|
+
attr_reader :n, :k
|
59
|
+
attr_reader :total_mean
|
60
|
+
# Type of analysis, for easy summarization
|
61
|
+
# By default, set to :icc_1
|
62
|
+
# * Shrout & Fleiss(1979) denominations
|
63
|
+
# * :icc_1_1
|
64
|
+
# * :icc_2_1
|
65
|
+
# * :icc_3_1
|
66
|
+
# * :icc_1_k
|
67
|
+
# * :icc_2_k
|
68
|
+
# * :icc_3_k
|
69
|
+
# * McGraw & Wong (1996) denominations
|
70
|
+
# * :icc_1
|
71
|
+
# * :icc_k
|
72
|
+
# * :icc_c_1
|
73
|
+
# * :icc_c_k
|
74
|
+
# * :icc_a_1
|
75
|
+
# * :icc_a_k
|
76
|
+
|
77
|
+
attr_reader :type
|
78
|
+
# ICC value, set with :type
|
79
|
+
attr_reader :r
|
80
|
+
attr_reader :f
|
81
|
+
attr_reader :lbound
|
82
|
+
attr_reader :ubound
|
83
|
+
|
84
|
+
attr_accessor :g_rho
|
85
|
+
attr_accessor :alpha
|
86
|
+
attr_accessor :name
|
87
|
+
def initialize(ds, opts=Hash.new)
|
88
|
+
@ds=ds.dup_only_valid
|
89
|
+
@vectors=@ds.vectors.values
|
90
|
+
@n=@ds.cases
|
91
|
+
@k=@ds.fields.size
|
92
|
+
compute
|
93
|
+
@g_rho=0
|
94
|
+
@alpha=0.05
|
95
|
+
@icc_name=nil
|
96
|
+
opts_default={:name=>"Intra-class correlation", :type=>:icc_1}
|
97
|
+
@opts=opts_default.merge(opts)
|
98
|
+
@opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
|
99
|
+
end
|
100
|
+
def type=(v)
|
101
|
+
case v
|
102
|
+
when :icc_1_1
|
103
|
+
@icc_name=_("Shrout & Fleiss ICC(1,1)")
|
104
|
+
@r=@icc_1_1
|
105
|
+
@f=icc_1_f
|
106
|
+
@lbound, @ubound=icc_1_1_ci(@alpha)
|
107
|
+
when :icc_2_1
|
108
|
+
@icc_name=_("Shrout & Fleiss ICC(2,1)")
|
109
|
+
@r=@icc_2_1
|
110
|
+
@f=icc_2_f
|
111
|
+
@lbound, @ubound=icc_2_1_ci(@alpha)
|
112
|
+
|
113
|
+
when :icc_3_1
|
114
|
+
@icc_name=_("Shrout & Fleiss ICC(3,1)")
|
115
|
+
|
116
|
+
@r=@icc_3_1
|
117
|
+
@f=icc_3_f
|
118
|
+
@lbound, @ubound=icc_3_1_ci(@alpha)
|
119
|
+
|
120
|
+
when :icc_1_k
|
121
|
+
@icc_name=_("Shrout & Fleiss ICC(1,k)")
|
122
|
+
|
123
|
+
@r=@icc_1_k
|
124
|
+
@f=icc_1_k_f
|
125
|
+
@lbound, @ubound=icc_1_k_ci(@alpha)
|
126
|
+
when :icc_2_k
|
127
|
+
@icc_name=_("Shrout & Fleiss ICC(2,k)")
|
128
|
+
|
129
|
+
@r=@icc_2_k
|
130
|
+
@f=icc_2_k_f
|
131
|
+
@lbound, @ubound=icc_2_k_ci(@alpha)
|
132
|
+
|
133
|
+
when :icc_3_k
|
134
|
+
@icc_name=_("Shrout & Fleiss ICC(3,k)")
|
135
|
+
|
136
|
+
@r=@icc_3_k
|
137
|
+
@f=icc_3_k_f
|
138
|
+
@lbound, @ubound=icc_3_k_ci(@alpha)
|
139
|
+
|
140
|
+
|
141
|
+
when :icc_1
|
142
|
+
@icc_name=_("McGraw & Wong ICC(1)")
|
143
|
+
|
144
|
+
@r=@icc_1_1
|
145
|
+
@f=icc_1_f(@g_rho)
|
146
|
+
@lbound, @ubound=icc_1_1_ci(@alpha)
|
147
|
+
when :icc_k
|
148
|
+
@icc_name=_("McGraw & Wong ICC(K)")
|
149
|
+
|
150
|
+
@r=@icc_1_k
|
151
|
+
@f=icc_1_k_f(@g_rho)
|
152
|
+
@lbound, @ubound=icc_1_k_ci(@alpha)
|
153
|
+
when :icc_c_1
|
154
|
+
@icc_name=_("McGraw & Wong ICC(C,1)")
|
155
|
+
|
156
|
+
@r=@icc_3_1
|
157
|
+
@f=icc_c_1_f(@g_rho)
|
158
|
+
@lbound, @ubound=icc_3_1_ci(@alpha)
|
159
|
+
|
160
|
+
when :icc_c_k
|
161
|
+
@icc_name=_("McGraw & Wong ICC(C,K)")
|
162
|
+
|
163
|
+
@r=@icc_3_k
|
164
|
+
@f=icc_c_k_f(@g_rho)
|
165
|
+
@lbound, @ubound=icc_c_k_ci(@alpha)
|
166
|
+
|
167
|
+
when :icc_a_1
|
168
|
+
@icc_name=_("McGraw & Wong ICC(A,1)")
|
169
|
+
|
170
|
+
@r=@icc_2_1
|
171
|
+
@f=icc_a_1_f(@g_rho)
|
172
|
+
@lbound,@ubound = icc_2_1_ci(@alpha)
|
173
|
+
|
174
|
+
when :icc_a_k
|
175
|
+
@icc_name=_("McGraw & Wong ICC(A,K)")
|
176
|
+
|
177
|
+
@r=@icc_2_k
|
178
|
+
@f=icc_a_k_f(@g_rho)
|
179
|
+
@lbound,@ubound=icc_2_k_ci(@alpha)
|
180
|
+
|
181
|
+
else
|
182
|
+
raise "Type #{v} doesn't exists"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
def compute
|
186
|
+
@df_bt=n-1
|
187
|
+
@df_wt=n*(k-1)
|
188
|
+
@df_bj=k-1
|
189
|
+
@df_residual=(n-1)*(k-1)
|
190
|
+
@total_mean=@vectors.inject(0){|ac,v| ac+v.sum}.quo(n*k)
|
191
|
+
vm=@ds.vector_mean
|
192
|
+
|
193
|
+
@ss_bt=k*vm.ss(@total_mean)
|
194
|
+
@ms_bt=@ss_bt.quo(@df_bt)
|
195
|
+
|
196
|
+
@ss_bj=n*@vectors.inject(0){|ac,v| ac+(v.mean-@total_mean).square}
|
197
|
+
@ms_bj=@ss_bj.quo(@df_bj)
|
198
|
+
|
199
|
+
@ss_wt=@vectors.inject(0){|ac,v| ac+(v-vm).ss(0)}
|
200
|
+
@ms_wt=@ss_wt.quo(@df_wt)
|
201
|
+
|
202
|
+
@ss_residual=@ss_wt-@ss_bj
|
203
|
+
@ms_residual=@ss_residual.quo(@df_residual)
|
204
|
+
###
|
205
|
+
# Shrout and Fleiss denomination
|
206
|
+
###
|
207
|
+
# ICC(1,1) / ICC(1)
|
208
|
+
@icc_1_1=(bms-wms).quo(bms+(k-1)*wms)
|
209
|
+
# ICC(2,1) / ICC(A,1)
|
210
|
+
@icc_2_1=(bms-ems).quo(bms+(k-1)*ems+k*(jms - ems).quo(n))
|
211
|
+
# ICC(3,1) / ICC(C,1)
|
212
|
+
@icc_3_1=(bms-ems).quo(bms+(k-1)*ems)
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
# ICC(1,K) / ICC(K)
|
217
|
+
@icc_1_k=(bms-wms).quo(bms)
|
218
|
+
# ICC(2,K) / ICC(A,k)
|
219
|
+
@icc_2_k=(bms-ems).quo(bms+(jms-ems).quo(n))
|
220
|
+
# ICC(3,K) / ICC(C,k) = Cronbach's alpha
|
221
|
+
@icc_3_k=(bms-ems).quo(bms)
|
222
|
+
|
223
|
+
###
|
224
|
+
# McGraw and Wong
|
225
|
+
###
|
226
|
+
|
227
|
+
end
|
228
|
+
|
229
|
+
def icc_1_f(rho=0.0)
|
230
|
+
num=msr*(1-rho)
|
231
|
+
den=msw*(1+(k-1)*rho)
|
232
|
+
Statsample::Test::F.new(num, den, @df_bt, @df_wt)
|
233
|
+
end
|
234
|
+
# One way random F, type k
|
235
|
+
def icc_1_k_f(rho=0)
|
236
|
+
num=msr*(1-rho)
|
237
|
+
den=msw
|
238
|
+
Statsample::Test::F.new(num, den, @df_bt, @df_wt)
|
239
|
+
end
|
240
|
+
|
241
|
+
def icc_c_1_f(rho=0)
|
242
|
+
num=msr*(1-rho)
|
243
|
+
den=mse*(1+(k-1)*rho)
|
244
|
+
Statsample::Test::F.new(num, den, @df_bt, @df_residual)
|
245
|
+
end
|
246
|
+
def icc_c_k_f(rho=0)
|
247
|
+
num=(1-rho)
|
248
|
+
den=1-@icc_3_k
|
249
|
+
Statsample::Test::F.new(num, den, @df_bt, @df_residual)
|
250
|
+
end
|
251
|
+
|
252
|
+
def v(a,b)
|
253
|
+
((a*msc+b*mse)**2).quo(((a*msc)**2.quo(k-1))+((b*mse)**2.quo( (n-1) * (k-1))))
|
254
|
+
end
|
255
|
+
def a(rho)
|
256
|
+
(k*rho).quo(n*(1-rho))
|
257
|
+
end
|
258
|
+
def b(rho)
|
259
|
+
1+((k*rho*(n-1)).quo(n*(1-rho)))
|
260
|
+
end
|
261
|
+
def c(rho)
|
262
|
+
rho.quo(n*(1-rho))
|
263
|
+
end
|
264
|
+
def d(rho)
|
265
|
+
1+((rho*(n-1)).quo(n*(1-rho)))
|
266
|
+
end
|
267
|
+
private :v, :a, :b, :c, :d
|
268
|
+
def icc_a_1_f(rho=0)
|
269
|
+
fj=jms.quo(ems)
|
270
|
+
num=msr
|
271
|
+
den=a(rho)*msc+b(rho)*mse
|
272
|
+
pp = @icc_2_1
|
273
|
+
vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
|
274
|
+
vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
|
275
|
+
v=vn.quo(vd)
|
276
|
+
Statsample::Test::F.new(num, den, @df_bt, v)
|
277
|
+
end
|
278
|
+
|
279
|
+
def icc_a_k_f(rho=0)
|
280
|
+
num=msr
|
281
|
+
den=c(rho)*msc+d(rho)*mse
|
282
|
+
|
283
|
+
fj=jms.quo(ems)
|
284
|
+
|
285
|
+
pp = @icc_2_k
|
286
|
+
vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
|
287
|
+
vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
|
288
|
+
v=vn.quo(vd)
|
289
|
+
|
290
|
+
|
291
|
+
Statsample::Test::F.new(num, den, @df_bt,v)
|
292
|
+
|
293
|
+
end
|
294
|
+
|
295
|
+
# F test for ICC Case 1. Shrout and Fleiss
|
296
|
+
def icc_1_f_shrout
|
297
|
+
Statsample::Test::F.new(bms, wms, @df_bt, @df_wt)
|
298
|
+
end
|
299
|
+
|
300
|
+
# Intervale of confidence for ICC (1,1)
|
301
|
+
def icc_1_1_ci(alpha=0.05)
|
302
|
+
per=1-(0.5*alpha)
|
303
|
+
|
304
|
+
fu=icc_1_f.f*Distribution::F.p_value(per, @df_wt, @df_bt)
|
305
|
+
fl=icc_1_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_wt))
|
306
|
+
|
307
|
+
[(fl-1).quo(fl+k-1), (fu-1).quo(fu+k-1)]
|
308
|
+
end
|
309
|
+
|
310
|
+
# Intervale of confidence for ICC (1,k)
|
311
|
+
def icc_1_k_ci(alpha=0.05)
|
312
|
+
per=1-(0.5*alpha)
|
313
|
+
fu=icc_1_f.f*Distribution::F.p_value(per, @df_wt, @df_bt)
|
314
|
+
fl=icc_1_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_wt))
|
315
|
+
[1-1.quo(fl), 1-1.quo(fu)]
|
316
|
+
end
|
317
|
+
|
318
|
+
# F test for ICC Case 2
|
319
|
+
def icc_2_f
|
320
|
+
Statsample::Test::F.new(bms, ems, @df_bt, @df_residual)
|
321
|
+
end
|
322
|
+
|
323
|
+
|
324
|
+
#
|
325
|
+
# F* for ICC(2,1) and ICC(2,k)
|
326
|
+
#
|
327
|
+
def icc_2_1_fs(pp,alpha=0.05)
|
328
|
+
fj=jms.quo(ems)
|
329
|
+
per=1-(0.5*alpha)
|
330
|
+
vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
|
331
|
+
vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
|
332
|
+
v=vn.quo(vd)
|
333
|
+
f1=Distribution::F.p_value(per, n-1,v)
|
334
|
+
f2=Distribution::F.p_value(per, v, n-1)
|
335
|
+
[f1,f2]
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
def icc_2_1_ci(alpha=0.05)
|
340
|
+
icc_2_1_ci_mcgraw
|
341
|
+
end
|
342
|
+
|
343
|
+
# Confidence interval ICC(A,1), McGawn
|
344
|
+
|
345
|
+
def icc_2_1_ci_mcgraw(alpha=0.05)
|
346
|
+
fd,fu=icc_2_1_fs(icc_2_1,alpha)
|
347
|
+
cl=(n*(msr-fd*mse)).quo(fd*(k*msc+(k*n-k-n)*mse)+n*msr)
|
348
|
+
cu=(n*(fu*msr-mse)).quo(k*msc+(k*n-k-n)*mse+n*fu*msr)
|
349
|
+
[cl,cu]
|
350
|
+
end
|
351
|
+
|
352
|
+
def icc_2_k_ci(alpha=0.05)
|
353
|
+
icc_2_k_ci_mcgraw(alpha)
|
354
|
+
end
|
355
|
+
|
356
|
+
def icc_2_k_ci_mcgraw(alpha=0.05)
|
357
|
+
f1,f2=icc_2_1_fs(icc_2_k,alpha)
|
358
|
+
[
|
359
|
+
(n*(msr-f1*mse)).quo(f1*(msc-mse)+n*msr),
|
360
|
+
(n*(f2*msr-mse)).quo(msc-mse+n*f2*msr)
|
361
|
+
]
|
362
|
+
|
363
|
+
end
|
364
|
+
def icc_2_k_ci_shrout(alpha=0.05)
|
365
|
+
ci=icc_2_1_ci(alpha)
|
366
|
+
[(ci[0]*k).quo(1+(k-1)*ci[0]), (ci[1]*k).quo(1+(k-1)*ci[1])]
|
367
|
+
end
|
368
|
+
|
369
|
+
|
370
|
+
def icc_3_f
|
371
|
+
Statsample::Test::F.new(bms, ems, @df_bt, @df_residual)
|
372
|
+
end
|
373
|
+
|
374
|
+
def icc_3_1_ci(alpha=0.05)
|
375
|
+
per=1-(0.5*alpha)
|
376
|
+
fl=icc_3_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
|
377
|
+
fu=icc_3_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
|
378
|
+
[(fl-1).quo(fl+k-1), (fu-1).quo(fu+k-1)]
|
379
|
+
end
|
380
|
+
|
381
|
+
def icc_3_k_ci(alpha=0.05)
|
382
|
+
per=1-(0.5*alpha)
|
383
|
+
fl=icc_3_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
|
384
|
+
fu=icc_3_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
|
385
|
+
[1-1.quo(fl),1-1.quo(fu)]
|
386
|
+
end
|
387
|
+
|
388
|
+
def icc_c_k_ci(alpha=0.05)
|
389
|
+
per=1-(0.5*alpha)
|
390
|
+
fl=icc_c_k_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
|
391
|
+
fu=icc_c_k_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
|
392
|
+
[1-1.quo(fl),1-1.quo(fu)]
|
393
|
+
end
|
394
|
+
def report_building(b)
|
395
|
+
b.section(:name=>name) do |s|
|
396
|
+
s.text @icc_name
|
397
|
+
s.text _("ICC: %0.4f") % @r
|
398
|
+
s.parse_element(@f)
|
399
|
+
s.text _("CI (%0.2f): [%0.4f - %0.4f]") % [(1-@alpha)*100, @lbound, @ubound]
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
end
|