statsample 0.14.1 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +9 -0
- data/Manifest.txt +5 -0
- data/README.txt +8 -2
- data/examples/icc.rb +26 -0
- data/grab_references.rb +28 -0
- data/lib/distribution.rb +5 -11
- data/lib/distribution/f.rb +12 -2
- data/lib/statsample.rb +11 -9
- data/lib/statsample/anova/oneway.rb +1 -1
- data/lib/statsample/anova/twoway.rb +4 -10
- data/lib/statsample/dataset.rb +1 -1
- data/lib/statsample/dominanceanalysis.rb +1 -1
- data/lib/statsample/dominanceanalysis/bootstrap.rb +0 -1
- data/lib/statsample/factor/map.rb +1 -1
- data/lib/statsample/factor/parallelanalysis.rb +2 -2
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +1 -2
- data/lib/statsample/factor/rotation.rb +2 -2
- data/lib/statsample/histogram.rb +2 -2
- data/lib/statsample/reliability.rb +17 -0
- data/lib/statsample/reliability/icc.rb +404 -0
- data/lib/statsample/reliability/scaleanalysis.rb +18 -6
- data/lib/statsample/srs.rb +5 -1
- data/lib/statsample/test.rb +1 -2
- data/lib/statsample/test/f.rb +5 -1
- data/lib/statsample/test/umannwhitney.rb +3 -3
- data/lib/statsample/vector.rb +2 -4
- data/references.txt +22 -0
- data/test/test_bivariate.rb +11 -11
- data/test/test_reliability.rb +11 -0
- data/test/test_reliability_icc.rb +188 -0
- data/test/test_test_f.rb +2 -2
- metadata +12 -4
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
=== 0.15.0 / 2010-09-07
|
2
|
+
* Added class Statsample::Reliability::ICC for calculation of Intra-class correlation (Shrout & Fleiss, 1979; McGraw & Wong, 1996). Tested with SPSS and R values.
|
3
|
+
* References: Updated and standarized references on many classes. Added grab_references.rb script, to create a list of references for library
|
4
|
+
* Added Spearman-Brown prophecy on Reliability module
|
5
|
+
* Distribution::F uses Gsl when available
|
6
|
+
* Added mean r.p.b. and item sd on Scale Analysis
|
7
|
+
* Corrected bug on Vector.ary_method and example of Anova Two Way using vector.
|
8
|
+
|
9
|
+
|
1
10
|
=== 0.14.1 / 2010-08-18
|
2
11
|
|
3
12
|
* Added extra information on $DEBUG=true.
|
data/Manifest.txt
CHANGED
@@ -16,6 +16,7 @@ examples/correlation_matrix.rb
|
|
16
16
|
examples/dataset.rb
|
17
17
|
examples/dominance_analysis.rb
|
18
18
|
examples/dominance_analysis_bootstrap.rb
|
19
|
+
examples/icc.rb
|
19
20
|
examples/levene.rb
|
20
21
|
examples/multiple_regression.rb
|
21
22
|
examples/multivariate_correlation.rb
|
@@ -29,6 +30,7 @@ examples/tetrachoric.rb
|
|
29
30
|
examples/u_test.rb
|
30
31
|
examples/vector.rb
|
31
32
|
examples/velicer_map_test.rb
|
33
|
+
grab_references.rb
|
32
34
|
lib/distribution.rb
|
33
35
|
lib/distribution/chisquare.rb
|
34
36
|
lib/distribution/f.rb
|
@@ -84,6 +86,7 @@ lib/statsample/regression/multiple/matrixengine.rb
|
|
84
86
|
lib/statsample/regression/multiple/rubyengine.rb
|
85
87
|
lib/statsample/regression/simple.rb
|
86
88
|
lib/statsample/reliability.rb
|
89
|
+
lib/statsample/reliability/icc.rb
|
87
90
|
lib/statsample/reliability/multiscaleanalysis.rb
|
88
91
|
lib/statsample/reliability/scaleanalysis.rb
|
89
92
|
lib/statsample/resample.rb
|
@@ -100,6 +103,7 @@ lib/statsample/vector.rb
|
|
100
103
|
po/es/statsample.mo
|
101
104
|
po/es/statsample.po
|
102
105
|
po/statsample.pot
|
106
|
+
references.txt
|
103
107
|
setup.rb
|
104
108
|
test/fixtures/correlation_matrix.rb
|
105
109
|
test/helpers_tests.rb
|
@@ -128,6 +132,7 @@ test/test_multiset.rb
|
|
128
132
|
test/test_permutation.rb
|
129
133
|
test/test_regression.rb
|
130
134
|
test/test_reliability.rb
|
135
|
+
test/test_reliability_icc.rb
|
131
136
|
test/test_resample.rb
|
132
137
|
test/test_rserve_extension.rb
|
133
138
|
test/test_srs.rb
|
data/README.txt
CHANGED
@@ -11,6 +11,7 @@ Include:
|
|
11
11
|
* Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
12
12
|
* Imports and exports datasets from and to Excel, CSV and plain text files.
|
13
13
|
* Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial, tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by +statsample-bivariate-extension+ gem.
|
14
|
+
* Intra-class correlation
|
14
15
|
* Anova: generic and vector-based One-way ANOVA and Two-way ANOVA
|
15
16
|
* Tests: F, T, Levene, U-Mannwhitney.
|
16
17
|
* Regression: Simple, Multiple (OLS), Probit and Logit
|
@@ -57,10 +58,13 @@ Include:
|
|
57
58
|
* Statsample::GGobi : Write Ggobi files
|
58
59
|
* Module Statsample::Crosstab provides function to create crosstab for categorical data
|
59
60
|
* Module Statsample::Reliability provides functions to analyze scales with psychometric methods.
|
60
|
-
* Class ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
|
61
|
-
* Class MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
|
61
|
+
* Class Statsample::Reliability::ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
|
62
|
+
* Class Statsample::Reliability::MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
|
63
|
+
* Class Statsample::Reliability::ICC provides intra-class correlation, using Shrout & Fleiss(1979) and McGraw & Wong (1996) formulation.
|
62
64
|
* Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
|
63
65
|
* Module Statsample::Test provides several methods and classes to perform inferencial statistics
|
66
|
+
* Statsample::Test::BartlettSphericity
|
67
|
+
* Statsample::Test::ChiSquare
|
64
68
|
* Statsample::Test::Levene
|
65
69
|
* Statsample::Test::UMannWhitney
|
66
70
|
* Statsample::Test::T
|
@@ -71,6 +75,8 @@ Include:
|
|
71
75
|
|
72
76
|
== Examples of use:
|
73
77
|
|
78
|
+
See multiples examples of use on [http://github.com/clbustos/statsample/tree/master/examples/]
|
79
|
+
|
74
80
|
=== Correlation matrix
|
75
81
|
|
76
82
|
require 'statsample'
|
data/examples/icc.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
|
4
|
+
require 'statsample'
|
5
|
+
size=1000
|
6
|
+
a=size.times.map {rand(10)}.to_scale
|
7
|
+
b=a.recode{|i|i+rand(4)-2}
|
8
|
+
c=a.recode{|i|i+rand(4)-2}
|
9
|
+
d=a.recode{|i|i+rand(4)-2}
|
10
|
+
@ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
@icc=Statsample::Reliability::ICC.new(@ds)
|
15
|
+
|
16
|
+
puts @icc.summary
|
17
|
+
|
18
|
+
@icc.type=:icc_3_1
|
19
|
+
|
20
|
+
puts @icc.summary
|
21
|
+
|
22
|
+
|
23
|
+
@icc.type=:icc_a_k
|
24
|
+
|
25
|
+
puts @icc.summary
|
26
|
+
|
data/grab_references.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
2
|
+
require 'reportbuilder'
|
3
|
+
refs=[]
|
4
|
+
Dir.glob "**/*.rb" do |f|
|
5
|
+
reference=false
|
6
|
+
File.open(f).each_line do |l|
|
7
|
+
|
8
|
+
if l=~/== Reference/
|
9
|
+
reference=true
|
10
|
+
elsif reference
|
11
|
+
if l=~/\*\s+(.+)/
|
12
|
+
refs.push $1
|
13
|
+
else
|
14
|
+
reference=false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
rb=ReportBuilder.new(:name=>"References") do |g|
|
23
|
+
refs.uniq.sort.each do |r|
|
24
|
+
g.text "* #{r}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
rb.save_text("references.txt")
|
data/lib/distribution.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
begin
|
2
|
-
|
2
|
+
require 'statistics2'
|
3
3
|
rescue LoadError
|
4
|
-
|
4
|
+
puts "You should install statistics2"
|
5
5
|
end
|
6
6
|
# Several distributions modules to calculate cdf, inverse cdf and pdf
|
7
7
|
# See Distribution::Pdf for interface.
|
@@ -12,16 +12,10 @@ end
|
|
12
12
|
# Distribution::Normal.p_value(0.95)
|
13
13
|
# => 1.64485364660836
|
14
14
|
module Distribution
|
15
|
-
|
16
|
-
|
17
|
-
def self.has_gsl?
|
18
|
-
true
|
19
|
-
end
|
20
|
-
rescue LoadError
|
21
|
-
def self.has_gsl?
|
22
|
-
false
|
23
|
-
end
|
15
|
+
def self.has_gsl?
|
16
|
+
Statsample.has_gsl?
|
24
17
|
end
|
18
|
+
|
25
19
|
autoload(:ChiSquare, 'distribution/chisquare')
|
26
20
|
autoload(:T, 'distribution/t')
|
27
21
|
autoload(:F, 'distribution/f')
|
data/lib/distribution/f.rb
CHANGED
@@ -8,7 +8,13 @@ module Distribution
|
|
8
8
|
#
|
9
9
|
# Distribution::F.p_value(0.95,1,2)
|
10
10
|
def p_value(pr,k1,k2)
|
11
|
-
Statistics2
|
11
|
+
# Statistics2 has some troubles with extreme f values
|
12
|
+
if Distribution.has_gsl?
|
13
|
+
GSL::Cdf.fdist_Pinv(pr,k1,k2)
|
14
|
+
else
|
15
|
+
#puts "F:#{k1}, #{k2},#{pr}"
|
16
|
+
Statistics2.pfdist(k1,k2, pr)
|
17
|
+
end
|
12
18
|
end
|
13
19
|
# F cumulative distribution function (cdf).
|
14
20
|
#
|
@@ -18,7 +24,11 @@ module Distribution
|
|
18
24
|
# Distribution::F.cdf(20,3,2)
|
19
25
|
#
|
20
26
|
def cdf(x, k1, k2)
|
21
|
-
|
27
|
+
if Distribution.has_gsl?
|
28
|
+
GSL::Cdf.fdist_P(x.to_f,k1,k2)
|
29
|
+
else
|
30
|
+
Statistics2.fdist(k1, k2,x)
|
31
|
+
end
|
22
32
|
end
|
23
33
|
end
|
24
34
|
end
|
data/lib/statsample.rb
CHANGED
@@ -102,18 +102,20 @@ end
|
|
102
102
|
# * Interfaces to gdchart, gnuplot and SVG::Graph
|
103
103
|
#
|
104
104
|
module Statsample
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
105
|
+
@@has_gsl=nil
|
106
|
+
def self.has_gsl?
|
107
|
+
if @@has_gsl.nil?
|
108
|
+
begin
|
109
|
+
require 'rbgsl'
|
110
|
+
@@has_gsl=true
|
111
|
+
rescue LoadError
|
112
|
+
@@has_gsl=false
|
113
|
+
end
|
113
114
|
end
|
115
|
+
@@has_gsl
|
114
116
|
end
|
115
117
|
|
116
|
-
VERSION = '0.
|
118
|
+
VERSION = '0.15.0'
|
117
119
|
SPLIT_TOKEN = ","
|
118
120
|
autoload(:Database, 'statsample/converters')
|
119
121
|
autoload(:Anova, 'statsample/anova')
|
@@ -92,7 +92,7 @@ module Statsample
|
|
92
92
|
:name_numerator=>_("Between Groups"),
|
93
93
|
:name_denominator=>_("Within Groups"),
|
94
94
|
:summary_descriptives=>false,
|
95
|
-
:summary_levene=>
|
95
|
+
:summary_levene=>true}
|
96
96
|
@opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
|
97
97
|
super(@opts)
|
98
98
|
end
|
@@ -107,16 +107,10 @@ module Statsample
|
|
107
107
|
|
108
108
|
# Two Way Anova with vectors
|
109
109
|
# Example:
|
110
|
-
# v1=[
|
111
|
-
# v2=[
|
112
|
-
# v3=[5,3,1,5
|
113
|
-
# anova=Statsample::Anova::
|
114
|
-
# anova.f
|
115
|
-
# => 0.0243902439024391
|
116
|
-
# anova.probability
|
117
|
-
# => 0.975953044203438
|
118
|
-
# anova.sst
|
119
|
-
# => 32.9333333333333
|
110
|
+
# v1=[1,1,2,2].to_scale
|
111
|
+
# v2=[1,2,1,2].to_scale
|
112
|
+
# v3=[5,3,1,5].to_scale
|
113
|
+
# anova=Statsample::Anova::TwoWayWithVectors.new(:a=>v1,:b=>v2, :dependent=>v3)
|
120
114
|
#
|
121
115
|
class TwoWayWithVectors < TwoWay
|
122
116
|
# Show summary Levene test
|
data/lib/statsample/dataset.rb
CHANGED
@@ -407,7 +407,7 @@ module Statsample
|
|
407
407
|
# if fields parameter is empty, return the mean for all fields
|
408
408
|
# if max invalid parameter > 0, returns the mean for all tuples
|
409
409
|
# with 0 to max_invalid invalid fields
|
410
|
-
def vector_mean(fields=nil,max_invalid=0)
|
410
|
+
def vector_mean(fields=nil, max_invalid=0)
|
411
411
|
a=[]
|
412
412
|
fields=check_fields(fields)
|
413
413
|
size=fields.size
|
@@ -50,7 +50,7 @@ module Statsample
|
|
50
50
|
# | b - c | 1.0 | 1.0 | 1.0 |
|
51
51
|
# -----------------------------------------
|
52
52
|
#
|
53
|
-
# ==
|
53
|
+
# == Reference:
|
54
54
|
# * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
|
55
55
|
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
56
56
|
# * Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
|
@@ -63,7 +63,6 @@ module Statsample
|
|
63
63
|
# ---------------------------------------
|
64
64
|
#
|
65
65
|
# == References:
|
66
|
-
#
|
67
66
|
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
68
67
|
class Bootstrap
|
69
68
|
include Writable
|
@@ -31,7 +31,7 @@ module Statsample
|
|
31
31
|
# Current algorithm is loosely based on SPSS O'Connor algorithm
|
32
32
|
#
|
33
33
|
# == Reference
|
34
|
-
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer
|
34
|
+
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
|
35
35
|
#
|
36
36
|
|
37
37
|
|
@@ -15,9 +15,9 @@ module Statsample
|
|
15
15
|
# *With number of cases and variables*
|
16
16
|
# pa=Statsample::Factor::ParallelAnalysis.with_random_data(100,8)
|
17
17
|
#
|
18
|
-
# ==
|
18
|
+
# == Reference:
|
19
19
|
# * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
|
20
|
-
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer
|
20
|
+
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
|
21
21
|
# * Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
|
22
22
|
|
23
23
|
class ParallelAnalysis
|
@@ -24,8 +24,7 @@ module Factor
|
|
24
24
|
# => [0.962964636346122, 0.962964636346122]
|
25
25
|
#
|
26
26
|
# == References:
|
27
|
-
#
|
28
|
-
# * SPSS manual
|
27
|
+
# * SPSS Manual
|
29
28
|
# * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
|
30
29
|
#
|
31
30
|
class PCA
|
@@ -22,8 +22,7 @@ module Factor
|
|
22
22
|
# => [0.962964636346122, 0.962964636346122]
|
23
23
|
#
|
24
24
|
# == References:
|
25
|
-
#
|
26
|
-
# * SPSS manual
|
25
|
+
# * SPSS Manual
|
27
26
|
# * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
|
28
27
|
#
|
29
28
|
class PrincipalAxis
|
@@ -2,9 +2,9 @@ module Statsample
|
|
2
2
|
module Factor
|
3
3
|
# Base class for component matrix rotation.
|
4
4
|
#
|
5
|
-
# ==
|
5
|
+
# == Reference:
|
6
6
|
# * SPSS Manual
|
7
|
-
# *
|
7
|
+
# * Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
|
8
8
|
#
|
9
9
|
# Use subclasses Varimax, Equimax or Quartimax for desired type of rotation
|
10
10
|
# Use:
|
data/lib/statsample/histogram.rb
CHANGED
@@ -34,8 +34,8 @@ module Statsample
|
|
34
34
|
# add an extra bin to your histogram.
|
35
35
|
#
|
36
36
|
#
|
37
|
-
# Reference:
|
38
|
-
# http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
|
37
|
+
# == Reference:
|
38
|
+
# * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
|
39
39
|
|
40
40
|
class Histogram
|
41
41
|
class << self
|
@@ -21,6 +21,21 @@ module Statsample
|
|
21
21
|
}.to_dataset
|
22
22
|
cronbach_alpha(ds)
|
23
23
|
end
|
24
|
+
# Predicted reliability of a test by replicating
|
25
|
+
# +n+ times the number of items
|
26
|
+
def spearman_brown_prophecy(r,n)
|
27
|
+
(n*r).quo(1+(n-1)*r)
|
28
|
+
end
|
29
|
+
|
30
|
+
alias :sbp :spearman_brown_prophecy
|
31
|
+
# Returns the number of items
|
32
|
+
# to obtain +r_d+ desired reliability
|
33
|
+
# from +r+ current reliability, achieved with
|
34
|
+
# +n+ items
|
35
|
+
def n_for_desired_reliability(r,r_d,n=1)
|
36
|
+
(r_d*(1-r)).quo(r*(1-r_d))*n
|
37
|
+
end
|
38
|
+
|
24
39
|
# Get Cronbach alpha from <tt>n</tt> cases,
|
25
40
|
# <tt>s2</tt> mean variance and <tt>cov</tt>
|
26
41
|
# mean covariance
|
@@ -120,5 +135,7 @@ module Statsample
|
|
120
135
|
end # self
|
121
136
|
end # Reliability
|
122
137
|
end # Statsample
|
138
|
+
|
139
|
+
require 'statsample/reliability/icc.rb'
|
123
140
|
require 'statsample/reliability/scaleanalysis.rb'
|
124
141
|
require 'statsample/reliability/multiscaleanalysis.rb'
|
@@ -0,0 +1,404 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Reliability
|
3
|
+
# = Intra-class correlation
|
4
|
+
# According to Shrout & Fleiss (1979, p.422): "ICC is the correlation
|
5
|
+
# between one measurement (either a single rating or a mean of
|
6
|
+
# several ratings) on a target and another measurement obtained on that target"
|
7
|
+
#
|
8
|
+
# == Reference
|
9
|
+
# * Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
|
10
|
+
# * McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.
|
11
|
+
|
12
|
+
class ICC
|
13
|
+
include Summarizable
|
14
|
+
|
15
|
+
# Create a ICC analysis for a given dataset
|
16
|
+
# Each vector is a different measurement. Only uses complete data
|
17
|
+
# (listwise deletion).
|
18
|
+
#
|
19
|
+
|
20
|
+
attr_reader :df_bt
|
21
|
+
attr_reader :df_wt
|
22
|
+
attr_reader :df_bj
|
23
|
+
attr_reader :df_residual
|
24
|
+
|
25
|
+
attr_reader :ms_bt
|
26
|
+
attr_reader :ms_wt
|
27
|
+
attr_reader :ms_bj
|
28
|
+
attr_reader :ms_residual
|
29
|
+
|
30
|
+
alias :bms :ms_bt
|
31
|
+
alias :wms :ms_wt
|
32
|
+
alias :jms :ms_bj
|
33
|
+
alias :ems :ms_residual
|
34
|
+
|
35
|
+
alias :msr :ms_bt
|
36
|
+
alias :msw :ms_wt
|
37
|
+
alias :msc :ms_bj
|
38
|
+
alias :mse :ms_residual
|
39
|
+
|
40
|
+
# :section: Shrout and Fleiss ICC denominations
|
41
|
+
attr_reader :icc_1_1
|
42
|
+
attr_reader :icc_2_1
|
43
|
+
attr_reader :icc_3_1
|
44
|
+
attr_reader :icc_1_k
|
45
|
+
attr_reader :icc_2_k
|
46
|
+
attr_reader :icc_3_k
|
47
|
+
|
48
|
+
# :section: McGraw and Wong ICC denominations
|
49
|
+
|
50
|
+
attr_reader :icc_1
|
51
|
+
attr_reader :icc_c_1
|
52
|
+
attr_reader :icc_a_1
|
53
|
+
attr_reader :icc_k
|
54
|
+
attr_reader :icc_c_k
|
55
|
+
attr_reader :icc_a_k
|
56
|
+
|
57
|
+
|
58
|
+
attr_reader :n, :k
|
59
|
+
attr_reader :total_mean
|
60
|
+
# Type of analysis, for easy summarization
|
61
|
+
# By default, set to :icc_1
|
62
|
+
# * Shrout & Fleiss(1979) denominations
|
63
|
+
# * :icc_1_1
|
64
|
+
# * :icc_2_1
|
65
|
+
# * :icc_3_1
|
66
|
+
# * :icc_1_k
|
67
|
+
# * :icc_2_k
|
68
|
+
# * :icc_3_k
|
69
|
+
# * McGraw & Wong (1996) denominations
|
70
|
+
# * :icc_1
|
71
|
+
# * :icc_k
|
72
|
+
# * :icc_c_1
|
73
|
+
# * :icc_c_k
|
74
|
+
# * :icc_a_1
|
75
|
+
# * :icc_a_k
|
76
|
+
|
77
|
+
attr_reader :type
|
78
|
+
# ICC value, set with :type
|
79
|
+
attr_reader :r
|
80
|
+
attr_reader :f
|
81
|
+
attr_reader :lbound
|
82
|
+
attr_reader :ubound
|
83
|
+
|
84
|
+
attr_accessor :g_rho
|
85
|
+
attr_accessor :alpha
|
86
|
+
attr_accessor :name
|
87
|
+
def initialize(ds, opts=Hash.new)
|
88
|
+
@ds=ds.dup_only_valid
|
89
|
+
@vectors=@ds.vectors.values
|
90
|
+
@n=@ds.cases
|
91
|
+
@k=@ds.fields.size
|
92
|
+
compute
|
93
|
+
@g_rho=0
|
94
|
+
@alpha=0.05
|
95
|
+
@icc_name=nil
|
96
|
+
opts_default={:name=>"Intra-class correlation", :type=>:icc_1}
|
97
|
+
@opts=opts_default.merge(opts)
|
98
|
+
@opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
|
99
|
+
end
|
100
|
+
def type=(v)
|
101
|
+
case v
|
102
|
+
when :icc_1_1
|
103
|
+
@icc_name=_("Shrout & Fleiss ICC(1,1)")
|
104
|
+
@r=@icc_1_1
|
105
|
+
@f=icc_1_f
|
106
|
+
@lbound, @ubound=icc_1_1_ci(@alpha)
|
107
|
+
when :icc_2_1
|
108
|
+
@icc_name=_("Shrout & Fleiss ICC(2,1)")
|
109
|
+
@r=@icc_2_1
|
110
|
+
@f=icc_2_f
|
111
|
+
@lbound, @ubound=icc_2_1_ci(@alpha)
|
112
|
+
|
113
|
+
when :icc_3_1
|
114
|
+
@icc_name=_("Shrout & Fleiss ICC(3,1)")
|
115
|
+
|
116
|
+
@r=@icc_3_1
|
117
|
+
@f=icc_3_f
|
118
|
+
@lbound, @ubound=icc_3_1_ci(@alpha)
|
119
|
+
|
120
|
+
when :icc_1_k
|
121
|
+
@icc_name=_("Shrout & Fleiss ICC(1,k)")
|
122
|
+
|
123
|
+
@r=@icc_1_k
|
124
|
+
@f=icc_1_k_f
|
125
|
+
@lbound, @ubound=icc_1_k_ci(@alpha)
|
126
|
+
when :icc_2_k
|
127
|
+
@icc_name=_("Shrout & Fleiss ICC(2,k)")
|
128
|
+
|
129
|
+
@r=@icc_2_k
|
130
|
+
@f=icc_2_k_f
|
131
|
+
@lbound, @ubound=icc_2_k_ci(@alpha)
|
132
|
+
|
133
|
+
when :icc_3_k
|
134
|
+
@icc_name=_("Shrout & Fleiss ICC(3,k)")
|
135
|
+
|
136
|
+
@r=@icc_3_k
|
137
|
+
@f=icc_3_k_f
|
138
|
+
@lbound, @ubound=icc_3_k_ci(@alpha)
|
139
|
+
|
140
|
+
|
141
|
+
when :icc_1
|
142
|
+
@icc_name=_("McGraw & Wong ICC(1)")
|
143
|
+
|
144
|
+
@r=@icc_1_1
|
145
|
+
@f=icc_1_f(@g_rho)
|
146
|
+
@lbound, @ubound=icc_1_1_ci(@alpha)
|
147
|
+
when :icc_k
|
148
|
+
@icc_name=_("McGraw & Wong ICC(K)")
|
149
|
+
|
150
|
+
@r=@icc_1_k
|
151
|
+
@f=icc_1_k_f(@g_rho)
|
152
|
+
@lbound, @ubound=icc_1_k_ci(@alpha)
|
153
|
+
when :icc_c_1
|
154
|
+
@icc_name=_("McGraw & Wong ICC(C,1)")
|
155
|
+
|
156
|
+
@r=@icc_3_1
|
157
|
+
@f=icc_c_1_f(@g_rho)
|
158
|
+
@lbound, @ubound=icc_3_1_ci(@alpha)
|
159
|
+
|
160
|
+
when :icc_c_k
|
161
|
+
@icc_name=_("McGraw & Wong ICC(C,K)")
|
162
|
+
|
163
|
+
@r=@icc_3_k
|
164
|
+
@f=icc_c_k_f(@g_rho)
|
165
|
+
@lbound, @ubound=icc_c_k_ci(@alpha)
|
166
|
+
|
167
|
+
when :icc_a_1
|
168
|
+
@icc_name=_("McGraw & Wong ICC(A,1)")
|
169
|
+
|
170
|
+
@r=@icc_2_1
|
171
|
+
@f=icc_a_1_f(@g_rho)
|
172
|
+
@lbound,@ubound = icc_2_1_ci(@alpha)
|
173
|
+
|
174
|
+
when :icc_a_k
|
175
|
+
@icc_name=_("McGraw & Wong ICC(A,K)")
|
176
|
+
|
177
|
+
@r=@icc_2_k
|
178
|
+
@f=icc_a_k_f(@g_rho)
|
179
|
+
@lbound,@ubound=icc_2_k_ci(@alpha)
|
180
|
+
|
181
|
+
else
|
182
|
+
raise "Type #{v} doesn't exists"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
def compute
|
186
|
+
@df_bt=n-1
|
187
|
+
@df_wt=n*(k-1)
|
188
|
+
@df_bj=k-1
|
189
|
+
@df_residual=(n-1)*(k-1)
|
190
|
+
@total_mean=@vectors.inject(0){|ac,v| ac+v.sum}.quo(n*k)
|
191
|
+
vm=@ds.vector_mean
|
192
|
+
|
193
|
+
@ss_bt=k*vm.ss(@total_mean)
|
194
|
+
@ms_bt=@ss_bt.quo(@df_bt)
|
195
|
+
|
196
|
+
@ss_bj=n*@vectors.inject(0){|ac,v| ac+(v.mean-@total_mean).square}
|
197
|
+
@ms_bj=@ss_bj.quo(@df_bj)
|
198
|
+
|
199
|
+
@ss_wt=@vectors.inject(0){|ac,v| ac+(v-vm).ss(0)}
|
200
|
+
@ms_wt=@ss_wt.quo(@df_wt)
|
201
|
+
|
202
|
+
@ss_residual=@ss_wt-@ss_bj
|
203
|
+
@ms_residual=@ss_residual.quo(@df_residual)
|
204
|
+
###
|
205
|
+
# Shrout and Fleiss denomination
|
206
|
+
###
|
207
|
+
# ICC(1,1) / ICC(1)
|
208
|
+
@icc_1_1=(bms-wms).quo(bms+(k-1)*wms)
|
209
|
+
# ICC(2,1) / ICC(A,1)
|
210
|
+
@icc_2_1=(bms-ems).quo(bms+(k-1)*ems+k*(jms - ems).quo(n))
|
211
|
+
# ICC(3,1) / ICC(C,1)
|
212
|
+
@icc_3_1=(bms-ems).quo(bms+(k-1)*ems)
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
# ICC(1,K) / ICC(K)
|
217
|
+
@icc_1_k=(bms-wms).quo(bms)
|
218
|
+
# ICC(2,K) / ICC(A,k)
|
219
|
+
@icc_2_k=(bms-ems).quo(bms+(jms-ems).quo(n))
|
220
|
+
# ICC(3,K) / ICC(C,k) = Cronbach's alpha
|
221
|
+
@icc_3_k=(bms-ems).quo(bms)
|
222
|
+
|
223
|
+
###
|
224
|
+
# McGraw and Wong
|
225
|
+
###
|
226
|
+
|
227
|
+
end
|
228
|
+
|
229
|
+
def icc_1_f(rho=0.0)
|
230
|
+
num=msr*(1-rho)
|
231
|
+
den=msw*(1+(k-1)*rho)
|
232
|
+
Statsample::Test::F.new(num, den, @df_bt, @df_wt)
|
233
|
+
end
|
234
|
+
# One way random F, type k
|
235
|
+
def icc_1_k_f(rho=0)
|
236
|
+
num=msr*(1-rho)
|
237
|
+
den=msw
|
238
|
+
Statsample::Test::F.new(num, den, @df_bt, @df_wt)
|
239
|
+
end
|
240
|
+
|
241
|
+
def icc_c_1_f(rho=0)
|
242
|
+
num=msr*(1-rho)
|
243
|
+
den=mse*(1+(k-1)*rho)
|
244
|
+
Statsample::Test::F.new(num, den, @df_bt, @df_residual)
|
245
|
+
end
|
246
|
+
def icc_c_k_f(rho=0)
|
247
|
+
num=(1-rho)
|
248
|
+
den=1-@icc_3_k
|
249
|
+
Statsample::Test::F.new(num, den, @df_bt, @df_residual)
|
250
|
+
end
|
251
|
+
|
252
|
+
def v(a,b)
|
253
|
+
((a*msc+b*mse)**2).quo(((a*msc)**2.quo(k-1))+((b*mse)**2.quo( (n-1) * (k-1))))
|
254
|
+
end
|
255
|
+
def a(rho)
|
256
|
+
(k*rho).quo(n*(1-rho))
|
257
|
+
end
|
258
|
+
def b(rho)
|
259
|
+
1+((k*rho*(n-1)).quo(n*(1-rho)))
|
260
|
+
end
|
261
|
+
def c(rho)
|
262
|
+
rho.quo(n*(1-rho))
|
263
|
+
end
|
264
|
+
def d(rho)
|
265
|
+
1+((rho*(n-1)).quo(n*(1-rho)))
|
266
|
+
end
|
267
|
+
private :v, :a, :b, :c, :d
|
268
|
+
def icc_a_1_f(rho=0)
|
269
|
+
fj=jms.quo(ems)
|
270
|
+
num=msr
|
271
|
+
den=a(rho)*msc+b(rho)*mse
|
272
|
+
pp = @icc_2_1
|
273
|
+
vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
|
274
|
+
vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
|
275
|
+
v=vn.quo(vd)
|
276
|
+
Statsample::Test::F.new(num, den, @df_bt, v)
|
277
|
+
end
|
278
|
+
|
279
|
+
def icc_a_k_f(rho=0)
|
280
|
+
num=msr
|
281
|
+
den=c(rho)*msc+d(rho)*mse
|
282
|
+
|
283
|
+
fj=jms.quo(ems)
|
284
|
+
|
285
|
+
pp = @icc_2_k
|
286
|
+
vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
|
287
|
+
vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
|
288
|
+
v=vn.quo(vd)
|
289
|
+
|
290
|
+
|
291
|
+
Statsample::Test::F.new(num, den, @df_bt,v)
|
292
|
+
|
293
|
+
end
|
294
|
+
|
295
|
+
# F test for ICC Case 1. Shrout and Fleiss
|
296
|
+
def icc_1_f_shrout
|
297
|
+
Statsample::Test::F.new(bms, wms, @df_bt, @df_wt)
|
298
|
+
end
|
299
|
+
|
300
|
+
# Intervale of confidence for ICC (1,1)
|
301
|
+
def icc_1_1_ci(alpha=0.05)
|
302
|
+
per=1-(0.5*alpha)
|
303
|
+
|
304
|
+
fu=icc_1_f.f*Distribution::F.p_value(per, @df_wt, @df_bt)
|
305
|
+
fl=icc_1_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_wt))
|
306
|
+
|
307
|
+
[(fl-1).quo(fl+k-1), (fu-1).quo(fu+k-1)]
|
308
|
+
end
|
309
|
+
|
310
|
+
# Intervale of confidence for ICC (1,k)
|
311
|
+
def icc_1_k_ci(alpha=0.05)
|
312
|
+
per=1-(0.5*alpha)
|
313
|
+
fu=icc_1_f.f*Distribution::F.p_value(per, @df_wt, @df_bt)
|
314
|
+
fl=icc_1_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_wt))
|
315
|
+
[1-1.quo(fl), 1-1.quo(fu)]
|
316
|
+
end
|
317
|
+
|
318
|
+
# F test for ICC Case 2
|
319
|
+
def icc_2_f
|
320
|
+
Statsample::Test::F.new(bms, ems, @df_bt, @df_residual)
|
321
|
+
end
|
322
|
+
|
323
|
+
|
324
|
+
#
|
325
|
+
# F* for ICC(2,1) and ICC(2,k)
|
326
|
+
#
|
327
|
+
def icc_2_1_fs(pp,alpha=0.05)
|
328
|
+
fj=jms.quo(ems)
|
329
|
+
per=1-(0.5*alpha)
|
330
|
+
vn=(k-1)*(n-1)*((k*pp*fj+n*(1+(k-1)*pp)-k*pp)**2)
|
331
|
+
vd=(n-1)*(k**2)*(pp**2)*(fj**2)+((n*(1+(k-1)*pp)-k*pp)**2)
|
332
|
+
v=vn.quo(vd)
|
333
|
+
f1=Distribution::F.p_value(per, n-1,v)
|
334
|
+
f2=Distribution::F.p_value(per, v, n-1)
|
335
|
+
[f1,f2]
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
def icc_2_1_ci(alpha=0.05)
|
340
|
+
icc_2_1_ci_mcgraw
|
341
|
+
end
|
342
|
+
|
343
|
+
# Confidence interval ICC(A,1), McGawn
|
344
|
+
|
345
|
+
def icc_2_1_ci_mcgraw(alpha=0.05)
|
346
|
+
fd,fu=icc_2_1_fs(icc_2_1,alpha)
|
347
|
+
cl=(n*(msr-fd*mse)).quo(fd*(k*msc+(k*n-k-n)*mse)+n*msr)
|
348
|
+
cu=(n*(fu*msr-mse)).quo(k*msc+(k*n-k-n)*mse+n*fu*msr)
|
349
|
+
[cl,cu]
|
350
|
+
end
|
351
|
+
|
352
|
+
def icc_2_k_ci(alpha=0.05)
|
353
|
+
icc_2_k_ci_mcgraw(alpha)
|
354
|
+
end
|
355
|
+
|
356
|
+
def icc_2_k_ci_mcgraw(alpha=0.05)
|
357
|
+
f1,f2=icc_2_1_fs(icc_2_k,alpha)
|
358
|
+
[
|
359
|
+
(n*(msr-f1*mse)).quo(f1*(msc-mse)+n*msr),
|
360
|
+
(n*(f2*msr-mse)).quo(msc-mse+n*f2*msr)
|
361
|
+
]
|
362
|
+
|
363
|
+
end
|
364
|
+
def icc_2_k_ci_shrout(alpha=0.05)
|
365
|
+
ci=icc_2_1_ci(alpha)
|
366
|
+
[(ci[0]*k).quo(1+(k-1)*ci[0]), (ci[1]*k).quo(1+(k-1)*ci[1])]
|
367
|
+
end
|
368
|
+
|
369
|
+
|
370
|
+
def icc_3_f
|
371
|
+
Statsample::Test::F.new(bms, ems, @df_bt, @df_residual)
|
372
|
+
end
|
373
|
+
|
374
|
+
def icc_3_1_ci(alpha=0.05)
|
375
|
+
per=1-(0.5*alpha)
|
376
|
+
fl=icc_3_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
|
377
|
+
fu=icc_3_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
|
378
|
+
[(fl-1).quo(fl+k-1), (fu-1).quo(fu+k-1)]
|
379
|
+
end
|
380
|
+
|
381
|
+
def icc_3_k_ci(alpha=0.05)
|
382
|
+
per=1-(0.5*alpha)
|
383
|
+
fl=icc_3_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
|
384
|
+
fu=icc_3_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
|
385
|
+
[1-1.quo(fl),1-1.quo(fu)]
|
386
|
+
end
|
387
|
+
|
388
|
+
def icc_c_k_ci(alpha=0.05)
|
389
|
+
per=1-(0.5*alpha)
|
390
|
+
fl=icc_c_k_f.f.quo(Distribution::F.p_value(per, @df_bt, @df_residual))
|
391
|
+
fu=icc_c_k_f.f*Distribution::F.p_value(per, @df_residual, @df_bt)
|
392
|
+
[1-1.quo(fl),1-1.quo(fu)]
|
393
|
+
end
|
394
|
+
def report_building(b)
|
395
|
+
b.section(:name=>name) do |s|
|
396
|
+
s.text @icc_name
|
397
|
+
s.text _("ICC: %0.4f") % @r
|
398
|
+
s.parse_element(@f)
|
399
|
+
s.text _("CI (%0.2f): [%0.4f - %0.4f]") % [(1-@alpha)*100, @lbound, @ubound]
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
end
|