statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module Multiple
|
4
|
+
# Pure Ruby Class for Multiple Regression Analysis, based on a covariance or correlation matrix.
|
5
|
+
#
|
6
|
+
# Use Statsample::Regression::Multiple::RubyEngine if you have a
|
7
|
+
# Dataset, to avoid setting all details.
|
8
|
+
#
|
9
|
+
# <b>Remember:</b> NEVER use a Covariance data if you have missing data. Use only correlation matrix on that case.
|
10
|
+
#
|
11
|
+
#
|
12
|
+
# Example:
|
13
|
+
#
|
14
|
+
# matrix=[[1.0, 0.5, 0.2], [0.5, 1.0, 0.7], [0.2, 0.7, 1.0]]
|
15
|
+
#
|
16
|
+
# lr=Statsample::Regression::Multiple::MatrixEngine.new(matrix,2)
|
17
|
+
|
18
|
+
class MatrixEngine < BaseEngine
|
19
|
+
# Hash of standard deviation of predictors.
|
20
|
+
# Only useful for Correlation Matrix, because by default is set to 1
|
21
|
+
attr_accessor :x_sd
|
22
|
+
# Standard deviation of criterion
|
23
|
+
# Only useful for Correlation Matrix, because by default is set to 1
|
24
|
+
attr_accessor :y_sd
|
25
|
+
# Hash of mean for predictors. By default, set to 0
|
26
|
+
attr_accessor :x_mean
|
27
|
+
|
28
|
+
# Mean for criteria. By default, set to 0
|
29
|
+
attr_accessor :y_mean
|
30
|
+
|
31
|
+
# Number of cases
|
32
|
+
attr_writer :cases
|
33
|
+
attr_writer :digits
|
34
|
+
# Create object
|
35
|
+
#
|
36
|
+
def initialize(matrix,y_var, opts=Hash.new)
|
37
|
+
matrix.extend Statsample::CovariateMatrix
|
38
|
+
raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
|
39
|
+
if matrix._type==:covariance
|
40
|
+
@matrix_cov=matrix
|
41
|
+
@matrix_cor=matrix.correlation
|
42
|
+
@no_covariance=false
|
43
|
+
else
|
44
|
+
@matrix_cor=matrix
|
45
|
+
@matrix_cov=matrix
|
46
|
+
@no_covariance=true
|
47
|
+
end
|
48
|
+
|
49
|
+
@y_var=y_var
|
50
|
+
@fields=matrix.fields-[y_var]
|
51
|
+
|
52
|
+
@n_predictors=@fields.size
|
53
|
+
@predictors_n=@n_predictors
|
54
|
+
@matrix_x= @matrix_cor.submatrix(@fields)
|
55
|
+
@matrix_x_cov= @matrix_cov.submatrix(@fields)
|
56
|
+
raise LinearDependency, "Regressors are linearly dependent" if @matrix_x.determinant<1e-15
|
57
|
+
|
58
|
+
|
59
|
+
@matrix_y = @matrix_cor.submatrix(@fields, [y_var])
|
60
|
+
@matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
|
61
|
+
|
62
|
+
@y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
|
63
|
+
|
64
|
+
@x_sd=@n_predictors.times.inject({}) {|ac,i|
|
65
|
+
ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
|
66
|
+
ac;
|
67
|
+
}
|
68
|
+
|
69
|
+
@cases=nil
|
70
|
+
@x_mean=@fields.inject({}) {|ac,f|
|
71
|
+
ac[f]=0.0
|
72
|
+
ac;
|
73
|
+
}
|
74
|
+
|
75
|
+
@y_mean=0.0
|
76
|
+
@name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
|
77
|
+
|
78
|
+
opts_default = {:digits=>3}
|
79
|
+
opts = opts_default.merge opts
|
80
|
+
opts.each{|k,v|
|
81
|
+
self.send("#{k}=",v) if self.respond_to? k
|
82
|
+
}
|
83
|
+
result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
|
84
|
+
|
85
|
+
if matrix._type == :covariance
|
86
|
+
@coeffs=result_matrix.column(0).to_a
|
87
|
+
@coeffs_stan=coeffs.collect {|k,v|
|
88
|
+
coeffs[k]*@x_sd[k].quo(@y_sd)
|
89
|
+
}
|
90
|
+
else
|
91
|
+
@coeffs_stan=result_matrix.column(0).to_a
|
92
|
+
@coeffs=standarized_coeffs.collect {|k,v|
|
93
|
+
standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
|
94
|
+
}
|
95
|
+
end
|
96
|
+
@total_cases=@valid_cases=@cases
|
97
|
+
end
|
98
|
+
def cases
|
99
|
+
raise "You should define the number of valid cases first" if @cases.nil?
|
100
|
+
@cases
|
101
|
+
end
|
102
|
+
# Get R^2 for the regression
|
103
|
+
# For fixed models is the coefficient of determination.
|
104
|
+
# On random models, is the 'squared-multiple correlation'
|
105
|
+
# Equal to
|
106
|
+
# * 1-(|R| / |R_x|) or
|
107
|
+
# * Sum(b_i*r_yi) <- used
|
108
|
+
def r2
|
109
|
+
@n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
|
110
|
+
end
|
111
|
+
# Multiple correlation, on random models.
|
112
|
+
def r
|
113
|
+
Math::sqrt(r2)
|
114
|
+
end
|
115
|
+
# Value of constant
|
116
|
+
def constant
|
117
|
+
c = coeffs
|
118
|
+
@y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])}
|
119
|
+
end
|
120
|
+
# Hash of b or raw coefficients
|
121
|
+
def coeffs
|
122
|
+
assign_names(@coeffs)
|
123
|
+
end
|
124
|
+
# Hash of beta or standarized coefficients
|
125
|
+
|
126
|
+
def standarized_coeffs
|
127
|
+
assign_names(@coeffs_stan)
|
128
|
+
end
|
129
|
+
# Total sum of squares
|
130
|
+
def sst
|
131
|
+
@y_sd**2*(cases-1.0)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Degrees of freedom for regression
|
135
|
+
def df_r
|
136
|
+
@n_predictors
|
137
|
+
end
|
138
|
+
# Degrees of freedom for error
|
139
|
+
def df_e
|
140
|
+
cases-@n_predictors-1
|
141
|
+
end
|
142
|
+
# Tolerance for a given variable
|
143
|
+
# defined as (1-R^2) of regression of other independent variables
|
144
|
+
# over the selected
|
145
|
+
# == Reference:
|
146
|
+
# * http://talkstats.com/showthread.php?t=5056
|
147
|
+
def tolerance(var)
|
148
|
+
return 1 if @matrix_x.column_size==1
|
149
|
+
lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
|
150
|
+
1-lr.r2
|
151
|
+
end
|
152
|
+
# Standard Error for coefficients.
|
153
|
+
# Standard error of a coefficients depends on
|
154
|
+
# * Tolerance of the coeffients: Higher tolerances implies higher error
|
155
|
+
# * Higher r2 implies lower error
|
156
|
+
# == Reference:
|
157
|
+
# * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
|
158
|
+
#
|
159
|
+
def coeffs_se
|
160
|
+
out={}
|
161
|
+
#mse=sse.quo(df_e)
|
162
|
+
coeffs.each {|k,v|
|
163
|
+
out[k]=@y_sd.quo(@x_sd[k])*Math::sqrt( 1.quo(tolerance(k)))*Math::sqrt((1-r2).quo(df_e))
|
164
|
+
}
|
165
|
+
out
|
166
|
+
end
|
167
|
+
# t value for constant
|
168
|
+
def constant_t
|
169
|
+
return nil if constant_se.nil?
|
170
|
+
constant.to_f / constant_se
|
171
|
+
end
|
172
|
+
# Standard error for constant.
|
173
|
+
# This method recreates the estimaded variance-covariance matrix
|
174
|
+
# using means, standard deviation and covariance matrix.
|
175
|
+
# So, needs the covariance matrix.
|
176
|
+
def constant_se
|
177
|
+
return nil if @no_covariance
|
178
|
+
means=@x_mean
|
179
|
+
#means[@y_var]=@y_mean
|
180
|
+
means[:constant]=1
|
181
|
+
sd=@x_sd
|
182
|
+
#sd[@y_var]=@y_sd
|
183
|
+
sd[:constant]=0
|
184
|
+
fields=[:constant]+@matrix_cov.fields-[@y_var]
|
185
|
+
# Recreate X'X using the variance-covariance matrix
|
186
|
+
xt_x=::Matrix.rows(fields.collect {|i|
|
187
|
+
fields.collect {|j|
|
188
|
+
if i==:constant or j==:constant
|
189
|
+
cov=0
|
190
|
+
elsif i==j
|
191
|
+
cov=sd[i]**2
|
192
|
+
else
|
193
|
+
cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
|
194
|
+
end
|
195
|
+
cov*(@cases-1)+@cases*means[i]*means[j]
|
196
|
+
}
|
197
|
+
})
|
198
|
+
matrix=xt_x.inverse * mse
|
199
|
+
matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module Multiple
|
4
|
+
# Pure Ruby Class for Multiple Regression Analysis.
|
5
|
+
# Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values.
|
6
|
+
# Coeffient calculation uses correlation matrix between the vectors
|
7
|
+
# If you need listwise aproach for missing values, use AlglibEngine, because is faster.
|
8
|
+
#
|
9
|
+
# Example:
|
10
|
+
#
|
11
|
+
# @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
|
12
|
+
# @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
|
13
|
+
# @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
|
14
|
+
# @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
|
15
|
+
# ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
|
16
|
+
# lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)
|
17
|
+
|
18
|
+
class RubyEngine < MatrixEngine
|
19
|
+
def initialize(ds,y_var, opts=Hash.new)
|
20
|
+
matrix = Statsample::Bivariate.correlation_matrix ds
|
21
|
+
fields_indep=ds.vectors.to_a - [y_var]
|
22
|
+
default= {
|
23
|
+
:y_mean => ds[y_var].mean,
|
24
|
+
:x_mean => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
|
25
|
+
:y_sd => ds[y_var].sd,
|
26
|
+
:x_sd => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
|
27
|
+
:cases => Statsample::Bivariate.min_n_valid(ds)
|
28
|
+
}
|
29
|
+
opts = opts.merge(default)
|
30
|
+
super(matrix, y_var, opts)
|
31
|
+
@ds = ds
|
32
|
+
@dy = ds[@y_var]
|
33
|
+
@ds_valid = ds.reject_values(*Daru::MISSING_VALUES)
|
34
|
+
@total_cases = @ds.nrows
|
35
|
+
@valid_cases = @ds_valid.nrows
|
36
|
+
@ds_indep = ds.dup(ds.vectors.to_a - [y_var])
|
37
|
+
set_dep_columns
|
38
|
+
end
|
39
|
+
|
40
|
+
def set_dep_columns
|
41
|
+
@dep_columns = []
|
42
|
+
@ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }
|
43
|
+
end
|
44
|
+
|
45
|
+
def fix_with_mean
|
46
|
+
i=0
|
47
|
+
@ds_indep.each(:row) do |row|
|
48
|
+
empty=[]
|
49
|
+
row.each do |k,v|
|
50
|
+
empty.push(k) if v.nil?
|
51
|
+
end
|
52
|
+
|
53
|
+
if empty.size==1
|
54
|
+
@ds_indep[empty[0]][i]=@ds[empty[0]].mean
|
55
|
+
end
|
56
|
+
i += 1
|
57
|
+
end
|
58
|
+
set_dep_columns
|
59
|
+
end
|
60
|
+
def fix_with_regression
|
61
|
+
i = 0
|
62
|
+
@ds_indep.each(:row) do |row|
|
63
|
+
empty = []
|
64
|
+
row.each { |k,v| empty.push(k) if v.nil? }
|
65
|
+
if empty.size==1
|
66
|
+
field = empty[0]
|
67
|
+
lr = MultipleRegression.new(@ds_indep,field)
|
68
|
+
fields = []
|
69
|
+
@ds_indep.vectors.each { |f|
|
70
|
+
fields.push(row[f]) unless f == field
|
71
|
+
}
|
72
|
+
|
73
|
+
@ds_indep[field][i]=lr.process(fields)
|
74
|
+
end
|
75
|
+
i+=1
|
76
|
+
end
|
77
|
+
set_dep_columns
|
78
|
+
end
|
79
|
+
# Standard error for constant
|
80
|
+
def constant_se
|
81
|
+
estimated_variance_covariance_matrix[0,0]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
# Class for calculation of linear regressions with form
|
4
|
+
# y = a+bx
|
5
|
+
# To create a Statsample::Regression::Simple object:
|
6
|
+
# * <tt> Statsample::Regression::Simple.new_from_dataset(ds,x,y)</tt>
|
7
|
+
# * <tt> Statsample::Regression::Simple.new_from_vectors(vx,vy)</tt>
|
8
|
+
# * <tt> Statsample::Regression::Simple.new_from_gsl(gsl) </tt>
|
9
|
+
#
|
10
|
+
class Simple
|
11
|
+
include Summarizable
|
12
|
+
attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
|
13
|
+
attr_accessor :name
|
14
|
+
attr_accessor :digits
|
15
|
+
def initialize(init_method, *argv)
|
16
|
+
self.send(init_method, *argv)
|
17
|
+
end
|
18
|
+
private_class_method :new
|
19
|
+
# Obtain y value given x value
|
20
|
+
# x=a+bx
|
21
|
+
|
22
|
+
def y(val_x)
|
23
|
+
@a+@b*val_x
|
24
|
+
end
|
25
|
+
# Obtain x value given y value
|
26
|
+
# x=(y-a)/b
|
27
|
+
def x(val_y)
|
28
|
+
(val_y-@a) / @b.to_f
|
29
|
+
end
|
30
|
+
# Sum of square error
|
31
|
+
def sse
|
32
|
+
(0...@vx.size).inject(0) {|acum,i| acum+((@vy[i]-y(@vx[i]))**2)
|
33
|
+
}
|
34
|
+
end
|
35
|
+
def standard_error
|
36
|
+
Math::sqrt(sse / (@vx.size-2).to_f)
|
37
|
+
end
|
38
|
+
# Sum of square regression
|
39
|
+
def ssr
|
40
|
+
vy_mean=@vy.mean
|
41
|
+
(0...@vx.size).inject(0) {|a,i|
|
42
|
+
a+((y(@vx[i])-vy_mean)**2)
|
43
|
+
}
|
44
|
+
|
45
|
+
end
|
46
|
+
# Sum of square total
|
47
|
+
def sst
|
48
|
+
@vy.sum_of_squared_deviation
|
49
|
+
end
|
50
|
+
# Value of r
|
51
|
+
def r
|
52
|
+
@b * (@vx.sds / @vy.sds)
|
53
|
+
end
|
54
|
+
# Value of r^2
|
55
|
+
def r2
|
56
|
+
r**2
|
57
|
+
end
|
58
|
+
class << self
|
59
|
+
# Create a regression object giving an array with following parameters:
|
60
|
+
# <tt>a,b,cov00, cov01, covx1, chisq, status</tt>
|
61
|
+
# Useful to obtain x and y values with a and b values.
|
62
|
+
def new_from_gsl(ar)
|
63
|
+
new(:init_gsl, *ar)
|
64
|
+
end
|
65
|
+
# Create a simple regression using two vectors
|
66
|
+
def new_from_vectors(vx,vy, opts=Hash.new)
|
67
|
+
new(:init_vectors,vx,vy, opts)
|
68
|
+
end
|
69
|
+
# Create a simple regression using a dataset and two vector names.
|
70
|
+
def new_from_dataset(ds,x,y, opts=Hash.new)
|
71
|
+
new(:init_vectors,ds[x],ds[y], opts)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
def init_vectors(vx,vy, opts=Hash.new)
|
75
|
+
@vx,@vy=Statsample.only_valid_clone(vx,vy)
|
76
|
+
x_m=@vx.mean
|
77
|
+
y_m=@vy.mean
|
78
|
+
num=den=0
|
79
|
+
(0...@vx.size).each {|i|
|
80
|
+
num+=(@vx[i]-x_m)*(@vy[i]-y_m)
|
81
|
+
den+=(@vx[i]-x_m)**2
|
82
|
+
}
|
83
|
+
@b=num.to_f/den
|
84
|
+
@a=y_m - @b*x_m
|
85
|
+
|
86
|
+
opts_default={
|
87
|
+
:digits=>3,
|
88
|
+
:name=>_("Regression of %s over %s") % [@vx.name, @vy.name]
|
89
|
+
}
|
90
|
+
@opts=opts_default.merge opts
|
91
|
+
|
92
|
+
@opts.each{|k,v|
|
93
|
+
self.send("#{k}=",v) if self.respond_to? k
|
94
|
+
}
|
95
|
+
|
96
|
+
end
|
97
|
+
def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
|
98
|
+
@a=a
|
99
|
+
@b=b
|
100
|
+
@cov00=cov00
|
101
|
+
@cov01=cov01
|
102
|
+
@covx1=covx1
|
103
|
+
@chisq=chisq
|
104
|
+
@status=status
|
105
|
+
end
|
106
|
+
def report_building(gen)
|
107
|
+
f="%0.#{digits}f"
|
108
|
+
gen.section(:name=>name) do |s|
|
109
|
+
s.table(:header=>[_("Variable"), _("Value")]) do |t|
|
110
|
+
t.row [_("r"), f % r]
|
111
|
+
t.row [_("r^2"), f % r2]
|
112
|
+
t.row [_("a"), f % a]
|
113
|
+
t.row [_("b"), f % b]
|
114
|
+
t.row [_("s.e"), f % standard_error]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
private :init_vectors, :init_gsl
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Reliability
|
3
|
+
class << self
|
4
|
+
# Calculate Chonbach's alpha for a given dataset.
|
5
|
+
# only uses tuples without missing data
|
6
|
+
def cronbach_alpha(ods)
|
7
|
+
ds = ods.reject_values(*Daru::MISSING_VALUES)
|
8
|
+
n_items = ds.ncols
|
9
|
+
return nil if n_items <= 1
|
10
|
+
s2_items = ds.to_hash.values.inject(0) { |ac,v|
|
11
|
+
ac + v.variance }
|
12
|
+
total = ds.vector_sum
|
13
|
+
|
14
|
+
(n_items.quo(n_items - 1)) * (1 - (s2_items.quo(total.variance)))
|
15
|
+
end
|
16
|
+
# Calculate Chonbach's alpha for a given dataset
|
17
|
+
# using standarized values for every vector.
|
18
|
+
# Only uses tuples without missing data
|
19
|
+
# Return nil if one or more vectors has 0 variance
|
20
|
+
def cronbach_alpha_standarized(ods)
|
21
|
+
ds = ods.reject_values(*Daru::MISSING_VALUES)
|
22
|
+
return nil if ds.any? { |v| v.variance==0}
|
23
|
+
|
24
|
+
ds = Daru::DataFrame.new(
|
25
|
+
ds.vectors.to_a.inject({}) { |a,i|
|
26
|
+
a[i] = ods[i].standardize
|
27
|
+
a
|
28
|
+
}
|
29
|
+
)
|
30
|
+
|
31
|
+
cronbach_alpha(ds)
|
32
|
+
end
|
33
|
+
# Predicted reliability of a test by replicating
|
34
|
+
# +n+ times the number of items
|
35
|
+
def spearman_brown_prophecy(r,n)
|
36
|
+
(n*r).quo(1+(n-1)*r)
|
37
|
+
end
|
38
|
+
|
39
|
+
alias :sbp :spearman_brown_prophecy
|
40
|
+
# Returns the number of items
|
41
|
+
# to obtain +r_d+ desired reliability
|
42
|
+
# from +r+ current reliability, achieved with
|
43
|
+
# +n+ items
|
44
|
+
def n_for_desired_reliability(r,r_d,n=1)
|
45
|
+
return nil if r.nil?
|
46
|
+
(r_d*(1-r)).quo(r*(1-r_d))*n
|
47
|
+
end
|
48
|
+
|
49
|
+
# Get Cronbach alpha from <tt>n</tt> cases,
|
50
|
+
# <tt>s2</tt> mean variance and <tt>cov</tt>
|
51
|
+
# mean covariance
|
52
|
+
def cronbach_alpha_from_n_s2_cov(n,s2,cov)
|
53
|
+
(n.quo(n-1)) * (1-(s2.quo(s2+(n-1)*cov)))
|
54
|
+
end
|
55
|
+
# Get Cronbach's alpha from a covariance matrix
|
56
|
+
def cronbach_alpha_from_covariance_matrix(cov)
|
57
|
+
n = cov.row_size
|
58
|
+
raise "covariance matrix should have at least 2 variables" if n < 2
|
59
|
+
s2 = n.times.inject(0) { |ac,i| ac + cov[i,i] }
|
60
|
+
(n.quo(n - 1)) * (1 - (s2.quo(cov.total_sum)))
|
61
|
+
end
|
62
|
+
# Returns n necessary to obtain specific alpha
|
63
|
+
# given variance and covariance mean of items
|
64
|
+
def n_for_desired_alpha(alpha,s2,cov)
|
65
|
+
# Start with a regular test : 50 items
|
66
|
+
min=2
|
67
|
+
max=1000
|
68
|
+
n=50
|
69
|
+
prev_n=0
|
70
|
+
epsilon=0.0001
|
71
|
+
dif=1000
|
72
|
+
c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
|
73
|
+
dif=c_a - alpha
|
74
|
+
while(dif.abs>epsilon and n!=prev_n)
|
75
|
+
prev_n=n
|
76
|
+
if dif<0
|
77
|
+
min=n
|
78
|
+
n=(n+(max-min).quo(2)).to_i
|
79
|
+
else
|
80
|
+
max=n
|
81
|
+
n=(n-(max-min).quo(2)).to_i
|
82
|
+
end
|
83
|
+
c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
|
84
|
+
dif=c_a - alpha
|
85
|
+
end
|
86
|
+
n
|
87
|
+
end
|
88
|
+
# First derivative for alfa
|
89
|
+
# Parameters
|
90
|
+
# <tt>n</tt>: Number of items
|
91
|
+
# <tt>sx</tt>: mean of variances
|
92
|
+
# <tt>sxy</tt>: mean of covariances
|
93
|
+
|
94
|
+
def alpha_first_derivative(n,sx,sxy)
|
95
|
+
(sxy*(sx-sxy)).quo(((sxy*(n-1))+sx)**2)
|
96
|
+
end
|
97
|
+
# Second derivative for alfa
|
98
|
+
# Parameters
|
99
|
+
# <tt>n</tt>: Number of items
|
100
|
+
# <tt>sx</tt>: mean of variances
|
101
|
+
# <tt>sxy</tt>: mean of covariances
|
102
|
+
|
103
|
+
def alfa_second_derivative(n,sx,sxy)
|
104
|
+
(2*(sxy**2)*(sxy-sx)).quo(((sxy*(n-1))+sx)**3)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
class ItemCharacteristicCurve
|
108
|
+
attr_reader :totals, :counts, :vector_total
|
109
|
+
def initialize (ds, vector_total=nil)
|
110
|
+
vector_total||=ds.vector_sum
|
111
|
+
raise ArgumentError, "Total size != Dataset size" if vector_total.size != ds.nrows
|
112
|
+
@vector_total=vector_total
|
113
|
+
@ds=ds
|
114
|
+
@totals={}
|
115
|
+
@counts=@ds.vectors.to_a.inject({}) {|a,v| a[v]={};a}
|
116
|
+
process
|
117
|
+
end
|
118
|
+
def process
|
119
|
+
i=0
|
120
|
+
@ds.each_row do |row|
|
121
|
+
tot=@vector_total[i]
|
122
|
+
@totals[tot]||=0
|
123
|
+
@totals[tot]+=1
|
124
|
+
@ds.vectors.each do |f|
|
125
|
+
item=row[f].to_s
|
126
|
+
@counts[f][tot]||={}
|
127
|
+
@counts[f][tot][item]||=0
|
128
|
+
@counts[f][tot][item] += 1
|
129
|
+
end
|
130
|
+
i+=1
|
131
|
+
end
|
132
|
+
end
|
133
|
+
# Return a hash with p for each different value on a vector
|
134
|
+
def curve_field(field, item)
|
135
|
+
out={}
|
136
|
+
item=item.to_s
|
137
|
+
@totals.each do |value,n|
|
138
|
+
count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item]
|
139
|
+
out[value]=count_value.quo(n)
|
140
|
+
end
|
141
|
+
out
|
142
|
+
end # def
|
143
|
+
end # self
|
144
|
+
end # Reliability
|
145
|
+
end # Statsample
|
146
|
+
|
147
|
+
require 'statsample/reliability/icc.rb'
|
148
|
+
require 'statsample/reliability/scaleanalysis.rb'
|
149
|
+
require 'statsample/reliability/skillscaleanalysis.rb'
|
150
|
+
require 'statsample/reliability/multiscaleanalysis.rb'
|