statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
|
3
|
+
class StatsampleRegressionTestCase < Minitest::Test
|
4
|
+
context 'Example with missing data' do
|
5
|
+
setup do
|
6
|
+
@x = Daru::Vector.new([0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857])
|
7
|
+
|
8
|
+
@y = Daru::Vector.new([nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil])
|
9
|
+
@ds = Daru::DataFrame.new({ :x => @x, :y => @y })
|
10
|
+
@lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, :y)
|
11
|
+
end
|
12
|
+
should 'have correct values' do
|
13
|
+
assert_in_delta(0.455, @lr.r2, 0.001)
|
14
|
+
assert_in_delta(0.427, @lr.r2_adjusted, 0.001)
|
15
|
+
assert_in_delta(0.1165, @lr.se_estimate, 0.001)
|
16
|
+
assert_in_delta(15.925, @lr.f, 0.0001)
|
17
|
+
assert_in_delta(0.675, @lr.standarized_coeffs[:x], 0.001)
|
18
|
+
assert_in_delta(0.778, @lr.coeffs[:x], 0.001, 'coeff x')
|
19
|
+
assert_in_delta(0.132, @lr.constant, 0.001, 'constant')
|
20
|
+
assert_in_delta(0.195, @lr.coeffs_se[:x], 0.001, 'coeff x se')
|
21
|
+
assert_in_delta(0.064, @lr.constant_se, 0.001, 'constant se')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
should 'return an error if data is linearly dependent' do
|
25
|
+
samples = 100
|
26
|
+
|
27
|
+
a, b = rand, rand
|
28
|
+
|
29
|
+
x1 = Daru::Vector.new(samples.times.map { rand })
|
30
|
+
x2 = Daru::Vector.new(samples.times.map { rand })
|
31
|
+
x3 = Daru::Vector.new(samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) })
|
32
|
+
y = Daru::Vector.new(samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand })
|
33
|
+
|
34
|
+
ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2, :x3 => x3, :y => y })
|
35
|
+
|
36
|
+
assert_raise(Statsample::Regression::LinearDependency) {
|
37
|
+
Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
|
38
|
+
}
|
39
|
+
end
|
40
|
+
def test_parameters
|
41
|
+
@x =Daru::Vector.new([13, 20, 10, 33, 15])
|
42
|
+
@y =Daru::Vector.new([23, 18, 35, 10, 27])
|
43
|
+
reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
|
44
|
+
_test_simple_regression(reg)
|
45
|
+
ds = Daru::DataFrame.new({ :x => @x, :y => @y })
|
46
|
+
reg = Statsample::Regression::Simple.new_from_dataset(ds, :x, :y)
|
47
|
+
_test_simple_regression(reg)
|
48
|
+
reg = Statsample::Regression.simple(@x, @y)
|
49
|
+
_test_simple_regression(reg)
|
50
|
+
end
|
51
|
+
|
52
|
+
def _test_simple_regression(reg)
|
53
|
+
assert_in_delta(40.009, reg.a, 0.001)
|
54
|
+
assert_in_delta(-0.957, reg.b, 0.001)
|
55
|
+
assert_in_delta(4.248, reg.standard_error, 0.002)
|
56
|
+
assert(reg.summary)
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_summaries
|
60
|
+
a = Daru::Vector.new(10.times.map { rand(100) })
|
61
|
+
b = Daru::Vector.new(10.times.map { rand(100) })
|
62
|
+
y = Daru::Vector.new(10.times.map { rand(100) })
|
63
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b, :y => y })
|
64
|
+
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
|
65
|
+
assert(lr.summary.size > 0)
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_multiple_dependent
|
69
|
+
complete = Matrix[
|
70
|
+
[1, 0.53, 0.62, 0.19, -0.09, 0.08, 0.02, -0.12, 0.08],
|
71
|
+
[0.53, 1, 0.61, 0.23, 0.1, 0.18, 0.02, -0.1, 0.15],
|
72
|
+
[0.62, 0.61, 1, 0.03, 0.1, 0.12, 0.03, -0.06, 0.12],
|
73
|
+
[0.19, 0.23, 0.03, 1, -0.02, 0.02, 0, -0.02, -0.02],
|
74
|
+
[-0.09, 0.1, 0.1, -0.02, 1, 0.05, 0.06, 0.18, 0.02],
|
75
|
+
[0.08, 0.18, 0.12, 0.02, 0.05, 1, 0.22, -0.07, 0.36],
|
76
|
+
[0.02, 0.02, 0.03, 0, 0.06, 0.22, 1, -0.01, -0.05],
|
77
|
+
[-0.12, -0.1, -0.06, -0.02, 0.18, -0.07, -0.01, 1, -0.03],
|
78
|
+
[0.08, 0.15, 0.12, -0.02, 0.02, 0.36, -0.05, -0.03, 1]]
|
79
|
+
complete.extend Statsample::CovariateMatrix
|
80
|
+
complete.fields = %w(adhd cd odd sex age monly mwork mage poverty)
|
81
|
+
|
82
|
+
lr = Statsample::Regression::Multiple::MultipleDependent.new(complete, %w(adhd cd odd))
|
83
|
+
|
84
|
+
assert_in_delta(0.197, lr.r2yx, 0.001)
|
85
|
+
assert_in_delta(0.197, lr.r2yx_covariance, 0.001)
|
86
|
+
assert_in_delta(0.07, lr.p2yx, 0.001)
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_multiple_regression_pairwise_2
|
90
|
+
@a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3])
|
91
|
+
@b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2])
|
92
|
+
@c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7])
|
93
|
+
@y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil])
|
94
|
+
ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
|
95
|
+
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
|
96
|
+
assert_in_delta(2407.436, lr.sst, 0.001)
|
97
|
+
assert_in_delta(0.752, lr.r, 0.001, 'pairwise r')
|
98
|
+
assert_in_delta(0.565, lr.r2, 0.001)
|
99
|
+
assert_in_delta(1361.130, lr.ssr, 0.001)
|
100
|
+
assert_in_delta(1046.306, lr.sse, 0.001)
|
101
|
+
assert_in_delta(3.035, lr.f, 0.001)
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_multiple_regression_gsl
|
105
|
+
if Statsample.has_gsl?
|
106
|
+
@a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
|
107
|
+
@b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
|
108
|
+
@c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
|
109
|
+
@y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
|
110
|
+
ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
|
111
|
+
lr = Statsample::Regression::Multiple::GslEngine.new(ds, :y)
|
112
|
+
assert(lr.summary.size > 0)
|
113
|
+
model_test(lr, 'gsl')
|
114
|
+
predicted = [1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
115
|
+
c_predicted = lr.predicted
|
116
|
+
predicted.each_index{|i|
|
117
|
+
assert_in_delta(predicted[i], c_predicted[i], 0.001)
|
118
|
+
}
|
119
|
+
residuals = [1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
|
120
|
+
c_residuals = lr.residuals
|
121
|
+
residuals.each_index{|i|
|
122
|
+
assert_in_delta(residuals[i], c_residuals[i], 0.001)
|
123
|
+
}
|
124
|
+
else
|
125
|
+
skip 'Regression::Multiple::GslEngine not tested (no Gsl)'
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def model_test_matrix(lr, name = 'undefined')
|
130
|
+
stan_coeffs = { :a => 0.151, :b => -0.547, :c => 0.997 }
|
131
|
+
unstan_coeffs = { :a => 0.695, :b => -4.286, :c => 0.266 }
|
132
|
+
|
133
|
+
unstan_coeffs.each_key{|k|
|
134
|
+
assert_in_delta(unstan_coeffs[k], lr.coeffs[k], 0.001, "b coeffs - #{name}")
|
135
|
+
}
|
136
|
+
|
137
|
+
stan_coeffs.each_key{|k|
|
138
|
+
assert_in_delta(stan_coeffs[k], lr.standarized_coeffs[k], 0.001, "beta coeffs - #{name}")
|
139
|
+
}
|
140
|
+
|
141
|
+
assert_in_delta(11.027, lr.constant, 0.001)
|
142
|
+
|
143
|
+
assert_in_delta(0.955, lr.r, 0.001)
|
144
|
+
assert_in_delta(0.913, lr.r2, 0.001)
|
145
|
+
|
146
|
+
assert_in_delta(20.908, lr.f, 0.001)
|
147
|
+
assert_in_delta(0.001, lr.probability, 0.001)
|
148
|
+
assert_in_delta(0.226, lr.tolerance(:a), 0.001)
|
149
|
+
|
150
|
+
coeffs_se = { :a => 1.171, :b => 1.129, :c => 0.072 }
|
151
|
+
|
152
|
+
ccoeffs_se = lr.coeffs_se
|
153
|
+
coeffs_se.each_key{|k|
|
154
|
+
assert_in_delta(coeffs_se[k], ccoeffs_se[k], 0.001)
|
155
|
+
}
|
156
|
+
coeffs_t = { :a => 0.594, :b => -3.796, :c => 3.703 }
|
157
|
+
ccoeffs_t = lr.coeffs_t
|
158
|
+
coeffs_t.each_key{|k|
|
159
|
+
assert_in_delta(coeffs_t[k], ccoeffs_t[k], 0.001)
|
160
|
+
}
|
161
|
+
|
162
|
+
assert_in_delta(639.6, lr.sst, 0.001)
|
163
|
+
assert_in_delta(583.76, lr.ssr, 0.001)
|
164
|
+
assert_in_delta(55.840, lr.sse, 0.001)
|
165
|
+
assert(lr.summary.size > 0, "#{name} without summary")
|
166
|
+
end
|
167
|
+
|
168
|
+
def model_test(lr, name = 'undefined')
|
169
|
+
model_test_matrix(lr, name)
|
170
|
+
assert_in_delta(4.559, lr.constant_se, 0.001)
|
171
|
+
assert_in_delta(2.419, lr.constant_t, 0.001)
|
172
|
+
|
173
|
+
assert_in_delta(1.785, lr.process([1, 3, 11]), 0.001)
|
174
|
+
end
|
175
|
+
|
176
|
+
def test_regression_matrix
|
177
|
+
@a = Daru::Vector.new([1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
|
178
|
+
@b = Daru::Vector.new([3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
|
179
|
+
@c = Daru::Vector.new([11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
|
180
|
+
@y = Daru::Vector.new([3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
|
181
|
+
ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
|
182
|
+
cor = Statsample::Bivariate.correlation_matrix(ds)
|
183
|
+
|
184
|
+
lr = Statsample::Regression::Multiple::MatrixEngine.new(
|
185
|
+
cor, :y, y_mean: @y.mean,
|
186
|
+
x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean },
|
187
|
+
cases: @a.size, y_sd: @y.sd, x_sd: { :a => @a.sd, :b => @b.sd, :c => @c.sd })
|
188
|
+
assert_nil(lr.constant_se)
|
189
|
+
assert_nil(lr.constant_t)
|
190
|
+
model_test_matrix(lr, 'correlation matrix')
|
191
|
+
|
192
|
+
covariance = Statsample::Bivariate.covariance_matrix(ds)
|
193
|
+
lr = Statsample::Regression::Multiple::MatrixEngine.new(
|
194
|
+
covariance, :y, y_mean: @y.mean,
|
195
|
+
x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, cases: @a.size)
|
196
|
+
assert(lr.summary.size > 0)
|
197
|
+
|
198
|
+
model_test(lr, 'covariance matrix')
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_regression_rubyengine
|
202
|
+
@a = Daru::Vector.new([nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
|
203
|
+
@b = Daru::Vector.new([nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
|
204
|
+
@c = Daru::Vector.new([nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
|
205
|
+
@y = Daru::Vector.new([nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
|
206
|
+
ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
|
207
|
+
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
|
208
|
+
assert_equal(11, lr.total_cases)
|
209
|
+
assert_equal(10, lr.valid_cases)
|
210
|
+
model_test(lr, 'rubyengine with missing data')
|
211
|
+
|
212
|
+
predicted = [nil, 1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
213
|
+
c_predicted = lr.predicted
|
214
|
+
predicted.each_index do |i|
|
215
|
+
if c_predicted[i].nil?
|
216
|
+
assert(predicted[i].nil?, "Actual #{i} is nil, but expected #{predicted[i]}")
|
217
|
+
else
|
218
|
+
assert_in_delta(predicted[i], c_predicted[i], 0.001)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
residuals = [nil, 1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
|
222
|
+
c_residuals = lr.residuals
|
223
|
+
residuals.each_index do |i|
|
224
|
+
if c_residuals[i].nil?
|
225
|
+
assert(residuals[i].nil?)
|
226
|
+
else
|
227
|
+
assert_in_delta(residuals[i], c_residuals[i], 0.001)
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
@@ -0,0 +1,223 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
class StatsampleReliabilityTestCase < Minitest::Test
|
3
|
+
context Statsample::Reliability do
|
4
|
+
should 'return correct r according to Spearman-Brown prophecy' do
|
5
|
+
r = 0.6849
|
6
|
+
n = 62.quo(15)
|
7
|
+
assert_in_delta(0.9, Statsample::Reliability.sbp(r, n), 0.001)
|
8
|
+
end
|
9
|
+
should 'return correct n for desired realiability' do
|
10
|
+
r = 0.6849
|
11
|
+
r_d = 0.9
|
12
|
+
assert_in_delta(62, Statsample::Reliability.n_for_desired_reliability(r, r_d, 15), 0.5)
|
13
|
+
end
|
14
|
+
context "Cronbach's alpha" do
|
15
|
+
setup do
|
16
|
+
@samples = 40
|
17
|
+
@n_variables = rand(10) + 2
|
18
|
+
@ds = Daru::DataFrame.new({}, index: @samples)
|
19
|
+
base = Daru::Vector.new(@samples.times.collect { |_a| rand })
|
20
|
+
@n_variables.times do |i|
|
21
|
+
@ds[i] = Daru::Vector.new(base.collect { |v| v + rand })
|
22
|
+
end
|
23
|
+
|
24
|
+
@k = @ds.ncols
|
25
|
+
@cm = Statsample::Bivariate.covariance_matrix(@ds)
|
26
|
+
@dse = @ds.dup
|
27
|
+
@dse.vectors.each do |f|
|
28
|
+
@dse[f] = @dse[f].standardize
|
29
|
+
end
|
30
|
+
@cme = Statsample::Bivariate.covariance_matrix(@dse)
|
31
|
+
@a = Statsample::Reliability.cronbach_alpha(@ds)
|
32
|
+
@as = Statsample::Reliability.cronbach_alpha_standarized(@ds)
|
33
|
+
end
|
34
|
+
should 'alpha will be equal to sum of matrix covariance less the individual variances' do
|
35
|
+
total_sum = @cm.total_sum
|
36
|
+
ind_var = @ds.vectors.to_a.inject(0) { |ac, v| ac + @ds[v].variance }
|
37
|
+
expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
|
38
|
+
assert_in_delta(expected, @a, 1e-10)
|
39
|
+
end
|
40
|
+
should 'method cronbach_alpha_from_n_s2_cov return correct values' do
|
41
|
+
sa = Statsample::Reliability::ScaleAnalysis.new(@ds)
|
42
|
+
vm, cm = sa.variances_mean, sa.covariances_mean
|
43
|
+
assert_in_delta(sa.alpha, Statsample::Reliability.cronbach_alpha_from_n_s2_cov(@n_variables, vm, cm), 1e-10)
|
44
|
+
end
|
45
|
+
should 'method cronbach_alpha_from_covariance_matrix returns correct value' do
|
46
|
+
cov = Statsample::Bivariate.covariance_matrix(@ds)
|
47
|
+
assert_in_delta(@a, Statsample::Reliability.cronbach_alpha_from_covariance_matrix(cov), 0.0000001)
|
48
|
+
end
|
49
|
+
should 'return correct n for desired alpha, covariance and variance' do
|
50
|
+
sa = Statsample::Reliability::ScaleAnalysis.new(@ds)
|
51
|
+
vm, cm = sa.variances_mean, sa.covariances_mean
|
52
|
+
n_obtained = Statsample::Reliability.n_for_desired_alpha(@a, vm, cm)
|
53
|
+
# p n_obtained
|
54
|
+
assert_in_delta(Statsample::Reliability.cronbach_alpha_from_n_s2_cov(n_obtained, vm, cm), @a, 0.001)
|
55
|
+
end
|
56
|
+
|
57
|
+
should 'standarized alpha will be equal to sum of matrix covariance less the individual variances on standarized values' do
|
58
|
+
total_sum = @cme.total_sum
|
59
|
+
ind_var = @dse.vectors.to_a.inject(0) { |ac, v| ac + @dse[v].variance }
|
60
|
+
expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
|
61
|
+
assert_in_delta(expected, @as, 1e-10)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
context Statsample::Reliability::ItemCharacteristicCurve do
|
65
|
+
setup do
|
66
|
+
@samples = 100
|
67
|
+
@points = rand(10) + 3
|
68
|
+
@max_point = (@points - 1) * 3
|
69
|
+
@x1 = Daru::Vector.new(@samples.times.map { rand(@points) })
|
70
|
+
@x2 = Daru::Vector.new(@samples.times.map { rand(@points) })
|
71
|
+
@x3 = Daru::Vector.new(@samples.times.map { rand(@points) })
|
72
|
+
@ds = Daru::DataFrame.new({ :a => @x1, :b => @x2, :c => @x3 })
|
73
|
+
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
|
74
|
+
end
|
75
|
+
should 'have a correct automatic vector_total' do
|
76
|
+
assert_equal(@ds.vector_sum, @icc.vector_total)
|
77
|
+
end
|
78
|
+
should 'have a correct different vector_total' do
|
79
|
+
x2 = Daru::Vector.new(@samples.times.map { rand(10) })
|
80
|
+
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
|
81
|
+
assert_equal(x2, @icc.vector_total)
|
82
|
+
assert_raises(ArgumentError) do
|
83
|
+
inc = Daru::Vector.new((@samples + 10).times.map { rand(10) })
|
84
|
+
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
should 'have 0% for 0 points on maximum value values' do
|
88
|
+
max = @icc.curve_field(:a, 0)[@max_point.to_f]
|
89
|
+
max ||= 0
|
90
|
+
assert_in_delta(0, max)
|
91
|
+
end
|
92
|
+
should 'have 0 for max value on minimum value' do
|
93
|
+
max = @icc.curve_field(:a, @max_point)[0.0]
|
94
|
+
max ||= 0
|
95
|
+
assert_in_delta(0, max)
|
96
|
+
end
|
97
|
+
should 'have correct values of % for any value' do
|
98
|
+
sum = @icc.vector_total
|
99
|
+
total = {}
|
100
|
+
total_g = sum.frequencies
|
101
|
+
index = rand(@points)
|
102
|
+
@x1.each_with_index do |v, i|
|
103
|
+
total[sum[i]] ||= 0
|
104
|
+
total[sum[i]] += 1 if v == index
|
105
|
+
end
|
106
|
+
expected = total.each {|k, v|
|
107
|
+
total[k] = v.quo(total_g[k])
|
108
|
+
}
|
109
|
+
assert_equal(expected, @icc.curve_field(:a, index))
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context Statsample::Reliability::MultiScaleAnalysis do
|
114
|
+
setup do
|
115
|
+
size = 100
|
116
|
+
@scales = 3
|
117
|
+
@items_per_scale = 10
|
118
|
+
h = {}
|
119
|
+
@scales.times {|s|
|
120
|
+
@items_per_scale.times {|i|
|
121
|
+
h["#{s}_#{i}".to_sym] = Daru::Vector.new((size.times.map { (s * 2) + rand }))
|
122
|
+
}
|
123
|
+
}
|
124
|
+
@ds = Daru::DataFrame.new(h)
|
125
|
+
@msa = Statsample::Reliability::MultiScaleAnalysis.new(name: 'Multiple Analysis') do |m|
|
126
|
+
m.scale 'complete', @ds
|
127
|
+
@scales.times {|s|
|
128
|
+
m.scale "scale_#{s}", @ds.clone(*@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}"
|
129
|
+
}
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
should 'Retrieve correct ScaleAnalysis for whole scale' do
|
134
|
+
sa = Statsample::Reliability::ScaleAnalysis.new(@ds, name: 'Scale complete')
|
135
|
+
assert_equal(sa.variances_mean, @msa.scale('complete').variances_mean)
|
136
|
+
end
|
137
|
+
should 'Retrieve correct ScaleAnalysis for each scale' do
|
138
|
+
@scales.times {|s|
|
139
|
+
sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}")
|
140
|
+
assert_equal(sa.variances_mean, @msa.scale("scale_#{s}").variances_mean)
|
141
|
+
}
|
142
|
+
end
|
143
|
+
should 'retrieve correct correlation matrix for each scale' do
|
144
|
+
vectors = { :complete => @ds.vector_sum }
|
145
|
+
@scales.times {|s|
|
146
|
+
vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
|
147
|
+
}
|
148
|
+
ds2 = Daru::DataFrame.new(vectors)
|
149
|
+
assert_equal(Statsample::Bivariate.correlation_matrix(ds2), @msa.correlation_matrix)
|
150
|
+
end
|
151
|
+
should 'delete scale using delete_scale' do
|
152
|
+
@msa.delete_scale('complete')
|
153
|
+
assert_equal(@msa.scales.keys.sort, @scales.times.map { |s| "scale_#{s}" })
|
154
|
+
end
|
155
|
+
should 'retrieve pca for scales' do
|
156
|
+
@msa.delete_scale('complete')
|
157
|
+
vectors = {}
|
158
|
+
@scales.times {|s|
|
159
|
+
vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
|
160
|
+
}
|
161
|
+
ds2 = Daru::DataFrame.new(vectors)
|
162
|
+
cor_matrix = Statsample::Bivariate.correlation_matrix(ds2)
|
163
|
+
m = 3
|
164
|
+
pca = Statsample::Factor::PCA.new(cor_matrix, m: m)
|
165
|
+
assert_equal(pca.component_matrix, @msa.pca(m: m).component_matrix)
|
166
|
+
end
|
167
|
+
should 'retrieve acceptable summary' do
|
168
|
+
@msa.delete_scale('scale_0')
|
169
|
+
@msa.delete_scale('scale_1')
|
170
|
+
@msa.delete_scale('scale_2')
|
171
|
+
|
172
|
+
# @msa.summary_correlation_matrix=true
|
173
|
+
# @msa.summary_pca=true
|
174
|
+
|
175
|
+
assert(@msa.summary.size > 0)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
context Statsample::Reliability::ScaleAnalysis do
|
179
|
+
setup do
|
180
|
+
@x1 = Daru::Vector.new([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30])
|
181
|
+
@x2 = Daru::Vector.new([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50])
|
182
|
+
@x3 = Daru::Vector.new([2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40])
|
183
|
+
@x4 = Daru::Vector.new([1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30])
|
184
|
+
@ds = Daru::DataFrame.new({ :x1 => @x1, :x2 => @x2, :x3 => @x3, :x4 => @x4 })
|
185
|
+
@ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
|
186
|
+
@cov_matrix = @ia.cov_m
|
187
|
+
end
|
188
|
+
should 'return correct values for item analysis' do
|
189
|
+
assert_in_delta(0.980, @ia.alpha, 0.001)
|
190
|
+
assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
|
191
|
+
var_mean = Daru::Vector.new(4.times.map { |m| @cov_matrix[m, m] }).mean
|
192
|
+
assert_in_delta(var_mean, @ia.variances_mean)
|
193
|
+
assert_equal(@x1.mean, @ia.item_statistics[:x1][:mean])
|
194
|
+
assert_equal(@x4.mean, @ia.item_statistics[:x4][:mean])
|
195
|
+
assert_in_delta(@x1.sds, @ia.item_statistics[:x1][:sds], 1e-14)
|
196
|
+
assert_in_delta(@x4.sds, @ia.item_statistics[:x4][:sds], 1e-14)
|
197
|
+
ds2 = @ds.clone
|
198
|
+
ds2.delete_vector(:x1)
|
199
|
+
vector_sum = ds2.vector_sum
|
200
|
+
assert_equal(vector_sum.mean, @ia.stats_if_deleted[:x1][:mean])
|
201
|
+
assert_equal(vector_sum.sds, @ia.stats_if_deleted[:x1][:sds])
|
202
|
+
assert_in_delta(vector_sum.variance, @ia.stats_if_deleted[:x1][:variance_sample], 1e-10)
|
203
|
+
|
204
|
+
assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted[:x1][:alpha])
|
205
|
+
|
206
|
+
covariances = []
|
207
|
+
4.times.each {|i|
|
208
|
+
4.times.each {|j|
|
209
|
+
if i != j
|
210
|
+
covariances.push(@cov_matrix[i, j])
|
211
|
+
end
|
212
|
+
}
|
213
|
+
}
|
214
|
+
assert_in_delta(Daru::Vector.new(covariances).mean, @ia.covariances_mean)
|
215
|
+
assert_in_delta(0.999, @ia.item_total_correlation[:x1], 0.001)
|
216
|
+
assert_in_delta(1050.455, @ia.stats_if_deleted[:x1][:variance_sample], 0.001)
|
217
|
+
end
|
218
|
+
should 'return a summary' do
|
219
|
+
assert(@ia.summary.size > 0)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|