statsample 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
data/test/test_regression.rb
CHANGED
@@ -3,21 +3,21 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
3
3
|
class StatsampleRegressionTestCase < Minitest::Test
|
4
4
|
context 'Example with missing data' do
|
5
5
|
setup do
|
6
|
-
@x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857]
|
6
|
+
@x = Daru::Vector.new([0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857])
|
7
7
|
|
8
|
-
@y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil]
|
9
|
-
@ds = {
|
10
|
-
@lr = Statsample::Regression::Multiple::RubyEngine.new(@ds,
|
8
|
+
@y = Daru::Vector.new([nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil])
|
9
|
+
@ds = Daru::DataFrame.new({ :x => @x, :y => @y })
|
10
|
+
@lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, :y)
|
11
11
|
end
|
12
12
|
should 'have correct values' do
|
13
13
|
assert_in_delta(0.455, @lr.r2, 0.001)
|
14
14
|
assert_in_delta(0.427, @lr.r2_adjusted, 0.001)
|
15
15
|
assert_in_delta(0.1165, @lr.se_estimate, 0.001)
|
16
16
|
assert_in_delta(15.925, @lr.f, 0.0001)
|
17
|
-
assert_in_delta(0.675, @lr.standarized_coeffs[
|
18
|
-
assert_in_delta(0.778, @lr.coeffs[
|
17
|
+
assert_in_delta(0.675, @lr.standarized_coeffs[:x], 0.001)
|
18
|
+
assert_in_delta(0.778, @lr.coeffs[:x], 0.001, 'coeff x')
|
19
19
|
assert_in_delta(0.132, @lr.constant, 0.001, 'constant')
|
20
|
-
assert_in_delta(0.195, @lr.coeffs_se[
|
20
|
+
assert_in_delta(0.195, @lr.coeffs_se[:x], 0.001, 'coeff x se')
|
21
21
|
assert_in_delta(0.064, @lr.constant_se, 0.001, 'constant se')
|
22
22
|
end
|
23
23
|
end
|
@@ -26,24 +26,24 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
26
26
|
|
27
27
|
a, b = rand, rand
|
28
28
|
|
29
|
-
x1 = samples.times.map { rand }
|
30
|
-
x2 = samples.times.map { rand }
|
31
|
-
x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }
|
32
|
-
y
|
29
|
+
x1 = Daru::Vector.new(samples.times.map { rand })
|
30
|
+
x2 = Daru::Vector.new(samples.times.map { rand })
|
31
|
+
x3 = Daru::Vector.new(samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) })
|
32
|
+
y = Daru::Vector.new(samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand })
|
33
33
|
|
34
|
-
ds = {
|
34
|
+
ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2, :x3 => x3, :y => y })
|
35
35
|
|
36
36
|
assert_raise(Statsample::Regression::LinearDependency) {
|
37
|
-
Statsample::Regression::Multiple::RubyEngine.new(ds,
|
37
|
+
Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
|
38
38
|
}
|
39
39
|
end
|
40
40
|
def test_parameters
|
41
|
-
@x =
|
42
|
-
@y =
|
41
|
+
@x =Daru::Vector.new([13, 20, 10, 33, 15])
|
42
|
+
@y =Daru::Vector.new([23, 18, 35, 10, 27])
|
43
43
|
reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
|
44
44
|
_test_simple_regression(reg)
|
45
|
-
ds = {
|
46
|
-
reg = Statsample::Regression::Simple.new_from_dataset(ds,
|
45
|
+
ds = Daru::DataFrame.new({ :x => @x, :y => @y })
|
46
|
+
reg = Statsample::Regression::Simple.new_from_dataset(ds, :x, :y)
|
47
47
|
_test_simple_regression(reg)
|
48
48
|
reg = Statsample::Regression.simple(@x, @y)
|
49
49
|
_test_simple_regression(reg)
|
@@ -57,11 +57,11 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
57
57
|
end
|
58
58
|
|
59
59
|
def test_summaries
|
60
|
-
a = 10.times.map { rand(100) }
|
61
|
-
b = 10.times.map { rand(100) }
|
62
|
-
y = 10.times.map { rand(100) }
|
63
|
-
ds = {
|
64
|
-
lr = Statsample::Regression::Multiple::RubyEngine.new(ds,
|
60
|
+
a = Daru::Vector.new(10.times.map { rand(100) })
|
61
|
+
b = Daru::Vector.new(10.times.map { rand(100) })
|
62
|
+
y = Daru::Vector.new(10.times.map { rand(100) })
|
63
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b, :y => y })
|
64
|
+
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
|
65
65
|
assert(lr.summary.size > 0)
|
66
66
|
end
|
67
67
|
|
@@ -87,12 +87,12 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
87
87
|
end
|
88
88
|
|
89
89
|
def test_multiple_regression_pairwise_2
|
90
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3]
|
91
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2]
|
92
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7]
|
93
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil]
|
94
|
-
ds = {
|
95
|
-
lr = Statsample::Regression::Multiple::RubyEngine.new(ds,
|
90
|
+
@a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3])
|
91
|
+
@b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2])
|
92
|
+
@c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7])
|
93
|
+
@y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil])
|
94
|
+
ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
|
95
|
+
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
|
96
96
|
assert_in_delta(2407.436, lr.sst, 0.001)
|
97
97
|
assert_in_delta(0.752, lr.r, 0.001, 'pairwise r')
|
98
98
|
assert_in_delta(0.565, lr.r2, 0.001)
|
@@ -103,12 +103,12 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
103
103
|
|
104
104
|
def test_multiple_regression_gsl
|
105
105
|
if Statsample.has_gsl?
|
106
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7]
|
107
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4]
|
108
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100]
|
109
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30]
|
110
|
-
ds = {
|
111
|
-
lr = Statsample::Regression::Multiple::GslEngine.new(ds,
|
106
|
+
@a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
|
107
|
+
@b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
|
108
|
+
@c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
|
109
|
+
@y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
|
110
|
+
ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
|
111
|
+
lr = Statsample::Regression::Multiple::GslEngine.new(ds, :y)
|
112
112
|
assert(lr.summary.size > 0)
|
113
113
|
model_test(lr, 'gsl')
|
114
114
|
predicted = [1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
@@ -127,8 +127,8 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
127
127
|
end
|
128
128
|
|
129
129
|
def model_test_matrix(lr, name = 'undefined')
|
130
|
-
stan_coeffs = {
|
131
|
-
unstan_coeffs = {
|
130
|
+
stan_coeffs = { :a => 0.151, :b => -0.547, :c => 0.997 }
|
131
|
+
unstan_coeffs = { :a => 0.695, :b => -4.286, :c => 0.266 }
|
132
132
|
|
133
133
|
unstan_coeffs.each_key{|k|
|
134
134
|
assert_in_delta(unstan_coeffs[k], lr.coeffs[k], 0.001, "b coeffs - #{name}")
|
@@ -145,15 +145,15 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
145
145
|
|
146
146
|
assert_in_delta(20.908, lr.f, 0.001)
|
147
147
|
assert_in_delta(0.001, lr.probability, 0.001)
|
148
|
-
assert_in_delta(0.226, lr.tolerance(
|
148
|
+
assert_in_delta(0.226, lr.tolerance(:a), 0.001)
|
149
149
|
|
150
|
-
coeffs_se = {
|
150
|
+
coeffs_se = { :a => 1.171, :b => 1.129, :c => 0.072 }
|
151
151
|
|
152
152
|
ccoeffs_se = lr.coeffs_se
|
153
153
|
coeffs_se.each_key{|k|
|
154
154
|
assert_in_delta(coeffs_se[k], ccoeffs_se[k], 0.001)
|
155
155
|
}
|
156
|
-
coeffs_t = {
|
156
|
+
coeffs_t = { :a => 0.594, :b => -3.796, :c => 3.703 }
|
157
157
|
ccoeffs_t = lr.coeffs_t
|
158
158
|
coeffs_t.each_key{|k|
|
159
159
|
assert_in_delta(coeffs_t[k], ccoeffs_t[k], 0.001)
|
@@ -174,32 +174,37 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
174
174
|
end
|
175
175
|
|
176
176
|
def test_regression_matrix
|
177
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7]
|
178
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4]
|
179
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100]
|
180
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30]
|
181
|
-
ds = {
|
177
|
+
@a = Daru::Vector.new([1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
|
178
|
+
@b = Daru::Vector.new([3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
|
179
|
+
@c = Daru::Vector.new([11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
|
180
|
+
@y = Daru::Vector.new([3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
|
181
|
+
ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
|
182
182
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
183
183
|
|
184
|
-
lr = Statsample::Regression::Multiple::MatrixEngine.new(
|
184
|
+
lr = Statsample::Regression::Multiple::MatrixEngine.new(
|
185
|
+
cor, :y, y_mean: @y.mean,
|
186
|
+
x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean },
|
187
|
+
cases: @a.size, y_sd: @y.sd, x_sd: { :a => @a.sd, :b => @b.sd, :c => @c.sd })
|
185
188
|
assert_nil(lr.constant_se)
|
186
189
|
assert_nil(lr.constant_t)
|
187
190
|
model_test_matrix(lr, 'correlation matrix')
|
188
191
|
|
189
192
|
covariance = Statsample::Bivariate.covariance_matrix(ds)
|
190
|
-
lr = Statsample::Regression::Multiple::MatrixEngine.new(
|
193
|
+
lr = Statsample::Regression::Multiple::MatrixEngine.new(
|
194
|
+
covariance, :y, y_mean: @y.mean,
|
195
|
+
x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, cases: @a.size)
|
191
196
|
assert(lr.summary.size > 0)
|
192
197
|
|
193
198
|
model_test(lr, 'covariance matrix')
|
194
199
|
end
|
195
200
|
|
196
201
|
def test_regression_rubyengine
|
197
|
-
@a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7]
|
198
|
-
@b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4]
|
199
|
-
@c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100]
|
200
|
-
@y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30]
|
201
|
-
ds = {
|
202
|
-
lr = Statsample::Regression::Multiple::RubyEngine.new(ds,
|
202
|
+
@a = Daru::Vector.new([nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
|
203
|
+
@b = Daru::Vector.new([nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
|
204
|
+
@c = Daru::Vector.new([nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
|
205
|
+
@y = Daru::Vector.new([nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
|
206
|
+
ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
|
207
|
+
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
|
203
208
|
assert_equal(11, lr.total_cases)
|
204
209
|
assert_equal(10, lr.valid_cases)
|
205
210
|
model_test(lr, 'rubyengine with missing data')
|
data/test/test_reliability.rb
CHANGED
@@ -1,6 +1,14 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleReliabilityTestCase < Minitest::Test
|
3
3
|
context Statsample::Reliability do
|
4
|
+
setup do
|
5
|
+
Daru.lazy_update = true
|
6
|
+
end
|
7
|
+
|
8
|
+
teardown do
|
9
|
+
Daru.lazy_update = false
|
10
|
+
end
|
11
|
+
|
4
12
|
should 'return correct r according to Spearman-Brown prophecy' do
|
5
13
|
r = 0.6849
|
6
14
|
n = 62.quo(15)
|
@@ -15,26 +23,27 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
15
23
|
setup do
|
16
24
|
@samples = 40
|
17
25
|
@n_variables = rand(10) + 2
|
18
|
-
@ds =
|
19
|
-
base = @samples.times.collect { |_a| rand }
|
26
|
+
@ds = Daru::DataFrame.new({}, index: @samples)
|
27
|
+
base = Daru::Vector.new(@samples.times.collect { |_a| rand })
|
20
28
|
@n_variables.times do |i|
|
21
|
-
@ds[i] = base.collect { |v| v + rand }
|
29
|
+
@ds[i] = Daru::Vector.new(base.collect { |v| v + rand })
|
22
30
|
end
|
23
31
|
|
24
|
-
@ds.
|
25
|
-
@k = @ds.
|
32
|
+
@ds.update
|
33
|
+
@k = @ds.ncols
|
26
34
|
@cm = Statsample::Bivariate.covariance_matrix(@ds)
|
27
35
|
@dse = @ds.dup
|
28
|
-
@dse.
|
29
|
-
@dse[f] = @dse[f].
|
36
|
+
@dse.vectors.each do |f|
|
37
|
+
@dse[f] = @dse[f].standardize
|
30
38
|
end
|
39
|
+
@dse.update
|
31
40
|
@cme = Statsample::Bivariate.covariance_matrix(@dse)
|
32
41
|
@a = Statsample::Reliability.cronbach_alpha(@ds)
|
33
42
|
@as = Statsample::Reliability.cronbach_alpha_standarized(@ds)
|
34
43
|
end
|
35
44
|
should 'alpha will be equal to sum of matrix covariance less the individual variances' do
|
36
45
|
total_sum = @cm.total_sum
|
37
|
-
ind_var = @ds.
|
46
|
+
ind_var = @ds.vectors.to_a.inject(0) { |ac, v| ac + @ds[v].variance }
|
38
47
|
expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
|
39
48
|
assert_in_delta(expected, @a, 1e-10)
|
40
49
|
end
|
@@ -57,7 +66,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
57
66
|
|
58
67
|
should 'standarized alpha will be equal to sum of matrix covariance less the individual variances on standarized values' do
|
59
68
|
total_sum = @cme.total_sum
|
60
|
-
ind_var = @dse.
|
69
|
+
ind_var = @dse.vectors.to_a.inject(0) { |ac, v| ac + @dse[v].variance }
|
61
70
|
expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
|
62
71
|
assert_in_delta(expected, @as, 1e-10)
|
63
72
|
end
|
@@ -67,31 +76,31 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
67
76
|
@samples = 100
|
68
77
|
@points = rand(10) + 3
|
69
78
|
@max_point = (@points - 1) * 3
|
70
|
-
@x1 = @samples.times.map { rand(@points) }
|
71
|
-
@x2 = @samples.times.map { rand(@points) }
|
72
|
-
@x3 = @samples.times.map { rand(@points) }
|
73
|
-
@ds = {
|
79
|
+
@x1 = Daru::Vector.new(@samples.times.map { rand(@points) })
|
80
|
+
@x2 = Daru::Vector.new(@samples.times.map { rand(@points) })
|
81
|
+
@x3 = Daru::Vector.new(@samples.times.map { rand(@points) })
|
82
|
+
@ds = Daru::DataFrame.new({ :a => @x1, :b => @x2, :c => @x3 })
|
74
83
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
|
75
84
|
end
|
76
85
|
should 'have a correct automatic vector_total' do
|
77
86
|
assert_equal(@ds.vector_sum, @icc.vector_total)
|
78
87
|
end
|
79
88
|
should 'have a correct different vector_total' do
|
80
|
-
x2 = @samples.times.map { rand(10) }
|
89
|
+
x2 = Daru::Vector.new(@samples.times.map { rand(10) })
|
81
90
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
|
82
91
|
assert_equal(x2, @icc.vector_total)
|
83
92
|
assert_raises(ArgumentError) do
|
84
|
-
inc = (@samples + 10).times.map { rand(10) }
|
93
|
+
inc = Daru::Vector.new((@samples + 10).times.map { rand(10) })
|
85
94
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
|
86
95
|
end
|
87
96
|
end
|
88
97
|
should 'have 0% for 0 points on maximum value values' do
|
89
|
-
max = @icc.curve_field(
|
98
|
+
max = @icc.curve_field(:a, 0)[@max_point.to_f]
|
90
99
|
max ||= 0
|
91
100
|
assert_in_delta(0, max)
|
92
101
|
end
|
93
102
|
should 'have 0 for max value on minimum value' do
|
94
|
-
max = @icc.curve_field(
|
103
|
+
max = @icc.curve_field(:a, @max_point)[0.0]
|
95
104
|
max ||= 0
|
96
105
|
assert_in_delta(0, max)
|
97
106
|
end
|
@@ -107,7 +116,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
107
116
|
expected = total.each {|k, v|
|
108
117
|
total[k] = v.quo(total_g[k])
|
109
118
|
}
|
110
|
-
assert_equal(expected, @icc.curve_field(
|
119
|
+
assert_equal(expected, @icc.curve_field(:a, index))
|
111
120
|
end
|
112
121
|
end
|
113
122
|
|
@@ -119,33 +128,34 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
119
128
|
h = {}
|
120
129
|
@scales.times {|s|
|
121
130
|
@items_per_scale.times {|i|
|
122
|
-
h["#{s}_#{i}"] = (size.times.map { (s * 2) + rand })
|
131
|
+
h["#{s}_#{i}".to_sym] = Daru::Vector.new((size.times.map { (s * 2) + rand }))
|
123
132
|
}
|
124
133
|
}
|
125
|
-
@ds = h
|
134
|
+
@ds = Daru::DataFrame.new(h)
|
126
135
|
@msa = Statsample::Reliability::MultiScaleAnalysis.new(name: 'Multiple Analysis') do |m|
|
127
136
|
m.scale 'complete', @ds
|
128
137
|
@scales.times {|s|
|
129
|
-
m.scale "scale_#{s}", @ds.clone(
|
138
|
+
m.scale "scale_#{s}", @ds.clone(*@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}"
|
130
139
|
}
|
131
140
|
end
|
132
141
|
end
|
142
|
+
|
133
143
|
should 'Retrieve correct ScaleAnalysis for whole scale' do
|
134
144
|
sa = Statsample::Reliability::ScaleAnalysis.new(@ds, name: 'Scale complete')
|
135
145
|
assert_equal(sa.variances_mean, @msa.scale('complete').variances_mean)
|
136
146
|
end
|
137
147
|
should 'Retrieve correct ScaleAnalysis for each scale' do
|
138
148
|
@scales.times {|s|
|
139
|
-
sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }), name: "Scale #{s}")
|
149
|
+
sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}")
|
140
150
|
assert_equal(sa.variances_mean, @msa.scale("scale_#{s}").variances_mean)
|
141
151
|
}
|
142
152
|
end
|
143
153
|
should 'retrieve correct correlation matrix for each scale' do
|
144
|
-
vectors = {
|
154
|
+
vectors = { :complete => @ds.vector_sum }
|
145
155
|
@scales.times {|s|
|
146
|
-
vectors["scale_#{s}"] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }).vector_sum
|
156
|
+
vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
|
147
157
|
}
|
148
|
-
ds2 = vectors
|
158
|
+
ds2 = Daru::DataFrame.new(vectors)
|
149
159
|
assert_equal(Statsample::Bivariate.correlation_matrix(ds2), @msa.correlation_matrix)
|
150
160
|
end
|
151
161
|
should 'delete scale using delete_scale' do
|
@@ -156,9 +166,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
156
166
|
@msa.delete_scale('complete')
|
157
167
|
vectors = {}
|
158
168
|
@scales.times {|s|
|
159
|
-
vectors["scale_#{s}"] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }).vector_sum
|
169
|
+
vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
|
160
170
|
}
|
161
|
-
ds2 = vectors
|
171
|
+
ds2 = Daru::DataFrame.new(vectors)
|
162
172
|
cor_matrix = Statsample::Bivariate.correlation_matrix(ds2)
|
163
173
|
m = 3
|
164
174
|
pca = Statsample::Factor::PCA.new(cor_matrix, m: m)
|
@@ -177,31 +187,31 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
177
187
|
end
|
178
188
|
context Statsample::Reliability::ScaleAnalysis do
|
179
189
|
setup do
|
180
|
-
@x1 = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30]
|
181
|
-
@x2 = [1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50]
|
182
|
-
@x3 = [2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40]
|
183
|
-
@x4 = [1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30]
|
184
|
-
@ds = {
|
190
|
+
@x1 = Daru::Vector.new([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30])
|
191
|
+
@x2 = Daru::Vector.new([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50])
|
192
|
+
@x3 = Daru::Vector.new([2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40])
|
193
|
+
@x4 = Daru::Vector.new([1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30])
|
194
|
+
@ds = Daru::DataFrame.new({ :x1 => @x1, :x2 => @x2, :x3 => @x3, :x4 => @x4 })
|
185
195
|
@ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
|
186
196
|
@cov_matrix = @ia.cov_m
|
187
197
|
end
|
188
198
|
should 'return correct values for item analysis' do
|
189
199
|
assert_in_delta(0.980, @ia.alpha, 0.001)
|
190
200
|
assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
|
191
|
-
var_mean = 4.times.map { |m| @cov_matrix[m, m] }.
|
201
|
+
var_mean = Daru::Vector.new(4.times.map { |m| @cov_matrix[m, m] }).mean
|
192
202
|
assert_in_delta(var_mean, @ia.variances_mean)
|
193
|
-
assert_equal(@x1.mean, @ia.item_statistics[
|
194
|
-
assert_equal(@x4.mean, @ia.item_statistics[
|
195
|
-
assert_in_delta(@x1.sds, @ia.item_statistics[
|
196
|
-
assert_in_delta(@x4.sds, @ia.item_statistics[
|
203
|
+
assert_equal(@x1.mean, @ia.item_statistics[:x1][:mean])
|
204
|
+
assert_equal(@x4.mean, @ia.item_statistics[:x4][:mean])
|
205
|
+
assert_in_delta(@x1.sds, @ia.item_statistics[:x1][:sds], 1e-14)
|
206
|
+
assert_in_delta(@x4.sds, @ia.item_statistics[:x4][:sds], 1e-14)
|
197
207
|
ds2 = @ds.clone
|
198
|
-
ds2.delete_vector(
|
208
|
+
ds2.delete_vector(:x1)
|
199
209
|
vector_sum = ds2.vector_sum
|
200
|
-
assert_equal(vector_sum.mean, @ia.stats_if_deleted[
|
201
|
-
assert_equal(vector_sum.sds, @ia.stats_if_deleted[
|
202
|
-
assert_in_delta(vector_sum.variance, @ia.stats_if_deleted[
|
210
|
+
assert_equal(vector_sum.mean, @ia.stats_if_deleted[:x1][:mean])
|
211
|
+
assert_equal(vector_sum.sds, @ia.stats_if_deleted[:x1][:sds])
|
212
|
+
assert_in_delta(vector_sum.variance, @ia.stats_if_deleted[:x1][:variance_sample], 1e-10)
|
203
213
|
|
204
|
-
assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted[
|
214
|
+
assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted[:x1][:alpha])
|
205
215
|
|
206
216
|
covariances = []
|
207
217
|
4.times.each {|i|
|
@@ -211,9 +221,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
211
221
|
end
|
212
222
|
}
|
213
223
|
}
|
214
|
-
assert_in_delta(covariances.
|
215
|
-
assert_in_delta(0.999, @ia.item_total_correlation[
|
216
|
-
assert_in_delta(1050.455, @ia.stats_if_deleted[
|
224
|
+
assert_in_delta(Daru::Vector.new(covariances).mean, @ia.covariances_mean)
|
225
|
+
assert_in_delta(0.999, @ia.item_total_correlation[:x1], 0.001)
|
226
|
+
assert_in_delta(1050.455, @ia.stats_if_deleted[:x1][:variance_sample], 0.001)
|
217
227
|
end
|
218
228
|
should 'return a summary' do
|
219
229
|
assert(@ia.summary.size > 0)
|
@@ -5,11 +5,11 @@ $reliability_icc = nil
|
|
5
5
|
class StatsampleReliabilityIccTestCase < Minitest::Test
|
6
6
|
context Statsample::Reliability::ICC do
|
7
7
|
setup do
|
8
|
-
a = [9, 6, 8, 7, 10, 6]
|
9
|
-
b = [2, 1, 4, 1, 5, 2]
|
10
|
-
c = [5, 3, 6, 2, 6, 4]
|
11
|
-
d = [8, 2, 8, 6, 9, 7]
|
12
|
-
@ds = {
|
8
|
+
a = Daru::Vector.new([9, 6, 8, 7, 10, 6])
|
9
|
+
b = Daru::Vector.new([2, 1, 4, 1, 5, 2])
|
10
|
+
c = Daru::Vector.new([5, 3, 6, 2, 6, 4])
|
11
|
+
d = Daru::Vector.new([8, 2, 8, 6, 9, 7])
|
12
|
+
@ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
|
13
13
|
@icc = Statsample::Reliability::ICC.new(@ds)
|
14
14
|
end
|
15
15
|
should 'basic method be correct' do
|
@@ -114,7 +114,7 @@ class StatsampleReliabilityIccTestCase < Minitest::Test
|
|
114
114
|
|
115
115
|
begin
|
116
116
|
require 'rserve'
|
117
|
-
require '
|
117
|
+
require 'daru/extensions/rserve'
|
118
118
|
context 'McGraw and Wong' do
|
119
119
|
teardown do
|
120
120
|
@r = $reliability_icc[:r].close unless $reliability_icc[:r].nil?
|
@@ -122,11 +122,11 @@ class StatsampleReliabilityIccTestCase < Minitest::Test
|
|
122
122
|
setup do
|
123
123
|
if $reliability_icc.nil?
|
124
124
|
size = 100
|
125
|
-
a = size.times.map { rand(10) }
|
125
|
+
a = Daru::Vector.new(size.times.map { rand(10) })
|
126
126
|
b = a.recode { |i| i + rand(4) - 2 }
|
127
127
|
c = a.recode { |i| i + rand(4) - 2 }
|
128
128
|
d = a.recode { |i| i + rand(4) - 2 }
|
129
|
-
@ds = {
|
129
|
+
@ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
|
130
130
|
|
131
131
|
@icc = Statsample::Reliability::ICC.new(@ds)
|
132
132
|
@r = Rserve::Connection.new
|