statsample 1.4.3 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +8 -0
- data/benchmarks/correlation_matrix_15_variables.rb +1 -1
- data/benchmarks/correlation_matrix_5_variables.rb +1 -1
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +2 -2
- data/examples/dataset.rb +2 -2
- data/examples/icc.rb +1 -1
- data/examples/levene.rb +2 -2
- data/examples/parallel_analysis.rb +1 -1
- data/examples/u_test.rb +2 -2
- data/examples/vector.rb +1 -1
- data/examples/velicer_map_test.rb +1 -1
- data/lib/statsample.rb +30 -4
- data/lib/statsample/anova/oneway.rb +3 -3
- data/lib/statsample/anova/twoway.rb +3 -3
- data/lib/statsample/bivariate.rb +7 -7
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/converter/csv.rb +1 -1
- data/lib/statsample/converters.rb +7 -7
- data/lib/statsample/dataset.rb +8 -8
- data/lib/statsample/dominanceanalysis.rb +4 -4
- data/lib/statsample/dominanceanalysis/bootstrap.rb +8 -8
- data/lib/statsample/factor.rb +2 -4
- data/lib/statsample/factor/map.rb +2 -1
- data/lib/statsample/factor/parallelanalysis.rb +2 -2
- data/lib/statsample/factor/pca.rb +2 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/boxplot.rb +4 -4
- data/lib/statsample/graph/histogram.rb +2 -2
- data/lib/statsample/graph/scatterplot.rb +4 -4
- data/lib/statsample/matrix.rb +20 -6
- data/lib/statsample/regression.rb +2 -2
- data/lib/statsample/regression/multiple.rb +3 -3
- data/lib/statsample/regression/multiple/alglibengine.rb +5 -5
- data/lib/statsample/regression/multiple/baseengine.rb +3 -3
- data/lib/statsample/regression/multiple/gslengine.rb +5 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +4 -4
- data/lib/statsample/reliability/icc.rb +1 -1
- data/lib/statsample/reliability/multiscaleanalysis.rb +4 -4
- data/lib/statsample/reliability/scaleanalysis.rb +6 -6
- data/lib/statsample/reliability/skillscaleanalysis.rb +1 -1
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +1 -1
- data/lib/statsample/test/bartlettsphericity.rb +1 -1
- data/lib/statsample/test/levene.rb +4 -4
- data/lib/statsample/test/t.rb +3 -3
- data/lib/statsample/test/umannwhitney.rb +2 -2
- data/lib/statsample/vector.rb +103 -80
- data/lib/statsample/vector/gsl.rb +16 -16
- data/lib/statsample/version.rb +1 -1
- data/test/test_analysis.rb +1 -1
- data/test/test_anova_contrast.rb +4 -4
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +6 -6
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +3 -3
- data/test/test_bivariate.rb +38 -38
- data/test/test_crosstab.rb +2 -2
- data/test/test_csv.rb +6 -6
- data/test/test_dataset.rb +79 -79
- data/test/test_factor.rb +55 -49
- data/test/test_factor_pa.rb +4 -4
- data/test/test_ggobi.rb +3 -3
- data/test/test_gsl.rb +3 -3
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +5 -5
- data/test/test_multiset.rb +19 -19
- data/test/test_regression.rb +27 -27
- data/test/test_reliability.rb +14 -14
- data/test/test_reliability_icc.rb +7 -7
- data/test/test_reliability_skillscale.rb +6 -6
- data/test/test_resample.rb +1 -1
- data/test/test_rserve_extension.rb +4 -4
- data/test/test_statistics.rb +5 -5
- data/test/test_stest.rb +8 -8
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +5 -5
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +153 -119
- data/test/test_wilcoxonsignedrank.rb +4 -4
- data/test/test_xls.rb +6 -6
- metadata +3 -53
data/test/test_factor.rb
CHANGED
@@ -18,8 +18,8 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
18
18
|
pca = Statsample::Factor::PCA.new(cm, m: 6)
|
19
19
|
# puts pca.summary
|
20
20
|
# puts pca.feature_matrix
|
21
|
-
exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].
|
22
|
-
assert_similar_vector(exp_eig, pca.eigenvalues.
|
21
|
+
exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_numeric
|
22
|
+
assert_similar_vector(exp_eig, pca.eigenvalues.to_numeric, 0.1)
|
23
23
|
pcs = pca.principal_components(ds)
|
24
24
|
k = 6
|
25
25
|
comp_matrix = pca.component_matrix
|
@@ -34,59 +34,61 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def test_principalcomponents_ruby_gsl
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
37
|
+
if Statsample.has_gsl?
|
38
|
+
ran = Distribution::Normal.rng
|
39
|
+
|
40
|
+
# @r=::Rserve::Connection.new
|
41
|
+
|
42
|
+
samples = 20
|
43
|
+
[3, 5, 7].each {|k|
|
44
|
+
v = {}
|
45
|
+
v['x0'] = samples.times.map { ran.call }.to_numeric.centered
|
46
|
+
(1...k).each {|i|
|
47
|
+
v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_numeric.centered
|
48
|
+
}
|
49
|
+
|
50
|
+
ds = v.to_dataset
|
51
|
+
cm = ds.covariance_matrix
|
52
|
+
# @r.assign('ds',ds)
|
53
|
+
# @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
|
54
|
+
# puts "eigenvalues"
|
55
|
+
# puts @r.eval('v').to_ruby.to_s
|
56
|
+
pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false)
|
57
|
+
pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true)
|
58
|
+
pc_ruby = pca_ruby.principal_components(ds)
|
59
|
+
pc_gsl = pca_gsl.principal_components(ds)
|
60
|
+
# Test component matrix correlation!
|
61
|
+
cm_ruby = pca_ruby.component_matrix
|
62
|
+
# puts cm_ruby.summary
|
63
|
+
k.times {|i|
|
64
|
+
pc_id = "PC_#{i + 1}"
|
65
|
+
assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
|
66
|
+
# Revert gsl component values
|
67
|
+
pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
|
68
|
+
assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
|
69
|
+
if false
|
70
|
+
k.times {|j| # variable
|
71
|
+
ds_id = "x#{j}"
|
72
|
+
r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
|
73
|
+
puts "#{pc_id}-#{ds_id}:#{r}"
|
74
|
+
}
|
75
|
+
end
|
76
|
+
}
|
75
77
|
}
|
76
|
-
|
78
|
+
end
|
77
79
|
# @r.close
|
78
80
|
end
|
79
81
|
|
80
82
|
def test_principalcomponents
|
81
|
-
principalcomponents(true)
|
83
|
+
principalcomponents(true) if Statsample.has_gsl?
|
82
84
|
principalcomponents(false)
|
83
85
|
end
|
84
86
|
|
85
87
|
def principalcomponents(gsl)
|
86
88
|
ran = Distribution::Normal.rng
|
87
89
|
samples = 50
|
88
|
-
x1 = samples.times.map { ran.call }.
|
89
|
-
x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.
|
90
|
+
x1 = samples.times.map { ran.call }.to_numeric
|
91
|
+
x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_numeric
|
90
92
|
ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
|
91
93
|
|
92
94
|
cm = ds.correlation_matrix
|
@@ -119,9 +121,9 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
119
121
|
end
|
120
122
|
|
121
123
|
def test_kmo
|
122
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].
|
123
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].
|
124
|
-
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].
|
124
|
+
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
|
125
|
+
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
|
126
|
+
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
|
125
127
|
# KMO: 0.490
|
126
128
|
ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
|
127
129
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
@@ -139,11 +141,13 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
139
141
|
end
|
140
142
|
# Tested with SPSS and R
|
141
143
|
def test_pca
|
142
|
-
|
143
|
-
|
144
|
+
|
145
|
+
a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
|
146
|
+
b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_numeric
|
144
147
|
a.recode! { |c| c - a.mean }
|
145
148
|
b.recode! { |c| c - b.mean }
|
146
149
|
ds = { 'a' => a, 'b' => b }.to_dataset
|
150
|
+
|
147
151
|
cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
|
148
152
|
if Statsample.has_gsl?
|
149
153
|
pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: true)
|
@@ -156,6 +160,8 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
156
160
|
end
|
157
161
|
|
158
162
|
def pca_set(pca, _type)
|
163
|
+
|
164
|
+
|
159
165
|
expected_eigenvalues = [1.284, 0.0490]
|
160
166
|
expected_eigenvalues.each_with_index{|ev, i|
|
161
167
|
assert_in_delta(ev, pca.eigenvalues[i], 0.001)
|
data/test/test_factor_pa.rb
CHANGED
@@ -15,18 +15,18 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
15
15
|
variables = 10
|
16
16
|
iterations = 50
|
17
17
|
rng = Distribution::Normal.rng
|
18
|
-
f1 = samples.times.collect { rng.call }.
|
19
|
-
f2 = samples.times.collect { rng.call }.
|
18
|
+
f1 = samples.times.collect { rng.call }.to_numeric
|
19
|
+
f2 = samples.times.collect { rng.call }.to_numeric
|
20
20
|
vectors = {}
|
21
21
|
variables.times do |i|
|
22
22
|
if i < 5
|
23
23
|
vectors["v#{i}"] = samples.times.collect {|nv|
|
24
24
|
f1[nv] * 5 + f2[nv] * 2 + rng.call
|
25
|
-
}.
|
25
|
+
}.to_numeric
|
26
26
|
else
|
27
27
|
vectors["v#{i}"] = samples.times.collect {|nv|
|
28
28
|
f2[nv] * 5 + f1[nv] * 2 + rng.call
|
29
|
-
}.
|
29
|
+
}.to_numeric
|
30
30
|
end
|
31
31
|
end
|
32
32
|
ds = vectors.to_dataset
|
data/test/test_ggobi.rb
CHANGED
@@ -2,10 +2,10 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
require 'ostruct'
|
3
3
|
class StatsampleGGobiTestCase < Minitest::Test
|
4
4
|
def setup
|
5
|
-
v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:
|
6
|
-
@v2 = (%w(a b c a a a b b c d) * 10).to_vector(:
|
5
|
+
v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:numeric)
|
6
|
+
@v2 = (%w(a b c a a a b b c d) * 10).to_vector(:object)
|
7
7
|
@v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
|
8
|
-
v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:
|
8
|
+
v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:numeric)
|
9
9
|
@ds = { 'v1' => v1, 'v2' => @v2, 'v3' => v3 }.to_dataset
|
10
10
|
end
|
11
11
|
|
data/test/test_gsl.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleGSLTestCase < Minitest::Test
|
3
3
|
should_with_gsl 'matrix with gsl' do
|
4
|
-
a = [1, 2, 3, 4, 20].to_vector(:
|
5
|
-
b = [3, 2, 3, 4, 50].to_vector(:
|
6
|
-
c = [6, 2, 3, 4, 3].to_vector(:
|
4
|
+
a = [1, 2, 3, 4, 20].to_vector(:numeric)
|
5
|
+
b = [3, 2, 3, 4, 50].to_vector(:numeric)
|
6
|
+
c = [6, 2, 3, 4, 3].to_vector(:numeric)
|
7
7
|
ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
|
8
8
|
gsl = ds.to_matrix.to_gsl
|
9
9
|
assert_equal(5, gsl.size1)
|
data/test/test_histogram.rb
CHANGED
@@ -75,13 +75,13 @@ class StatsampleHistogramTestCase < Minitest::Test
|
|
75
75
|
assert_equal(min, h.min_val)
|
76
76
|
end
|
77
77
|
should 'return correct estimated mean' do
|
78
|
-
a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].
|
78
|
+
a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].to_numeric
|
79
79
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
80
80
|
h.increment(a)
|
81
81
|
assert_equal(2.5, h.estimated_mean)
|
82
82
|
end
|
83
83
|
should 'return correct estimated standard deviation' do
|
84
|
-
a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].
|
84
|
+
a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].to_numeric
|
85
85
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
86
86
|
h.increment(a)
|
87
87
|
assert_equal(a.sd, h.estimated_standard_deviation)
|
@@ -100,7 +100,7 @@ class StatsampleHistogramTestCase < Minitest::Test
|
|
100
100
|
end
|
101
101
|
should 'not raise exception when all values equal' do
|
102
102
|
assert_nothing_raised do
|
103
|
-
a = [5, 5, 5, 5, 5, 5].
|
103
|
+
a = [5, 5, 5, 5, 5, 5].to_numeric
|
104
104
|
h = Statsample::Graph::Histogram.new(a)
|
105
105
|
h.to_svg
|
106
106
|
end
|
data/test/test_matrix.rb
CHANGED
@@ -7,8 +7,8 @@ class StatsampleMatrixTestCase < Minitest::Test
|
|
7
7
|
m.fields_y = %w(x1 x2)
|
8
8
|
m.name = 'test'
|
9
9
|
samples = 100
|
10
|
-
x1 = [1, 2, 3].
|
11
|
-
x2 = [4, 5, 6].
|
10
|
+
x1 = [1, 2, 3].to_numeric
|
11
|
+
x2 = [4, 5, 6].to_numeric
|
12
12
|
ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
|
13
13
|
ds.name = 'test'
|
14
14
|
obs = m.to_dataset
|
@@ -33,9 +33,9 @@ class StatsampleMatrixTestCase < Minitest::Test
|
|
33
33
|
|
34
34
|
assert_equal(:covariance, a._type)
|
35
35
|
|
36
|
-
a = 50.times.collect { rand }.
|
37
|
-
b = 50.times.collect { rand }.
|
38
|
-
c = 50.times.collect { rand }.
|
36
|
+
a = 50.times.collect { rand }.to_numeric
|
37
|
+
b = 50.times.collect { rand }.to_numeric
|
38
|
+
c = 50.times.collect { rand }.to_numeric
|
39
39
|
ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
|
40
40
|
corr = Statsample::Bivariate.correlation_matrix(ds)
|
41
41
|
real = Statsample::Bivariate.covariance_matrix(ds).correlation
|
data/test/test_multiset.rb
CHANGED
@@ -3,8 +3,8 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
3
3
|
class StatsampleMultisetTestCase < Minitest::Test
|
4
4
|
def setup
|
5
5
|
@x = %w(a a a a b b b b).to_vector
|
6
|
-
@y = [1, 2, 3, 4, 5, 6, 7, 8].
|
7
|
-
@z = [10, 11, 12, 13, 14, 15, 16, 17].
|
6
|
+
@y = [1, 2, 3, 4, 5, 6, 7, 8].to_numeric
|
7
|
+
@z = [10, 11, 12, 13, 14, 15, 16, 17].to_numeric
|
8
8
|
@ds = { 'x' => @x, 'y' => @y, 'z' => @z }.to_dataset
|
9
9
|
@ms = @ds.to_multiset_by_split('x')
|
10
10
|
end
|
@@ -44,9 +44,9 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def test_to_multiset_by_split_one
|
47
|
-
sex = %w(m m m m m f f f f m).to_vector(:
|
48
|
-
city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:
|
49
|
-
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:
|
47
|
+
sex = %w(m m m m m f f f f m).to_vector(:object)
|
48
|
+
city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:object)
|
49
|
+
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:numeric)
|
50
50
|
ds = { 'sex' => sex, 'city' => city, 'age' => age }.to_dataset
|
51
51
|
ms = ds.to_multiset_by_split('sex')
|
52
52
|
assert_equal(2, ms.n_datasets)
|
@@ -58,10 +58,10 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_to_multiset_by_split_multiple
|
61
|
-
sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:
|
62
|
-
city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:
|
63
|
-
hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:
|
64
|
-
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:
|
61
|
+
sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:object)
|
62
|
+
city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:object)
|
63
|
+
hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:object)
|
64
|
+
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:numeric)
|
65
65
|
ds = { 'sex' => sex, 'city' => city, 'hair' => hair, 'age' => age }.to_dataset(%w(sex city hair age))
|
66
66
|
ms = ds.to_multiset_by_split('sex', 'city', 'hair')
|
67
67
|
assert_equal(8, ms.n_datasets)
|
@@ -84,8 +84,8 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def test_stratum_scale
|
87
|
-
boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:
|
88
|
-
girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:
|
87
|
+
boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:numeric) }.to_dataset
|
88
|
+
girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:numeric) }.to_dataset
|
89
89
|
ms = Statsample::Multiset.new(['test'])
|
90
90
|
ms.add_dataset('boys', boys)
|
91
91
|
ms.add_dataset('girls', girls)
|
@@ -106,12 +106,12 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
106
106
|
'b' => %w(b b b b).to_vector
|
107
107
|
}
|
108
108
|
ype = {
|
109
|
-
'a' => [1, 2, 3, 4].
|
110
|
-
'b' => [5, 6, 7, 8].
|
109
|
+
'a' => [1, 2, 3, 4].to_numeric,
|
110
|
+
'b' => [5, 6, 7, 8].to_numeric
|
111
111
|
}
|
112
112
|
zpe = {
|
113
|
-
'a' => [10, 11, 12, 13].
|
114
|
-
'b' => [14, 15, 16, 17].
|
113
|
+
'a' => [10, 11, 12, 13].to_numeric,
|
114
|
+
'b' => [14, 15, 16, 17].to_numeric
|
115
115
|
}
|
116
116
|
xp, yp, zp = {}, {}, {}
|
117
117
|
@ms.each {|k, ds|
|
@@ -127,9 +127,9 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
127
127
|
def test_multiset_union_with_block
|
128
128
|
r1 = rand
|
129
129
|
r2 = rand
|
130
|
-
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].
|
130
|
+
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_numeric
|
131
131
|
|
132
|
-
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].
|
132
|
+
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_numeric
|
133
133
|
|
134
134
|
ds2 = @ms.union {|k, ds|
|
135
135
|
ds['y'].recode!{|v|
|
@@ -146,9 +146,9 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
146
146
|
def test_multiset_union
|
147
147
|
r1 = rand
|
148
148
|
r2 = rand
|
149
|
-
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].
|
149
|
+
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_numeric
|
150
150
|
|
151
|
-
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].
|
151
|
+
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_numeric
|
152
152
|
@ms.each {|k, ds|
|
153
153
|
ds['y'].recode!{|v|
|
154
154
|
k == 'a' ? v * r1 : v * r2
|
data/test/test_regression.rb
CHANGED
@@ -3,9 +3,9 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
3
3
|
class StatsampleRegressionTestCase < Minitest::Test
|
4
4
|
context 'Example with missing data' do
|
5
5
|
setup do
|
6
|
-
@x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].
|
6
|
+
@x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].to_numeric
|
7
7
|
|
8
|
-
@y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].
|
8
|
+
@y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].to_numeric
|
9
9
|
@ds = { 'x' => @x, 'y' => @y }.to_dataset
|
10
10
|
@lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, 'y')
|
11
11
|
end
|
@@ -26,10 +26,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
26
26
|
|
27
27
|
a, b = rand, rand
|
28
28
|
|
29
|
-
x1 = samples.times.map { rand }.
|
30
|
-
x2 = samples.times.map { rand }.
|
31
|
-
x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }.
|
32
|
-
y = samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }.
|
29
|
+
x1 = samples.times.map { rand }.to_numeric
|
30
|
+
x2 = samples.times.map { rand }.to_numeric
|
31
|
+
x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }.to_numeric
|
32
|
+
y = samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }.to_numeric
|
33
33
|
|
34
34
|
ds = { 'x1' => x1, 'x2' => x2, 'x3' => x3, 'y' => y }.to_dataset
|
35
35
|
|
@@ -38,8 +38,8 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
38
38
|
}
|
39
39
|
end
|
40
40
|
def test_parameters
|
41
|
-
@x = [13, 20, 10, 33, 15].to_vector(:
|
42
|
-
@y = [23, 18, 35, 10, 27].to_vector(:
|
41
|
+
@x = [13, 20, 10, 33, 15].to_vector(:numeric)
|
42
|
+
@y = [23, 18, 35, 10, 27].to_vector(:numeric)
|
43
43
|
reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
|
44
44
|
_test_simple_regression(reg)
|
45
45
|
ds = { 'x' => @x, 'y' => @y }.to_dataset
|
@@ -57,9 +57,9 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
57
57
|
end
|
58
58
|
|
59
59
|
def test_summaries
|
60
|
-
a = 10.times.map { rand(100) }.
|
61
|
-
b = 10.times.map { rand(100) }.
|
62
|
-
y = 10.times.map { rand(100) }.
|
60
|
+
a = 10.times.map { rand(100) }.to_numeric
|
61
|
+
b = 10.times.map { rand(100) }.to_numeric
|
62
|
+
y = 10.times.map { rand(100) }.to_numeric
|
63
63
|
ds = { 'a' => a, 'b' => b, 'y' => y }.to_dataset
|
64
64
|
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
|
65
65
|
assert(lr.summary.size > 0)
|
@@ -87,10 +87,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
87
87
|
end
|
88
88
|
|
89
89
|
def test_multiple_regression_pairwise_2
|
90
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3].to_vector(:
|
91
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2].to_vector(:
|
92
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7].to_vector(:
|
93
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil].to_vector(:
|
90
|
+
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3].to_vector(:numeric)
|
91
|
+
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2].to_vector(:numeric)
|
92
|
+
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7].to_vector(:numeric)
|
93
|
+
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil].to_vector(:numeric)
|
94
94
|
ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
|
95
95
|
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
|
96
96
|
assert_in_delta(2407.436, lr.sst, 0.001)
|
@@ -103,10 +103,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
103
103
|
|
104
104
|
def test_multiple_regression_gsl
|
105
105
|
if Statsample.has_gsl?
|
106
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:
|
107
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:
|
108
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:
|
109
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:
|
106
|
+
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
|
107
|
+
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
|
108
|
+
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
|
109
|
+
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
|
110
110
|
ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
|
111
111
|
lr = Statsample::Regression::Multiple::GslEngine.new(ds, 'y')
|
112
112
|
assert(lr.summary.size > 0)
|
@@ -174,10 +174,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
174
174
|
end
|
175
175
|
|
176
176
|
def test_regression_matrix
|
177
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:
|
178
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:
|
179
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:
|
180
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:
|
177
|
+
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
|
178
|
+
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
|
179
|
+
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
|
180
|
+
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
|
181
181
|
ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
|
182
182
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
183
183
|
|
@@ -194,10 +194,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
194
194
|
end
|
195
195
|
|
196
196
|
def test_regression_rubyengine
|
197
|
-
@a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:
|
198
|
-
@b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:
|
199
|
-
@c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:
|
200
|
-
@y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:
|
197
|
+
@a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
|
198
|
+
@b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
|
199
|
+
@c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
|
200
|
+
@y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
|
201
201
|
ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
|
202
202
|
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
|
203
203
|
assert_equal(11, lr.total_cases)
|
data/test/test_reliability.rb
CHANGED
@@ -16,9 +16,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
16
16
|
@samples = 40
|
17
17
|
@n_variables = rand(10) + 2
|
18
18
|
@ds = Statsample::Dataset.new
|
19
|
-
base = @samples.times.collect { |_a| rand }.
|
19
|
+
base = @samples.times.collect { |_a| rand }.to_numeric
|
20
20
|
@n_variables.times do |i|
|
21
|
-
@ds[i] = base.collect { |v| v + rand }.
|
21
|
+
@ds[i] = base.collect { |v| v + rand }.to_numeric
|
22
22
|
end
|
23
23
|
|
24
24
|
@ds.update_valid_data
|
@@ -67,9 +67,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
67
67
|
@samples = 100
|
68
68
|
@points = rand(10) + 3
|
69
69
|
@max_point = (@points - 1) * 3
|
70
|
-
@x1 = @samples.times.map { rand(@points) }.
|
71
|
-
@x2 = @samples.times.map { rand(@points) }.
|
72
|
-
@x3 = @samples.times.map { rand(@points) }.
|
70
|
+
@x1 = @samples.times.map { rand(@points) }.to_numeric
|
71
|
+
@x2 = @samples.times.map { rand(@points) }.to_numeric
|
72
|
+
@x3 = @samples.times.map { rand(@points) }.to_numeric
|
73
73
|
@ds = { 'a' => @x1, 'b' => @x2, 'c' => @x3 }.to_dataset
|
74
74
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
|
75
75
|
end
|
@@ -77,11 +77,11 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
77
77
|
assert_equal(@ds.vector_sum, @icc.vector_total)
|
78
78
|
end
|
79
79
|
should 'have a correct different vector_total' do
|
80
|
-
x2 = @samples.times.map { rand(10) }.
|
80
|
+
x2 = @samples.times.map { rand(10) }.to_numeric
|
81
81
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
|
82
82
|
assert_equal(x2, @icc.vector_total)
|
83
83
|
assert_raises(ArgumentError) do
|
84
|
-
inc = (@samples + 10).times.map { rand(10) }.
|
84
|
+
inc = (@samples + 10).times.map { rand(10) }.to_numeric
|
85
85
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
|
86
86
|
end
|
87
87
|
end
|
@@ -119,7 +119,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
119
119
|
h = {}
|
120
120
|
@scales.times {|s|
|
121
121
|
@items_per_scale.times {|i|
|
122
|
-
h["#{s}_#{i}"] = (size.times.map { (s * 2) + rand }).
|
122
|
+
h["#{s}_#{i}"] = (size.times.map { (s * 2) + rand }).to_numeric
|
123
123
|
}
|
124
124
|
}
|
125
125
|
@ds = h.to_dataset
|
@@ -177,10 +177,10 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
177
177
|
end
|
178
178
|
context Statsample::Reliability::ScaleAnalysis do
|
179
179
|
setup do
|
180
|
-
@x1 = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30].
|
181
|
-
@x2 = [1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50].
|
182
|
-
@x3 = [2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40].
|
183
|
-
@x4 = [1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30].
|
180
|
+
@x1 = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30].to_numeric
|
181
|
+
@x2 = [1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50].to_numeric
|
182
|
+
@x3 = [2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40].to_numeric
|
183
|
+
@x4 = [1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30].to_numeric
|
184
184
|
@ds = { 'x1' => @x1, 'x2' => @x2, 'x3' => @x3, 'x4' => @x4 }.to_dataset
|
185
185
|
@ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
|
186
186
|
@cov_matrix = @ia.cov_m
|
@@ -188,7 +188,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
188
188
|
should 'return correct values for item analysis' do
|
189
189
|
assert_in_delta(0.980, @ia.alpha, 0.001)
|
190
190
|
assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
|
191
|
-
var_mean = 4.times.map { |m| @cov_matrix[m, m] }.
|
191
|
+
var_mean = 4.times.map { |m| @cov_matrix[m, m] }.to_numeric.mean
|
192
192
|
assert_in_delta(var_mean, @ia.variances_mean)
|
193
193
|
assert_equal(@x1.mean, @ia.item_statistics['x1'][:mean])
|
194
194
|
assert_equal(@x4.mean, @ia.item_statistics['x4'][:mean])
|
@@ -211,7 +211,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
211
211
|
end
|
212
212
|
}
|
213
213
|
}
|
214
|
-
assert_in_delta(covariances.
|
214
|
+
assert_in_delta(covariances.to_numeric.mean, @ia.covariances_mean)
|
215
215
|
assert_in_delta(0.999, @ia.item_total_correlation['x1'], 0.001)
|
216
216
|
assert_in_delta(1050.455, @ia.stats_if_deleted['x1'][:variance_sample], 0.001)
|
217
217
|
end
|