statsample 1.4.3 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +8 -0
- data/benchmarks/correlation_matrix_15_variables.rb +1 -1
- data/benchmarks/correlation_matrix_5_variables.rb +1 -1
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +2 -2
- data/examples/dataset.rb +2 -2
- data/examples/icc.rb +1 -1
- data/examples/levene.rb +2 -2
- data/examples/parallel_analysis.rb +1 -1
- data/examples/u_test.rb +2 -2
- data/examples/vector.rb +1 -1
- data/examples/velicer_map_test.rb +1 -1
- data/lib/statsample.rb +30 -4
- data/lib/statsample/anova/oneway.rb +3 -3
- data/lib/statsample/anova/twoway.rb +3 -3
- data/lib/statsample/bivariate.rb +7 -7
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/converter/csv.rb +1 -1
- data/lib/statsample/converters.rb +7 -7
- data/lib/statsample/dataset.rb +8 -8
- data/lib/statsample/dominanceanalysis.rb +4 -4
- data/lib/statsample/dominanceanalysis/bootstrap.rb +8 -8
- data/lib/statsample/factor.rb +2 -4
- data/lib/statsample/factor/map.rb +2 -1
- data/lib/statsample/factor/parallelanalysis.rb +2 -2
- data/lib/statsample/factor/pca.rb +2 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/boxplot.rb +4 -4
- data/lib/statsample/graph/histogram.rb +2 -2
- data/lib/statsample/graph/scatterplot.rb +4 -4
- data/lib/statsample/matrix.rb +20 -6
- data/lib/statsample/regression.rb +2 -2
- data/lib/statsample/regression/multiple.rb +3 -3
- data/lib/statsample/regression/multiple/alglibengine.rb +5 -5
- data/lib/statsample/regression/multiple/baseengine.rb +3 -3
- data/lib/statsample/regression/multiple/gslengine.rb +5 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +4 -4
- data/lib/statsample/reliability/icc.rb +1 -1
- data/lib/statsample/reliability/multiscaleanalysis.rb +4 -4
- data/lib/statsample/reliability/scaleanalysis.rb +6 -6
- data/lib/statsample/reliability/skillscaleanalysis.rb +1 -1
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +1 -1
- data/lib/statsample/test/bartlettsphericity.rb +1 -1
- data/lib/statsample/test/levene.rb +4 -4
- data/lib/statsample/test/t.rb +3 -3
- data/lib/statsample/test/umannwhitney.rb +2 -2
- data/lib/statsample/vector.rb +103 -80
- data/lib/statsample/vector/gsl.rb +16 -16
- data/lib/statsample/version.rb +1 -1
- data/test/test_analysis.rb +1 -1
- data/test/test_anova_contrast.rb +4 -4
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +6 -6
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +3 -3
- data/test/test_bivariate.rb +38 -38
- data/test/test_crosstab.rb +2 -2
- data/test/test_csv.rb +6 -6
- data/test/test_dataset.rb +79 -79
- data/test/test_factor.rb +55 -49
- data/test/test_factor_pa.rb +4 -4
- data/test/test_ggobi.rb +3 -3
- data/test/test_gsl.rb +3 -3
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +5 -5
- data/test/test_multiset.rb +19 -19
- data/test/test_regression.rb +27 -27
- data/test/test_reliability.rb +14 -14
- data/test/test_reliability_icc.rb +7 -7
- data/test/test_reliability_skillscale.rb +6 -6
- data/test/test_resample.rb +1 -1
- data/test/test_rserve_extension.rb +4 -4
- data/test/test_statistics.rb +5 -5
- data/test/test_stest.rb +8 -8
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +5 -5
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +153 -119
- data/test/test_wilcoxonsignedrank.rb +4 -4
- data/test/test_xls.rb +6 -6
- metadata +3 -53
data/test/test_factor.rb
CHANGED
@@ -18,8 +18,8 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
18
18
|
pca = Statsample::Factor::PCA.new(cm, m: 6)
|
19
19
|
# puts pca.summary
|
20
20
|
# puts pca.feature_matrix
|
21
|
-
exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].
|
22
|
-
assert_similar_vector(exp_eig, pca.eigenvalues.
|
21
|
+
exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_numeric
|
22
|
+
assert_similar_vector(exp_eig, pca.eigenvalues.to_numeric, 0.1)
|
23
23
|
pcs = pca.principal_components(ds)
|
24
24
|
k = 6
|
25
25
|
comp_matrix = pca.component_matrix
|
@@ -34,59 +34,61 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def test_principalcomponents_ruby_gsl
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
37
|
+
if Statsample.has_gsl?
|
38
|
+
ran = Distribution::Normal.rng
|
39
|
+
|
40
|
+
# @r=::Rserve::Connection.new
|
41
|
+
|
42
|
+
samples = 20
|
43
|
+
[3, 5, 7].each {|k|
|
44
|
+
v = {}
|
45
|
+
v['x0'] = samples.times.map { ran.call }.to_numeric.centered
|
46
|
+
(1...k).each {|i|
|
47
|
+
v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_numeric.centered
|
48
|
+
}
|
49
|
+
|
50
|
+
ds = v.to_dataset
|
51
|
+
cm = ds.covariance_matrix
|
52
|
+
# @r.assign('ds',ds)
|
53
|
+
# @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
|
54
|
+
# puts "eigenvalues"
|
55
|
+
# puts @r.eval('v').to_ruby.to_s
|
56
|
+
pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false)
|
57
|
+
pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true)
|
58
|
+
pc_ruby = pca_ruby.principal_components(ds)
|
59
|
+
pc_gsl = pca_gsl.principal_components(ds)
|
60
|
+
# Test component matrix correlation!
|
61
|
+
cm_ruby = pca_ruby.component_matrix
|
62
|
+
# puts cm_ruby.summary
|
63
|
+
k.times {|i|
|
64
|
+
pc_id = "PC_#{i + 1}"
|
65
|
+
assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
|
66
|
+
# Revert gsl component values
|
67
|
+
pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
|
68
|
+
assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
|
69
|
+
if false
|
70
|
+
k.times {|j| # variable
|
71
|
+
ds_id = "x#{j}"
|
72
|
+
r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
|
73
|
+
puts "#{pc_id}-#{ds_id}:#{r}"
|
74
|
+
}
|
75
|
+
end
|
76
|
+
}
|
75
77
|
}
|
76
|
-
|
78
|
+
end
|
77
79
|
# @r.close
|
78
80
|
end
|
79
81
|
|
80
82
|
def test_principalcomponents
|
81
|
-
principalcomponents(true)
|
83
|
+
principalcomponents(true) if Statsample.has_gsl?
|
82
84
|
principalcomponents(false)
|
83
85
|
end
|
84
86
|
|
85
87
|
def principalcomponents(gsl)
|
86
88
|
ran = Distribution::Normal.rng
|
87
89
|
samples = 50
|
88
|
-
x1 = samples.times.map { ran.call }.
|
89
|
-
x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.
|
90
|
+
x1 = samples.times.map { ran.call }.to_numeric
|
91
|
+
x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_numeric
|
90
92
|
ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
|
91
93
|
|
92
94
|
cm = ds.correlation_matrix
|
@@ -119,9 +121,9 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
119
121
|
end
|
120
122
|
|
121
123
|
def test_kmo
|
122
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].
|
123
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].
|
124
|
-
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].
|
124
|
+
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
|
125
|
+
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
|
126
|
+
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
|
125
127
|
# KMO: 0.490
|
126
128
|
ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
|
127
129
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
@@ -139,11 +141,13 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
139
141
|
end
|
140
142
|
# Tested with SPSS and R
|
141
143
|
def test_pca
|
142
|
-
|
143
|
-
|
144
|
+
|
145
|
+
a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
|
146
|
+
b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_numeric
|
144
147
|
a.recode! { |c| c - a.mean }
|
145
148
|
b.recode! { |c| c - b.mean }
|
146
149
|
ds = { 'a' => a, 'b' => b }.to_dataset
|
150
|
+
|
147
151
|
cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
|
148
152
|
if Statsample.has_gsl?
|
149
153
|
pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: true)
|
@@ -156,6 +160,8 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
156
160
|
end
|
157
161
|
|
158
162
|
def pca_set(pca, _type)
|
163
|
+
|
164
|
+
|
159
165
|
expected_eigenvalues = [1.284, 0.0490]
|
160
166
|
expected_eigenvalues.each_with_index{|ev, i|
|
161
167
|
assert_in_delta(ev, pca.eigenvalues[i], 0.001)
|
data/test/test_factor_pa.rb
CHANGED
@@ -15,18 +15,18 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
15
15
|
variables = 10
|
16
16
|
iterations = 50
|
17
17
|
rng = Distribution::Normal.rng
|
18
|
-
f1 = samples.times.collect { rng.call }.
|
19
|
-
f2 = samples.times.collect { rng.call }.
|
18
|
+
f1 = samples.times.collect { rng.call }.to_numeric
|
19
|
+
f2 = samples.times.collect { rng.call }.to_numeric
|
20
20
|
vectors = {}
|
21
21
|
variables.times do |i|
|
22
22
|
if i < 5
|
23
23
|
vectors["v#{i}"] = samples.times.collect {|nv|
|
24
24
|
f1[nv] * 5 + f2[nv] * 2 + rng.call
|
25
|
-
}.
|
25
|
+
}.to_numeric
|
26
26
|
else
|
27
27
|
vectors["v#{i}"] = samples.times.collect {|nv|
|
28
28
|
f2[nv] * 5 + f1[nv] * 2 + rng.call
|
29
|
-
}.
|
29
|
+
}.to_numeric
|
30
30
|
end
|
31
31
|
end
|
32
32
|
ds = vectors.to_dataset
|
data/test/test_ggobi.rb
CHANGED
@@ -2,10 +2,10 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
require 'ostruct'
|
3
3
|
class StatsampleGGobiTestCase < Minitest::Test
|
4
4
|
def setup
|
5
|
-
v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:
|
6
|
-
@v2 = (%w(a b c a a a b b c d) * 10).to_vector(:
|
5
|
+
v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:numeric)
|
6
|
+
@v2 = (%w(a b c a a a b b c d) * 10).to_vector(:object)
|
7
7
|
@v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
|
8
|
-
v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:
|
8
|
+
v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:numeric)
|
9
9
|
@ds = { 'v1' => v1, 'v2' => @v2, 'v3' => v3 }.to_dataset
|
10
10
|
end
|
11
11
|
|
data/test/test_gsl.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleGSLTestCase < Minitest::Test
|
3
3
|
should_with_gsl 'matrix with gsl' do
|
4
|
-
a = [1, 2, 3, 4, 20].to_vector(:
|
5
|
-
b = [3, 2, 3, 4, 50].to_vector(:
|
6
|
-
c = [6, 2, 3, 4, 3].to_vector(:
|
4
|
+
a = [1, 2, 3, 4, 20].to_vector(:numeric)
|
5
|
+
b = [3, 2, 3, 4, 50].to_vector(:numeric)
|
6
|
+
c = [6, 2, 3, 4, 3].to_vector(:numeric)
|
7
7
|
ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
|
8
8
|
gsl = ds.to_matrix.to_gsl
|
9
9
|
assert_equal(5, gsl.size1)
|
data/test/test_histogram.rb
CHANGED
@@ -75,13 +75,13 @@ class StatsampleHistogramTestCase < Minitest::Test
|
|
75
75
|
assert_equal(min, h.min_val)
|
76
76
|
end
|
77
77
|
should 'return correct estimated mean' do
|
78
|
-
a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].
|
78
|
+
a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].to_numeric
|
79
79
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
80
80
|
h.increment(a)
|
81
81
|
assert_equal(2.5, h.estimated_mean)
|
82
82
|
end
|
83
83
|
should 'return correct estimated standard deviation' do
|
84
|
-
a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].
|
84
|
+
a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].to_numeric
|
85
85
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
86
86
|
h.increment(a)
|
87
87
|
assert_equal(a.sd, h.estimated_standard_deviation)
|
@@ -100,7 +100,7 @@ class StatsampleHistogramTestCase < Minitest::Test
|
|
100
100
|
end
|
101
101
|
should 'not raise exception when all values equal' do
|
102
102
|
assert_nothing_raised do
|
103
|
-
a = [5, 5, 5, 5, 5, 5].
|
103
|
+
a = [5, 5, 5, 5, 5, 5].to_numeric
|
104
104
|
h = Statsample::Graph::Histogram.new(a)
|
105
105
|
h.to_svg
|
106
106
|
end
|
data/test/test_matrix.rb
CHANGED
@@ -7,8 +7,8 @@ class StatsampleMatrixTestCase < Minitest::Test
|
|
7
7
|
m.fields_y = %w(x1 x2)
|
8
8
|
m.name = 'test'
|
9
9
|
samples = 100
|
10
|
-
x1 = [1, 2, 3].
|
11
|
-
x2 = [4, 5, 6].
|
10
|
+
x1 = [1, 2, 3].to_numeric
|
11
|
+
x2 = [4, 5, 6].to_numeric
|
12
12
|
ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
|
13
13
|
ds.name = 'test'
|
14
14
|
obs = m.to_dataset
|
@@ -33,9 +33,9 @@ class StatsampleMatrixTestCase < Minitest::Test
|
|
33
33
|
|
34
34
|
assert_equal(:covariance, a._type)
|
35
35
|
|
36
|
-
a = 50.times.collect { rand }.
|
37
|
-
b = 50.times.collect { rand }.
|
38
|
-
c = 50.times.collect { rand }.
|
36
|
+
a = 50.times.collect { rand }.to_numeric
|
37
|
+
b = 50.times.collect { rand }.to_numeric
|
38
|
+
c = 50.times.collect { rand }.to_numeric
|
39
39
|
ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
|
40
40
|
corr = Statsample::Bivariate.correlation_matrix(ds)
|
41
41
|
real = Statsample::Bivariate.covariance_matrix(ds).correlation
|
data/test/test_multiset.rb
CHANGED
@@ -3,8 +3,8 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
3
3
|
class StatsampleMultisetTestCase < Minitest::Test
|
4
4
|
def setup
|
5
5
|
@x = %w(a a a a b b b b).to_vector
|
6
|
-
@y = [1, 2, 3, 4, 5, 6, 7, 8].
|
7
|
-
@z = [10, 11, 12, 13, 14, 15, 16, 17].
|
6
|
+
@y = [1, 2, 3, 4, 5, 6, 7, 8].to_numeric
|
7
|
+
@z = [10, 11, 12, 13, 14, 15, 16, 17].to_numeric
|
8
8
|
@ds = { 'x' => @x, 'y' => @y, 'z' => @z }.to_dataset
|
9
9
|
@ms = @ds.to_multiset_by_split('x')
|
10
10
|
end
|
@@ -44,9 +44,9 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def test_to_multiset_by_split_one
|
47
|
-
sex = %w(m m m m m f f f f m).to_vector(:
|
48
|
-
city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:
|
49
|
-
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:
|
47
|
+
sex = %w(m m m m m f f f f m).to_vector(:object)
|
48
|
+
city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:object)
|
49
|
+
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:numeric)
|
50
50
|
ds = { 'sex' => sex, 'city' => city, 'age' => age }.to_dataset
|
51
51
|
ms = ds.to_multiset_by_split('sex')
|
52
52
|
assert_equal(2, ms.n_datasets)
|
@@ -58,10 +58,10 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_to_multiset_by_split_multiple
|
61
|
-
sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:
|
62
|
-
city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:
|
63
|
-
hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:
|
64
|
-
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:
|
61
|
+
sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:object)
|
62
|
+
city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:object)
|
63
|
+
hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:object)
|
64
|
+
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:numeric)
|
65
65
|
ds = { 'sex' => sex, 'city' => city, 'hair' => hair, 'age' => age }.to_dataset(%w(sex city hair age))
|
66
66
|
ms = ds.to_multiset_by_split('sex', 'city', 'hair')
|
67
67
|
assert_equal(8, ms.n_datasets)
|
@@ -84,8 +84,8 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def test_stratum_scale
|
87
|
-
boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:
|
88
|
-
girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:
|
87
|
+
boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:numeric) }.to_dataset
|
88
|
+
girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:numeric) }.to_dataset
|
89
89
|
ms = Statsample::Multiset.new(['test'])
|
90
90
|
ms.add_dataset('boys', boys)
|
91
91
|
ms.add_dataset('girls', girls)
|
@@ -106,12 +106,12 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
106
106
|
'b' => %w(b b b b).to_vector
|
107
107
|
}
|
108
108
|
ype = {
|
109
|
-
'a' => [1, 2, 3, 4].
|
110
|
-
'b' => [5, 6, 7, 8].
|
109
|
+
'a' => [1, 2, 3, 4].to_numeric,
|
110
|
+
'b' => [5, 6, 7, 8].to_numeric
|
111
111
|
}
|
112
112
|
zpe = {
|
113
|
-
'a' => [10, 11, 12, 13].
|
114
|
-
'b' => [14, 15, 16, 17].
|
113
|
+
'a' => [10, 11, 12, 13].to_numeric,
|
114
|
+
'b' => [14, 15, 16, 17].to_numeric
|
115
115
|
}
|
116
116
|
xp, yp, zp = {}, {}, {}
|
117
117
|
@ms.each {|k, ds|
|
@@ -127,9 +127,9 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
127
127
|
def test_multiset_union_with_block
|
128
128
|
r1 = rand
|
129
129
|
r2 = rand
|
130
|
-
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].
|
130
|
+
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_numeric
|
131
131
|
|
132
|
-
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].
|
132
|
+
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_numeric
|
133
133
|
|
134
134
|
ds2 = @ms.union {|k, ds|
|
135
135
|
ds['y'].recode!{|v|
|
@@ -146,9 +146,9 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
146
146
|
def test_multiset_union
|
147
147
|
r1 = rand
|
148
148
|
r2 = rand
|
149
|
-
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].
|
149
|
+
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_numeric
|
150
150
|
|
151
|
-
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].
|
151
|
+
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_numeric
|
152
152
|
@ms.each {|k, ds|
|
153
153
|
ds['y'].recode!{|v|
|
154
154
|
k == 'a' ? v * r1 : v * r2
|
data/test/test_regression.rb
CHANGED
@@ -3,9 +3,9 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
3
3
|
class StatsampleRegressionTestCase < Minitest::Test
|
4
4
|
context 'Example with missing data' do
|
5
5
|
setup do
|
6
|
-
@x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].
|
6
|
+
@x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].to_numeric
|
7
7
|
|
8
|
-
@y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].
|
8
|
+
@y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].to_numeric
|
9
9
|
@ds = { 'x' => @x, 'y' => @y }.to_dataset
|
10
10
|
@lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, 'y')
|
11
11
|
end
|
@@ -26,10 +26,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
26
26
|
|
27
27
|
a, b = rand, rand
|
28
28
|
|
29
|
-
x1 = samples.times.map { rand }.
|
30
|
-
x2 = samples.times.map { rand }.
|
31
|
-
x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }.
|
32
|
-
y = samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }.
|
29
|
+
x1 = samples.times.map { rand }.to_numeric
|
30
|
+
x2 = samples.times.map { rand }.to_numeric
|
31
|
+
x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }.to_numeric
|
32
|
+
y = samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }.to_numeric
|
33
33
|
|
34
34
|
ds = { 'x1' => x1, 'x2' => x2, 'x3' => x3, 'y' => y }.to_dataset
|
35
35
|
|
@@ -38,8 +38,8 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
38
38
|
}
|
39
39
|
end
|
40
40
|
def test_parameters
|
41
|
-
@x = [13, 20, 10, 33, 15].to_vector(:
|
42
|
-
@y = [23, 18, 35, 10, 27].to_vector(:
|
41
|
+
@x = [13, 20, 10, 33, 15].to_vector(:numeric)
|
42
|
+
@y = [23, 18, 35, 10, 27].to_vector(:numeric)
|
43
43
|
reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
|
44
44
|
_test_simple_regression(reg)
|
45
45
|
ds = { 'x' => @x, 'y' => @y }.to_dataset
|
@@ -57,9 +57,9 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
57
57
|
end
|
58
58
|
|
59
59
|
def test_summaries
|
60
|
-
a = 10.times.map { rand(100) }.
|
61
|
-
b = 10.times.map { rand(100) }.
|
62
|
-
y = 10.times.map { rand(100) }.
|
60
|
+
a = 10.times.map { rand(100) }.to_numeric
|
61
|
+
b = 10.times.map { rand(100) }.to_numeric
|
62
|
+
y = 10.times.map { rand(100) }.to_numeric
|
63
63
|
ds = { 'a' => a, 'b' => b, 'y' => y }.to_dataset
|
64
64
|
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
|
65
65
|
assert(lr.summary.size > 0)
|
@@ -87,10 +87,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
87
87
|
end
|
88
88
|
|
89
89
|
def test_multiple_regression_pairwise_2
|
90
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3].to_vector(:
|
91
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2].to_vector(:
|
92
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7].to_vector(:
|
93
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil].to_vector(:
|
90
|
+
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3].to_vector(:numeric)
|
91
|
+
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2].to_vector(:numeric)
|
92
|
+
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7].to_vector(:numeric)
|
93
|
+
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil].to_vector(:numeric)
|
94
94
|
ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
|
95
95
|
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
|
96
96
|
assert_in_delta(2407.436, lr.sst, 0.001)
|
@@ -103,10 +103,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
103
103
|
|
104
104
|
def test_multiple_regression_gsl
|
105
105
|
if Statsample.has_gsl?
|
106
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:
|
107
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:
|
108
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:
|
109
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:
|
106
|
+
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
|
107
|
+
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
|
108
|
+
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
|
109
|
+
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
|
110
110
|
ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
|
111
111
|
lr = Statsample::Regression::Multiple::GslEngine.new(ds, 'y')
|
112
112
|
assert(lr.summary.size > 0)
|
@@ -174,10 +174,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
174
174
|
end
|
175
175
|
|
176
176
|
def test_regression_matrix
|
177
|
-
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:
|
178
|
-
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:
|
179
|
-
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:
|
180
|
-
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:
|
177
|
+
@a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
|
178
|
+
@b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
|
179
|
+
@c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
|
180
|
+
@y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
|
181
181
|
ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
|
182
182
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
183
183
|
|
@@ -194,10 +194,10 @@ class StatsampleRegressionTestCase < Minitest::Test
|
|
194
194
|
end
|
195
195
|
|
196
196
|
def test_regression_rubyengine
|
197
|
-
@a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:
|
198
|
-
@b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:
|
199
|
-
@c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:
|
200
|
-
@y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:
|
197
|
+
@a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
|
198
|
+
@b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
|
199
|
+
@c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
|
200
|
+
@y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
|
201
201
|
ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
|
202
202
|
lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
|
203
203
|
assert_equal(11, lr.total_cases)
|
data/test/test_reliability.rb
CHANGED
@@ -16,9 +16,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
16
16
|
@samples = 40
|
17
17
|
@n_variables = rand(10) + 2
|
18
18
|
@ds = Statsample::Dataset.new
|
19
|
-
base = @samples.times.collect { |_a| rand }.
|
19
|
+
base = @samples.times.collect { |_a| rand }.to_numeric
|
20
20
|
@n_variables.times do |i|
|
21
|
-
@ds[i] = base.collect { |v| v + rand }.
|
21
|
+
@ds[i] = base.collect { |v| v + rand }.to_numeric
|
22
22
|
end
|
23
23
|
|
24
24
|
@ds.update_valid_data
|
@@ -67,9 +67,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
67
67
|
@samples = 100
|
68
68
|
@points = rand(10) + 3
|
69
69
|
@max_point = (@points - 1) * 3
|
70
|
-
@x1 = @samples.times.map { rand(@points) }.
|
71
|
-
@x2 = @samples.times.map { rand(@points) }.
|
72
|
-
@x3 = @samples.times.map { rand(@points) }.
|
70
|
+
@x1 = @samples.times.map { rand(@points) }.to_numeric
|
71
|
+
@x2 = @samples.times.map { rand(@points) }.to_numeric
|
72
|
+
@x3 = @samples.times.map { rand(@points) }.to_numeric
|
73
73
|
@ds = { 'a' => @x1, 'b' => @x2, 'c' => @x3 }.to_dataset
|
74
74
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
|
75
75
|
end
|
@@ -77,11 +77,11 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
77
77
|
assert_equal(@ds.vector_sum, @icc.vector_total)
|
78
78
|
end
|
79
79
|
should 'have a correct different vector_total' do
|
80
|
-
x2 = @samples.times.map { rand(10) }.
|
80
|
+
x2 = @samples.times.map { rand(10) }.to_numeric
|
81
81
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
|
82
82
|
assert_equal(x2, @icc.vector_total)
|
83
83
|
assert_raises(ArgumentError) do
|
84
|
-
inc = (@samples + 10).times.map { rand(10) }.
|
84
|
+
inc = (@samples + 10).times.map { rand(10) }.to_numeric
|
85
85
|
@icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
|
86
86
|
end
|
87
87
|
end
|
@@ -119,7 +119,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
119
119
|
h = {}
|
120
120
|
@scales.times {|s|
|
121
121
|
@items_per_scale.times {|i|
|
122
|
-
h["#{s}_#{i}"] = (size.times.map { (s * 2) + rand }).
|
122
|
+
h["#{s}_#{i}"] = (size.times.map { (s * 2) + rand }).to_numeric
|
123
123
|
}
|
124
124
|
}
|
125
125
|
@ds = h.to_dataset
|
@@ -177,10 +177,10 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
177
177
|
end
|
178
178
|
context Statsample::Reliability::ScaleAnalysis do
|
179
179
|
setup do
|
180
|
-
@x1 = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30].
|
181
|
-
@x2 = [1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50].
|
182
|
-
@x3 = [2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40].
|
183
|
-
@x4 = [1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30].
|
180
|
+
@x1 = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30].to_numeric
|
181
|
+
@x2 = [1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50].to_numeric
|
182
|
+
@x3 = [2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40].to_numeric
|
183
|
+
@x4 = [1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30].to_numeric
|
184
184
|
@ds = { 'x1' => @x1, 'x2' => @x2, 'x3' => @x3, 'x4' => @x4 }.to_dataset
|
185
185
|
@ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
|
186
186
|
@cov_matrix = @ia.cov_m
|
@@ -188,7 +188,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
188
188
|
should 'return correct values for item analysis' do
|
189
189
|
assert_in_delta(0.980, @ia.alpha, 0.001)
|
190
190
|
assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
|
191
|
-
var_mean = 4.times.map { |m| @cov_matrix[m, m] }.
|
191
|
+
var_mean = 4.times.map { |m| @cov_matrix[m, m] }.to_numeric.mean
|
192
192
|
assert_in_delta(var_mean, @ia.variances_mean)
|
193
193
|
assert_equal(@x1.mean, @ia.item_statistics['x1'][:mean])
|
194
194
|
assert_equal(@x4.mean, @ia.item_statistics['x4'][:mean])
|
@@ -211,7 +211,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
|
|
211
211
|
end
|
212
212
|
}
|
213
213
|
}
|
214
|
-
assert_in_delta(covariances.
|
214
|
+
assert_in_delta(covariances.to_numeric.mean, @ia.covariances_mean)
|
215
215
|
assert_in_delta(0.999, @ia.item_total_correlation['x1'], 0.001)
|
216
216
|
assert_in_delta(1050.455, @ia.stats_if_deleted['x1'][:variance_sample], 0.001)
|
217
217
|
end
|