statsample 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
data/test/test_factor.rb
CHANGED
@@ -7,26 +7,32 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
7
7
|
# Based on Hardle and Simar
|
8
8
|
def setup
|
9
9
|
@fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
|
10
|
+
Daru.lazy_update = true
|
11
|
+
end
|
12
|
+
|
13
|
+
def teardown
|
14
|
+
Daru.lazy_update = false
|
10
15
|
end
|
11
16
|
# Based on Hurdle example
|
12
17
|
def test_covariance_matrix
|
13
|
-
ds =
|
14
|
-
ds.
|
15
|
-
ds[f] = ds[f].
|
18
|
+
ds = Daru::DataFrame.from_plaintext(@fixtures_dir + '/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6])
|
19
|
+
ds.vectors.each {|f|
|
20
|
+
ds[f] = ds[f].center
|
16
21
|
}
|
17
|
-
|
22
|
+
ds.update
|
23
|
+
cm = Statsample::Bivariate.covariance_matrix ds
|
18
24
|
pca = Statsample::Factor::PCA.new(cm, m: 6)
|
19
25
|
# puts pca.summary
|
20
26
|
# puts pca.feature_matrix
|
21
|
-
exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035]
|
22
|
-
assert_similar_vector(exp_eig, pca.eigenvalues
|
27
|
+
exp_eig = Daru::Vector.new([2.985, 0.931, 0.242, 0.194, 0.085, 0.035])
|
28
|
+
assert_similar_vector(exp_eig, Daru::Vector.new(pca.eigenvalues), 0.1)
|
23
29
|
pcs = pca.principal_components(ds)
|
24
30
|
k = 6
|
25
31
|
comp_matrix = pca.component_matrix
|
26
32
|
k.times {|i|
|
27
|
-
pc_id = "PC_#{i + 1}"
|
33
|
+
pc_id = "PC_#{i + 1}".to_sym
|
28
34
|
k.times {|j| # variable
|
29
|
-
ds_id = "v#{j + 1}"
|
35
|
+
ds_id = "v#{j + 1}".to_sym
|
30
36
|
r = Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
|
31
37
|
assert_in_delta(r, comp_matrix[j, i])
|
32
38
|
}
|
@@ -42,13 +48,13 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
42
48
|
samples = 20
|
43
49
|
[3, 5, 7].each {|k|
|
44
50
|
v = {}
|
45
|
-
v[
|
46
|
-
(1...k).each {|i|
|
47
|
-
v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.
|
51
|
+
v[:x0] = Daru::Vector.new(samples.times.map { ran.call }).center
|
52
|
+
(1...k).each { |i|
|
53
|
+
v["x#{i}".to_sym] = Daru::Vector.new(samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}".to_sym][ii] * 0.5 }).center
|
48
54
|
}
|
49
55
|
|
50
|
-
ds = v
|
51
|
-
cm =
|
56
|
+
ds = Daru::DataFrame.new(v)
|
57
|
+
cm = Statsample::Bivariate.covariance_matrix ds
|
52
58
|
# @r.assign('ds',ds)
|
53
59
|
# @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
|
54
60
|
# puts "eigenvalues"
|
@@ -61,14 +67,14 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
61
67
|
cm_ruby = pca_ruby.component_matrix
|
62
68
|
# puts cm_ruby.summary
|
63
69
|
k.times {|i|
|
64
|
-
pc_id = "PC_#{i + 1}"
|
70
|
+
pc_id = "PC_#{i + 1}".to_sym
|
65
71
|
assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
|
66
72
|
# Revert gsl component values
|
67
73
|
pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
|
68
74
|
assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
|
69
75
|
if false
|
70
76
|
k.times {|j| # variable
|
71
|
-
ds_id = "x#{j}"
|
77
|
+
ds_id = "x#{j}".to_sym
|
72
78
|
r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
|
73
79
|
puts "#{pc_id}-#{ds_id}:#{r}"
|
74
80
|
}
|
@@ -80,18 +86,22 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
80
86
|
end
|
81
87
|
|
82
88
|
def test_principalcomponents
|
83
|
-
|
89
|
+
if Statsample.has_gsl?
|
90
|
+
principalcomponents(true)
|
91
|
+
else
|
92
|
+
skip "Require GSL"
|
93
|
+
end
|
84
94
|
principalcomponents(false)
|
85
95
|
end
|
86
96
|
|
87
97
|
def principalcomponents(gsl)
|
88
98
|
ran = Distribution::Normal.rng
|
89
99
|
samples = 50
|
90
|
-
x1 = samples.times.map { ran.call }
|
91
|
-
x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }
|
92
|
-
ds = {
|
100
|
+
x1 = Daru::Vector.new(samples.times.map { ran.call })
|
101
|
+
x2 = Daru::Vector.new(samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 })
|
102
|
+
ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
|
93
103
|
|
94
|
-
cm =
|
104
|
+
cm = Statsample::Bivariate.correlation_matrix ds
|
95
105
|
r = cm[0, 1]
|
96
106
|
pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl)
|
97
107
|
assert_in_delta(1 + r, pca.eigenvalues[0], 1e-10)
|
@@ -103,14 +113,14 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
103
113
|
assert_equal_vector(hs * m_1, pca.eigenvectors[1])
|
104
114
|
|
105
115
|
pcs = pca.principal_components(ds)
|
106
|
-
exp_pc_1 = ds.
|
107
|
-
hs * (row[
|
116
|
+
exp_pc_1 = ds.collect_row_with_index {|row, _i|
|
117
|
+
hs * (row[:x1] + row[:x2])
|
108
118
|
}
|
109
|
-
exp_pc_2 = ds.
|
110
|
-
gsl ? hs * (row[
|
119
|
+
exp_pc_2 = ds.collect_row_with_index {|row, _i|
|
120
|
+
gsl ? hs * (row[:x2] - row[:x1]) : hs * (row[:x1] - row[:x2])
|
111
121
|
}
|
112
|
-
assert_similar_vector(exp_pc_1, pcs[
|
113
|
-
assert_similar_vector(exp_pc_2, pcs[
|
122
|
+
assert_similar_vector(exp_pc_1, pcs[:PC_1])
|
123
|
+
assert_similar_vector(exp_pc_2, pcs[:PC_2])
|
114
124
|
end
|
115
125
|
|
116
126
|
def test_antiimage
|
@@ -121,11 +131,11 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
121
131
|
end
|
122
132
|
|
123
133
|
def test_kmo
|
124
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70]
|
125
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0]
|
126
|
-
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4]
|
134
|
+
@v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
|
135
|
+
@v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
|
136
|
+
@v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
|
127
137
|
# KMO: 0.490
|
128
|
-
ds = {
|
138
|
+
ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
|
129
139
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
130
140
|
kmo = Statsample::Factor.kmo(cor)
|
131
141
|
assert_in_delta(0.667, kmo, 0.001)
|
@@ -141,12 +151,12 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
141
151
|
end
|
142
152
|
# Tested with SPSS and R
|
143
153
|
def test_pca
|
144
|
-
|
145
|
-
a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1]
|
146
|
-
b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9]
|
147
|
-
a
|
148
|
-
b
|
149
|
-
ds = {
|
154
|
+
dtype = Statsample.has_gsl? ? :gsl : :array
|
155
|
+
a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1], dtype: dtype)
|
156
|
+
b = Daru::Vector.new([2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9], dtype: dtype)
|
157
|
+
a = a - a.mean
|
158
|
+
b = b - b.mean
|
159
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b })
|
150
160
|
|
151
161
|
cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
|
152
162
|
if Statsample.has_gsl?
|
@@ -160,8 +170,6 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
160
170
|
end
|
161
171
|
|
162
172
|
def pca_set(pca, _type)
|
163
|
-
|
164
|
-
|
165
173
|
expected_eigenvalues = [1.284, 0.0490]
|
166
174
|
expected_eigenvalues.each_with_index{|ev, i|
|
167
175
|
assert_in_delta(ev, pca.eigenvalues[i], 0.001)
|
data/test/test_factor_pa.rb
CHANGED
@@ -7,6 +7,11 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
7
7
|
# Based on Hardle and Simar
|
8
8
|
def setup
|
9
9
|
@fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
|
10
|
+
Daru.lazy_update = true
|
11
|
+
end
|
12
|
+
|
13
|
+
def teardown
|
14
|
+
Daru.lazy_update = false
|
10
15
|
end
|
11
16
|
|
12
17
|
def test_parallelanalysis_with_data
|
@@ -15,26 +20,30 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
15
20
|
variables = 10
|
16
21
|
iterations = 50
|
17
22
|
rng = Distribution::Normal.rng
|
18
|
-
f1 = samples.times.collect { rng.call }
|
19
|
-
f2 = samples.times.collect { rng.call }
|
23
|
+
f1 = Daru::Vector.new(samples.times.collect { rng.call })
|
24
|
+
f2 = Daru::Vector.new(samples.times.collect { rng.call })
|
20
25
|
vectors = {}
|
21
26
|
variables.times do |i|
|
22
27
|
if i < 5
|
23
|
-
vectors["v#{i}"] =
|
24
|
-
|
25
|
-
|
28
|
+
vectors["v#{i}".to_sym] = Daru::Vector.new(
|
29
|
+
samples.times.collect { |nv|
|
30
|
+
f1[nv] * 5 + f2[nv] * 2 + rng.call
|
31
|
+
}
|
32
|
+
)
|
26
33
|
else
|
27
|
-
vectors["v#{i}"] =
|
28
|
-
|
29
|
-
|
34
|
+
vectors["v#{i}".to_sym] = Daru::Vector.new(
|
35
|
+
samples.times.collect { |nv|
|
36
|
+
f2[nv] * 5 + f1[nv] * 2 + rng.call
|
37
|
+
}
|
38
|
+
)
|
30
39
|
end
|
31
40
|
end
|
32
|
-
ds = vectors
|
41
|
+
ds = Daru::DataFrame.new(vectors)
|
33
42
|
|
34
43
|
pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
|
35
44
|
pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
|
36
45
|
3.times do |n|
|
37
|
-
var = "ev_0000#{n + 1}"
|
46
|
+
var = "ev_0000#{n + 1}".to_sym
|
38
47
|
assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.05)
|
39
48
|
end
|
40
49
|
else
|
@@ -44,9 +53,9 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
44
53
|
|
45
54
|
def test_parallelanalysis
|
46
55
|
pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
|
47
|
-
assert_in_delta(1.2454, pa.ds_eigenvalues[
|
48
|
-
assert_in_delta(1.1542, pa.ds_eigenvalues[
|
49
|
-
assert_in_delta(1.0836, pa.ds_eigenvalues[
|
56
|
+
assert_in_delta(1.2454, pa.ds_eigenvalues[:ev_00001].mean, 0.01)
|
57
|
+
assert_in_delta(1.1542, pa.ds_eigenvalues[:ev_00002].mean, 0.01)
|
58
|
+
assert_in_delta(1.0836, pa.ds_eigenvalues[:ev_00003].mean, 0.01)
|
50
59
|
assert(pa.summary.size > 0)
|
51
60
|
end
|
52
61
|
end
|
data/test/test_ggobi.rb
CHANGED
@@ -2,11 +2,11 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
require 'ostruct'
|
3
3
|
class StatsampleGGobiTestCase < Minitest::Test
|
4
4
|
def setup
|
5
|
-
v1
|
6
|
-
@v2 = (%w(a b c a a a b b c d) * 10)
|
5
|
+
v1 = Daru::Vector.new([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10)
|
6
|
+
@v2 = Daru::Vector.new(%w(a b c a a a b b c d) * 10)
|
7
7
|
@v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
|
8
|
-
v3
|
9
|
-
@ds = {
|
8
|
+
v3 = Daru::Vector.new([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10)
|
9
|
+
@ds = Daru::DataFrame.new({ :v1 => v1, :v2 => @v2, :v3 => v3 })
|
10
10
|
end
|
11
11
|
|
12
12
|
def test_values_definition
|
data/test/test_gsl.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleGSLTestCase < Minitest::Test
|
3
3
|
should_with_gsl 'matrix with gsl' do
|
4
|
-
a = [1, 2, 3, 4, 20]
|
5
|
-
b = [3, 2, 3, 4, 50]
|
6
|
-
c = [6, 2, 3, 4, 3]
|
7
|
-
ds = {
|
4
|
+
a = Daru::Vector.new([1, 2, 3, 4, 20])
|
5
|
+
b = Daru::Vector.new([3, 2, 3, 4, 50])
|
6
|
+
c = Daru::Vector.new([6, 2, 3, 4, 3])
|
7
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
|
8
8
|
gsl = ds.to_matrix.to_gsl
|
9
9
|
assert_equal(5, gsl.size1)
|
10
10
|
assert_equal(3, gsl.size2)
|
data/test/test_histogram.rb
CHANGED
@@ -75,13 +75,13 @@ class StatsampleHistogramTestCase < Minitest::Test
|
|
75
75
|
assert_equal(min, h.min_val)
|
76
76
|
end
|
77
77
|
should 'return correct estimated mean' do
|
78
|
-
a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5]
|
78
|
+
a = Daru::Vector.new([1.5, 1.5, 1.5, 3.5, 3.5, 3.5])
|
79
79
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
80
80
|
h.increment(a)
|
81
81
|
assert_equal(2.5, h.estimated_mean)
|
82
82
|
end
|
83
83
|
should 'return correct estimated standard deviation' do
|
84
|
-
a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5]
|
84
|
+
a = Daru::Vector.new([0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5])
|
85
85
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
86
86
|
h.increment(a)
|
87
87
|
assert_equal(a.sd, h.estimated_standard_deviation)
|
@@ -100,7 +100,7 @@ class StatsampleHistogramTestCase < Minitest::Test
|
|
100
100
|
end
|
101
101
|
should 'not raise exception when all values equal' do
|
102
102
|
assert_nothing_raised do
|
103
|
-
a = [5, 5, 5, 5, 5, 5]
|
103
|
+
a = Daru::Vector.new([5, 5, 5, 5, 5, 5])
|
104
104
|
h = Statsample::Graph::Histogram.new(a)
|
105
105
|
h.to_svg
|
106
106
|
end
|
data/test/test_matrix.rb
CHANGED
@@ -4,17 +4,17 @@ class StatsampleMatrixTestCase < Minitest::Test
|
|
4
4
|
def test_to_dataset
|
5
5
|
m = Matrix[[1, 4], [2, 5], [3, 6]]
|
6
6
|
m.extend Statsample::NamedMatrix
|
7
|
-
m.fields_y =
|
7
|
+
m.fields_y = [:x1, :x2]
|
8
8
|
m.name = 'test'
|
9
9
|
samples = 100
|
10
|
-
x1 =
|
11
|
-
x2 =
|
12
|
-
ds = {
|
13
|
-
ds.
|
14
|
-
obs = m.
|
15
|
-
assert_equal(ds[
|
16
|
-
assert_equal(ds[
|
17
|
-
assert_equal(ds[
|
10
|
+
x1 =Daru::Vector.new([1, 2, 3])
|
11
|
+
x2 =Daru::Vector.new([4, 5, 6])
|
12
|
+
ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
|
13
|
+
ds.rename 'test'
|
14
|
+
obs = m.to_dataframe
|
15
|
+
assert_equal(ds[:x1], obs[:x1])
|
16
|
+
assert_equal(ds[:x2], obs[:x2])
|
17
|
+
assert_equal(ds[:x1].mean, obs[:x1].mean)
|
18
18
|
end
|
19
19
|
|
20
20
|
def test_covariate
|
@@ -33,10 +33,10 @@ class StatsampleMatrixTestCase < Minitest::Test
|
|
33
33
|
|
34
34
|
assert_equal(:covariance, a._type)
|
35
35
|
|
36
|
-
a = 50.times.collect { rand }
|
37
|
-
b = 50.times.collect { rand }
|
38
|
-
c = 50.times.collect { rand }
|
39
|
-
ds = {
|
36
|
+
a = Daru::Vector.new(50.times.collect { rand })
|
37
|
+
b = Daru::Vector.new(50.times.collect { rand })
|
38
|
+
c = Daru::Vector.new(50.times.collect { rand })
|
39
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
|
40
40
|
corr = Statsample::Bivariate.correlation_matrix(ds)
|
41
41
|
real = Statsample::Bivariate.covariance_matrix(ds).correlation
|
42
42
|
corr.row_size.times do |i|
|
data/test/test_multiset.rb
CHANGED
@@ -2,122 +2,134 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
|
3
3
|
class StatsampleMultisetTestCase < Minitest::Test
|
4
4
|
def setup
|
5
|
-
@x = %w(a a a a b b b b)
|
6
|
-
@y = [1, 2, 3, 4, 5, 6, 7, 8]
|
7
|
-
@z = [10, 11, 12, 13, 14, 15, 16, 17]
|
8
|
-
@ds = {
|
9
|
-
@ms = @ds.to_multiset_by_split(
|
5
|
+
@x = Daru::Vector.new(%w(a a a a b b b b))
|
6
|
+
@y = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8])
|
7
|
+
@z = Daru::Vector.new([10, 11, 12, 13, 14, 15, 16, 17])
|
8
|
+
@ds = Daru::DataFrame.new({ :x => @x, :y => @y, :z => @z })
|
9
|
+
@ms = @ds.to_multiset_by_split(:x)
|
10
10
|
end
|
11
11
|
|
12
12
|
def test_creation
|
13
|
-
v1a = [1, 2, 3, 4, 5]
|
14
|
-
v2b = [11, 21, 31, 41, 51]
|
15
|
-
v3c = [21, 23, 34, 45, 56]
|
16
|
-
ds1 = {
|
17
|
-
v1b = [15, 25, 35, 45, 55]
|
18
|
-
v2b = [11, 21, 31, 41, 51]
|
19
|
-
v3b = [21, 23, 34, 45, 56]
|
20
|
-
ds2 = {
|
21
|
-
ms = Statsample::Multiset.new(
|
22
|
-
ms.add_dataset(
|
23
|
-
ms.add_dataset(
|
24
|
-
assert_equal(ds1, ms[
|
25
|
-
assert_equal(ds2, ms[
|
26
|
-
assert_equal(v1a, ms[
|
27
|
-
assert_not_equal(v1b, ms[
|
28
|
-
ds3 = {
|
13
|
+
v1a = Daru::Vector.new([1, 2, 3, 4, 5])
|
14
|
+
v2b = Daru::Vector.new([11, 21, 31, 41, 51])
|
15
|
+
v3c = Daru::Vector.new([21, 23, 34, 45, 56])
|
16
|
+
ds1 = Daru::DataFrame.new({ :v1 => v1a, :v2 => v2b, :v3 => v3c })
|
17
|
+
v1b = Daru::Vector.new([15, 25, 35, 45, 55])
|
18
|
+
v2b = Daru::Vector.new([11, 21, 31, 41, 51])
|
19
|
+
v3b = Daru::Vector.new([21, 23, 34, 45, 56])
|
20
|
+
ds2 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b, :v3 => v3b })
|
21
|
+
ms = Statsample::Multiset.new([:v1, :v2, :v3])
|
22
|
+
ms.add_dataset(:ds1, ds1)
|
23
|
+
ms.add_dataset(:ds2, ds2)
|
24
|
+
assert_equal(ds1, ms[:ds1])
|
25
|
+
assert_equal(ds2, ms[:ds2])
|
26
|
+
assert_equal(v1a, ms[:ds1][:v1])
|
27
|
+
assert_not_equal(v1b, ms[:ds1][:v1])
|
28
|
+
ds3 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b })
|
29
29
|
assert_raise ArgumentError do
|
30
30
|
ms.add_dataset(ds3)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_creation_empty
|
35
|
-
ms = Statsample::Multiset.new_empty_vectors(
|
36
|
-
ds_male
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
35
|
+
ms = Statsample::Multiset.new_empty_vectors([:id, :age, :name], [:male, :female])
|
36
|
+
ds_male = Daru::DataFrame.new({
|
37
|
+
:id => Daru::Vector.new([]),
|
38
|
+
:age => Daru::Vector.new([]),
|
39
|
+
:name => Daru::Vector.new([])
|
40
|
+
}, order: [:id, :age, :name])
|
41
|
+
|
42
|
+
ds_female = Daru::DataFrame.new({
|
43
|
+
:id => Daru::Vector.new([]),
|
44
|
+
:age => Daru::Vector.new([]),
|
45
|
+
:name => Daru::Vector.new([])
|
46
|
+
}, order: [:id, :age, :name])
|
47
|
+
|
48
|
+
ms2 = Statsample::Multiset.new([:id, :age, :name])
|
49
|
+
ms2.add_dataset(:male, ds_male)
|
50
|
+
ms2.add_dataset(:female, ds_female)
|
41
51
|
assert_equal(ms2.fields, ms.fields)
|
42
|
-
assert_equal(ms2[
|
43
|
-
assert_equal(ms2[
|
52
|
+
assert_equal(ms2[:male], ms[:male])
|
53
|
+
assert_equal(ms2[:female], ms[:female])
|
44
54
|
end
|
45
55
|
|
46
56
|
def test_to_multiset_by_split_one
|
47
|
-
sex
|
48
|
-
city = %w(London Paris NY London Paris NY London Paris NY Tome)
|
49
|
-
age
|
50
|
-
ds
|
51
|
-
ms = ds.to_multiset_by_split(
|
57
|
+
sex = Daru::Vector.new(%w(m m m m m f f f f m))
|
58
|
+
city = Daru::Vector.new(%w(London Paris NY London Paris NY London Paris NY Tome))
|
59
|
+
age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
|
60
|
+
ds = Daru::DataFrame.new({ :sex => sex, :city => city, :age => age })
|
61
|
+
ms = ds.to_multiset_by_split(:sex)
|
52
62
|
assert_equal(2, ms.n_datasets)
|
53
63
|
assert_equal(%w(f m), ms.datasets.keys.sort)
|
54
|
-
assert_equal(6, ms['m'].
|
55
|
-
assert_equal(4, ms['f'].
|
56
|
-
assert_equal(%w(London Paris NY London Paris Tome), ms['m'][
|
57
|
-
assert_equal([34, 33, 35, 36], ms['f'][
|
64
|
+
assert_equal(6, ms['m'].nrows)
|
65
|
+
assert_equal(4, ms['f'].nrows)
|
66
|
+
assert_equal(%w(London Paris NY London Paris Tome), ms['m'][:city].to_a)
|
67
|
+
assert_equal([34, 33, 35, 36], ms['f'][:age].to_a)
|
58
68
|
end
|
59
69
|
|
60
70
|
def test_to_multiset_by_split_multiple
|
61
|
-
sex = %w(m m m m m m m m m m f f f f f f f f f f)
|
62
|
-
city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris)
|
63
|
-
hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black)
|
64
|
-
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40]
|
65
|
-
ds =
|
66
|
-
|
71
|
+
sex = Daru::Vector.new(%w(m m m m m m m m m m f f f f f f f f f f))
|
72
|
+
city = Daru::Vector.new(%w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris))
|
73
|
+
hair = Daru::Vector.new(%w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black))
|
74
|
+
age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
|
75
|
+
ds = Daru::DataFrame.new({
|
76
|
+
:sex => sex, :city => city, :hair => hair, :age => age
|
77
|
+
}, order: [:sex, :city, :hair, :age])
|
78
|
+
ms = ds.to_multiset_by_split(:sex, :city, :hair)
|
67
79
|
assert_equal(8, ms.n_datasets)
|
68
|
-
assert_equal(3, ms[%w(m London blonde)].
|
69
|
-
assert_equal(3, ms[%w(m London blonde)].
|
70
|
-
assert_equal(1, ms[%w(m Paris black)].
|
80
|
+
assert_equal(3, ms[%w(m London blonde)].nrows)
|
81
|
+
assert_equal(3, ms[%w(m London blonde)].nrows)
|
82
|
+
assert_equal(1, ms[%w(m Paris black)].nrows)
|
71
83
|
end
|
72
84
|
|
73
85
|
def test_stratum_proportion
|
74
|
-
ds1 = {
|
75
|
-
ds2 = {
|
76
|
-
assert_equal(5.0 / 12, ds1[
|
77
|
-
assert_equal(7.0 / 9, ds2[
|
78
|
-
ms = Statsample::Multiset.new([
|
79
|
-
ms.add_dataset(
|
80
|
-
ms.add_dataset(
|
81
|
-
ss = Statsample::StratifiedSample.new(ms,
|
82
|
-
assert_in_delta(0.655, ss.proportion(
|
83
|
-
assert_in_delta(0.345, ss.proportion(
|
86
|
+
ds1 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) })
|
87
|
+
ds2 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 1, 1, 0, 0]) })
|
88
|
+
assert_equal(5.0 / 12, ds1[:q1].proportion)
|
89
|
+
assert_equal(7.0 / 9, ds2[:q1].proportion)
|
90
|
+
ms = Statsample::Multiset.new([:q1])
|
91
|
+
ms.add_dataset(:d1, ds1)
|
92
|
+
ms.add_dataset(:d2, ds2)
|
93
|
+
ss = Statsample::StratifiedSample.new(ms, :d1 => 50, :d2 => 100)
|
94
|
+
assert_in_delta(0.655, ss.proportion(:q1), 0.01)
|
95
|
+
assert_in_delta(0.345, ss.proportion(:q1, 0), 0.01)
|
84
96
|
end
|
85
97
|
|
86
98
|
def test_stratum_scale
|
87
|
-
boys = {
|
88
|
-
girls =
|
89
|
-
ms = Statsample::Multiset.new([
|
90
|
-
ms.add_dataset(
|
91
|
-
ms.add_dataset(
|
92
|
-
ss = Statsample::StratifiedSample.new(ms,
|
99
|
+
boys = Daru::DataFrame.new({ :test => Daru::Vector.new([50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90]) })
|
100
|
+
girls =Daru::DataFrame.new({ :test => Daru::Vector.new( [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90]) })
|
101
|
+
ms = Statsample::Multiset.new([:test])
|
102
|
+
ms.add_dataset(:boys, boys)
|
103
|
+
ms.add_dataset(:girls, girls)
|
104
|
+
ss = Statsample::StratifiedSample.new(ms, :boys => 10_000, :girls => 10_000)
|
93
105
|
assert_equal(2, ss.strata_number)
|
94
106
|
assert_equal(20_000, ss.population_size)
|
95
|
-
assert_equal(10_000, ss.stratum_size(
|
96
|
-
assert_equal(10_000, ss.stratum_size(
|
107
|
+
assert_equal(10_000, ss.stratum_size(:boys))
|
108
|
+
assert_equal(10_000, ss.stratum_size(:girls))
|
97
109
|
assert_equal(36, ss.sample_size)
|
98
|
-
assert_equal(75, ss.mean(
|
99
|
-
assert_in_delta(1.45, ss.standard_error_wor(
|
100
|
-
assert_in_delta(ss.standard_error_wor(
|
110
|
+
assert_equal(75, ss.mean(:test))
|
111
|
+
assert_in_delta(1.45, ss.standard_error_wor(:test), 0.01)
|
112
|
+
assert_in_delta(ss.standard_error_wor(:test), ss.standard_error_wor_2(:test), 0.00001)
|
101
113
|
end
|
102
114
|
|
103
115
|
def test_each
|
104
116
|
xpe = {
|
105
|
-
'a' => %w(a a a a)
|
106
|
-
'b' => %w(b b b b)
|
117
|
+
'a' => Daru::Vector.new(%w(a a a a)),
|
118
|
+
'b' => Daru::Vector.new(%w(b b b b))
|
107
119
|
}
|
108
120
|
ype = {
|
109
|
-
'a' => [1, 2, 3, 4]
|
110
|
-
'b' => [5, 6, 7, 8]
|
121
|
+
'a' => Daru::Vector.new([1, 2, 3, 4]),
|
122
|
+
'b' => Daru::Vector.new([5, 6, 7, 8])
|
111
123
|
}
|
112
124
|
zpe = {
|
113
|
-
'a' => [10, 11, 12, 13]
|
114
|
-
'b' => [14, 15, 16, 17]
|
125
|
+
'a' => Daru::Vector.new([10, 11, 12, 13]),
|
126
|
+
'b' => Daru::Vector.new([14, 15, 16, 17])
|
115
127
|
}
|
116
128
|
xp, yp, zp = {}, {}, {}
|
117
129
|
@ms.each {|k, ds|
|
118
|
-
xp[k] = ds[
|
119
|
-
yp[k] = ds[
|
120
|
-
zp[k] = ds[
|
130
|
+
xp[k] = ds[:x]
|
131
|
+
yp[k] = ds[:y]
|
132
|
+
zp[k] = ds[:z]
|
121
133
|
}
|
122
134
|
assert_equal(xpe, xp)
|
123
135
|
assert_equal(ype, yp)
|
@@ -127,38 +139,38 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
127
139
|
def test_multiset_union_with_block
|
128
140
|
r1 = rand
|
129
141
|
r2 = rand
|
130
|
-
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2]
|
142
|
+
ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
|
131
143
|
|
132
|
-
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2]
|
144
|
+
ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
|
133
145
|
|
134
146
|
ds2 = @ms.union {|k, ds|
|
135
|
-
ds[
|
147
|
+
ds[:y].recode!{|v|
|
136
148
|
k == 'a' ? v * r1 : v * r2
|
137
149
|
}
|
138
|
-
ds[
|
150
|
+
ds[:z].recode!{|v|
|
139
151
|
k == 'a' ? v * r1 : v * r2
|
140
152
|
}
|
141
153
|
}
|
142
|
-
assert_equal(ye, ds2[
|
143
|
-
assert_equal(ze, ds2[
|
154
|
+
assert_equal(ye, ds2[:y])
|
155
|
+
assert_equal(ze, ds2[:z])
|
144
156
|
end
|
145
157
|
|
146
158
|
def test_multiset_union
|
147
159
|
r1 = rand
|
148
160
|
r2 = rand
|
149
|
-
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2]
|
161
|
+
ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
|
162
|
+
ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
|
150
163
|
|
151
|
-
|
152
|
-
|
153
|
-
ds['y'].recode!{|v|
|
164
|
+
@ms.each do |k, ds|
|
165
|
+
ds[:y].recode! { |v|
|
154
166
|
k == 'a' ? v * r1 : v * r2
|
155
167
|
}
|
156
|
-
ds[
|
168
|
+
ds[:z].recode! {|v|
|
157
169
|
k == 'a' ? v * r1 : v * r2
|
158
170
|
}
|
159
|
-
|
171
|
+
end
|
160
172
|
ds2 = @ms.union
|
161
|
-
assert_equal(ye, ds2[
|
162
|
-
assert_equal(ze, ds2[
|
173
|
+
assert_equal(ye, ds2[:y])
|
174
|
+
assert_equal(ze, ds2[:z])
|
163
175
|
end
|
164
176
|
end
|