statsample 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
data/test/test_factor.rb
CHANGED
@@ -7,26 +7,32 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
7
7
|
# Based on Hardle and Simar
|
8
8
|
def setup
|
9
9
|
@fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
|
10
|
+
Daru.lazy_update = true
|
11
|
+
end
|
12
|
+
|
13
|
+
def teardown
|
14
|
+
Daru.lazy_update = false
|
10
15
|
end
|
11
16
|
# Based on Hurdle example
|
12
17
|
def test_covariance_matrix
|
13
|
-
ds =
|
14
|
-
ds.
|
15
|
-
ds[f] = ds[f].
|
18
|
+
ds = Daru::DataFrame.from_plaintext(@fixtures_dir + '/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6])
|
19
|
+
ds.vectors.each {|f|
|
20
|
+
ds[f] = ds[f].center
|
16
21
|
}
|
17
|
-
|
22
|
+
ds.update
|
23
|
+
cm = Statsample::Bivariate.covariance_matrix ds
|
18
24
|
pca = Statsample::Factor::PCA.new(cm, m: 6)
|
19
25
|
# puts pca.summary
|
20
26
|
# puts pca.feature_matrix
|
21
|
-
exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035]
|
22
|
-
assert_similar_vector(exp_eig, pca.eigenvalues
|
27
|
+
exp_eig = Daru::Vector.new([2.985, 0.931, 0.242, 0.194, 0.085, 0.035])
|
28
|
+
assert_similar_vector(exp_eig, Daru::Vector.new(pca.eigenvalues), 0.1)
|
23
29
|
pcs = pca.principal_components(ds)
|
24
30
|
k = 6
|
25
31
|
comp_matrix = pca.component_matrix
|
26
32
|
k.times {|i|
|
27
|
-
pc_id = "PC_#{i + 1}"
|
33
|
+
pc_id = "PC_#{i + 1}".to_sym
|
28
34
|
k.times {|j| # variable
|
29
|
-
ds_id = "v#{j + 1}"
|
35
|
+
ds_id = "v#{j + 1}".to_sym
|
30
36
|
r = Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
|
31
37
|
assert_in_delta(r, comp_matrix[j, i])
|
32
38
|
}
|
@@ -42,13 +48,13 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
42
48
|
samples = 20
|
43
49
|
[3, 5, 7].each {|k|
|
44
50
|
v = {}
|
45
|
-
v[
|
46
|
-
(1...k).each {|i|
|
47
|
-
v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.
|
51
|
+
v[:x0] = Daru::Vector.new(samples.times.map { ran.call }).center
|
52
|
+
(1...k).each { |i|
|
53
|
+
v["x#{i}".to_sym] = Daru::Vector.new(samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}".to_sym][ii] * 0.5 }).center
|
48
54
|
}
|
49
55
|
|
50
|
-
ds = v
|
51
|
-
cm =
|
56
|
+
ds = Daru::DataFrame.new(v)
|
57
|
+
cm = Statsample::Bivariate.covariance_matrix ds
|
52
58
|
# @r.assign('ds',ds)
|
53
59
|
# @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
|
54
60
|
# puts "eigenvalues"
|
@@ -61,14 +67,14 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
61
67
|
cm_ruby = pca_ruby.component_matrix
|
62
68
|
# puts cm_ruby.summary
|
63
69
|
k.times {|i|
|
64
|
-
pc_id = "PC_#{i + 1}"
|
70
|
+
pc_id = "PC_#{i + 1}".to_sym
|
65
71
|
assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
|
66
72
|
# Revert gsl component values
|
67
73
|
pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
|
68
74
|
assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
|
69
75
|
if false
|
70
76
|
k.times {|j| # variable
|
71
|
-
ds_id = "x#{j}"
|
77
|
+
ds_id = "x#{j}".to_sym
|
72
78
|
r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
|
73
79
|
puts "#{pc_id}-#{ds_id}:#{r}"
|
74
80
|
}
|
@@ -80,18 +86,22 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
80
86
|
end
|
81
87
|
|
82
88
|
def test_principalcomponents
|
83
|
-
|
89
|
+
if Statsample.has_gsl?
|
90
|
+
principalcomponents(true)
|
91
|
+
else
|
92
|
+
skip "Require GSL"
|
93
|
+
end
|
84
94
|
principalcomponents(false)
|
85
95
|
end
|
86
96
|
|
87
97
|
def principalcomponents(gsl)
|
88
98
|
ran = Distribution::Normal.rng
|
89
99
|
samples = 50
|
90
|
-
x1 = samples.times.map { ran.call }
|
91
|
-
x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }
|
92
|
-
ds = {
|
100
|
+
x1 = Daru::Vector.new(samples.times.map { ran.call })
|
101
|
+
x2 = Daru::Vector.new(samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 })
|
102
|
+
ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
|
93
103
|
|
94
|
-
cm =
|
104
|
+
cm = Statsample::Bivariate.correlation_matrix ds
|
95
105
|
r = cm[0, 1]
|
96
106
|
pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl)
|
97
107
|
assert_in_delta(1 + r, pca.eigenvalues[0], 1e-10)
|
@@ -103,14 +113,14 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
103
113
|
assert_equal_vector(hs * m_1, pca.eigenvectors[1])
|
104
114
|
|
105
115
|
pcs = pca.principal_components(ds)
|
106
|
-
exp_pc_1 = ds.
|
107
|
-
hs * (row[
|
116
|
+
exp_pc_1 = ds.collect_row_with_index {|row, _i|
|
117
|
+
hs * (row[:x1] + row[:x2])
|
108
118
|
}
|
109
|
-
exp_pc_2 = ds.
|
110
|
-
gsl ? hs * (row[
|
119
|
+
exp_pc_2 = ds.collect_row_with_index {|row, _i|
|
120
|
+
gsl ? hs * (row[:x2] - row[:x1]) : hs * (row[:x1] - row[:x2])
|
111
121
|
}
|
112
|
-
assert_similar_vector(exp_pc_1, pcs[
|
113
|
-
assert_similar_vector(exp_pc_2, pcs[
|
122
|
+
assert_similar_vector(exp_pc_1, pcs[:PC_1])
|
123
|
+
assert_similar_vector(exp_pc_2, pcs[:PC_2])
|
114
124
|
end
|
115
125
|
|
116
126
|
def test_antiimage
|
@@ -121,11 +131,11 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
121
131
|
end
|
122
132
|
|
123
133
|
def test_kmo
|
124
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70]
|
125
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0]
|
126
|
-
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4]
|
134
|
+
@v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
|
135
|
+
@v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
|
136
|
+
@v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
|
127
137
|
# KMO: 0.490
|
128
|
-
ds = {
|
138
|
+
ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
|
129
139
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
130
140
|
kmo = Statsample::Factor.kmo(cor)
|
131
141
|
assert_in_delta(0.667, kmo, 0.001)
|
@@ -141,12 +151,12 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
141
151
|
end
|
142
152
|
# Tested with SPSS and R
|
143
153
|
def test_pca
|
144
|
-
|
145
|
-
a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1]
|
146
|
-
b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9]
|
147
|
-
a
|
148
|
-
b
|
149
|
-
ds = {
|
154
|
+
dtype = Statsample.has_gsl? ? :gsl : :array
|
155
|
+
a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1], dtype: dtype)
|
156
|
+
b = Daru::Vector.new([2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9], dtype: dtype)
|
157
|
+
a = a - a.mean
|
158
|
+
b = b - b.mean
|
159
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b })
|
150
160
|
|
151
161
|
cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
|
152
162
|
if Statsample.has_gsl?
|
@@ -160,8 +170,6 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
160
170
|
end
|
161
171
|
|
162
172
|
def pca_set(pca, _type)
|
163
|
-
|
164
|
-
|
165
173
|
expected_eigenvalues = [1.284, 0.0490]
|
166
174
|
expected_eigenvalues.each_with_index{|ev, i|
|
167
175
|
assert_in_delta(ev, pca.eigenvalues[i], 0.001)
|
data/test/test_factor_pa.rb
CHANGED
@@ -7,6 +7,11 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
7
7
|
# Based on Hardle and Simar
|
8
8
|
def setup
|
9
9
|
@fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
|
10
|
+
Daru.lazy_update = true
|
11
|
+
end
|
12
|
+
|
13
|
+
def teardown
|
14
|
+
Daru.lazy_update = false
|
10
15
|
end
|
11
16
|
|
12
17
|
def test_parallelanalysis_with_data
|
@@ -15,26 +20,30 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
15
20
|
variables = 10
|
16
21
|
iterations = 50
|
17
22
|
rng = Distribution::Normal.rng
|
18
|
-
f1 = samples.times.collect { rng.call }
|
19
|
-
f2 = samples.times.collect { rng.call }
|
23
|
+
f1 = Daru::Vector.new(samples.times.collect { rng.call })
|
24
|
+
f2 = Daru::Vector.new(samples.times.collect { rng.call })
|
20
25
|
vectors = {}
|
21
26
|
variables.times do |i|
|
22
27
|
if i < 5
|
23
|
-
vectors["v#{i}"] =
|
24
|
-
|
25
|
-
|
28
|
+
vectors["v#{i}".to_sym] = Daru::Vector.new(
|
29
|
+
samples.times.collect { |nv|
|
30
|
+
f1[nv] * 5 + f2[nv] * 2 + rng.call
|
31
|
+
}
|
32
|
+
)
|
26
33
|
else
|
27
|
-
vectors["v#{i}"] =
|
28
|
-
|
29
|
-
|
34
|
+
vectors["v#{i}".to_sym] = Daru::Vector.new(
|
35
|
+
samples.times.collect { |nv|
|
36
|
+
f2[nv] * 5 + f1[nv] * 2 + rng.call
|
37
|
+
}
|
38
|
+
)
|
30
39
|
end
|
31
40
|
end
|
32
|
-
ds = vectors
|
41
|
+
ds = Daru::DataFrame.new(vectors)
|
33
42
|
|
34
43
|
pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
|
35
44
|
pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
|
36
45
|
3.times do |n|
|
37
|
-
var = "ev_0000#{n + 1}"
|
46
|
+
var = "ev_0000#{n + 1}".to_sym
|
38
47
|
assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.05)
|
39
48
|
end
|
40
49
|
else
|
@@ -44,9 +53,9 @@ class StatsampleFactorTestCase < Minitest::Test
|
|
44
53
|
|
45
54
|
def test_parallelanalysis
|
46
55
|
pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
|
47
|
-
assert_in_delta(1.2454, pa.ds_eigenvalues[
|
48
|
-
assert_in_delta(1.1542, pa.ds_eigenvalues[
|
49
|
-
assert_in_delta(1.0836, pa.ds_eigenvalues[
|
56
|
+
assert_in_delta(1.2454, pa.ds_eigenvalues[:ev_00001].mean, 0.01)
|
57
|
+
assert_in_delta(1.1542, pa.ds_eigenvalues[:ev_00002].mean, 0.01)
|
58
|
+
assert_in_delta(1.0836, pa.ds_eigenvalues[:ev_00003].mean, 0.01)
|
50
59
|
assert(pa.summary.size > 0)
|
51
60
|
end
|
52
61
|
end
|
data/test/test_ggobi.rb
CHANGED
@@ -2,11 +2,11 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
require 'ostruct'
|
3
3
|
class StatsampleGGobiTestCase < Minitest::Test
|
4
4
|
def setup
|
5
|
-
v1
|
6
|
-
@v2 = (%w(a b c a a a b b c d) * 10)
|
5
|
+
v1 = Daru::Vector.new([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10)
|
6
|
+
@v2 = Daru::Vector.new(%w(a b c a a a b b c d) * 10)
|
7
7
|
@v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
|
8
|
-
v3
|
9
|
-
@ds = {
|
8
|
+
v3 = Daru::Vector.new([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10)
|
9
|
+
@ds = Daru::DataFrame.new({ :v1 => v1, :v2 => @v2, :v3 => v3 })
|
10
10
|
end
|
11
11
|
|
12
12
|
def test_values_definition
|
data/test/test_gsl.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleGSLTestCase < Minitest::Test
|
3
3
|
should_with_gsl 'matrix with gsl' do
|
4
|
-
a = [1, 2, 3, 4, 20]
|
5
|
-
b = [3, 2, 3, 4, 50]
|
6
|
-
c = [6, 2, 3, 4, 3]
|
7
|
-
ds = {
|
4
|
+
a = Daru::Vector.new([1, 2, 3, 4, 20])
|
5
|
+
b = Daru::Vector.new([3, 2, 3, 4, 50])
|
6
|
+
c = Daru::Vector.new([6, 2, 3, 4, 3])
|
7
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
|
8
8
|
gsl = ds.to_matrix.to_gsl
|
9
9
|
assert_equal(5, gsl.size1)
|
10
10
|
assert_equal(3, gsl.size2)
|
data/test/test_histogram.rb
CHANGED
@@ -75,13 +75,13 @@ class StatsampleHistogramTestCase < Minitest::Test
|
|
75
75
|
assert_equal(min, h.min_val)
|
76
76
|
end
|
77
77
|
should 'return correct estimated mean' do
|
78
|
-
a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5]
|
78
|
+
a = Daru::Vector.new([1.5, 1.5, 1.5, 3.5, 3.5, 3.5])
|
79
79
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
80
80
|
h.increment(a)
|
81
81
|
assert_equal(2.5, h.estimated_mean)
|
82
82
|
end
|
83
83
|
should 'return correct estimated standard deviation' do
|
84
|
-
a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5]
|
84
|
+
a = Daru::Vector.new([0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5])
|
85
85
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
86
86
|
h.increment(a)
|
87
87
|
assert_equal(a.sd, h.estimated_standard_deviation)
|
@@ -100,7 +100,7 @@ class StatsampleHistogramTestCase < Minitest::Test
|
|
100
100
|
end
|
101
101
|
should 'not raise exception when all values equal' do
|
102
102
|
assert_nothing_raised do
|
103
|
-
a = [5, 5, 5, 5, 5, 5]
|
103
|
+
a = Daru::Vector.new([5, 5, 5, 5, 5, 5])
|
104
104
|
h = Statsample::Graph::Histogram.new(a)
|
105
105
|
h.to_svg
|
106
106
|
end
|
data/test/test_matrix.rb
CHANGED
@@ -4,17 +4,17 @@ class StatsampleMatrixTestCase < Minitest::Test
|
|
4
4
|
def test_to_dataset
|
5
5
|
m = Matrix[[1, 4], [2, 5], [3, 6]]
|
6
6
|
m.extend Statsample::NamedMatrix
|
7
|
-
m.fields_y =
|
7
|
+
m.fields_y = [:x1, :x2]
|
8
8
|
m.name = 'test'
|
9
9
|
samples = 100
|
10
|
-
x1 =
|
11
|
-
x2 =
|
12
|
-
ds = {
|
13
|
-
ds.
|
14
|
-
obs = m.
|
15
|
-
assert_equal(ds[
|
16
|
-
assert_equal(ds[
|
17
|
-
assert_equal(ds[
|
10
|
+
x1 =Daru::Vector.new([1, 2, 3])
|
11
|
+
x2 =Daru::Vector.new([4, 5, 6])
|
12
|
+
ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
|
13
|
+
ds.rename 'test'
|
14
|
+
obs = m.to_dataframe
|
15
|
+
assert_equal(ds[:x1], obs[:x1])
|
16
|
+
assert_equal(ds[:x2], obs[:x2])
|
17
|
+
assert_equal(ds[:x1].mean, obs[:x1].mean)
|
18
18
|
end
|
19
19
|
|
20
20
|
def test_covariate
|
@@ -33,10 +33,10 @@ class StatsampleMatrixTestCase < Minitest::Test
|
|
33
33
|
|
34
34
|
assert_equal(:covariance, a._type)
|
35
35
|
|
36
|
-
a = 50.times.collect { rand }
|
37
|
-
b = 50.times.collect { rand }
|
38
|
-
c = 50.times.collect { rand }
|
39
|
-
ds = {
|
36
|
+
a = Daru::Vector.new(50.times.collect { rand })
|
37
|
+
b = Daru::Vector.new(50.times.collect { rand })
|
38
|
+
c = Daru::Vector.new(50.times.collect { rand })
|
39
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
|
40
40
|
corr = Statsample::Bivariate.correlation_matrix(ds)
|
41
41
|
real = Statsample::Bivariate.covariance_matrix(ds).correlation
|
42
42
|
corr.row_size.times do |i|
|
data/test/test_multiset.rb
CHANGED
@@ -2,122 +2,134 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
|
3
3
|
class StatsampleMultisetTestCase < Minitest::Test
|
4
4
|
def setup
|
5
|
-
@x = %w(a a a a b b b b)
|
6
|
-
@y = [1, 2, 3, 4, 5, 6, 7, 8]
|
7
|
-
@z = [10, 11, 12, 13, 14, 15, 16, 17]
|
8
|
-
@ds = {
|
9
|
-
@ms = @ds.to_multiset_by_split(
|
5
|
+
@x = Daru::Vector.new(%w(a a a a b b b b))
|
6
|
+
@y = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8])
|
7
|
+
@z = Daru::Vector.new([10, 11, 12, 13, 14, 15, 16, 17])
|
8
|
+
@ds = Daru::DataFrame.new({ :x => @x, :y => @y, :z => @z })
|
9
|
+
@ms = @ds.to_multiset_by_split(:x)
|
10
10
|
end
|
11
11
|
|
12
12
|
def test_creation
|
13
|
-
v1a = [1, 2, 3, 4, 5]
|
14
|
-
v2b = [11, 21, 31, 41, 51]
|
15
|
-
v3c = [21, 23, 34, 45, 56]
|
16
|
-
ds1 = {
|
17
|
-
v1b = [15, 25, 35, 45, 55]
|
18
|
-
v2b = [11, 21, 31, 41, 51]
|
19
|
-
v3b = [21, 23, 34, 45, 56]
|
20
|
-
ds2 = {
|
21
|
-
ms = Statsample::Multiset.new(
|
22
|
-
ms.add_dataset(
|
23
|
-
ms.add_dataset(
|
24
|
-
assert_equal(ds1, ms[
|
25
|
-
assert_equal(ds2, ms[
|
26
|
-
assert_equal(v1a, ms[
|
27
|
-
assert_not_equal(v1b, ms[
|
28
|
-
ds3 = {
|
13
|
+
v1a = Daru::Vector.new([1, 2, 3, 4, 5])
|
14
|
+
v2b = Daru::Vector.new([11, 21, 31, 41, 51])
|
15
|
+
v3c = Daru::Vector.new([21, 23, 34, 45, 56])
|
16
|
+
ds1 = Daru::DataFrame.new({ :v1 => v1a, :v2 => v2b, :v3 => v3c })
|
17
|
+
v1b = Daru::Vector.new([15, 25, 35, 45, 55])
|
18
|
+
v2b = Daru::Vector.new([11, 21, 31, 41, 51])
|
19
|
+
v3b = Daru::Vector.new([21, 23, 34, 45, 56])
|
20
|
+
ds2 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b, :v3 => v3b })
|
21
|
+
ms = Statsample::Multiset.new([:v1, :v2, :v3])
|
22
|
+
ms.add_dataset(:ds1, ds1)
|
23
|
+
ms.add_dataset(:ds2, ds2)
|
24
|
+
assert_equal(ds1, ms[:ds1])
|
25
|
+
assert_equal(ds2, ms[:ds2])
|
26
|
+
assert_equal(v1a, ms[:ds1][:v1])
|
27
|
+
assert_not_equal(v1b, ms[:ds1][:v1])
|
28
|
+
ds3 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b })
|
29
29
|
assert_raise ArgumentError do
|
30
30
|
ms.add_dataset(ds3)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_creation_empty
|
35
|
-
ms = Statsample::Multiset.new_empty_vectors(
|
36
|
-
ds_male
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
35
|
+
ms = Statsample::Multiset.new_empty_vectors([:id, :age, :name], [:male, :female])
|
36
|
+
ds_male = Daru::DataFrame.new({
|
37
|
+
:id => Daru::Vector.new([]),
|
38
|
+
:age => Daru::Vector.new([]),
|
39
|
+
:name => Daru::Vector.new([])
|
40
|
+
}, order: [:id, :age, :name])
|
41
|
+
|
42
|
+
ds_female = Daru::DataFrame.new({
|
43
|
+
:id => Daru::Vector.new([]),
|
44
|
+
:age => Daru::Vector.new([]),
|
45
|
+
:name => Daru::Vector.new([])
|
46
|
+
}, order: [:id, :age, :name])
|
47
|
+
|
48
|
+
ms2 = Statsample::Multiset.new([:id, :age, :name])
|
49
|
+
ms2.add_dataset(:male, ds_male)
|
50
|
+
ms2.add_dataset(:female, ds_female)
|
41
51
|
assert_equal(ms2.fields, ms.fields)
|
42
|
-
assert_equal(ms2[
|
43
|
-
assert_equal(ms2[
|
52
|
+
assert_equal(ms2[:male], ms[:male])
|
53
|
+
assert_equal(ms2[:female], ms[:female])
|
44
54
|
end
|
45
55
|
|
46
56
|
def test_to_multiset_by_split_one
|
47
|
-
sex
|
48
|
-
city = %w(London Paris NY London Paris NY London Paris NY Tome)
|
49
|
-
age
|
50
|
-
ds
|
51
|
-
ms = ds.to_multiset_by_split(
|
57
|
+
sex = Daru::Vector.new(%w(m m m m m f f f f m))
|
58
|
+
city = Daru::Vector.new(%w(London Paris NY London Paris NY London Paris NY Tome))
|
59
|
+
age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
|
60
|
+
ds = Daru::DataFrame.new({ :sex => sex, :city => city, :age => age })
|
61
|
+
ms = ds.to_multiset_by_split(:sex)
|
52
62
|
assert_equal(2, ms.n_datasets)
|
53
63
|
assert_equal(%w(f m), ms.datasets.keys.sort)
|
54
|
-
assert_equal(6, ms['m'].
|
55
|
-
assert_equal(4, ms['f'].
|
56
|
-
assert_equal(%w(London Paris NY London Paris Tome), ms['m'][
|
57
|
-
assert_equal([34, 33, 35, 36], ms['f'][
|
64
|
+
assert_equal(6, ms['m'].nrows)
|
65
|
+
assert_equal(4, ms['f'].nrows)
|
66
|
+
assert_equal(%w(London Paris NY London Paris Tome), ms['m'][:city].to_a)
|
67
|
+
assert_equal([34, 33, 35, 36], ms['f'][:age].to_a)
|
58
68
|
end
|
59
69
|
|
60
70
|
def test_to_multiset_by_split_multiple
|
61
|
-
sex = %w(m m m m m m m m m m f f f f f f f f f f)
|
62
|
-
city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris)
|
63
|
-
hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black)
|
64
|
-
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40]
|
65
|
-
ds =
|
66
|
-
|
71
|
+
sex = Daru::Vector.new(%w(m m m m m m m m m m f f f f f f f f f f))
|
72
|
+
city = Daru::Vector.new(%w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris))
|
73
|
+
hair = Daru::Vector.new(%w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black))
|
74
|
+
age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
|
75
|
+
ds = Daru::DataFrame.new({
|
76
|
+
:sex => sex, :city => city, :hair => hair, :age => age
|
77
|
+
}, order: [:sex, :city, :hair, :age])
|
78
|
+
ms = ds.to_multiset_by_split(:sex, :city, :hair)
|
67
79
|
assert_equal(8, ms.n_datasets)
|
68
|
-
assert_equal(3, ms[%w(m London blonde)].
|
69
|
-
assert_equal(3, ms[%w(m London blonde)].
|
70
|
-
assert_equal(1, ms[%w(m Paris black)].
|
80
|
+
assert_equal(3, ms[%w(m London blonde)].nrows)
|
81
|
+
assert_equal(3, ms[%w(m London blonde)].nrows)
|
82
|
+
assert_equal(1, ms[%w(m Paris black)].nrows)
|
71
83
|
end
|
72
84
|
|
73
85
|
def test_stratum_proportion
|
74
|
-
ds1 = {
|
75
|
-
ds2 = {
|
76
|
-
assert_equal(5.0 / 12, ds1[
|
77
|
-
assert_equal(7.0 / 9, ds2[
|
78
|
-
ms = Statsample::Multiset.new([
|
79
|
-
ms.add_dataset(
|
80
|
-
ms.add_dataset(
|
81
|
-
ss = Statsample::StratifiedSample.new(ms,
|
82
|
-
assert_in_delta(0.655, ss.proportion(
|
83
|
-
assert_in_delta(0.345, ss.proportion(
|
86
|
+
ds1 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) })
|
87
|
+
ds2 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 1, 1, 0, 0]) })
|
88
|
+
assert_equal(5.0 / 12, ds1[:q1].proportion)
|
89
|
+
assert_equal(7.0 / 9, ds2[:q1].proportion)
|
90
|
+
ms = Statsample::Multiset.new([:q1])
|
91
|
+
ms.add_dataset(:d1, ds1)
|
92
|
+
ms.add_dataset(:d2, ds2)
|
93
|
+
ss = Statsample::StratifiedSample.new(ms, :d1 => 50, :d2 => 100)
|
94
|
+
assert_in_delta(0.655, ss.proportion(:q1), 0.01)
|
95
|
+
assert_in_delta(0.345, ss.proportion(:q1, 0), 0.01)
|
84
96
|
end
|
85
97
|
|
86
98
|
def test_stratum_scale
|
87
|
-
boys = {
|
88
|
-
girls =
|
89
|
-
ms = Statsample::Multiset.new([
|
90
|
-
ms.add_dataset(
|
91
|
-
ms.add_dataset(
|
92
|
-
ss = Statsample::StratifiedSample.new(ms,
|
99
|
+
boys = Daru::DataFrame.new({ :test => Daru::Vector.new([50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90]) })
|
100
|
+
girls =Daru::DataFrame.new({ :test => Daru::Vector.new( [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90]) })
|
101
|
+
ms = Statsample::Multiset.new([:test])
|
102
|
+
ms.add_dataset(:boys, boys)
|
103
|
+
ms.add_dataset(:girls, girls)
|
104
|
+
ss = Statsample::StratifiedSample.new(ms, :boys => 10_000, :girls => 10_000)
|
93
105
|
assert_equal(2, ss.strata_number)
|
94
106
|
assert_equal(20_000, ss.population_size)
|
95
|
-
assert_equal(10_000, ss.stratum_size(
|
96
|
-
assert_equal(10_000, ss.stratum_size(
|
107
|
+
assert_equal(10_000, ss.stratum_size(:boys))
|
108
|
+
assert_equal(10_000, ss.stratum_size(:girls))
|
97
109
|
assert_equal(36, ss.sample_size)
|
98
|
-
assert_equal(75, ss.mean(
|
99
|
-
assert_in_delta(1.45, ss.standard_error_wor(
|
100
|
-
assert_in_delta(ss.standard_error_wor(
|
110
|
+
assert_equal(75, ss.mean(:test))
|
111
|
+
assert_in_delta(1.45, ss.standard_error_wor(:test), 0.01)
|
112
|
+
assert_in_delta(ss.standard_error_wor(:test), ss.standard_error_wor_2(:test), 0.00001)
|
101
113
|
end
|
102
114
|
|
103
115
|
def test_each
|
104
116
|
xpe = {
|
105
|
-
'a' => %w(a a a a)
|
106
|
-
'b' => %w(b b b b)
|
117
|
+
'a' => Daru::Vector.new(%w(a a a a)),
|
118
|
+
'b' => Daru::Vector.new(%w(b b b b))
|
107
119
|
}
|
108
120
|
ype = {
|
109
|
-
'a' => [1, 2, 3, 4]
|
110
|
-
'b' => [5, 6, 7, 8]
|
121
|
+
'a' => Daru::Vector.new([1, 2, 3, 4]),
|
122
|
+
'b' => Daru::Vector.new([5, 6, 7, 8])
|
111
123
|
}
|
112
124
|
zpe = {
|
113
|
-
'a' => [10, 11, 12, 13]
|
114
|
-
'b' => [14, 15, 16, 17]
|
125
|
+
'a' => Daru::Vector.new([10, 11, 12, 13]),
|
126
|
+
'b' => Daru::Vector.new([14, 15, 16, 17])
|
115
127
|
}
|
116
128
|
xp, yp, zp = {}, {}, {}
|
117
129
|
@ms.each {|k, ds|
|
118
|
-
xp[k] = ds[
|
119
|
-
yp[k] = ds[
|
120
|
-
zp[k] = ds[
|
130
|
+
xp[k] = ds[:x]
|
131
|
+
yp[k] = ds[:y]
|
132
|
+
zp[k] = ds[:z]
|
121
133
|
}
|
122
134
|
assert_equal(xpe, xp)
|
123
135
|
assert_equal(ype, yp)
|
@@ -127,38 +139,38 @@ class StatsampleMultisetTestCase < Minitest::Test
|
|
127
139
|
def test_multiset_union_with_block
|
128
140
|
r1 = rand
|
129
141
|
r2 = rand
|
130
|
-
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2]
|
142
|
+
ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
|
131
143
|
|
132
|
-
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2]
|
144
|
+
ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
|
133
145
|
|
134
146
|
ds2 = @ms.union {|k, ds|
|
135
|
-
ds[
|
147
|
+
ds[:y].recode!{|v|
|
136
148
|
k == 'a' ? v * r1 : v * r2
|
137
149
|
}
|
138
|
-
ds[
|
150
|
+
ds[:z].recode!{|v|
|
139
151
|
k == 'a' ? v * r1 : v * r2
|
140
152
|
}
|
141
153
|
}
|
142
|
-
assert_equal(ye, ds2[
|
143
|
-
assert_equal(ze, ds2[
|
154
|
+
assert_equal(ye, ds2[:y])
|
155
|
+
assert_equal(ze, ds2[:z])
|
144
156
|
end
|
145
157
|
|
146
158
|
def test_multiset_union
|
147
159
|
r1 = rand
|
148
160
|
r2 = rand
|
149
|
-
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2]
|
161
|
+
ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
|
162
|
+
ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
|
150
163
|
|
151
|
-
|
152
|
-
|
153
|
-
ds['y'].recode!{|v|
|
164
|
+
@ms.each do |k, ds|
|
165
|
+
ds[:y].recode! { |v|
|
154
166
|
k == 'a' ? v * r1 : v * r2
|
155
167
|
}
|
156
|
-
ds[
|
168
|
+
ds[:z].recode! {|v|
|
157
169
|
k == 'a' ? v * r1 : v * r2
|
158
170
|
}
|
159
|
-
|
171
|
+
end
|
160
172
|
ds2 = @ms.union
|
161
|
-
assert_equal(ye, ds2[
|
162
|
-
assert_equal(ze, ds2[
|
173
|
+
assert_equal(ye, ds2[:y])
|
174
|
+
assert_equal(ze, ds2[:z])
|
163
175
|
end
|
164
176
|
end
|