statsample 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
@@ -5,30 +5,32 @@ class StatsampleReliabilitySkillScaleTestCase < Minitest::Test
|
|
5
5
|
setup do
|
6
6
|
options = %w(a b c d e)
|
7
7
|
cases = 20
|
8
|
-
@id = cases.times.map { |v| v }
|
9
|
-
@a = cases.times.map { options[rand(5)] }
|
10
|
-
@b = cases.times.map { options[rand(5)] }
|
11
|
-
@c = cases.times.map { options[rand(5)] }
|
12
|
-
@d = cases.times.map { options[rand(5)] }
|
13
|
-
@e =
|
14
|
-
|
8
|
+
@id = Daru::Vector.new(cases.times.map { |v| v })
|
9
|
+
@a = Daru::Vector.new(cases.times.map { options[rand(5)] })
|
10
|
+
@b = Daru::Vector.new(cases.times.map { options[rand(5)] })
|
11
|
+
@c = Daru::Vector.new(cases.times.map { options[rand(5)] })
|
12
|
+
@d = Daru::Vector.new(cases.times.map { options[rand(5)] })
|
13
|
+
@e = Daru::Vector.new(
|
14
|
+
cases.times.map do |i|
|
15
|
+
i == 0 ? options[rand(0)] :
|
15
16
|
rand > 0.8 ? nil : options[rand(5)]
|
16
|
-
|
17
|
-
|
18
|
-
@
|
17
|
+
end
|
18
|
+
)
|
19
|
+
@ds = Daru::DataFrame.new({ :id => @id, :a => @a, :b => @b, :c => @c, :d => @d, :e => @e })
|
20
|
+
@key = { :a => 'a', :b => options[rand(5)], :c => options[rand(5)], :d => options[rand(5)], :e => options[rand(5)] }
|
19
21
|
@ssa = Statsample::Reliability::SkillScaleAnalysis.new(@ds, @key)
|
20
|
-
@ac = @a.map { |v| v == @key[
|
21
|
-
@bc = @b.map { |v| v == @key[
|
22
|
-
@cc = @c.map { |v| v == @key[
|
23
|
-
@dc = @d.map { |v| v == @key[
|
24
|
-
@ec = @e.map { |v| v.nil? ? nil : (v == @key[
|
22
|
+
@ac = Daru::Vector.new(@a.map { |v| v == @key[:a] ? 1 : 0 })
|
23
|
+
@bc = Daru::Vector.new(@b.map { |v| v == @key[:b] ? 1 : 0 })
|
24
|
+
@cc = Daru::Vector.new(@c.map { |v| v == @key[:c] ? 1 : 0 })
|
25
|
+
@dc = Daru::Vector.new(@d.map { |v| v == @key[:d] ? 1 : 0 })
|
26
|
+
@ec = Daru::Vector.new(@e.map { |v| v.nil? ? nil : (v == @key[:e] ? 1 : 0) })
|
25
27
|
end
|
26
28
|
should 'return proper corrected dataset' do
|
27
|
-
cds = {
|
29
|
+
cds = Daru::DataFrame.new({ :id => @id, :a => @ac, :b => @bc, :c => @cc, :d => @dc, :e => @ec })
|
28
30
|
assert_equal(cds, @ssa.corrected_dataset)
|
29
31
|
end
|
30
32
|
should 'return proper corrected minimal dataset' do
|
31
|
-
cdsm = {
|
33
|
+
cdsm = Daru::DataFrame.new({ :a => @ac, :b => @bc, :c => @cc, :d => @dc, :e => @ec })
|
32
34
|
assert_equal(cdsm, @ssa.corrected_dataset_minimal)
|
33
35
|
end
|
34
36
|
should 'return correct vector_sum and vector_sum' do
|
@@ -37,13 +39,13 @@ class StatsampleReliabilitySkillScaleTestCase < Minitest::Test
|
|
37
39
|
assert_equal(cdsm.vector_mean, @ssa.vector_mean)
|
38
40
|
end
|
39
41
|
should 'not crash on rare case' do
|
40
|
-
a =
|
41
|
-
b =
|
42
|
-
c =
|
43
|
-
d =
|
44
|
-
e =
|
45
|
-
key = {
|
46
|
-
ds =
|
42
|
+
a = Daru::Vector.new(['c', 'c', 'a', 'a', 'c', 'a', 'b', 'c', 'c', 'b', 'a', 'd', 'a', 'd', 'a', 'a', 'd', 'e', 'c', 'd'])
|
43
|
+
b = Daru::Vector.new(['e', 'b', 'e', 'b', 'c', 'd', 'a', 'e', 'e', 'c', 'b', 'e', 'e', 'b', 'd', 'c', 'e', 'b', 'b', 'd'])
|
44
|
+
c = Daru::Vector.new(['e', 'b', 'e', 'c', 'e', 'c', 'b', 'd', 'e', 'c', 'a', 'a', 'b', 'd', 'e', 'c', 'b', 'a', 'a', 'e'])
|
45
|
+
d = Daru::Vector.new(['a', 'b', 'd', 'd', 'e', 'b', 'e', 'b', 'd', 'c', 'e', 'a', 'c', 'd', 'c', 'c', 'e', 'd', 'd', 'b'])
|
46
|
+
e = Daru::Vector.new(['a', 'b', nil, 'd', 'c', 'c', 'd', nil, 'd', 'd', 'e', 'e', nil, nil, nil, 'd', 'c', nil, 'e', 'd'])
|
47
|
+
key = { :a => 'a', :b => 'e', :c => 'd', :d => 'c', :e => 'd' }
|
48
|
+
ds = Daru::DataFrame.new({:a => a, :b => b, :c => c, :d => d, :e => e})
|
47
49
|
ssa = Statsample::Reliability::SkillScaleAnalysis.new(ds, key)
|
48
50
|
assert(ssa.summary)
|
49
51
|
end
|
data/test/test_resample.rb
CHANGED
@@ -17,7 +17,7 @@ class StatsampleResampleTestCase < Minitest::Test
|
|
17
17
|
Statsample::Resample.generate(20, 1, 10).count(1)
|
18
18
|
}
|
19
19
|
assert_equal(400, r.size)
|
20
|
-
v =
|
20
|
+
v = Daru::Vector.new(r)
|
21
21
|
a = v.count { |x| x > 3 }
|
22
22
|
assert(a >= 30 && a <= 70)
|
23
23
|
end
|
data/test/test_statistics.rb
CHANGED
@@ -32,7 +32,7 @@ class StatsampleStatisicsTestCase < Minitest::Test
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_estimation_mean
|
35
|
-
v = ([42] * 23 + [41] * 4 + [36] * 1 + [32] * 1 + [29] * 1 + [27] * 2 + [23] * 1 + [19] * 1 + [16] * 2 + [15] * 2 + [14, 11, 10, 9, 7] + [6] * 3 + [5] * 2 + [4, 3])
|
35
|
+
v = Daru::Vector.new([42] * 23 + [41] * 4 + [36] * 1 + [32] * 1 + [29] * 1 + [27] * 2 + [23] * 1 + [19] * 1 + [16] * 2 + [15] * 2 + [14, 11, 10, 9, 7] + [6] * 3 + [5] * 2 + [4, 3])
|
36
36
|
assert_equal(50, v.size)
|
37
37
|
assert_equal(1471, v.sum)
|
38
38
|
# limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
|
@@ -55,19 +55,9 @@ class StatsampleStatisicsTestCase < Minitest::Test
|
|
55
55
|
assert_in_delta(0.46, l[1], 0.01)
|
56
56
|
end
|
57
57
|
|
58
|
-
def test_ml
|
59
|
-
if true
|
60
|
-
# real=[1,1,1,1].to_vector(:numeric)
|
61
|
-
|
62
|
-
# pred=[0.0001,0.0001,0.0001,0.0001].to_vector(:numeric)
|
63
|
-
# puts Statsample::Bivariate.maximum_likehood_dichotomic(pred,real)
|
64
|
-
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
58
|
def test_simple_linear_regression
|
69
|
-
a = [1, 2, 3, 4, 5, 6]
|
70
|
-
b = [6, 2, 4, 10, 12, 8]
|
59
|
+
a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
|
60
|
+
b = Daru::Vector.new([6, 2, 4, 10, 12, 8])
|
71
61
|
reg = Statsample::Regression::Simple.new_from_vectors(a, b)
|
72
62
|
assert_in_delta((reg.ssr + reg.sse).to_f, reg.sst, 0.001)
|
73
63
|
assert_in_delta(Statsample::Bivariate.pearson(a, b), reg.r, 0.001)
|
data/test/test_stest.rb
CHANGED
@@ -24,26 +24,26 @@ class StatsampleTestTestCase < Minitest::Test
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_u_mannwhitney
|
27
|
-
a = [1, 2, 3, 4, 5, 6]
|
28
|
-
b = [0, 5, 7, 9, 10, 11]
|
27
|
+
a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
|
28
|
+
b = Daru::Vector.new([0, 5, 7, 9, 10, 11])
|
29
29
|
assert_equal(7.5, Statsample::Test.u_mannwhitney(a, b).u)
|
30
30
|
assert_equal(7.5, Statsample::Test.u_mannwhitney(b, a).u)
|
31
|
-
a = [1, 7, 8, 9, 10, 11]
|
32
|
-
b = [2, 3, 4, 5, 6, 12]
|
31
|
+
a = Daru::Vector.new([1, 7, 8, 9, 10, 11])
|
32
|
+
b = Daru::Vector.new([2, 3, 4, 5, 6, 12])
|
33
33
|
assert_equal(11, Statsample::Test.u_mannwhitney(a, b).u)
|
34
34
|
end
|
35
35
|
|
36
36
|
def test_levene
|
37
|
-
a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10]
|
38
|
-
b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120]
|
37
|
+
a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
|
38
|
+
b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
|
39
39
|
levene = Statsample::Test::Levene.new([a, b])
|
40
40
|
assert_levene(levene)
|
41
41
|
end
|
42
42
|
|
43
43
|
def test_levene_dataset
|
44
|
-
a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10]
|
45
|
-
b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120]
|
46
|
-
ds = {
|
44
|
+
a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
|
45
|
+
b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
|
46
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b })
|
47
47
|
levene = Statsample::Test::Levene.new(ds)
|
48
48
|
assert_levene(levene)
|
49
49
|
end
|
data/test/test_stratified.rb
CHANGED
@@ -9,9 +9,9 @@ class StatsampleStratifiedTestCase < Minitest::Test
|
|
9
9
|
a = [10, 20, 30, 40, 50]
|
10
10
|
b = [110, 120, 130, 140]
|
11
11
|
pop = a + b
|
12
|
-
av
|
13
|
-
bv
|
14
|
-
popv =
|
12
|
+
av = Daru::Vector.new(a)
|
13
|
+
bv = Daru::Vector.new(b)
|
14
|
+
popv = Daru::Vector.new(pop)
|
15
15
|
assert_equal(popv.mean, Statsample::StratifiedSample.mean(av, bv))
|
16
16
|
end
|
17
17
|
end
|
data/test/test_test_t.rb
CHANGED
@@ -4,24 +4,24 @@ class StatsampleTestTTestCase < Minitest::Test
|
|
4
4
|
include Math
|
5
5
|
context T do
|
6
6
|
setup do
|
7
|
-
@a = [30.02, 29.99, 30.11, 29.97, 30.01, 29.99]
|
8
|
-
@b = [29.89, 29.93, 29.72, 29.98, 30.02, 29.98]
|
7
|
+
@a = Daru::Vector.new([30.02, 29.99, 30.11, 29.97, 30.01, 29.99])
|
8
|
+
@b = Daru::Vector.new([29.89, 29.93, 29.72, 29.98, 30.02, 29.98])
|
9
9
|
@x1 = @a.mean
|
10
10
|
@x2 = @b.mean
|
11
11
|
@s1 = @a.sd
|
12
12
|
@s2 = @b.sd
|
13
|
-
@n1 = @a.
|
14
|
-
@n2 = @b.
|
13
|
+
@n1 = @a.size
|
14
|
+
@n2 = @b.size
|
15
15
|
end
|
16
16
|
should 'calculate correctly standard t' do
|
17
|
-
t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.
|
18
|
-
assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.
|
19
|
-
assert_equal(@a.
|
17
|
+
t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.size)), @a.size - 1)
|
18
|
+
assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.size))), t.t)
|
19
|
+
assert_equal(@a.size - 1, t.df)
|
20
20
|
assert(t.summary.size > 0)
|
21
21
|
end
|
22
22
|
should 'calculate correctly t for one sample' do
|
23
|
-
t1 = [6, 4, 6, 7, 4, 5, 5, 12, 6, 1]
|
24
|
-
t2 = [9, 6, 5, 10, 10, 8, 7, 10, 6, 5]
|
23
|
+
t1 = Daru::Vector.new([6, 4, 6, 7, 4, 5, 5, 12, 6, 1])
|
24
|
+
t2 = Daru::Vector.new([9, 6, 5, 10, 10, 8, 7, 10, 6, 5])
|
25
25
|
d = t1 - t2
|
26
26
|
t = Statsample::Test::T::OneSample.new(d)
|
27
27
|
assert_in_delta(-2.631, t.t, 0.001)
|
@@ -48,14 +48,14 @@ class StatsampleTestTTestCase < Minitest::Test
|
|
48
48
|
assert_in_delta(0.09095, t.probability_not_equal_variance, 0.001)
|
49
49
|
end
|
50
50
|
should 'be the same using shorthand' do
|
51
|
-
v = 100.times.map { rand(100) }
|
51
|
+
v = Daru::Vector.new(100.times.map { rand(100) })
|
52
52
|
assert_equal(Statsample::Test.t_one_sample(v).t, T::OneSample.new(v).t)
|
53
53
|
end
|
54
54
|
should 'calculate all values for one sample T test' do
|
55
55
|
u = @a.mean + (1 - rand * 2)
|
56
56
|
tos = T::OneSample.new(@a, u: u)
|
57
|
-
assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.
|
58
|
-
assert_equal(@a.
|
57
|
+
assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.size))), tos.t)
|
58
|
+
assert_equal(@a.size - 1, tos.df)
|
59
59
|
assert(tos.summary.size > 0)
|
60
60
|
end
|
61
61
|
end
|
data/test/test_umannwhitney.rb
CHANGED
@@ -4,8 +4,8 @@ class StatsampleUMannWhitneyTestCase < Minitest::Test
|
|
4
4
|
include Statsample::Test
|
5
5
|
context Statsample::Test::UMannWhitney do
|
6
6
|
setup do
|
7
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15]
|
8
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19]
|
7
|
+
@v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15])
|
8
|
+
@v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19])
|
9
9
|
@u = Statsample::Test::UMannWhitney.new(@v1, @v2)
|
10
10
|
end
|
11
11
|
should 'have same result using class or Test#u_mannwhitney' do
|
data/test/test_vector.rb
CHANGED
@@ -3,24 +3,11 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
3
3
|
class StatsampleTestVector < Minitest::Test
|
4
4
|
include Statsample::Shorthand
|
5
5
|
|
6
|
-
def setup
|
7
|
-
@c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :object)
|
8
|
-
@c.name = 'Test Vector'
|
9
|
-
@c.missing_values = [-99]
|
10
|
-
end
|
11
|
-
|
12
|
-
def assert_counting_tokens(b)
|
13
|
-
assert_equal([1, 1, 0, 1, 0, nil], b['a'].to_a)
|
14
|
-
assert_equal([0, 1, 0, 0, 0, nil], b['b'].to_a)
|
15
|
-
assert_equal([0, 0, 1, 0, 0, nil], b['c'].to_a)
|
16
|
-
assert_equal([0, 0, 1, 1, 0, nil], b['d'].to_a)
|
17
|
-
assert_equal([0, 0, 0, 0, 1, nil], b[10].to_a)
|
18
|
-
end
|
19
6
|
context Statsample do
|
20
7
|
setup do
|
21
8
|
@sample = 100
|
22
|
-
@a = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }
|
23
|
-
@b = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }
|
9
|
+
@a = Daru::Vector.new(@sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) })
|
10
|
+
@b = Daru::Vector.new(@sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) })
|
24
11
|
@correct_a = []
|
25
12
|
@correct_b = []
|
26
13
|
@a.each_with_index do |_v, i|
|
@@ -29,8 +16,8 @@ class StatsampleTestVector < Minitest::Test
|
|
29
16
|
@correct_b.push(@b[i])
|
30
17
|
end
|
31
18
|
end
|
32
|
-
@correct_a = @correct_a
|
33
|
-
@correct_b = @correct_b
|
19
|
+
@correct_a = Daru::Vector.new(@correct_a)
|
20
|
+
@correct_b = Daru::Vector.new(@correct_b)
|
34
21
|
|
35
22
|
@common = lambda do |av, bv|
|
36
23
|
assert_equal(@correct_a, av, 'A no es esperado')
|
@@ -39,649 +26,125 @@ class StatsampleTestVector < Minitest::Test
|
|
39
26
|
assert(!bv.has_missing_data?, 'b tiene datos faltantes')
|
40
27
|
end
|
41
28
|
end
|
29
|
+
|
42
30
|
should 'return correct only_valid' do
|
43
31
|
av, bv = Statsample.only_valid @a, @b
|
32
|
+
av.reset_index!
|
33
|
+
bv.reset_index!
|
44
34
|
av2, bv2 = Statsample.only_valid av, bv
|
45
35
|
@common.call(av, bv)
|
46
36
|
assert_equal(av, av2)
|
47
37
|
assert_not_same(av, av2)
|
48
38
|
assert_not_same(bv, bv2)
|
49
39
|
end
|
40
|
+
|
50
41
|
should 'return correct only_valid_clone' do
|
51
42
|
av, bv = Statsample.only_valid_clone @a, @b
|
43
|
+
av.reset_index!
|
44
|
+
bv.reset_index!
|
52
45
|
@common.call(av, bv)
|
53
46
|
av2, bv2 = Statsample.only_valid_clone av, bv
|
54
47
|
assert_equal(av, av2)
|
55
48
|
assert_same(av, av2)
|
56
49
|
assert_same(bv, bv2)
|
57
50
|
end
|
58
|
-
end
|
59
|
-
context Statsample::Vector do
|
60
|
-
setup do
|
61
|
-
@c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :object)
|
62
|
-
@c.name = 'Test Vector'
|
63
|
-
@c.missing_values = [-99]
|
64
|
-
end
|
65
|
-
should_with_gsl 'be created with GSL::Vector' do
|
66
|
-
gsl = GSL::Vector[1, 2, 3, 4, 5]
|
67
|
-
v = Statsample::Vector.new(gsl)
|
68
|
-
assert_equal([1, 2, 3, 4, 5], v.to_a)
|
69
|
-
refute(v.flawed?)
|
70
|
-
end
|
71
51
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
assert_equal(mh, @a.to_matrix)
|
79
|
-
end
|
80
|
-
should 'to_matrix(:vertical) returns a matrix with 1 column' do
|
81
|
-
mv = Matrix.columns([[1, 2, 3, 4, 5]])
|
82
|
-
assert_equal(mv, @a.to_matrix(:vertical))
|
83
|
-
end
|
84
|
-
should 'returns valid submatrixes' do
|
85
|
-
# 3*4 + 2*5 = 22
|
86
|
-
a = [3, 2].to_vector(:numeric)
|
87
|
-
b = [4, 5].to_vector(:numeric)
|
88
|
-
assert_equal(22, (a.to_matrix * b.to_matrix(:vertical))[0, 0])
|
89
|
-
end
|
52
|
+
should 'returns correct vector_cols_matrix' do
|
53
|
+
v1 = Daru::Vector.new(%w(a a a b b b c c))
|
54
|
+
v2 = Daru::Vector.new(%w(1 3 4 5 6 4 3 2))
|
55
|
+
v3 = Daru::Vector.new(%w(1 0 0 0 1 1 1 0))
|
56
|
+
ex = Matrix.rows([%w(a 1 1), %w(a 3 0), %w(a 4 0), %w(b 5 0), %w(b 6 1), %w(b 4 1), %w(c 3 1), %w(c 2 0)])
|
57
|
+
assert_equal(ex, Statsample.vector_cols_matrix(v1, v2, v3))
|
90
58
|
end
|
91
|
-
|
92
|
-
setup do
|
93
|
-
@data = (10.times.map { rand(100) }) + [nil]
|
94
|
-
@original = Statsample::Vector.new(@data, :numeric)
|
95
|
-
end
|
96
|
-
should 'be the sample using []' do
|
97
|
-
second = Statsample::Vector[*@data]
|
98
|
-
assert_equal(@original, second)
|
99
|
-
end
|
100
|
-
should '[] returns same results as R-c()' do
|
101
|
-
reference = [0, 4, 5, 6, 10].to_numeric
|
102
|
-
assert_equal(reference, Statsample::Vector[0, 4, 5, 6, 10])
|
103
|
-
assert_equal(reference, Statsample::Vector[0, 4..6, 10])
|
104
|
-
assert_equal(reference, Statsample::Vector[[0], [4, 5, 6], [10]])
|
105
|
-
assert_equal(reference, Statsample::Vector[[0], [4, [5, [6]]], [10]])
|
106
|
-
|
107
|
-
assert_equal(reference, Statsample::Vector[[0], [4, 5, 6].to_vector, [10]])
|
108
|
-
end
|
109
|
-
should 'be the same usign #to_vector' do
|
110
|
-
lazy1 = @data.to_vector(:numeric)
|
111
|
-
assert_equal(@original, lazy1)
|
112
|
-
end
|
113
|
-
should 'be the same using #to_numeric' do
|
114
|
-
lazy2 = @data.to_numeric
|
115
|
-
assert_equal(@original, lazy2)
|
116
|
-
assert_equal(:numeric, lazy2.type)
|
117
|
-
assert_equal(@data.find_all { |v| !v.nil? }, lazy2.valid_data)
|
118
|
-
end
|
119
|
-
should 'could use new_numeric with size only' do
|
120
|
-
v1 = 10.times.map { nil }.to_numeric
|
121
|
-
v2 = Statsample::Vector.new_numeric(10)
|
122
|
-
assert_equal(v1, v2)
|
123
|
-
end
|
124
|
-
should 'could use new_numeric with size and value' do
|
125
|
-
a = rand
|
126
|
-
v1 = 10.times.map { a }.to_numeric
|
127
|
-
v2 = Statsample::Vector.new_numeric(10, a)
|
128
|
-
assert_equal(v1, v2)
|
129
|
-
end
|
130
|
-
should 'could use new_numeric with func' do
|
131
|
-
v1 = 10.times.map { |i| i * 2 }.to_numeric
|
132
|
-
v2 = Statsample::Vector.new_numeric(10) { |i| i * 2 }
|
133
|
-
assert_equal(v1, v2)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
context "new types :numeric and :object" do
|
138
|
-
should "set default type of vector to :object" do
|
139
|
-
v = Statsample::Vector.new [1,2,3,4,5]
|
140
|
-
assert_equal(:object, v.type)
|
141
|
-
end
|
142
|
-
|
143
|
-
should "initialize Vector with :numeric type" do
|
144
|
-
v = Statsample::Vector.new [1,2,3,4,5,nil], :numeric
|
145
|
-
assert_equal(:numeric, v.type)
|
146
|
-
assert_equal([1,2,3,4,5], v.valid_data)
|
147
|
-
end
|
148
|
-
|
149
|
-
should "show a warning when initializing with :nominal, :numeric or :ordinal" do
|
150
|
-
assert_output(nil,"WARNING: nominal has been deprecated. Use :object instead.\n") do
|
151
|
-
Statsample::Vector.new [1,2,3,4,5,nil,'hello'], :nominal
|
152
|
-
end
|
153
|
-
|
154
|
-
assert_output(nil,"WARNING: scale has been deprecated. Use :numeric instead.\n") do
|
155
|
-
Statsample::Vector.new [1,2,3,4,nil,5], :scale
|
156
|
-
end
|
157
|
-
|
158
|
-
assert_output(nil,"WARNING: ordinal has been deprecated. Use :numeric instead.\n") do
|
159
|
-
Statsample::Vector.new [1,2,3,4,5], :ordinal
|
160
|
-
end
|
59
|
+
end
|
161
60
|
|
162
|
-
|
163
|
-
|
61
|
+
context Statsample::Vector do
|
62
|
+
context 'when initializing' do
|
63
|
+
should '.new creates a Daru::Vector internally and shows a warning' do
|
64
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
65
|
+
data = (10.times.map { rand(100) }) + [nil]
|
66
|
+
original = Statsample::Vector.new(@data, :numeric)
|
67
|
+
assert_equal(true, original.kind_of?(Daru::Vector))
|
164
68
|
end
|
165
69
|
end
|
166
70
|
|
167
|
-
should
|
168
|
-
|
169
|
-
|
170
|
-
assert_equal(numeric, [1,2,3,4,nil,5].to_vector(:numeric))
|
171
|
-
|
172
|
-
obj = Statsample::Vector.new([1,2,3,4,'one','two'], :object)
|
173
|
-
assert_equal(obj, [1,2,3,4,'one','two'].to_vector(:object))
|
174
|
-
end
|
175
|
-
|
176
|
-
should "test that old shorthands raise warnings" do
|
177
|
-
assert_output(nil,"WARNING: to_scale has been deprecated. Use to_numeric instead.\n") do
|
178
|
-
[1,2,3,4,nil,5].to_scale
|
71
|
+
should '[] returns same results as R-c()' do
|
72
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
73
|
+
assert_equal(true, Statsample::Vector[1,2,3,4,5].kind_of?(Daru::Vector))
|
179
74
|
end
|
180
75
|
end
|
181
|
-
end
|
182
76
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
end
|
188
|
-
should 'returns a Hash' do
|
189
|
-
assert_kind_of(Hash, @b)
|
190
|
-
end
|
191
|
-
should 'return a Hash with keys with different values of @a' do
|
192
|
-
expected = ['a', 'b', 'c', 'd', 10]
|
193
|
-
assert_equal(expected, @b.keys)
|
194
|
-
end
|
77
|
+
should "new_numeric/new_scale creates a Daru::Vector internally and shows a warning" do
|
78
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
79
|
+
Statsample::Vector.new_scale(4)
|
80
|
+
end
|
195
81
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
should 'hash values are n times the tokens appears' do
|
200
|
-
assert_counting_tokens(@b)
|
201
|
-
end
|
202
|
-
should '#split_by_separator_freq returns the number of ocurrences of tokens' do
|
203
|
-
assert_equal({ 'a' => 3, 'b' => 1, 'c' => 1, 'd' => 2, 10 => 1 }, @a.split_by_separator_freq)
|
204
|
-
end
|
205
|
-
should 'using a different separator give the same values' do
|
206
|
-
a = Statsample::Vector.new(['a', 'a*b', 'c*d', 'a*d', 10, nil], :object)
|
207
|
-
b = a.split_by_separator('*')
|
208
|
-
assert_counting_tokens(b)
|
209
|
-
end
|
210
|
-
end
|
211
|
-
should 'return correct median_absolute_deviation' do
|
212
|
-
a = [1, 1, 2, 2, 4, 6, 9].to_numeric
|
213
|
-
assert_equal(1, a.median_absolute_deviation)
|
214
|
-
end
|
215
|
-
should 'return correct histogram' do
|
216
|
-
a = 10.times.map { |v| v }.to_numeric
|
217
|
-
hist = a.histogram(2)
|
218
|
-
assert_equal([5, 5], hist.bin)
|
219
|
-
3.times do |i|
|
220
|
-
assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
|
82
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
83
|
+
Statsample::Vector.new_numeric(4)
|
84
|
+
end
|
221
85
|
end
|
222
86
|
end
|
223
|
-
|
224
|
-
@c.name == 'Test Vector'
|
225
|
-
end
|
226
|
-
should 'without explicit name, returns vector with succesive numbers' do
|
227
|
-
a = 10.times.map { rand(100) }.to_numeric
|
228
|
-
b = 10.times.map { rand(100) }.to_numeric
|
229
|
-
assert_match(/Vector \d+/, a.name)
|
230
|
-
a.name =~ /Vector (\d+)/
|
231
|
-
next_number = Regexp.last_match(1).to_i + 1
|
232
|
-
assert_equal("Vector #{next_number}", b.name)
|
233
|
-
end
|
234
|
-
should 'save to a file and load the same Vector' do
|
235
|
-
outfile = Tempfile.new('vector.vec')
|
236
|
-
@c.save(outfile.path)
|
237
|
-
a = Statsample.load(outfile.path)
|
238
|
-
assert_equal(@c, a)
|
239
|
-
end
|
240
|
-
should '#collect returns an array' do
|
241
|
-
val = @c.collect { |v| v }
|
242
|
-
assert_equal(val, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
|
243
|
-
end
|
87
|
+
end
|
244
88
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
assert_equal(
|
249
|
-
exp.recode! { |v| v == 0 ? 1 : 0 }
|
250
|
-
exp2 = (([1] * 15) + ([0] * 3)).to_vector
|
251
|
-
assert_equal(exp2, exp)
|
252
|
-
end
|
253
|
-
should '#product returns the * of all values' do
|
254
|
-
a = [1, 2, 3, 4, 5].to_vector(:numeric)
|
255
|
-
assert_equal(120, a.product)
|
89
|
+
context "new types :numeric and :object" do
|
90
|
+
should "numerical data is automatically detected to be of type :numeric" do
|
91
|
+
v = Statsample::Vector.new [1,2,3,4,5,nil]
|
92
|
+
assert_equal(:numeric, v.type)
|
256
93
|
end
|
257
94
|
|
258
|
-
should
|
259
|
-
|
260
|
-
assert_equal(
|
261
|
-
assert_equal([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, nil, 1, 2, 3, 4, nil, -99, -99], @c.data_with_nils)
|
262
|
-
@c.missing_values = [-99]
|
263
|
-
assert_equal(@c.valid_data.sort, [1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
|
264
|
-
assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, nil, nil])
|
265
|
-
@c.missing_values = []
|
266
|
-
assert_equal(@c.valid_data.sort, [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
|
267
|
-
assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
|
268
|
-
end
|
269
|
-
should 'correct has_missing_data? with missing data' do
|
270
|
-
a = [1, 2, 3, nil].to_vector
|
271
|
-
assert(a.has_missing_data?)
|
272
|
-
end
|
273
|
-
should 'correct has_missing_data? without missing data' do
|
274
|
-
a = [1, 2, 3, 4, 10].to_vector
|
275
|
-
assert(!a.has_missing_data?)
|
276
|
-
end
|
277
|
-
should 'with explicit missing_values, should respond has_missing_data?' do
|
278
|
-
a = [1, 2, 3, 4, 10].to_vector
|
279
|
-
a.missing_values = [10]
|
280
|
-
assert(a.has_missing_data?)
|
281
|
-
end
|
282
|
-
should 'label correctly fields' do
|
283
|
-
@c.labels = { 5 => 'FIVE' }
|
284
|
-
assert_equal(['FIVE', 'FIVE', 'FIVE', 'FIVE', 'FIVE', 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], @c.vector_labeled.to_a)
|
285
|
-
end
|
286
|
-
should 'verify' do
|
287
|
-
h = @c.verify { |d| !d.nil? and d > 0 }
|
288
|
-
e = { 15 => nil, 16 => -99, 17 => -99 }
|
289
|
-
assert_equal(e, h)
|
290
|
-
end
|
291
|
-
should 'have a summary with name on it' do
|
292
|
-
assert_match(/#{@c.name}/, @c.summary)
|
95
|
+
should "object data automatically detected as :object" do
|
96
|
+
v = Statsample::Vector.new [1,2,3,4,'hello','world']
|
97
|
+
assert_equal(:object, v.type)
|
293
98
|
end
|
294
99
|
|
295
|
-
should
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
assert_equal([1,
|
300
|
-
assert(v.flawed?)
|
301
|
-
else
|
302
|
-
skip('Requires GSL')
|
100
|
+
should "initialize Vector with :numeric type" do
|
101
|
+
v = Statsample::Vector.new [1,2,3,4,5,nil], :numeric
|
102
|
+
assert_equal(:numeric, v.type)
|
103
|
+
assert_output(nil, "WARNING: valid_data in Statsample::Vector has been deprecated in favor of only_valid in Daru::Vector. Please use that.\n") do
|
104
|
+
assert_equal([1,2,3,4,5], v.valid_data)
|
303
105
|
end
|
304
106
|
end
|
305
107
|
|
306
|
-
should
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
should 'multiply correct for scalar' do
|
311
|
-
a = [1, 2, 3].to_numeric
|
312
|
-
assert_equal([5, 10, 15].to_numeric, a * 5)
|
313
|
-
end
|
314
|
-
should 'multiply correct with other vector' do
|
315
|
-
a = [1, 2, 3].to_numeric
|
316
|
-
b = [2, 4, 6].to_numeric
|
317
|
-
|
318
|
-
assert_equal([2, 8, 18].to_numeric, a * b)
|
319
|
-
end
|
320
|
-
should 'sum correct for scalar' do
|
321
|
-
a = [1, 2, 3].to_numeric
|
322
|
-
assert_equal([11, 12, 13].to_numeric, a + 10)
|
323
|
-
end
|
324
|
-
|
325
|
-
should 'raise NoMethodError when method requires numeric and vector is object' do
|
326
|
-
@c.type = :object
|
327
|
-
assert_raise(::NoMethodError) { @c.median }
|
328
|
-
end
|
329
|
-
|
330
|
-
should 'jacknife correctly with named method' do
|
331
|
-
# First example
|
332
|
-
a = [1, 2, 3, 4].to_numeric
|
333
|
-
ds = a.jacknife(:mean)
|
334
|
-
assert_equal(a.mean, ds[:mean].mean)
|
335
|
-
ds = a.jacknife([:mean, :sd])
|
336
|
-
assert_equal(a.mean, ds[:mean].mean)
|
337
|
-
assert_equal(a.sd, ds[:mean].sd)
|
338
|
-
end
|
339
|
-
should 'jacknife correctly with custom method' do
|
340
|
-
# Second example
|
341
|
-
a = [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25].to_numeric
|
342
|
-
ds = a.jacknife(log_s2: ->(v) { Math.log(v.variance) })
|
343
|
-
exp = [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937].to_numeric
|
344
|
-
|
345
|
-
assert_similar_vector(exp, ds[:log_s2], 0.001)
|
346
|
-
assert_in_delta(2.00389, ds[:log_s2].mean, 0.00001)
|
347
|
-
assert_in_delta(1.091, ds[:log_s2].variance, 0.001)
|
348
|
-
end
|
349
|
-
should 'jacknife correctly with k>1' do
|
350
|
-
a = rnorm(6)
|
351
|
-
ds = a.jacknife(:mean, 2)
|
352
|
-
mean = a.mean
|
353
|
-
exp = [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4].to_numeric
|
354
|
-
assert_similar_vector(exp, ds[:mean], 1e-13)
|
355
|
-
end
|
356
|
-
should 'bootstrap should return a vector with mean=mu and sd=se' do
|
357
|
-
a = rnorm(100)
|
358
|
-
ds = a.bootstrap([:mean, :sd], 200)
|
359
|
-
se = 1 / Math.sqrt(a.size)
|
360
|
-
assert_in_delta(0, ds[:mean].mean, 0.3)
|
361
|
-
assert_in_delta(se, ds[:mean].sd, 0.02)
|
362
|
-
end
|
363
|
-
end
|
364
|
-
|
365
|
-
def test_object
|
366
|
-
assert_equal(@c[1], 5)
|
367
|
-
assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c.frequencies)
|
368
|
-
assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c._frequencies)
|
369
|
-
assert_equal({ 1 => 1.quo(15), 2 => 1.quo(15), 3 => 1.quo(15), 4 => 1.quo(15), 5 => 5.quo(15), 6 => 2.quo(15), 7 => 1.quo(15), 8 => 1.quo(15), 9 => 1.quo(15), 10 => 1.quo(15) }, @c.proportions)
|
370
|
-
assert_equal(@c.proportion, 1.quo(15))
|
371
|
-
assert_equal(@c.proportion(2), 1.quo(15))
|
372
|
-
assert_equal([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], @c.factors.sort)
|
373
|
-
assert_equal(@c.mode, 5)
|
374
|
-
assert_equal(@c.n_valid, 15)
|
375
|
-
end
|
376
|
-
|
377
|
-
def test_equality
|
378
|
-
v1 = [1, 2, 3].to_vector
|
379
|
-
v2 = [1, 2, 3].to_vector
|
380
|
-
assert_equal(v1, v2)
|
381
|
-
v1 = [1, 2, 3].to_vector(:object)
|
382
|
-
v2 = [1, 2, 3].to_vector(:numeric)
|
383
|
-
assert_not_equal(v1, v2)
|
384
|
-
v2 = [1, 2, 3]
|
385
|
-
assert_not_equal(v1, v2)
|
386
|
-
v1 = [1, 2, 3].to_vector
|
387
|
-
v2 = [1, 2, 3].to_vector
|
388
|
-
assert_equal(v1, v2)
|
389
|
-
assert_equal(false, v1 == Object.new)
|
390
|
-
end
|
391
|
-
|
392
|
-
def test_vector_percentil
|
393
|
-
a = [1, 2, 2, 3, 4, 5, 5, 5, 6, 10].to_numeric
|
394
|
-
expected = [10, 25, 25, 40, 50, 70, 70, 70, 90, 100].to_numeric
|
395
|
-
assert_equal(expected, a.vector_percentil)
|
396
|
-
a = [1, nil, nil, 2, 2, 3, 4, nil, nil, 5, 5, 5, 6, 10].to_numeric
|
397
|
-
expected = [10, nil, nil, 25, 25, 40, 50, nil, nil, 70, 70, 70, 90, 100].to_numeric
|
398
|
-
assert_equal(expected, a.vector_percentil)
|
399
|
-
end
|
400
|
-
|
401
|
-
def test_numeric
|
402
|
-
@c.type = :numeric
|
403
|
-
assert_equal(5, @c.median)
|
404
|
-
assert_equal(4, @c.percentil(25))
|
405
|
-
assert_equal(7, @c.percentil(75))
|
406
|
-
|
407
|
-
v = [200_000, 200_000, 210_000, 220_000, 230_000, 250_000, 250_000, 250_000, 270_000, 300_000, 450_000, 130_000, 140_000, 140_000, 140_000, 145_000, 148_000, 165_000, 170_000, 180_000, 180_000, 180_000, 180_000, 180_000, 180_000].to_numeric
|
408
|
-
assert_equal(180_000, v.median)
|
409
|
-
a = [7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 12.0, 12.0, 13.0, 14.0, 14.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0].to_numeric
|
410
|
-
assert_equal(4.5, a.percentil(25))
|
411
|
-
assert_equal(6.5, a.percentil(50))
|
412
|
-
assert_equal(9.5, a.percentil(75))
|
413
|
-
assert_equal(3.0, a.percentil(10))
|
414
|
-
end
|
415
|
-
|
416
|
-
def test_linear_percentil_strategy
|
417
|
-
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_numeric
|
418
|
-
assert_equal 102, values.percentil(0, :linear)
|
419
|
-
assert_equal 104.75, values.percentil(25, :linear)
|
420
|
-
assert_equal 108.5, values.percentil(50, :linear)
|
421
|
-
assert_equal 112.75, values.percentil(75, :linear)
|
422
|
-
assert_equal 116, values.percentil(100, :linear)
|
423
|
-
|
424
|
-
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_numeric
|
425
|
-
assert_equal 102, values.percentil(0, :linear)
|
426
|
-
assert_equal 105, values.percentil(25, :linear)
|
427
|
-
assert_equal 109, values.percentil(50, :linear)
|
428
|
-
assert_equal 115, values.percentil(75, :linear)
|
429
|
-
assert_equal 118, values.percentil(100, :linear)
|
430
|
-
end
|
431
|
-
|
432
|
-
def test_ranked
|
433
|
-
v1 = [0.8, 1.2, 1.2, 2.3, 18].to_vector(:numeric)
|
434
|
-
expected = [1, 2.5, 2.5, 4, 5].to_vector(:numeric)
|
435
|
-
assert_equal(expected, v1.ranked)
|
436
|
-
v1 = [nil, 0.8, 1.2, 1.2, 2.3, 18, nil].to_vector(:numeric)
|
437
|
-
expected = [nil, 1, 2.5, 2.5, 4, 5, nil].to_vector(:numeric)
|
438
|
-
assert_equal(expected, v1.ranked)
|
439
|
-
end
|
440
|
-
|
441
|
-
def test_numeric
|
442
|
-
a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'], :numeric)
|
443
|
-
assert_equal(10, a.sum)
|
444
|
-
i = 0
|
445
|
-
factors = a.factors.sort
|
446
|
-
[0, 1, 2, 3, 4].each{|v|
|
447
|
-
assert(v == factors[i])
|
448
|
-
assert(v.class == factors[i].class, "#{v} - #{v.class} != #{factors[i]} - #{factors[i].class}")
|
449
|
-
i += 1
|
450
|
-
}
|
451
|
-
end
|
452
|
-
|
453
|
-
def test_vector_centered
|
454
|
-
mean = rand
|
455
|
-
samples = 11
|
456
|
-
centered = samples.times.map { |i| i - ((samples / 2).floor).to_i }.to_numeric
|
457
|
-
not_centered = centered.recode { |v| v + mean }
|
458
|
-
obs = not_centered.centered
|
459
|
-
centered.each_with_index do |v, i|
|
460
|
-
assert_in_delta(v, obs[i], 0.0001)
|
461
|
-
end
|
462
|
-
end
|
463
|
-
|
464
|
-
def test_vector_standarized
|
465
|
-
v1 = [1, 2, 3, 4, nil].to_vector(:numeric)
|
466
|
-
sds = v1.sds
|
467
|
-
expected = [((1 - 2.5).quo(sds)), ((2 - 2.5).quo(sds)), ((3 - 2.5).quo(sds)), ((4 - 2.5).quo(sds)), nil].to_vector(:numeric)
|
468
|
-
vs = v1.vector_standarized
|
469
|
-
assert_equal(expected, vs)
|
470
|
-
assert_equal(0, vs.mean)
|
471
|
-
assert_equal(1, vs.sds)
|
472
|
-
end
|
473
|
-
|
474
|
-
def test_vector_standarized_with_zero_variance
|
475
|
-
v1 = 100.times.map { |_i| 1 }.to_numeric
|
476
|
-
exp = 100.times.map { nil }.to_numeric
|
477
|
-
assert_equal(exp, v1.standarized)
|
478
|
-
end
|
479
|
-
|
480
|
-
def test_check_type
|
481
|
-
v = Statsample::Vector.new
|
482
|
-
v.type = :object
|
483
|
-
assert_raise(NoMethodError) { v.check_type(:numeric) }
|
484
|
-
assert(v.check_type(:object).nil?)
|
485
|
-
|
486
|
-
v.type = :numeric
|
487
|
-
|
488
|
-
assert(v.check_type(:numeric).nil?)
|
489
|
-
assert(v.check_type(:object).nil?)
|
108
|
+
should "show a warning when initializing with :nominal, :numeric or :ordinal" do
|
109
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: nominal has been deprecated.\n") do
|
110
|
+
Statsample::Vector.new [1,2,3,4,5,nil,'hello'], :nominal
|
111
|
+
end
|
490
112
|
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
assert_raise(NoMethodError) { v.check_type(:object) }
|
495
|
-
end
|
113
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: scale has been deprecated.\n") do
|
114
|
+
Statsample::Vector.new [1,2,3,4,nil,5], :scale
|
115
|
+
end
|
496
116
|
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
assert_equal([3, 4, 5, 6, 7], (a + 2).to_a)
|
501
|
-
assert_equal([12, 14, 16, 18, 20], (a + b).to_a)
|
502
|
-
assert_raise ArgumentError do
|
503
|
-
a + @c
|
504
|
-
end
|
505
|
-
assert_raise TypeError do
|
506
|
-
a + 'string'
|
507
|
-
end
|
508
|
-
a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :numeric)
|
509
|
-
b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :numeric)
|
510
|
-
assert_equal([nil, 13, nil, 16, 18, 20], (a + b).to_a)
|
511
|
-
assert_equal([nil, 13, nil, 16, 18, 20], (a + b.to_a).to_a)
|
512
|
-
end
|
117
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: ordinal has been deprecated.\n") do
|
118
|
+
Statsample::Vector.new [1,2,3,4,5], :ordinal
|
119
|
+
end
|
513
120
|
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
assert_equal([-1, 0, 1, 2, 3], (a - 2).to_a)
|
518
|
-
assert_equal([10, 10, 10, 10, 10], (b - a).to_a)
|
519
|
-
assert_raise ArgumentError do
|
520
|
-
a - @c
|
521
|
-
end
|
522
|
-
assert_raise TypeError do
|
523
|
-
a - 'string'
|
121
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
122
|
+
Statsample::Vector.new_scale 10, 1
|
123
|
+
end
|
524
124
|
end
|
525
|
-
a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :numeric)
|
526
|
-
b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :numeric)
|
527
|
-
assert_equal([nil, 11, nil, 10, 10, 10], (b - a).to_a)
|
528
|
-
assert_equal([nil, 11, nil, 10, 10, 10], (b - a.to_a).to_a)
|
529
|
-
end
|
530
125
|
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
126
|
+
should "show a warning when Statsample::Vector shorthands are used" do
|
127
|
+
numeric = Statsample::Vector.new([1,2,3,4,nil,5], :numeric)
|
128
|
+
assert_equal(numeric, [1,2,3,4,nil,5].to_numeric)
|
129
|
+
assert_equal(numeric, [1,2,3,4,nil,5].to_vector(:numeric))
|
535
130
|
|
536
|
-
|
537
|
-
|
538
|
-
assert_equal(20.quo(9), a.average_deviation_population)
|
539
|
-
end
|
540
|
-
|
541
|
-
def test_samples
|
542
|
-
srand(1)
|
543
|
-
assert_equal(100, @c.sample_with_replacement(100).size)
|
544
|
-
assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
|
545
|
-
assert_raise ArgumentError do
|
546
|
-
@c.sample_without_replacement(20)
|
131
|
+
obj = Statsample::Vector.new([1,2,3,4,'one','two'], :object)
|
132
|
+
assert_equal(obj, [1,2,3,4,'one','two'].to_vector(:object))
|
547
133
|
end
|
548
|
-
@c.type = :numeric
|
549
|
-
srand(1)
|
550
|
-
assert_equal(100, @c.sample_with_replacement(100).size)
|
551
|
-
assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
|
552
|
-
end
|
553
|
-
|
554
|
-
def test_valid_data
|
555
|
-
a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'])
|
556
|
-
a.missing_values = [-99]
|
557
|
-
a.add(1, false)
|
558
|
-
a.add(2, false)
|
559
|
-
a.add(-99, false)
|
560
|
-
a.set_valid_data
|
561
|
-
exp_valid_data = [1, 2, 3, 4, 'STRING', 1, 2]
|
562
|
-
assert_equal(exp_valid_data, a.valid_data)
|
563
|
-
a.add(20, false)
|
564
|
-
a.add(30, false)
|
565
|
-
assert_equal(exp_valid_data, a.valid_data)
|
566
|
-
a.set_valid_data
|
567
|
-
exp_valid_data_2 = [1, 2, 3, 4, 'STRING', 1, 2, 20, 30]
|
568
|
-
assert_equal(exp_valid_data_2, a.valid_data)
|
569
|
-
end
|
570
|
-
|
571
|
-
def test_set_value
|
572
|
-
@c[2] = 10
|
573
|
-
expected = [5, 5, 10, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99].to_vector
|
574
|
-
assert_equal(expected.data, @c.data)
|
575
|
-
end
|
576
134
|
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
assert_equal(2, a.mean)
|
582
|
-
assert_equal(a.variance_sample_ruby, a.variance_sample)
|
583
|
-
assert_equal(a.standard_deviation_sample_ruby, a.sds)
|
584
|
-
assert_equal(a.variance_population_ruby, a.variance_population)
|
585
|
-
assert_equal(a.standard_deviation_population_ruby, a.standard_deviation_population)
|
586
|
-
assert_nothing_raised do
|
587
|
-
a = [].to_vector(:numeric)
|
135
|
+
should "test that old shorthands show deprecation warnings" do
|
136
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
137
|
+
[1,2,3,4,nil,5].to_scale
|
588
138
|
end
|
589
|
-
a.add(1, false)
|
590
|
-
a.add(2, false)
|
591
|
-
a.set_valid_data
|
592
|
-
assert_equal(3, a.sum)
|
593
|
-
b = [1, 2, nil, 3, 4, 5, nil, 6].to_vector(:numeric)
|
594
|
-
assert_equal(21, b.sum)
|
595
|
-
assert_equal(3.5, b.mean)
|
596
|
-
assert_equal(6, b.gsl.size)
|
597
|
-
c = [10, 20, 30, 40, 50, 100, 1000, 2000, 5000].to_numeric
|
598
|
-
assert_in_delta(c.skew, c.skew_ruby, 0.0001)
|
599
|
-
assert_in_delta(c.kurtosis, c.kurtosis_ruby, 0.0001)
|
600
139
|
end
|
601
140
|
end
|
602
141
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
def test_marshalling
|
612
|
-
v1 = (0..100).to_a.collect { |_n| rand(100) }.to_vector(:numeric)
|
613
|
-
v2 = Marshal.load(Marshal.dump(v1))
|
614
|
-
assert_equal(v1, v2)
|
615
|
-
end
|
616
|
-
|
617
|
-
def test_dup
|
618
|
-
v1 = %w(a a a b b b c c).to_vector
|
619
|
-
v2 = v1.dup
|
620
|
-
assert_equal(v1.data, v2.data)
|
621
|
-
assert_not_same(v1.data, v2.data)
|
622
|
-
assert_equal(v1.type, v2.type)
|
623
|
-
|
624
|
-
v1.type = :numeric
|
625
|
-
assert_not_equal(v1.type, v2.type)
|
626
|
-
assert_equal(v1.missing_values, v2.missing_values)
|
627
|
-
assert_not_same(v1.missing_values, v2.missing_values)
|
628
|
-
assert_equal(v1.labels, v2.labels)
|
629
|
-
assert_not_same(v1.labels, v2.labels)
|
630
|
-
|
631
|
-
v3 = v1.dup_empty
|
632
|
-
assert_equal([], v3.data)
|
633
|
-
assert_not_equal(v1.data, v3.data)
|
634
|
-
assert_not_same(v1.data, v3.data)
|
635
|
-
assert_equal(v1.type, v3.type)
|
636
|
-
v1.type = :numeric
|
637
|
-
v3.type = :object
|
638
|
-
assert_not_equal(v1.type, v3.type)
|
639
|
-
assert_equal(v1.missing_values, v3.missing_values)
|
640
|
-
assert_not_same(v1.missing_values, v3.missing_values)
|
641
|
-
assert_equal(v1.labels, v3.labels)
|
642
|
-
assert_not_same(v1.labels, v3.labels)
|
643
|
-
end
|
644
|
-
|
645
|
-
def test_paired_ties
|
646
|
-
a = [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4].to_vector(:numeric)
|
647
|
-
expected = [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10].to_vector(:numeric)
|
648
|
-
assert_equal(expected, a.ranked)
|
649
|
-
end
|
650
|
-
|
651
|
-
def test_dichotomize
|
652
|
-
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
653
|
-
exp = [0, 0, 0, 1, 1, 1, nil].to_numeric
|
654
|
-
assert_equal(exp, a.dichotomize)
|
655
|
-
a = [1, 1, 1, 2, 2, 2, 3].to_vector
|
656
|
-
exp = [0, 0, 0, 1, 1, 1, 1].to_numeric
|
657
|
-
assert_equal(exp, a.dichotomize)
|
658
|
-
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
659
|
-
exp = [0, 0, 0, 0, 1, 1, nil].to_numeric
|
660
|
-
assert_equal(exp, a.dichotomize(1))
|
661
|
-
a = %w(a a a b c d).to_vector
|
662
|
-
exp = [0, 0, 0, 1, 1, 1].to_numeric
|
663
|
-
assert_equal(exp, a.dichotomize)
|
664
|
-
end
|
665
|
-
|
666
|
-
def test_can_be_methods
|
667
|
-
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
668
|
-
assert(a.can_be_numeric?)
|
669
|
-
a = [0, 's', 0, 1, 2, 3, nil].to_vector
|
670
|
-
assert(!a.can_be_numeric?)
|
671
|
-
a.missing_values = ['s']
|
672
|
-
assert(a.can_be_numeric?)
|
673
|
-
|
674
|
-
a = [Date.new(2009, 10, 10), Date.today, '2009-10-10', '2009-1-1', nil, 'NOW'].to_vector
|
675
|
-
assert(a.can_be_date?)
|
676
|
-
a = [Date.new(2009, 10, 10), Date.today, nil, 'sss'].to_vector
|
677
|
-
assert(!a.can_be_date?)
|
678
|
-
end
|
679
|
-
|
680
|
-
def test_date_vector
|
681
|
-
a = [Date.new(2009, 10, 10), :NOW, '2009-10-10', '2009-1-1', nil, 'NOW', 'MISSING'].to_vector(:date, missing_values: ['MISSING'])
|
682
|
-
|
683
|
-
assert(a.type == :date)
|
684
|
-
expected = [Date.new(2009, 10, 10), Date.today, Date.new(2009, 10, 10), Date.new(2009, 1, 1), nil, Date.today, nil]
|
685
|
-
assert_equal(expected, a.date_data_with_nils)
|
142
|
+
should 'return correct histogram' do
|
143
|
+
a = Daru::Vector.new(10.times.map { |v| v })
|
144
|
+
hist = a.histogram(2)
|
145
|
+
assert_equal([5, 5], hist.bin)
|
146
|
+
3.times do |i|
|
147
|
+
assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
|
148
|
+
end
|
686
149
|
end
|
687
150
|
end
|