statsample 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
@@ -5,30 +5,32 @@ class StatsampleReliabilitySkillScaleTestCase < Minitest::Test
|
|
5
5
|
setup do
|
6
6
|
options = %w(a b c d e)
|
7
7
|
cases = 20
|
8
|
-
@id = cases.times.map { |v| v }
|
9
|
-
@a = cases.times.map { options[rand(5)] }
|
10
|
-
@b = cases.times.map { options[rand(5)] }
|
11
|
-
@c = cases.times.map { options[rand(5)] }
|
12
|
-
@d = cases.times.map { options[rand(5)] }
|
13
|
-
@e =
|
14
|
-
|
8
|
+
@id = Daru::Vector.new(cases.times.map { |v| v })
|
9
|
+
@a = Daru::Vector.new(cases.times.map { options[rand(5)] })
|
10
|
+
@b = Daru::Vector.new(cases.times.map { options[rand(5)] })
|
11
|
+
@c = Daru::Vector.new(cases.times.map { options[rand(5)] })
|
12
|
+
@d = Daru::Vector.new(cases.times.map { options[rand(5)] })
|
13
|
+
@e = Daru::Vector.new(
|
14
|
+
cases.times.map do |i|
|
15
|
+
i == 0 ? options[rand(0)] :
|
15
16
|
rand > 0.8 ? nil : options[rand(5)]
|
16
|
-
|
17
|
-
|
18
|
-
@
|
17
|
+
end
|
18
|
+
)
|
19
|
+
@ds = Daru::DataFrame.new({ :id => @id, :a => @a, :b => @b, :c => @c, :d => @d, :e => @e })
|
20
|
+
@key = { :a => 'a', :b => options[rand(5)], :c => options[rand(5)], :d => options[rand(5)], :e => options[rand(5)] }
|
19
21
|
@ssa = Statsample::Reliability::SkillScaleAnalysis.new(@ds, @key)
|
20
|
-
@ac = @a.map { |v| v == @key[
|
21
|
-
@bc = @b.map { |v| v == @key[
|
22
|
-
@cc = @c.map { |v| v == @key[
|
23
|
-
@dc = @d.map { |v| v == @key[
|
24
|
-
@ec = @e.map { |v| v.nil? ? nil : (v == @key[
|
22
|
+
@ac = Daru::Vector.new(@a.map { |v| v == @key[:a] ? 1 : 0 })
|
23
|
+
@bc = Daru::Vector.new(@b.map { |v| v == @key[:b] ? 1 : 0 })
|
24
|
+
@cc = Daru::Vector.new(@c.map { |v| v == @key[:c] ? 1 : 0 })
|
25
|
+
@dc = Daru::Vector.new(@d.map { |v| v == @key[:d] ? 1 : 0 })
|
26
|
+
@ec = Daru::Vector.new(@e.map { |v| v.nil? ? nil : (v == @key[:e] ? 1 : 0) })
|
25
27
|
end
|
26
28
|
should 'return proper corrected dataset' do
|
27
|
-
cds = {
|
29
|
+
cds = Daru::DataFrame.new({ :id => @id, :a => @ac, :b => @bc, :c => @cc, :d => @dc, :e => @ec })
|
28
30
|
assert_equal(cds, @ssa.corrected_dataset)
|
29
31
|
end
|
30
32
|
should 'return proper corrected minimal dataset' do
|
31
|
-
cdsm = {
|
33
|
+
cdsm = Daru::DataFrame.new({ :a => @ac, :b => @bc, :c => @cc, :d => @dc, :e => @ec })
|
32
34
|
assert_equal(cdsm, @ssa.corrected_dataset_minimal)
|
33
35
|
end
|
34
36
|
should 'return correct vector_sum and vector_sum' do
|
@@ -37,13 +39,13 @@ class StatsampleReliabilitySkillScaleTestCase < Minitest::Test
|
|
37
39
|
assert_equal(cdsm.vector_mean, @ssa.vector_mean)
|
38
40
|
end
|
39
41
|
should 'not crash on rare case' do
|
40
|
-
a =
|
41
|
-
b =
|
42
|
-
c =
|
43
|
-
d =
|
44
|
-
e =
|
45
|
-
key = {
|
46
|
-
ds =
|
42
|
+
a = Daru::Vector.new(['c', 'c', 'a', 'a', 'c', 'a', 'b', 'c', 'c', 'b', 'a', 'd', 'a', 'd', 'a', 'a', 'd', 'e', 'c', 'd'])
|
43
|
+
b = Daru::Vector.new(['e', 'b', 'e', 'b', 'c', 'd', 'a', 'e', 'e', 'c', 'b', 'e', 'e', 'b', 'd', 'c', 'e', 'b', 'b', 'd'])
|
44
|
+
c = Daru::Vector.new(['e', 'b', 'e', 'c', 'e', 'c', 'b', 'd', 'e', 'c', 'a', 'a', 'b', 'd', 'e', 'c', 'b', 'a', 'a', 'e'])
|
45
|
+
d = Daru::Vector.new(['a', 'b', 'd', 'd', 'e', 'b', 'e', 'b', 'd', 'c', 'e', 'a', 'c', 'd', 'c', 'c', 'e', 'd', 'd', 'b'])
|
46
|
+
e = Daru::Vector.new(['a', 'b', nil, 'd', 'c', 'c', 'd', nil, 'd', 'd', 'e', 'e', nil, nil, nil, 'd', 'c', nil, 'e', 'd'])
|
47
|
+
key = { :a => 'a', :b => 'e', :c => 'd', :d => 'c', :e => 'd' }
|
48
|
+
ds = Daru::DataFrame.new({:a => a, :b => b, :c => c, :d => d, :e => e})
|
47
49
|
ssa = Statsample::Reliability::SkillScaleAnalysis.new(ds, key)
|
48
50
|
assert(ssa.summary)
|
49
51
|
end
|
data/test/test_resample.rb
CHANGED
@@ -17,7 +17,7 @@ class StatsampleResampleTestCase < Minitest::Test
|
|
17
17
|
Statsample::Resample.generate(20, 1, 10).count(1)
|
18
18
|
}
|
19
19
|
assert_equal(400, r.size)
|
20
|
-
v =
|
20
|
+
v = Daru::Vector.new(r)
|
21
21
|
a = v.count { |x| x > 3 }
|
22
22
|
assert(a >= 30 && a <= 70)
|
23
23
|
end
|
data/test/test_statistics.rb
CHANGED
@@ -32,7 +32,7 @@ class StatsampleStatisicsTestCase < Minitest::Test
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_estimation_mean
|
35
|
-
v = ([42] * 23 + [41] * 4 + [36] * 1 + [32] * 1 + [29] * 1 + [27] * 2 + [23] * 1 + [19] * 1 + [16] * 2 + [15] * 2 + [14, 11, 10, 9, 7] + [6] * 3 + [5] * 2 + [4, 3])
|
35
|
+
v = Daru::Vector.new([42] * 23 + [41] * 4 + [36] * 1 + [32] * 1 + [29] * 1 + [27] * 2 + [23] * 1 + [19] * 1 + [16] * 2 + [15] * 2 + [14, 11, 10, 9, 7] + [6] * 3 + [5] * 2 + [4, 3])
|
36
36
|
assert_equal(50, v.size)
|
37
37
|
assert_equal(1471, v.sum)
|
38
38
|
# limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
|
@@ -55,19 +55,9 @@ class StatsampleStatisicsTestCase < Minitest::Test
|
|
55
55
|
assert_in_delta(0.46, l[1], 0.01)
|
56
56
|
end
|
57
57
|
|
58
|
-
def test_ml
|
59
|
-
if true
|
60
|
-
# real=[1,1,1,1].to_vector(:numeric)
|
61
|
-
|
62
|
-
# pred=[0.0001,0.0001,0.0001,0.0001].to_vector(:numeric)
|
63
|
-
# puts Statsample::Bivariate.maximum_likehood_dichotomic(pred,real)
|
64
|
-
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
58
|
def test_simple_linear_regression
|
69
|
-
a = [1, 2, 3, 4, 5, 6]
|
70
|
-
b = [6, 2, 4, 10, 12, 8]
|
59
|
+
a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
|
60
|
+
b = Daru::Vector.new([6, 2, 4, 10, 12, 8])
|
71
61
|
reg = Statsample::Regression::Simple.new_from_vectors(a, b)
|
72
62
|
assert_in_delta((reg.ssr + reg.sse).to_f, reg.sst, 0.001)
|
73
63
|
assert_in_delta(Statsample::Bivariate.pearson(a, b), reg.r, 0.001)
|
data/test/test_stest.rb
CHANGED
@@ -24,26 +24,26 @@ class StatsampleTestTestCase < Minitest::Test
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_u_mannwhitney
|
27
|
-
a = [1, 2, 3, 4, 5, 6]
|
28
|
-
b = [0, 5, 7, 9, 10, 11]
|
27
|
+
a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
|
28
|
+
b = Daru::Vector.new([0, 5, 7, 9, 10, 11])
|
29
29
|
assert_equal(7.5, Statsample::Test.u_mannwhitney(a, b).u)
|
30
30
|
assert_equal(7.5, Statsample::Test.u_mannwhitney(b, a).u)
|
31
|
-
a = [1, 7, 8, 9, 10, 11]
|
32
|
-
b = [2, 3, 4, 5, 6, 12]
|
31
|
+
a = Daru::Vector.new([1, 7, 8, 9, 10, 11])
|
32
|
+
b = Daru::Vector.new([2, 3, 4, 5, 6, 12])
|
33
33
|
assert_equal(11, Statsample::Test.u_mannwhitney(a, b).u)
|
34
34
|
end
|
35
35
|
|
36
36
|
def test_levene
|
37
|
-
a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10]
|
38
|
-
b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120]
|
37
|
+
a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
|
38
|
+
b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
|
39
39
|
levene = Statsample::Test::Levene.new([a, b])
|
40
40
|
assert_levene(levene)
|
41
41
|
end
|
42
42
|
|
43
43
|
def test_levene_dataset
|
44
|
-
a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10]
|
45
|
-
b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120]
|
46
|
-
ds = {
|
44
|
+
a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
|
45
|
+
b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
|
46
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b })
|
47
47
|
levene = Statsample::Test::Levene.new(ds)
|
48
48
|
assert_levene(levene)
|
49
49
|
end
|
data/test/test_stratified.rb
CHANGED
@@ -9,9 +9,9 @@ class StatsampleStratifiedTestCase < Minitest::Test
|
|
9
9
|
a = [10, 20, 30, 40, 50]
|
10
10
|
b = [110, 120, 130, 140]
|
11
11
|
pop = a + b
|
12
|
-
av
|
13
|
-
bv
|
14
|
-
popv =
|
12
|
+
av = Daru::Vector.new(a)
|
13
|
+
bv = Daru::Vector.new(b)
|
14
|
+
popv = Daru::Vector.new(pop)
|
15
15
|
assert_equal(popv.mean, Statsample::StratifiedSample.mean(av, bv))
|
16
16
|
end
|
17
17
|
end
|
data/test/test_test_t.rb
CHANGED
@@ -4,24 +4,24 @@ class StatsampleTestTTestCase < Minitest::Test
|
|
4
4
|
include Math
|
5
5
|
context T do
|
6
6
|
setup do
|
7
|
-
@a = [30.02, 29.99, 30.11, 29.97, 30.01, 29.99]
|
8
|
-
@b = [29.89, 29.93, 29.72, 29.98, 30.02, 29.98]
|
7
|
+
@a = Daru::Vector.new([30.02, 29.99, 30.11, 29.97, 30.01, 29.99])
|
8
|
+
@b = Daru::Vector.new([29.89, 29.93, 29.72, 29.98, 30.02, 29.98])
|
9
9
|
@x1 = @a.mean
|
10
10
|
@x2 = @b.mean
|
11
11
|
@s1 = @a.sd
|
12
12
|
@s2 = @b.sd
|
13
|
-
@n1 = @a.
|
14
|
-
@n2 = @b.
|
13
|
+
@n1 = @a.size
|
14
|
+
@n2 = @b.size
|
15
15
|
end
|
16
16
|
should 'calculate correctly standard t' do
|
17
|
-
t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.
|
18
|
-
assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.
|
19
|
-
assert_equal(@a.
|
17
|
+
t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.size)), @a.size - 1)
|
18
|
+
assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.size))), t.t)
|
19
|
+
assert_equal(@a.size - 1, t.df)
|
20
20
|
assert(t.summary.size > 0)
|
21
21
|
end
|
22
22
|
should 'calculate correctly t for one sample' do
|
23
|
-
t1 = [6, 4, 6, 7, 4, 5, 5, 12, 6, 1]
|
24
|
-
t2 = [9, 6, 5, 10, 10, 8, 7, 10, 6, 5]
|
23
|
+
t1 = Daru::Vector.new([6, 4, 6, 7, 4, 5, 5, 12, 6, 1])
|
24
|
+
t2 = Daru::Vector.new([9, 6, 5, 10, 10, 8, 7, 10, 6, 5])
|
25
25
|
d = t1 - t2
|
26
26
|
t = Statsample::Test::T::OneSample.new(d)
|
27
27
|
assert_in_delta(-2.631, t.t, 0.001)
|
@@ -48,14 +48,14 @@ class StatsampleTestTTestCase < Minitest::Test
|
|
48
48
|
assert_in_delta(0.09095, t.probability_not_equal_variance, 0.001)
|
49
49
|
end
|
50
50
|
should 'be the same using shorthand' do
|
51
|
-
v = 100.times.map { rand(100) }
|
51
|
+
v = Daru::Vector.new(100.times.map { rand(100) })
|
52
52
|
assert_equal(Statsample::Test.t_one_sample(v).t, T::OneSample.new(v).t)
|
53
53
|
end
|
54
54
|
should 'calculate all values for one sample T test' do
|
55
55
|
u = @a.mean + (1 - rand * 2)
|
56
56
|
tos = T::OneSample.new(@a, u: u)
|
57
|
-
assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.
|
58
|
-
assert_equal(@a.
|
57
|
+
assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.size))), tos.t)
|
58
|
+
assert_equal(@a.size - 1, tos.df)
|
59
59
|
assert(tos.summary.size > 0)
|
60
60
|
end
|
61
61
|
end
|
data/test/test_umannwhitney.rb
CHANGED
@@ -4,8 +4,8 @@ class StatsampleUMannWhitneyTestCase < Minitest::Test
|
|
4
4
|
include Statsample::Test
|
5
5
|
context Statsample::Test::UMannWhitney do
|
6
6
|
setup do
|
7
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15]
|
8
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19]
|
7
|
+
@v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15])
|
8
|
+
@v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19])
|
9
9
|
@u = Statsample::Test::UMannWhitney.new(@v1, @v2)
|
10
10
|
end
|
11
11
|
should 'have same result using class or Test#u_mannwhitney' do
|
data/test/test_vector.rb
CHANGED
@@ -3,24 +3,11 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
3
3
|
class StatsampleTestVector < Minitest::Test
|
4
4
|
include Statsample::Shorthand
|
5
5
|
|
6
|
-
def setup
|
7
|
-
@c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :object)
|
8
|
-
@c.name = 'Test Vector'
|
9
|
-
@c.missing_values = [-99]
|
10
|
-
end
|
11
|
-
|
12
|
-
def assert_counting_tokens(b)
|
13
|
-
assert_equal([1, 1, 0, 1, 0, nil], b['a'].to_a)
|
14
|
-
assert_equal([0, 1, 0, 0, 0, nil], b['b'].to_a)
|
15
|
-
assert_equal([0, 0, 1, 0, 0, nil], b['c'].to_a)
|
16
|
-
assert_equal([0, 0, 1, 1, 0, nil], b['d'].to_a)
|
17
|
-
assert_equal([0, 0, 0, 0, 1, nil], b[10].to_a)
|
18
|
-
end
|
19
6
|
context Statsample do
|
20
7
|
setup do
|
21
8
|
@sample = 100
|
22
|
-
@a = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }
|
23
|
-
@b = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }
|
9
|
+
@a = Daru::Vector.new(@sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) })
|
10
|
+
@b = Daru::Vector.new(@sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) })
|
24
11
|
@correct_a = []
|
25
12
|
@correct_b = []
|
26
13
|
@a.each_with_index do |_v, i|
|
@@ -29,8 +16,8 @@ class StatsampleTestVector < Minitest::Test
|
|
29
16
|
@correct_b.push(@b[i])
|
30
17
|
end
|
31
18
|
end
|
32
|
-
@correct_a = @correct_a
|
33
|
-
@correct_b = @correct_b
|
19
|
+
@correct_a = Daru::Vector.new(@correct_a)
|
20
|
+
@correct_b = Daru::Vector.new(@correct_b)
|
34
21
|
|
35
22
|
@common = lambda do |av, bv|
|
36
23
|
assert_equal(@correct_a, av, 'A no es esperado')
|
@@ -39,649 +26,125 @@ class StatsampleTestVector < Minitest::Test
|
|
39
26
|
assert(!bv.has_missing_data?, 'b tiene datos faltantes')
|
40
27
|
end
|
41
28
|
end
|
29
|
+
|
42
30
|
should 'return correct only_valid' do
|
43
31
|
av, bv = Statsample.only_valid @a, @b
|
32
|
+
av.reset_index!
|
33
|
+
bv.reset_index!
|
44
34
|
av2, bv2 = Statsample.only_valid av, bv
|
45
35
|
@common.call(av, bv)
|
46
36
|
assert_equal(av, av2)
|
47
37
|
assert_not_same(av, av2)
|
48
38
|
assert_not_same(bv, bv2)
|
49
39
|
end
|
40
|
+
|
50
41
|
should 'return correct only_valid_clone' do
|
51
42
|
av, bv = Statsample.only_valid_clone @a, @b
|
43
|
+
av.reset_index!
|
44
|
+
bv.reset_index!
|
52
45
|
@common.call(av, bv)
|
53
46
|
av2, bv2 = Statsample.only_valid_clone av, bv
|
54
47
|
assert_equal(av, av2)
|
55
48
|
assert_same(av, av2)
|
56
49
|
assert_same(bv, bv2)
|
57
50
|
end
|
58
|
-
end
|
59
|
-
context Statsample::Vector do
|
60
|
-
setup do
|
61
|
-
@c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :object)
|
62
|
-
@c.name = 'Test Vector'
|
63
|
-
@c.missing_values = [-99]
|
64
|
-
end
|
65
|
-
should_with_gsl 'be created with GSL::Vector' do
|
66
|
-
gsl = GSL::Vector[1, 2, 3, 4, 5]
|
67
|
-
v = Statsample::Vector.new(gsl)
|
68
|
-
assert_equal([1, 2, 3, 4, 5], v.to_a)
|
69
|
-
refute(v.flawed?)
|
70
|
-
end
|
71
51
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
assert_equal(mh, @a.to_matrix)
|
79
|
-
end
|
80
|
-
should 'to_matrix(:vertical) returns a matrix with 1 column' do
|
81
|
-
mv = Matrix.columns([[1, 2, 3, 4, 5]])
|
82
|
-
assert_equal(mv, @a.to_matrix(:vertical))
|
83
|
-
end
|
84
|
-
should 'returns valid submatrixes' do
|
85
|
-
# 3*4 + 2*5 = 22
|
86
|
-
a = [3, 2].to_vector(:numeric)
|
87
|
-
b = [4, 5].to_vector(:numeric)
|
88
|
-
assert_equal(22, (a.to_matrix * b.to_matrix(:vertical))[0, 0])
|
89
|
-
end
|
52
|
+
should 'returns correct vector_cols_matrix' do
|
53
|
+
v1 = Daru::Vector.new(%w(a a a b b b c c))
|
54
|
+
v2 = Daru::Vector.new(%w(1 3 4 5 6 4 3 2))
|
55
|
+
v3 = Daru::Vector.new(%w(1 0 0 0 1 1 1 0))
|
56
|
+
ex = Matrix.rows([%w(a 1 1), %w(a 3 0), %w(a 4 0), %w(b 5 0), %w(b 6 1), %w(b 4 1), %w(c 3 1), %w(c 2 0)])
|
57
|
+
assert_equal(ex, Statsample.vector_cols_matrix(v1, v2, v3))
|
90
58
|
end
|
91
|
-
|
92
|
-
setup do
|
93
|
-
@data = (10.times.map { rand(100) }) + [nil]
|
94
|
-
@original = Statsample::Vector.new(@data, :numeric)
|
95
|
-
end
|
96
|
-
should 'be the sample using []' do
|
97
|
-
second = Statsample::Vector[*@data]
|
98
|
-
assert_equal(@original, second)
|
99
|
-
end
|
100
|
-
should '[] returns same results as R-c()' do
|
101
|
-
reference = [0, 4, 5, 6, 10].to_numeric
|
102
|
-
assert_equal(reference, Statsample::Vector[0, 4, 5, 6, 10])
|
103
|
-
assert_equal(reference, Statsample::Vector[0, 4..6, 10])
|
104
|
-
assert_equal(reference, Statsample::Vector[[0], [4, 5, 6], [10]])
|
105
|
-
assert_equal(reference, Statsample::Vector[[0], [4, [5, [6]]], [10]])
|
106
|
-
|
107
|
-
assert_equal(reference, Statsample::Vector[[0], [4, 5, 6].to_vector, [10]])
|
108
|
-
end
|
109
|
-
should 'be the same usign #to_vector' do
|
110
|
-
lazy1 = @data.to_vector(:numeric)
|
111
|
-
assert_equal(@original, lazy1)
|
112
|
-
end
|
113
|
-
should 'be the same using #to_numeric' do
|
114
|
-
lazy2 = @data.to_numeric
|
115
|
-
assert_equal(@original, lazy2)
|
116
|
-
assert_equal(:numeric, lazy2.type)
|
117
|
-
assert_equal(@data.find_all { |v| !v.nil? }, lazy2.valid_data)
|
118
|
-
end
|
119
|
-
should 'could use new_numeric with size only' do
|
120
|
-
v1 = 10.times.map { nil }.to_numeric
|
121
|
-
v2 = Statsample::Vector.new_numeric(10)
|
122
|
-
assert_equal(v1, v2)
|
123
|
-
end
|
124
|
-
should 'could use new_numeric with size and value' do
|
125
|
-
a = rand
|
126
|
-
v1 = 10.times.map { a }.to_numeric
|
127
|
-
v2 = Statsample::Vector.new_numeric(10, a)
|
128
|
-
assert_equal(v1, v2)
|
129
|
-
end
|
130
|
-
should 'could use new_numeric with func' do
|
131
|
-
v1 = 10.times.map { |i| i * 2 }.to_numeric
|
132
|
-
v2 = Statsample::Vector.new_numeric(10) { |i| i * 2 }
|
133
|
-
assert_equal(v1, v2)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
context "new types :numeric and :object" do
|
138
|
-
should "set default type of vector to :object" do
|
139
|
-
v = Statsample::Vector.new [1,2,3,4,5]
|
140
|
-
assert_equal(:object, v.type)
|
141
|
-
end
|
142
|
-
|
143
|
-
should "initialize Vector with :numeric type" do
|
144
|
-
v = Statsample::Vector.new [1,2,3,4,5,nil], :numeric
|
145
|
-
assert_equal(:numeric, v.type)
|
146
|
-
assert_equal([1,2,3,4,5], v.valid_data)
|
147
|
-
end
|
148
|
-
|
149
|
-
should "show a warning when initializing with :nominal, :numeric or :ordinal" do
|
150
|
-
assert_output(nil,"WARNING: nominal has been deprecated. Use :object instead.\n") do
|
151
|
-
Statsample::Vector.new [1,2,3,4,5,nil,'hello'], :nominal
|
152
|
-
end
|
153
|
-
|
154
|
-
assert_output(nil,"WARNING: scale has been deprecated. Use :numeric instead.\n") do
|
155
|
-
Statsample::Vector.new [1,2,3,4,nil,5], :scale
|
156
|
-
end
|
157
|
-
|
158
|
-
assert_output(nil,"WARNING: ordinal has been deprecated. Use :numeric instead.\n") do
|
159
|
-
Statsample::Vector.new [1,2,3,4,5], :ordinal
|
160
|
-
end
|
59
|
+
end
|
161
60
|
|
162
|
-
|
163
|
-
|
61
|
+
context Statsample::Vector do
|
62
|
+
context 'when initializing' do
|
63
|
+
should '.new creates a Daru::Vector internally and shows a warning' do
|
64
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
65
|
+
data = (10.times.map { rand(100) }) + [nil]
|
66
|
+
original = Statsample::Vector.new(@data, :numeric)
|
67
|
+
assert_equal(true, original.kind_of?(Daru::Vector))
|
164
68
|
end
|
165
69
|
end
|
166
70
|
|
167
|
-
should
|
168
|
-
|
169
|
-
|
170
|
-
assert_equal(numeric, [1,2,3,4,nil,5].to_vector(:numeric))
|
171
|
-
|
172
|
-
obj = Statsample::Vector.new([1,2,3,4,'one','two'], :object)
|
173
|
-
assert_equal(obj, [1,2,3,4,'one','two'].to_vector(:object))
|
174
|
-
end
|
175
|
-
|
176
|
-
should "test that old shorthands raise warnings" do
|
177
|
-
assert_output(nil,"WARNING: to_scale has been deprecated. Use to_numeric instead.\n") do
|
178
|
-
[1,2,3,4,nil,5].to_scale
|
71
|
+
should '[] returns same results as R-c()' do
|
72
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
73
|
+
assert_equal(true, Statsample::Vector[1,2,3,4,5].kind_of?(Daru::Vector))
|
179
74
|
end
|
180
75
|
end
|
181
|
-
end
|
182
76
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
end
|
188
|
-
should 'returns a Hash' do
|
189
|
-
assert_kind_of(Hash, @b)
|
190
|
-
end
|
191
|
-
should 'return a Hash with keys with different values of @a' do
|
192
|
-
expected = ['a', 'b', 'c', 'd', 10]
|
193
|
-
assert_equal(expected, @b.keys)
|
194
|
-
end
|
77
|
+
should "new_numeric/new_scale creates a Daru::Vector internally and shows a warning" do
|
78
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
79
|
+
Statsample::Vector.new_scale(4)
|
80
|
+
end
|
195
81
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
should 'hash values are n times the tokens appears' do
|
200
|
-
assert_counting_tokens(@b)
|
201
|
-
end
|
202
|
-
should '#split_by_separator_freq returns the number of ocurrences of tokens' do
|
203
|
-
assert_equal({ 'a' => 3, 'b' => 1, 'c' => 1, 'd' => 2, 10 => 1 }, @a.split_by_separator_freq)
|
204
|
-
end
|
205
|
-
should 'using a different separator give the same values' do
|
206
|
-
a = Statsample::Vector.new(['a', 'a*b', 'c*d', 'a*d', 10, nil], :object)
|
207
|
-
b = a.split_by_separator('*')
|
208
|
-
assert_counting_tokens(b)
|
209
|
-
end
|
210
|
-
end
|
211
|
-
should 'return correct median_absolute_deviation' do
|
212
|
-
a = [1, 1, 2, 2, 4, 6, 9].to_numeric
|
213
|
-
assert_equal(1, a.median_absolute_deviation)
|
214
|
-
end
|
215
|
-
should 'return correct histogram' do
|
216
|
-
a = 10.times.map { |v| v }.to_numeric
|
217
|
-
hist = a.histogram(2)
|
218
|
-
assert_equal([5, 5], hist.bin)
|
219
|
-
3.times do |i|
|
220
|
-
assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
|
82
|
+
assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
83
|
+
Statsample::Vector.new_numeric(4)
|
84
|
+
end
|
221
85
|
end
|
222
86
|
end
|
223
|
-
|
224
|
-
@c.name == 'Test Vector'
|
225
|
-
end
|
226
|
-
should 'without explicit name, returns vector with succesive numbers' do
|
227
|
-
a = 10.times.map { rand(100) }.to_numeric
|
228
|
-
b = 10.times.map { rand(100) }.to_numeric
|
229
|
-
assert_match(/Vector \d+/, a.name)
|
230
|
-
a.name =~ /Vector (\d+)/
|
231
|
-
next_number = Regexp.last_match(1).to_i + 1
|
232
|
-
assert_equal("Vector #{next_number}", b.name)
|
233
|
-
end
|
234
|
-
should 'save to a file and load the same Vector' do
|
235
|
-
outfile = Tempfile.new('vector.vec')
|
236
|
-
@c.save(outfile.path)
|
237
|
-
a = Statsample.load(outfile.path)
|
238
|
-
assert_equal(@c, a)
|
239
|
-
end
|
240
|
-
should '#collect returns an array' do
|
241
|
-
val = @c.collect { |v| v }
|
242
|
-
assert_equal(val, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
|
243
|
-
end
|
87
|
+
end
|
244
88
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
assert_equal(
|
249
|
-
exp.recode! { |v| v == 0 ? 1 : 0 }
|
250
|
-
exp2 = (([1] * 15) + ([0] * 3)).to_vector
|
251
|
-
assert_equal(exp2, exp)
|
252
|
-
end
|
253
|
-
should '#product returns the * of all values' do
|
254
|
-
a = [1, 2, 3, 4, 5].to_vector(:numeric)
|
255
|
-
assert_equal(120, a.product)
|
89
|
+
context "new types :numeric and :object" do
|
90
|
+
should "numerical data is automatically detected to be of type :numeric" do
|
91
|
+
v = Statsample::Vector.new [1,2,3,4,5,nil]
|
92
|
+
assert_equal(:numeric, v.type)
|
256
93
|
end
|
257
94
|
|
258
|
-
should
|
259
|
-
|
260
|
-
assert_equal(
|
261
|
-
assert_equal([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, nil, 1, 2, 3, 4, nil, -99, -99], @c.data_with_nils)
|
262
|
-
@c.missing_values = [-99]
|
263
|
-
assert_equal(@c.valid_data.sort, [1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
|
264
|
-
assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, nil, nil])
|
265
|
-
@c.missing_values = []
|
266
|
-
assert_equal(@c.valid_data.sort, [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
|
267
|
-
assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
|
268
|
-
end
|
269
|
-
should 'correct has_missing_data? with missing data' do
|
270
|
-
a = [1, 2, 3, nil].to_vector
|
271
|
-
assert(a.has_missing_data?)
|
272
|
-
end
|
273
|
-
should 'correct has_missing_data? without missing data' do
|
274
|
-
a = [1, 2, 3, 4, 10].to_vector
|
275
|
-
assert(!a.has_missing_data?)
|
276
|
-
end
|
277
|
-
should 'with explicit missing_values, should respond has_missing_data?' do
|
278
|
-
a = [1, 2, 3, 4, 10].to_vector
|
279
|
-
a.missing_values = [10]
|
280
|
-
assert(a.has_missing_data?)
|
281
|
-
end
|
282
|
-
should 'label correctly fields' do
|
283
|
-
@c.labels = { 5 => 'FIVE' }
|
284
|
-
assert_equal(['FIVE', 'FIVE', 'FIVE', 'FIVE', 'FIVE', 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], @c.vector_labeled.to_a)
|
285
|
-
end
|
286
|
-
should 'verify' do
|
287
|
-
h = @c.verify { |d| !d.nil? and d > 0 }
|
288
|
-
e = { 15 => nil, 16 => -99, 17 => -99 }
|
289
|
-
assert_equal(e, h)
|
290
|
-
end
|
291
|
-
should 'have a summary with name on it' do
|
292
|
-
assert_match(/#{@c.name}/, @c.summary)
|
95
|
+
should "object data automatically detected as :object" do
|
96
|
+
v = Statsample::Vector.new [1,2,3,4,'hello','world']
|
97
|
+
assert_equal(:object, v.type)
|
293
98
|
end
|
294
99
|
|
295
|
-
should
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
assert_equal([1,
|
300
|
-
assert(v.flawed?)
|
301
|
-
else
|
302
|
-
skip('Requires GSL')
|
100
|
+
should "initialize Vector with :numeric type" do
|
101
|
+
v = Statsample::Vector.new [1,2,3,4,5,nil], :numeric
|
102
|
+
assert_equal(:numeric, v.type)
|
103
|
+
assert_output(nil, "WARNING: valid_data in Statsample::Vector has been deprecated in favor of only_valid in Daru::Vector. Please use that.\n") do
|
104
|
+
assert_equal([1,2,3,4,5], v.valid_data)
|
303
105
|
end
|
304
106
|
end
|
305
107
|
|
306
|
-
should
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
should 'multiply correct for scalar' do
|
311
|
-
a = [1, 2, 3].to_numeric
|
312
|
-
assert_equal([5, 10, 15].to_numeric, a * 5)
|
313
|
-
end
|
314
|
-
should 'multiply correct with other vector' do
|
315
|
-
a = [1, 2, 3].to_numeric
|
316
|
-
b = [2, 4, 6].to_numeric
|
317
|
-
|
318
|
-
assert_equal([2, 8, 18].to_numeric, a * b)
|
319
|
-
end
|
320
|
-
should 'sum correct for scalar' do
|
321
|
-
a = [1, 2, 3].to_numeric
|
322
|
-
assert_equal([11, 12, 13].to_numeric, a + 10)
|
323
|
-
end
|
324
|
-
|
325
|
-
should 'raise NoMethodError when method requires numeric and vector is object' do
|
326
|
-
@c.type = :object
|
327
|
-
assert_raise(::NoMethodError) { @c.median }
|
328
|
-
end
|
329
|
-
|
330
|
-
should 'jacknife correctly with named method' do
|
331
|
-
# First example
|
332
|
-
a = [1, 2, 3, 4].to_numeric
|
333
|
-
ds = a.jacknife(:mean)
|
334
|
-
assert_equal(a.mean, ds[:mean].mean)
|
335
|
-
ds = a.jacknife([:mean, :sd])
|
336
|
-
assert_equal(a.mean, ds[:mean].mean)
|
337
|
-
assert_equal(a.sd, ds[:mean].sd)
|
338
|
-
end
|
339
|
-
should 'jacknife correctly with custom method' do
|
340
|
-
# Second example
|
341
|
-
a = [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25].to_numeric
|
342
|
-
ds = a.jacknife(log_s2: ->(v) { Math.log(v.variance) })
|
343
|
-
exp = [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937].to_numeric
|
344
|
-
|
345
|
-
assert_similar_vector(exp, ds[:log_s2], 0.001)
|
346
|
-
assert_in_delta(2.00389, ds[:log_s2].mean, 0.00001)
|
347
|
-
assert_in_delta(1.091, ds[:log_s2].variance, 0.001)
|
348
|
-
end
|
349
|
-
should 'jacknife correctly with k>1' do
|
350
|
-
a = rnorm(6)
|
351
|
-
ds = a.jacknife(:mean, 2)
|
352
|
-
mean = a.mean
|
353
|
-
exp = [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4].to_numeric
|
354
|
-
assert_similar_vector(exp, ds[:mean], 1e-13)
|
355
|
-
end
|
356
|
-
should 'bootstrap should return a vector with mean=mu and sd=se' do
|
357
|
-
a = rnorm(100)
|
358
|
-
ds = a.bootstrap([:mean, :sd], 200)
|
359
|
-
se = 1 / Math.sqrt(a.size)
|
360
|
-
assert_in_delta(0, ds[:mean].mean, 0.3)
|
361
|
-
assert_in_delta(se, ds[:mean].sd, 0.02)
|
362
|
-
end
|
363
|
-
end
|
364
|
-
|
365
|
-
def test_object
|
366
|
-
assert_equal(@c[1], 5)
|
367
|
-
assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c.frequencies)
|
368
|
-
assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c._frequencies)
|
369
|
-
assert_equal({ 1 => 1.quo(15), 2 => 1.quo(15), 3 => 1.quo(15), 4 => 1.quo(15), 5 => 5.quo(15), 6 => 2.quo(15), 7 => 1.quo(15), 8 => 1.quo(15), 9 => 1.quo(15), 10 => 1.quo(15) }, @c.proportions)
|
370
|
-
assert_equal(@c.proportion, 1.quo(15))
|
371
|
-
assert_equal(@c.proportion(2), 1.quo(15))
|
372
|
-
assert_equal([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], @c.factors.sort)
|
373
|
-
assert_equal(@c.mode, 5)
|
374
|
-
assert_equal(@c.n_valid, 15)
|
375
|
-
end
|
376
|
-
|
377
|
-
def test_equality
|
378
|
-
v1 = [1, 2, 3].to_vector
|
379
|
-
v2 = [1, 2, 3].to_vector
|
380
|
-
assert_equal(v1, v2)
|
381
|
-
v1 = [1, 2, 3].to_vector(:object)
|
382
|
-
v2 = [1, 2, 3].to_vector(:numeric)
|
383
|
-
assert_not_equal(v1, v2)
|
384
|
-
v2 = [1, 2, 3]
|
385
|
-
assert_not_equal(v1, v2)
|
386
|
-
v1 = [1, 2, 3].to_vector
|
387
|
-
v2 = [1, 2, 3].to_vector
|
388
|
-
assert_equal(v1, v2)
|
389
|
-
assert_equal(false, v1 == Object.new)
|
390
|
-
end
|
391
|
-
|
392
|
-
def test_vector_percentil
|
393
|
-
a = [1, 2, 2, 3, 4, 5, 5, 5, 6, 10].to_numeric
|
394
|
-
expected = [10, 25, 25, 40, 50, 70, 70, 70, 90, 100].to_numeric
|
395
|
-
assert_equal(expected, a.vector_percentil)
|
396
|
-
a = [1, nil, nil, 2, 2, 3, 4, nil, nil, 5, 5, 5, 6, 10].to_numeric
|
397
|
-
expected = [10, nil, nil, 25, 25, 40, 50, nil, nil, 70, 70, 70, 90, 100].to_numeric
|
398
|
-
assert_equal(expected, a.vector_percentil)
|
399
|
-
end
|
400
|
-
|
401
|
-
def test_numeric
|
402
|
-
@c.type = :numeric
|
403
|
-
assert_equal(5, @c.median)
|
404
|
-
assert_equal(4, @c.percentil(25))
|
405
|
-
assert_equal(7, @c.percentil(75))
|
406
|
-
|
407
|
-
v = [200_000, 200_000, 210_000, 220_000, 230_000, 250_000, 250_000, 250_000, 270_000, 300_000, 450_000, 130_000, 140_000, 140_000, 140_000, 145_000, 148_000, 165_000, 170_000, 180_000, 180_000, 180_000, 180_000, 180_000, 180_000].to_numeric
|
408
|
-
assert_equal(180_000, v.median)
|
409
|
-
a = [7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 12.0, 12.0, 13.0, 14.0, 14.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0].to_numeric
|
410
|
-
assert_equal(4.5, a.percentil(25))
|
411
|
-
assert_equal(6.5, a.percentil(50))
|
412
|
-
assert_equal(9.5, a.percentil(75))
|
413
|
-
assert_equal(3.0, a.percentil(10))
|
414
|
-
end
|
415
|
-
|
416
|
-
def test_linear_percentil_strategy
|
417
|
-
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_numeric
|
418
|
-
assert_equal 102, values.percentil(0, :linear)
|
419
|
-
assert_equal 104.75, values.percentil(25, :linear)
|
420
|
-
assert_equal 108.5, values.percentil(50, :linear)
|
421
|
-
assert_equal 112.75, values.percentil(75, :linear)
|
422
|
-
assert_equal 116, values.percentil(100, :linear)
|
423
|
-
|
424
|
-
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_numeric
|
425
|
-
assert_equal 102, values.percentil(0, :linear)
|
426
|
-
assert_equal 105, values.percentil(25, :linear)
|
427
|
-
assert_equal 109, values.percentil(50, :linear)
|
428
|
-
assert_equal 115, values.percentil(75, :linear)
|
429
|
-
assert_equal 118, values.percentil(100, :linear)
|
430
|
-
end
|
431
|
-
|
432
|
-
def test_ranked
|
433
|
-
v1 = [0.8, 1.2, 1.2, 2.3, 18].to_vector(:numeric)
|
434
|
-
expected = [1, 2.5, 2.5, 4, 5].to_vector(:numeric)
|
435
|
-
assert_equal(expected, v1.ranked)
|
436
|
-
v1 = [nil, 0.8, 1.2, 1.2, 2.3, 18, nil].to_vector(:numeric)
|
437
|
-
expected = [nil, 1, 2.5, 2.5, 4, 5, nil].to_vector(:numeric)
|
438
|
-
assert_equal(expected, v1.ranked)
|
439
|
-
end
|
440
|
-
|
441
|
-
def test_numeric
|
442
|
-
a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'], :numeric)
|
443
|
-
assert_equal(10, a.sum)
|
444
|
-
i = 0
|
445
|
-
factors = a.factors.sort
|
446
|
-
[0, 1, 2, 3, 4].each{|v|
|
447
|
-
assert(v == factors[i])
|
448
|
-
assert(v.class == factors[i].class, "#{v} - #{v.class} != #{factors[i]} - #{factors[i].class}")
|
449
|
-
i += 1
|
450
|
-
}
|
451
|
-
end
|
452
|
-
|
453
|
-
def test_vector_centered
|
454
|
-
mean = rand
|
455
|
-
samples = 11
|
456
|
-
centered = samples.times.map { |i| i - ((samples / 2).floor).to_i }.to_numeric
|
457
|
-
not_centered = centered.recode { |v| v + mean }
|
458
|
-
obs = not_centered.centered
|
459
|
-
centered.each_with_index do |v, i|
|
460
|
-
assert_in_delta(v, obs[i], 0.0001)
|
461
|
-
end
|
462
|
-
end
|
463
|
-
|
464
|
-
def test_vector_standarized
|
465
|
-
v1 = [1, 2, 3, 4, nil].to_vector(:numeric)
|
466
|
-
sds = v1.sds
|
467
|
-
expected = [((1 - 2.5).quo(sds)), ((2 - 2.5).quo(sds)), ((3 - 2.5).quo(sds)), ((4 - 2.5).quo(sds)), nil].to_vector(:numeric)
|
468
|
-
vs = v1.vector_standarized
|
469
|
-
assert_equal(expected, vs)
|
470
|
-
assert_equal(0, vs.mean)
|
471
|
-
assert_equal(1, vs.sds)
|
472
|
-
end
|
473
|
-
|
474
|
-
def test_vector_standarized_with_zero_variance
|
475
|
-
v1 = 100.times.map { |_i| 1 }.to_numeric
|
476
|
-
exp = 100.times.map { nil }.to_numeric
|
477
|
-
assert_equal(exp, v1.standarized)
|
478
|
-
end
|
479
|
-
|
480
|
-
def test_check_type
|
481
|
-
v = Statsample::Vector.new
|
482
|
-
v.type = :object
|
483
|
-
assert_raise(NoMethodError) { v.check_type(:numeric) }
|
484
|
-
assert(v.check_type(:object).nil?)
|
485
|
-
|
486
|
-
v.type = :numeric
|
487
|
-
|
488
|
-
assert(v.check_type(:numeric).nil?)
|
489
|
-
assert(v.check_type(:object).nil?)
|
108
|
+
should "show a warning when initializing with :nominal, :numeric or :ordinal" do
|
109
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: nominal has been deprecated.\n") do
|
110
|
+
Statsample::Vector.new [1,2,3,4,5,nil,'hello'], :nominal
|
111
|
+
end
|
490
112
|
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
assert_raise(NoMethodError) { v.check_type(:object) }
|
495
|
-
end
|
113
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: scale has been deprecated.\n") do
|
114
|
+
Statsample::Vector.new [1,2,3,4,nil,5], :scale
|
115
|
+
end
|
496
116
|
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
assert_equal([3, 4, 5, 6, 7], (a + 2).to_a)
|
501
|
-
assert_equal([12, 14, 16, 18, 20], (a + b).to_a)
|
502
|
-
assert_raise ArgumentError do
|
503
|
-
a + @c
|
504
|
-
end
|
505
|
-
assert_raise TypeError do
|
506
|
-
a + 'string'
|
507
|
-
end
|
508
|
-
a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :numeric)
|
509
|
-
b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :numeric)
|
510
|
-
assert_equal([nil, 13, nil, 16, 18, 20], (a + b).to_a)
|
511
|
-
assert_equal([nil, 13, nil, 16, 18, 20], (a + b.to_a).to_a)
|
512
|
-
end
|
117
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: ordinal has been deprecated.\n") do
|
118
|
+
Statsample::Vector.new [1,2,3,4,5], :ordinal
|
119
|
+
end
|
513
120
|
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
assert_equal([-1, 0, 1, 2, 3], (a - 2).to_a)
|
518
|
-
assert_equal([10, 10, 10, 10, 10], (b - a).to_a)
|
519
|
-
assert_raise ArgumentError do
|
520
|
-
a - @c
|
521
|
-
end
|
522
|
-
assert_raise TypeError do
|
523
|
-
a - 'string'
|
121
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
122
|
+
Statsample::Vector.new_scale 10, 1
|
123
|
+
end
|
524
124
|
end
|
525
|
-
a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :numeric)
|
526
|
-
b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :numeric)
|
527
|
-
assert_equal([nil, 11, nil, 10, 10, 10], (b - a).to_a)
|
528
|
-
assert_equal([nil, 11, nil, 10, 10, 10], (b - a.to_a).to_a)
|
529
|
-
end
|
530
125
|
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
126
|
+
should "show a warning when Statsample::Vector shorthands are used" do
|
127
|
+
numeric = Statsample::Vector.new([1,2,3,4,nil,5], :numeric)
|
128
|
+
assert_equal(numeric, [1,2,3,4,nil,5].to_numeric)
|
129
|
+
assert_equal(numeric, [1,2,3,4,nil,5].to_vector(:numeric))
|
535
130
|
|
536
|
-
|
537
|
-
|
538
|
-
assert_equal(20.quo(9), a.average_deviation_population)
|
539
|
-
end
|
540
|
-
|
541
|
-
def test_samples
|
542
|
-
srand(1)
|
543
|
-
assert_equal(100, @c.sample_with_replacement(100).size)
|
544
|
-
assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
|
545
|
-
assert_raise ArgumentError do
|
546
|
-
@c.sample_without_replacement(20)
|
131
|
+
obj = Statsample::Vector.new([1,2,3,4,'one','two'], :object)
|
132
|
+
assert_equal(obj, [1,2,3,4,'one','two'].to_vector(:object))
|
547
133
|
end
|
548
|
-
@c.type = :numeric
|
549
|
-
srand(1)
|
550
|
-
assert_equal(100, @c.sample_with_replacement(100).size)
|
551
|
-
assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
|
552
|
-
end
|
553
|
-
|
554
|
-
def test_valid_data
|
555
|
-
a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'])
|
556
|
-
a.missing_values = [-99]
|
557
|
-
a.add(1, false)
|
558
|
-
a.add(2, false)
|
559
|
-
a.add(-99, false)
|
560
|
-
a.set_valid_data
|
561
|
-
exp_valid_data = [1, 2, 3, 4, 'STRING', 1, 2]
|
562
|
-
assert_equal(exp_valid_data, a.valid_data)
|
563
|
-
a.add(20, false)
|
564
|
-
a.add(30, false)
|
565
|
-
assert_equal(exp_valid_data, a.valid_data)
|
566
|
-
a.set_valid_data
|
567
|
-
exp_valid_data_2 = [1, 2, 3, 4, 'STRING', 1, 2, 20, 30]
|
568
|
-
assert_equal(exp_valid_data_2, a.valid_data)
|
569
|
-
end
|
570
|
-
|
571
|
-
def test_set_value
|
572
|
-
@c[2] = 10
|
573
|
-
expected = [5, 5, 10, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99].to_vector
|
574
|
-
assert_equal(expected.data, @c.data)
|
575
|
-
end
|
576
134
|
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
assert_equal(2, a.mean)
|
582
|
-
assert_equal(a.variance_sample_ruby, a.variance_sample)
|
583
|
-
assert_equal(a.standard_deviation_sample_ruby, a.sds)
|
584
|
-
assert_equal(a.variance_population_ruby, a.variance_population)
|
585
|
-
assert_equal(a.standard_deviation_population_ruby, a.standard_deviation_population)
|
586
|
-
assert_nothing_raised do
|
587
|
-
a = [].to_vector(:numeric)
|
135
|
+
should "test that old shorthands show deprecation warnings" do
|
136
|
+
assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
|
137
|
+
[1,2,3,4,nil,5].to_scale
|
588
138
|
end
|
589
|
-
a.add(1, false)
|
590
|
-
a.add(2, false)
|
591
|
-
a.set_valid_data
|
592
|
-
assert_equal(3, a.sum)
|
593
|
-
b = [1, 2, nil, 3, 4, 5, nil, 6].to_vector(:numeric)
|
594
|
-
assert_equal(21, b.sum)
|
595
|
-
assert_equal(3.5, b.mean)
|
596
|
-
assert_equal(6, b.gsl.size)
|
597
|
-
c = [10, 20, 30, 40, 50, 100, 1000, 2000, 5000].to_numeric
|
598
|
-
assert_in_delta(c.skew, c.skew_ruby, 0.0001)
|
599
|
-
assert_in_delta(c.kurtosis, c.kurtosis_ruby, 0.0001)
|
600
139
|
end
|
601
140
|
end
|
602
141
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
def test_marshalling
|
612
|
-
v1 = (0..100).to_a.collect { |_n| rand(100) }.to_vector(:numeric)
|
613
|
-
v2 = Marshal.load(Marshal.dump(v1))
|
614
|
-
assert_equal(v1, v2)
|
615
|
-
end
|
616
|
-
|
617
|
-
def test_dup
|
618
|
-
v1 = %w(a a a b b b c c).to_vector
|
619
|
-
v2 = v1.dup
|
620
|
-
assert_equal(v1.data, v2.data)
|
621
|
-
assert_not_same(v1.data, v2.data)
|
622
|
-
assert_equal(v1.type, v2.type)
|
623
|
-
|
624
|
-
v1.type = :numeric
|
625
|
-
assert_not_equal(v1.type, v2.type)
|
626
|
-
assert_equal(v1.missing_values, v2.missing_values)
|
627
|
-
assert_not_same(v1.missing_values, v2.missing_values)
|
628
|
-
assert_equal(v1.labels, v2.labels)
|
629
|
-
assert_not_same(v1.labels, v2.labels)
|
630
|
-
|
631
|
-
v3 = v1.dup_empty
|
632
|
-
assert_equal([], v3.data)
|
633
|
-
assert_not_equal(v1.data, v3.data)
|
634
|
-
assert_not_same(v1.data, v3.data)
|
635
|
-
assert_equal(v1.type, v3.type)
|
636
|
-
v1.type = :numeric
|
637
|
-
v3.type = :object
|
638
|
-
assert_not_equal(v1.type, v3.type)
|
639
|
-
assert_equal(v1.missing_values, v3.missing_values)
|
640
|
-
assert_not_same(v1.missing_values, v3.missing_values)
|
641
|
-
assert_equal(v1.labels, v3.labels)
|
642
|
-
assert_not_same(v1.labels, v3.labels)
|
643
|
-
end
|
644
|
-
|
645
|
-
def test_paired_ties
|
646
|
-
a = [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4].to_vector(:numeric)
|
647
|
-
expected = [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10].to_vector(:numeric)
|
648
|
-
assert_equal(expected, a.ranked)
|
649
|
-
end
|
650
|
-
|
651
|
-
def test_dichotomize
|
652
|
-
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
653
|
-
exp = [0, 0, 0, 1, 1, 1, nil].to_numeric
|
654
|
-
assert_equal(exp, a.dichotomize)
|
655
|
-
a = [1, 1, 1, 2, 2, 2, 3].to_vector
|
656
|
-
exp = [0, 0, 0, 1, 1, 1, 1].to_numeric
|
657
|
-
assert_equal(exp, a.dichotomize)
|
658
|
-
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
659
|
-
exp = [0, 0, 0, 0, 1, 1, nil].to_numeric
|
660
|
-
assert_equal(exp, a.dichotomize(1))
|
661
|
-
a = %w(a a a b c d).to_vector
|
662
|
-
exp = [0, 0, 0, 1, 1, 1].to_numeric
|
663
|
-
assert_equal(exp, a.dichotomize)
|
664
|
-
end
|
665
|
-
|
666
|
-
def test_can_be_methods
|
667
|
-
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
668
|
-
assert(a.can_be_numeric?)
|
669
|
-
a = [0, 's', 0, 1, 2, 3, nil].to_vector
|
670
|
-
assert(!a.can_be_numeric?)
|
671
|
-
a.missing_values = ['s']
|
672
|
-
assert(a.can_be_numeric?)
|
673
|
-
|
674
|
-
a = [Date.new(2009, 10, 10), Date.today, '2009-10-10', '2009-1-1', nil, 'NOW'].to_vector
|
675
|
-
assert(a.can_be_date?)
|
676
|
-
a = [Date.new(2009, 10, 10), Date.today, nil, 'sss'].to_vector
|
677
|
-
assert(!a.can_be_date?)
|
678
|
-
end
|
679
|
-
|
680
|
-
def test_date_vector
|
681
|
-
a = [Date.new(2009, 10, 10), :NOW, '2009-10-10', '2009-1-1', nil, 'NOW', 'MISSING'].to_vector(:date, missing_values: ['MISSING'])
|
682
|
-
|
683
|
-
assert(a.type == :date)
|
684
|
-
expected = [Date.new(2009, 10, 10), Date.today, Date.new(2009, 10, 10), Date.new(2009, 1, 1), nil, Date.today, nil]
|
685
|
-
assert_equal(expected, a.date_data_with_nils)
|
142
|
+
should 'return correct histogram' do
|
143
|
+
a = Daru::Vector.new(10.times.map { |v| v })
|
144
|
+
hist = a.histogram(2)
|
145
|
+
assert_equal([5, 5], hist.bin)
|
146
|
+
3.times do |i|
|
147
|
+
assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
|
148
|
+
end
|
686
149
|
end
|
687
150
|
end
|