statsample 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -5,30 +5,32 @@ class StatsampleReliabilitySkillScaleTestCase < Minitest::Test
5
5
  setup do
6
6
  options = %w(a b c d e)
7
7
  cases = 20
8
- @id = cases.times.map { |v| v }.to_numeric
9
- @a = cases.times.map { options[rand(5)] }.to_vector
10
- @b = cases.times.map { options[rand(5)] }.to_vector
11
- @c = cases.times.map { options[rand(5)] }.to_vector
12
- @d = cases.times.map { options[rand(5)] }.to_vector
13
- @e = cases.times.map {|i|
14
- i == 0 ? options[rand(0)] :
8
+ @id = Daru::Vector.new(cases.times.map { |v| v })
9
+ @a = Daru::Vector.new(cases.times.map { options[rand(5)] })
10
+ @b = Daru::Vector.new(cases.times.map { options[rand(5)] })
11
+ @c = Daru::Vector.new(cases.times.map { options[rand(5)] })
12
+ @d = Daru::Vector.new(cases.times.map { options[rand(5)] })
13
+ @e = Daru::Vector.new(
14
+ cases.times.map do |i|
15
+ i == 0 ? options[rand(0)] :
15
16
  rand > 0.8 ? nil : options[rand(5)]
16
- }.to_vector
17
- @ds = { 'id' => @id, 'a' => @a, 'b' => @b, 'c' => @c, 'd' => @d, 'e' => @e }.to_dataset
18
- @key = { 'a' => 'a', 'b' => options[rand(5)], 'c' => options[rand(5)], 'd' => options[rand(5)], 'e' => options[rand(5)] }
17
+ end
18
+ )
19
+ @ds = Daru::DataFrame.new({ :id => @id, :a => @a, :b => @b, :c => @c, :d => @d, :e => @e })
20
+ @key = { :a => 'a', :b => options[rand(5)], :c => options[rand(5)], :d => options[rand(5)], :e => options[rand(5)] }
19
21
  @ssa = Statsample::Reliability::SkillScaleAnalysis.new(@ds, @key)
20
- @ac = @a.map { |v| v == @key['a'] ? 1 : 0 }.to_numeric
21
- @bc = @b.map { |v| v == @key['b'] ? 1 : 0 }.to_numeric
22
- @cc = @c.map { |v| v == @key['c'] ? 1 : 0 }.to_numeric
23
- @dc = @d.map { |v| v == @key['d'] ? 1 : 0 }.to_numeric
24
- @ec = @e.map { |v| v.nil? ? nil : (v == @key['e'] ? 1 : 0) }.to_numeric
22
+ @ac = Daru::Vector.new(@a.map { |v| v == @key[:a] ? 1 : 0 })
23
+ @bc = Daru::Vector.new(@b.map { |v| v == @key[:b] ? 1 : 0 })
24
+ @cc = Daru::Vector.new(@c.map { |v| v == @key[:c] ? 1 : 0 })
25
+ @dc = Daru::Vector.new(@d.map { |v| v == @key[:d] ? 1 : 0 })
26
+ @ec = Daru::Vector.new(@e.map { |v| v.nil? ? nil : (v == @key[:e] ? 1 : 0) })
25
27
  end
26
28
  should 'return proper corrected dataset' do
27
- cds = { 'id' => @id, 'a' => @ac, 'b' => @bc, 'c' => @cc, 'd' => @dc, 'e' => @ec }.to_dataset
29
+ cds = Daru::DataFrame.new({ :id => @id, :a => @ac, :b => @bc, :c => @cc, :d => @dc, :e => @ec })
28
30
  assert_equal(cds, @ssa.corrected_dataset)
29
31
  end
30
32
  should 'return proper corrected minimal dataset' do
31
- cdsm = { 'a' => @ac, 'b' => @bc, 'c' => @cc, 'd' => @dc, 'e' => @ec }.to_dataset
33
+ cdsm = Daru::DataFrame.new({ :a => @ac, :b => @bc, :c => @cc, :d => @dc, :e => @ec })
32
34
  assert_equal(cdsm, @ssa.corrected_dataset_minimal)
33
35
  end
34
36
  should 'return correct vector_sum and vector_sum' do
@@ -37,13 +39,13 @@ class StatsampleReliabilitySkillScaleTestCase < Minitest::Test
37
39
  assert_equal(cdsm.vector_mean, @ssa.vector_mean)
38
40
  end
39
41
  should 'not crash on rare case' do
40
- a = Statsample::Vector['c', 'c', 'a', 'a', 'c', 'a', 'b', 'c', 'c', 'b', 'a', 'd', 'a', 'd', 'a', 'a', 'd', 'e', 'c', 'd']
41
- b = Statsample::Vector['e', 'b', 'e', 'b', 'c', 'd', 'a', 'e', 'e', 'c', 'b', 'e', 'e', 'b', 'd', 'c', 'e', 'b', 'b', 'd']
42
- c = Statsample::Vector['e', 'b', 'e', 'c', 'e', 'c', 'b', 'd', 'e', 'c', 'a', 'a', 'b', 'd', 'e', 'c', 'b', 'a', 'a', 'e']
43
- d = Statsample::Vector['a', 'b', 'd', 'd', 'e', 'b', 'e', 'b', 'd', 'c', 'e', 'a', 'c', 'd', 'c', 'c', 'e', 'd', 'd', 'b']
44
- e = Statsample::Vector['a', 'b', nil, 'd', 'c', 'c', 'd', nil, 'd', 'd', 'e', 'e', nil, nil, nil, 'd', 'c', nil, 'e', 'd']
45
- key = { 'a' => 'a', 'b' => 'e', 'c' => 'd', 'd' => 'c', 'e' => 'd' }
46
- ds = Statsample::Dataset.new('a' => a, 'b' => b, 'c' => c, 'd' => d, 'e' => e)
42
+ a = Daru::Vector.new(['c', 'c', 'a', 'a', 'c', 'a', 'b', 'c', 'c', 'b', 'a', 'd', 'a', 'd', 'a', 'a', 'd', 'e', 'c', 'd'])
43
+ b = Daru::Vector.new(['e', 'b', 'e', 'b', 'c', 'd', 'a', 'e', 'e', 'c', 'b', 'e', 'e', 'b', 'd', 'c', 'e', 'b', 'b', 'd'])
44
+ c = Daru::Vector.new(['e', 'b', 'e', 'c', 'e', 'c', 'b', 'd', 'e', 'c', 'a', 'a', 'b', 'd', 'e', 'c', 'b', 'a', 'a', 'e'])
45
+ d = Daru::Vector.new(['a', 'b', 'd', 'd', 'e', 'b', 'e', 'b', 'd', 'c', 'e', 'a', 'c', 'd', 'c', 'c', 'e', 'd', 'd', 'b'])
46
+ e = Daru::Vector.new(['a', 'b', nil, 'd', 'c', 'c', 'd', nil, 'd', 'd', 'e', 'e', nil, nil, nil, 'd', 'c', nil, 'e', 'd'])
47
+ key = { :a => 'a', :b => 'e', :c => 'd', :d => 'c', :e => 'd' }
48
+ ds = Daru::DataFrame.new({:a => a, :b => b, :c => c, :d => d, :e => e})
47
49
  ssa = Statsample::Reliability::SkillScaleAnalysis.new(ds, key)
48
50
  assert(ssa.summary)
49
51
  end
@@ -17,7 +17,7 @@ class StatsampleResampleTestCase < Minitest::Test
17
17
  Statsample::Resample.generate(20, 1, 10).count(1)
18
18
  }
19
19
  assert_equal(400, r.size)
20
- v = Statsample::Vector.new(r, :numeric)
20
+ v = Daru::Vector.new(r)
21
21
  a = v.count { |x| x > 3 }
22
22
  assert(a >= 30 && a <= 70)
23
23
  end
@@ -32,7 +32,7 @@ class StatsampleStatisicsTestCase < Minitest::Test
32
32
  end
33
33
 
34
34
  def test_estimation_mean
35
- v = ([42] * 23 + [41] * 4 + [36] * 1 + [32] * 1 + [29] * 1 + [27] * 2 + [23] * 1 + [19] * 1 + [16] * 2 + [15] * 2 + [14, 11, 10, 9, 7] + [6] * 3 + [5] * 2 + [4, 3]).to_vector(:numeric)
35
+ v = Daru::Vector.new([42] * 23 + [41] * 4 + [36] * 1 + [32] * 1 + [29] * 1 + [27] * 2 + [23] * 1 + [19] * 1 + [16] * 2 + [15] * 2 + [14, 11, 10, 9, 7] + [6] * 3 + [5] * 2 + [4, 3])
36
36
  assert_equal(50, v.size)
37
37
  assert_equal(1471, v.sum)
38
38
  # limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
@@ -55,19 +55,9 @@ class StatsampleStatisicsTestCase < Minitest::Test
55
55
  assert_in_delta(0.46, l[1], 0.01)
56
56
  end
57
57
 
58
- def test_ml
59
- if true
60
- # real=[1,1,1,1].to_vector(:numeric)
61
-
62
- # pred=[0.0001,0.0001,0.0001,0.0001].to_vector(:numeric)
63
- # puts Statsample::Bivariate.maximum_likehood_dichotomic(pred,real)
64
-
65
- end
66
- end
67
-
68
58
  def test_simple_linear_regression
69
- a = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
70
- b = [6, 2, 4, 10, 12, 8].to_vector(:numeric)
59
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
60
+ b = Daru::Vector.new([6, 2, 4, 10, 12, 8])
71
61
  reg = Statsample::Regression::Simple.new_from_vectors(a, b)
72
62
  assert_in_delta((reg.ssr + reg.sse).to_f, reg.sst, 0.001)
73
63
  assert_in_delta(Statsample::Bivariate.pearson(a, b), reg.r, 0.001)
@@ -24,26 +24,26 @@ class StatsampleTestTestCase < Minitest::Test
24
24
  end
25
25
 
26
26
  def test_u_mannwhitney
27
- a = [1, 2, 3, 4, 5, 6].to_numeric
28
- b = [0, 5, 7, 9, 10, 11].to_numeric
27
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
28
+ b = Daru::Vector.new([0, 5, 7, 9, 10, 11])
29
29
  assert_equal(7.5, Statsample::Test.u_mannwhitney(a, b).u)
30
30
  assert_equal(7.5, Statsample::Test.u_mannwhitney(b, a).u)
31
- a = [1, 7, 8, 9, 10, 11].to_numeric
32
- b = [2, 3, 4, 5, 6, 12].to_numeric
31
+ a = Daru::Vector.new([1, 7, 8, 9, 10, 11])
32
+ b = Daru::Vector.new([2, 3, 4, 5, 6, 12])
33
33
  assert_equal(11, Statsample::Test.u_mannwhitney(a, b).u)
34
34
  end
35
35
 
36
36
  def test_levene
37
- a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10].to_numeric
38
- b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120].to_numeric
37
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
38
+ b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
39
39
  levene = Statsample::Test::Levene.new([a, b])
40
40
  assert_levene(levene)
41
41
  end
42
42
 
43
43
  def test_levene_dataset
44
- a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10].to_numeric
45
- b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120].to_numeric
46
- ds = { 'a' => a, 'b' => b }.to_dataset
44
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
45
+ b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
46
+ ds = Daru::DataFrame.new({ :a => a, :b => b })
47
47
  levene = Statsample::Test::Levene.new(ds)
48
48
  assert_levene(levene)
49
49
  end
@@ -9,9 +9,9 @@ class StatsampleStratifiedTestCase < Minitest::Test
9
9
  a = [10, 20, 30, 40, 50]
10
10
  b = [110, 120, 130, 140]
11
11
  pop = a + b
12
- av = a.to_vector(:numeric)
13
- bv = b.to_vector(:numeric)
14
- popv = pop.to_vector(:numeric)
12
+ av = Daru::Vector.new(a)
13
+ bv = Daru::Vector.new(b)
14
+ popv = Daru::Vector.new(pop)
15
15
  assert_equal(popv.mean, Statsample::StratifiedSample.mean(av, bv))
16
16
  end
17
17
  end
@@ -4,24 +4,24 @@ class StatsampleTestTTestCase < Minitest::Test
4
4
  include Math
5
5
  context T do
6
6
  setup do
7
- @a = [30.02, 29.99, 30.11, 29.97, 30.01, 29.99].to_numeric
8
- @b = [29.89, 29.93, 29.72, 29.98, 30.02, 29.98].to_numeric
7
+ @a = Daru::Vector.new([30.02, 29.99, 30.11, 29.97, 30.01, 29.99])
8
+ @b = Daru::Vector.new([29.89, 29.93, 29.72, 29.98, 30.02, 29.98])
9
9
  @x1 = @a.mean
10
10
  @x2 = @b.mean
11
11
  @s1 = @a.sd
12
12
  @s2 = @b.sd
13
- @n1 = @a.n
14
- @n2 = @b.n
13
+ @n1 = @a.size
14
+ @n2 = @b.size
15
15
  end
16
16
  should 'calculate correctly standard t' do
17
- t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.n)), @a.n - 1)
18
- assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.n))), t.t)
19
- assert_equal(@a.n - 1, t.df)
17
+ t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.size)), @a.size - 1)
18
+ assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.size))), t.t)
19
+ assert_equal(@a.size - 1, t.df)
20
20
  assert(t.summary.size > 0)
21
21
  end
22
22
  should 'calculate correctly t for one sample' do
23
- t1 = [6, 4, 6, 7, 4, 5, 5, 12, 6, 1].to_numeric
24
- t2 = [9, 6, 5, 10, 10, 8, 7, 10, 6, 5].to_numeric
23
+ t1 = Daru::Vector.new([6, 4, 6, 7, 4, 5, 5, 12, 6, 1])
24
+ t2 = Daru::Vector.new([9, 6, 5, 10, 10, 8, 7, 10, 6, 5])
25
25
  d = t1 - t2
26
26
  t = Statsample::Test::T::OneSample.new(d)
27
27
  assert_in_delta(-2.631, t.t, 0.001)
@@ -48,14 +48,14 @@ class StatsampleTestTTestCase < Minitest::Test
48
48
  assert_in_delta(0.09095, t.probability_not_equal_variance, 0.001)
49
49
  end
50
50
  should 'be the same using shorthand' do
51
- v = 100.times.map { rand(100) }.to_numeric
51
+ v = Daru::Vector.new(100.times.map { rand(100) })
52
52
  assert_equal(Statsample::Test.t_one_sample(v).t, T::OneSample.new(v).t)
53
53
  end
54
54
  should 'calculate all values for one sample T test' do
55
55
  u = @a.mean + (1 - rand * 2)
56
56
  tos = T::OneSample.new(@a, u: u)
57
- assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.n))), tos.t)
58
- assert_equal(@a.n - 1, tos.df)
57
+ assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.size))), tos.t)
58
+ assert_equal(@a.size - 1, tos.df)
59
59
  assert(tos.summary.size > 0)
60
60
  end
61
61
  end
@@ -4,8 +4,8 @@ class StatsampleUMannWhitneyTestCase < Minitest::Test
4
4
  include Statsample::Test
5
5
  context Statsample::Test::UMannWhitney do
6
6
  setup do
7
- @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15].to_numeric
8
- @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19].to_numeric
7
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15])
8
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19])
9
9
  @u = Statsample::Test::UMannWhitney.new(@v1, @v2)
10
10
  end
11
11
  should 'have same result using class or Test#u_mannwhitney' do
@@ -3,24 +3,11 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
3
3
  class StatsampleTestVector < Minitest::Test
4
4
  include Statsample::Shorthand
5
5
 
6
- def setup
7
- @c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :object)
8
- @c.name = 'Test Vector'
9
- @c.missing_values = [-99]
10
- end
11
-
12
- def assert_counting_tokens(b)
13
- assert_equal([1, 1, 0, 1, 0, nil], b['a'].to_a)
14
- assert_equal([0, 1, 0, 0, 0, nil], b['b'].to_a)
15
- assert_equal([0, 0, 1, 0, 0, nil], b['c'].to_a)
16
- assert_equal([0, 0, 1, 1, 0, nil], b['d'].to_a)
17
- assert_equal([0, 0, 0, 0, 1, nil], b[10].to_a)
18
- end
19
6
  context Statsample do
20
7
  setup do
21
8
  @sample = 100
22
- @a = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }.to_numeric
23
- @b = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }.to_numeric
9
+ @a = Daru::Vector.new(@sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) })
10
+ @b = Daru::Vector.new(@sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) })
24
11
  @correct_a = []
25
12
  @correct_b = []
26
13
  @a.each_with_index do |_v, i|
@@ -29,8 +16,8 @@ class StatsampleTestVector < Minitest::Test
29
16
  @correct_b.push(@b[i])
30
17
  end
31
18
  end
32
- @correct_a = @correct_a.to_numeric
33
- @correct_b = @correct_b.to_numeric
19
+ @correct_a = Daru::Vector.new(@correct_a)
20
+ @correct_b = Daru::Vector.new(@correct_b)
34
21
 
35
22
  @common = lambda do |av, bv|
36
23
  assert_equal(@correct_a, av, 'A no es esperado')
@@ -39,649 +26,125 @@ class StatsampleTestVector < Minitest::Test
39
26
  assert(!bv.has_missing_data?, 'b tiene datos faltantes')
40
27
  end
41
28
  end
29
+
42
30
  should 'return correct only_valid' do
43
31
  av, bv = Statsample.only_valid @a, @b
32
+ av.reset_index!
33
+ bv.reset_index!
44
34
  av2, bv2 = Statsample.only_valid av, bv
45
35
  @common.call(av, bv)
46
36
  assert_equal(av, av2)
47
37
  assert_not_same(av, av2)
48
38
  assert_not_same(bv, bv2)
49
39
  end
40
+
50
41
  should 'return correct only_valid_clone' do
51
42
  av, bv = Statsample.only_valid_clone @a, @b
43
+ av.reset_index!
44
+ bv.reset_index!
52
45
  @common.call(av, bv)
53
46
  av2, bv2 = Statsample.only_valid_clone av, bv
54
47
  assert_equal(av, av2)
55
48
  assert_same(av, av2)
56
49
  assert_same(bv, bv2)
57
50
  end
58
- end
59
- context Statsample::Vector do
60
- setup do
61
- @c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :object)
62
- @c.name = 'Test Vector'
63
- @c.missing_values = [-99]
64
- end
65
- should_with_gsl 'be created with GSL::Vector' do
66
- gsl = GSL::Vector[1, 2, 3, 4, 5]
67
- v = Statsample::Vector.new(gsl)
68
- assert_equal([1, 2, 3, 4, 5], v.to_a)
69
- refute(v.flawed?)
70
- end
71
51
 
72
- context 'using matrix operations' do
73
- setup do
74
- @a = [1, 2, 3, 4, 5].to_numeric
75
- end
76
- should 'to_matrix returns a matrix with 1 row' do
77
- mh = Matrix[[1, 2, 3, 4, 5]]
78
- assert_equal(mh, @a.to_matrix)
79
- end
80
- should 'to_matrix(:vertical) returns a matrix with 1 column' do
81
- mv = Matrix.columns([[1, 2, 3, 4, 5]])
82
- assert_equal(mv, @a.to_matrix(:vertical))
83
- end
84
- should 'returns valid submatrixes' do
85
- # 3*4 + 2*5 = 22
86
- a = [3, 2].to_vector(:numeric)
87
- b = [4, 5].to_vector(:numeric)
88
- assert_equal(22, (a.to_matrix * b.to_matrix(:vertical))[0, 0])
89
- end
52
+ should 'returns correct vector_cols_matrix' do
53
+ v1 = Daru::Vector.new(%w(a a a b b b c c))
54
+ v2 = Daru::Vector.new(%w(1 3 4 5 6 4 3 2))
55
+ v3 = Daru::Vector.new(%w(1 0 0 0 1 1 1 0))
56
+ ex = Matrix.rows([%w(a 1 1), %w(a 3 0), %w(a 4 0), %w(b 5 0), %w(b 6 1), %w(b 4 1), %w(c 3 1), %w(c 2 0)])
57
+ assert_equal(ex, Statsample.vector_cols_matrix(v1, v2, v3))
90
58
  end
91
- context 'when initializing' do
92
- setup do
93
- @data = (10.times.map { rand(100) }) + [nil]
94
- @original = Statsample::Vector.new(@data, :numeric)
95
- end
96
- should 'be the sample using []' do
97
- second = Statsample::Vector[*@data]
98
- assert_equal(@original, second)
99
- end
100
- should '[] returns same results as R-c()' do
101
- reference = [0, 4, 5, 6, 10].to_numeric
102
- assert_equal(reference, Statsample::Vector[0, 4, 5, 6, 10])
103
- assert_equal(reference, Statsample::Vector[0, 4..6, 10])
104
- assert_equal(reference, Statsample::Vector[[0], [4, 5, 6], [10]])
105
- assert_equal(reference, Statsample::Vector[[0], [4, [5, [6]]], [10]])
106
-
107
- assert_equal(reference, Statsample::Vector[[0], [4, 5, 6].to_vector, [10]])
108
- end
109
- should 'be the same usign #to_vector' do
110
- lazy1 = @data.to_vector(:numeric)
111
- assert_equal(@original, lazy1)
112
- end
113
- should 'be the same using #to_numeric' do
114
- lazy2 = @data.to_numeric
115
- assert_equal(@original, lazy2)
116
- assert_equal(:numeric, lazy2.type)
117
- assert_equal(@data.find_all { |v| !v.nil? }, lazy2.valid_data)
118
- end
119
- should 'could use new_numeric with size only' do
120
- v1 = 10.times.map { nil }.to_numeric
121
- v2 = Statsample::Vector.new_numeric(10)
122
- assert_equal(v1, v2)
123
- end
124
- should 'could use new_numeric with size and value' do
125
- a = rand
126
- v1 = 10.times.map { a }.to_numeric
127
- v2 = Statsample::Vector.new_numeric(10, a)
128
- assert_equal(v1, v2)
129
- end
130
- should 'could use new_numeric with func' do
131
- v1 = 10.times.map { |i| i * 2 }.to_numeric
132
- v2 = Statsample::Vector.new_numeric(10) { |i| i * 2 }
133
- assert_equal(v1, v2)
134
- end
135
- end
136
-
137
- context "new types :numeric and :object" do
138
- should "set default type of vector to :object" do
139
- v = Statsample::Vector.new [1,2,3,4,5]
140
- assert_equal(:object, v.type)
141
- end
142
-
143
- should "initialize Vector with :numeric type" do
144
- v = Statsample::Vector.new [1,2,3,4,5,nil], :numeric
145
- assert_equal(:numeric, v.type)
146
- assert_equal([1,2,3,4,5], v.valid_data)
147
- end
148
-
149
- should "show a warning when initializing with :nominal, :numeric or :ordinal" do
150
- assert_output(nil,"WARNING: nominal has been deprecated. Use :object instead.\n") do
151
- Statsample::Vector.new [1,2,3,4,5,nil,'hello'], :nominal
152
- end
153
-
154
- assert_output(nil,"WARNING: scale has been deprecated. Use :numeric instead.\n") do
155
- Statsample::Vector.new [1,2,3,4,nil,5], :scale
156
- end
157
-
158
- assert_output(nil,"WARNING: ordinal has been deprecated. Use :numeric instead.\n") do
159
- Statsample::Vector.new [1,2,3,4,5], :ordinal
160
- end
59
+ end
161
60
 
162
- assert_output(nil,"WARNING: .new_scale has been deprecated. Use .new_numeric instead.\n") do
163
- Statsample::Vector.new_scale 10, 1
61
+ context Statsample::Vector do
62
+ context 'when initializing' do
63
+ should '.new creates a Daru::Vector internally and shows a warning' do
64
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
65
+ data = (10.times.map { rand(100) }) + [nil]
66
+ original = Statsample::Vector.new(@data, :numeric)
67
+ assert_equal(true, original.kind_of?(Daru::Vector))
164
68
  end
165
69
  end
166
70
 
167
- should "test that new shorthands work" do
168
- numeric = Statsample::Vector.new([1,2,3,4,nil,5], :numeric)
169
- assert_equal(numeric, [1,2,3,4,nil,5].to_numeric)
170
- assert_equal(numeric, [1,2,3,4,nil,5].to_vector(:numeric))
171
-
172
- obj = Statsample::Vector.new([1,2,3,4,'one','two'], :object)
173
- assert_equal(obj, [1,2,3,4,'one','two'].to_vector(:object))
174
- end
175
-
176
- should "test that old shorthands raise warnings" do
177
- assert_output(nil,"WARNING: to_scale has been deprecated. Use to_numeric instead.\n") do
178
- [1,2,3,4,nil,5].to_scale
71
+ should '[] returns same results as R-c()' do
72
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
73
+ assert_equal(true, Statsample::Vector[1,2,3,4,5].kind_of?(Daru::Vector))
179
74
  end
180
75
  end
181
- end
182
76
 
183
- context '#split_by_separator' do
184
- setup do
185
- @a = Statsample::Vector.new(['a', 'a,b', 'c,d', 'a,d', 10, nil], :object)
186
- @b = @a.split_by_separator(',')
187
- end
188
- should 'returns a Hash' do
189
- assert_kind_of(Hash, @b)
190
- end
191
- should 'return a Hash with keys with different values of @a' do
192
- expected = ['a', 'b', 'c', 'd', 10]
193
- assert_equal(expected, @b.keys)
194
- end
77
+ should "new_numeric/new_scale creates a Daru::Vector internally and shows a warning" do
78
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
79
+ Statsample::Vector.new_scale(4)
80
+ end
195
81
 
196
- should 'returns a Hash, which values are Statsample::Vector' do
197
- @b.each_key { |k| assert_instance_of(Statsample::Vector, @b[k]) }
198
- end
199
- should 'hash values are n times the tokens appears' do
200
- assert_counting_tokens(@b)
201
- end
202
- should '#split_by_separator_freq returns the number of ocurrences of tokens' do
203
- assert_equal({ 'a' => 3, 'b' => 1, 'c' => 1, 'd' => 2, 10 => 1 }, @a.split_by_separator_freq)
204
- end
205
- should 'using a different separator give the same values' do
206
- a = Statsample::Vector.new(['a', 'a*b', 'c*d', 'a*d', 10, nil], :object)
207
- b = a.split_by_separator('*')
208
- assert_counting_tokens(b)
209
- end
210
- end
211
- should 'return correct median_absolute_deviation' do
212
- a = [1, 1, 2, 2, 4, 6, 9].to_numeric
213
- assert_equal(1, a.median_absolute_deviation)
214
- end
215
- should 'return correct histogram' do
216
- a = 10.times.map { |v| v }.to_numeric
217
- hist = a.histogram(2)
218
- assert_equal([5, 5], hist.bin)
219
- 3.times do |i|
220
- assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
82
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
83
+ Statsample::Vector.new_numeric(4)
84
+ end
221
85
  end
222
86
  end
223
- should 'have a name' do
224
- @c.name == 'Test Vector'
225
- end
226
- should 'without explicit name, returns vector with succesive numbers' do
227
- a = 10.times.map { rand(100) }.to_numeric
228
- b = 10.times.map { rand(100) }.to_numeric
229
- assert_match(/Vector \d+/, a.name)
230
- a.name =~ /Vector (\d+)/
231
- next_number = Regexp.last_match(1).to_i + 1
232
- assert_equal("Vector #{next_number}", b.name)
233
- end
234
- should 'save to a file and load the same Vector' do
235
- outfile = Tempfile.new('vector.vec')
236
- @c.save(outfile.path)
237
- a = Statsample.load(outfile.path)
238
- assert_equal(@c, a)
239
- end
240
- should '#collect returns an array' do
241
- val = @c.collect { |v| v }
242
- assert_equal(val, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
243
- end
87
+ end
244
88
 
245
- should '#recode returns a recoded array' do
246
- a = @c.recode { |v| @c.is_valid?(v) ? 0 : 1 }
247
- exp = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1].to_vector
248
- assert_equal(exp, a)
249
- exp.recode! { |v| v == 0 ? 1 : 0 }
250
- exp2 = (([1] * 15) + ([0] * 3)).to_vector
251
- assert_equal(exp2, exp)
252
- end
253
- should '#product returns the * of all values' do
254
- a = [1, 2, 3, 4, 5].to_vector(:numeric)
255
- assert_equal(120, a.product)
89
+ context "new types :numeric and :object" do
90
+ should "numerical data is automatically detected to be of type :numeric" do
91
+ v = Statsample::Vector.new [1,2,3,4,5,nil]
92
+ assert_equal(:numeric, v.type)
256
93
  end
257
94
 
258
- should 'missing values' do
259
- @c.missing_values = [10]
260
- assert_equal([-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9], @c.valid_data.sort)
261
- assert_equal([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, nil, 1, 2, 3, 4, nil, -99, -99], @c.data_with_nils)
262
- @c.missing_values = [-99]
263
- assert_equal(@c.valid_data.sort, [1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
264
- assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, nil, nil])
265
- @c.missing_values = []
266
- assert_equal(@c.valid_data.sort, [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
267
- assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
268
- end
269
- should 'correct has_missing_data? with missing data' do
270
- a = [1, 2, 3, nil].to_vector
271
- assert(a.has_missing_data?)
272
- end
273
- should 'correct has_missing_data? without missing data' do
274
- a = [1, 2, 3, 4, 10].to_vector
275
- assert(!a.has_missing_data?)
276
- end
277
- should 'with explicit missing_values, should respond has_missing_data?' do
278
- a = [1, 2, 3, 4, 10].to_vector
279
- a.missing_values = [10]
280
- assert(a.has_missing_data?)
281
- end
282
- should 'label correctly fields' do
283
- @c.labels = { 5 => 'FIVE' }
284
- assert_equal(['FIVE', 'FIVE', 'FIVE', 'FIVE', 'FIVE', 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], @c.vector_labeled.to_a)
285
- end
286
- should 'verify' do
287
- h = @c.verify { |d| !d.nil? and d > 0 }
288
- e = { 15 => nil, 16 => -99, 17 => -99 }
289
- assert_equal(e, h)
290
- end
291
- should 'have a summary with name on it' do
292
- assert_match(/#{@c.name}/, @c.summary)
95
+ should "object data automatically detected as :object" do
96
+ v = Statsample::Vector.new [1,2,3,4,'hello','world']
97
+ assert_equal(:object, v.type)
293
98
  end
294
99
 
295
- should 'GSL::Vector based should push correcty' do
296
- if Statsample.has_gsl?
297
- v = GSL::Vector[1, 2, 3, 4, 5].to_numeric
298
- v.push(nil)
299
- assert_equal([1, 2, 3, 4, 5, nil], v.to_a)
300
- assert(v.flawed?)
301
- else
302
- skip('Requires GSL')
100
+ should "initialize Vector with :numeric type" do
101
+ v = Statsample::Vector.new [1,2,3,4,5,nil], :numeric
102
+ assert_equal(:numeric, v.type)
103
+ assert_output(nil, "WARNING: valid_data in Statsample::Vector has been deprecated in favor of only_valid in Daru::Vector. Please use that.\n") do
104
+ assert_equal([1,2,3,4,5], v.valid_data)
303
105
  end
304
106
  end
305
107
 
306
- should 'split correctly' do
307
- a = Statsample::Vector.new(['a', 'a,b', 'c,d', 'a,d', 'd', 10, nil], :object)
308
- assert_equal([%w(a), %w(a b), %w(c d), %w(a d), %w(d), [10], nil], a.splitted)
309
- end
310
- should 'multiply correct for scalar' do
311
- a = [1, 2, 3].to_numeric
312
- assert_equal([5, 10, 15].to_numeric, a * 5)
313
- end
314
- should 'multiply correct with other vector' do
315
- a = [1, 2, 3].to_numeric
316
- b = [2, 4, 6].to_numeric
317
-
318
- assert_equal([2, 8, 18].to_numeric, a * b)
319
- end
320
- should 'sum correct for scalar' do
321
- a = [1, 2, 3].to_numeric
322
- assert_equal([11, 12, 13].to_numeric, a + 10)
323
- end
324
-
325
- should 'raise NoMethodError when method requires numeric and vector is object' do
326
- @c.type = :object
327
- assert_raise(::NoMethodError) { @c.median }
328
- end
329
-
330
- should 'jacknife correctly with named method' do
331
- # First example
332
- a = [1, 2, 3, 4].to_numeric
333
- ds = a.jacknife(:mean)
334
- assert_equal(a.mean, ds[:mean].mean)
335
- ds = a.jacknife([:mean, :sd])
336
- assert_equal(a.mean, ds[:mean].mean)
337
- assert_equal(a.sd, ds[:mean].sd)
338
- end
339
- should 'jacknife correctly with custom method' do
340
- # Second example
341
- a = [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25].to_numeric
342
- ds = a.jacknife(log_s2: ->(v) { Math.log(v.variance) })
343
- exp = [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937].to_numeric
344
-
345
- assert_similar_vector(exp, ds[:log_s2], 0.001)
346
- assert_in_delta(2.00389, ds[:log_s2].mean, 0.00001)
347
- assert_in_delta(1.091, ds[:log_s2].variance, 0.001)
348
- end
349
- should 'jacknife correctly with k>1' do
350
- a = rnorm(6)
351
- ds = a.jacknife(:mean, 2)
352
- mean = a.mean
353
- exp = [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4].to_numeric
354
- assert_similar_vector(exp, ds[:mean], 1e-13)
355
- end
356
- should 'bootstrap should return a vector with mean=mu and sd=se' do
357
- a = rnorm(100)
358
- ds = a.bootstrap([:mean, :sd], 200)
359
- se = 1 / Math.sqrt(a.size)
360
- assert_in_delta(0, ds[:mean].mean, 0.3)
361
- assert_in_delta(se, ds[:mean].sd, 0.02)
362
- end
363
- end
364
-
365
- def test_object
366
- assert_equal(@c[1], 5)
367
- assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c.frequencies)
368
- assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c._frequencies)
369
- assert_equal({ 1 => 1.quo(15), 2 => 1.quo(15), 3 => 1.quo(15), 4 => 1.quo(15), 5 => 5.quo(15), 6 => 2.quo(15), 7 => 1.quo(15), 8 => 1.quo(15), 9 => 1.quo(15), 10 => 1.quo(15) }, @c.proportions)
370
- assert_equal(@c.proportion, 1.quo(15))
371
- assert_equal(@c.proportion(2), 1.quo(15))
372
- assert_equal([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], @c.factors.sort)
373
- assert_equal(@c.mode, 5)
374
- assert_equal(@c.n_valid, 15)
375
- end
376
-
377
- def test_equality
378
- v1 = [1, 2, 3].to_vector
379
- v2 = [1, 2, 3].to_vector
380
- assert_equal(v1, v2)
381
- v1 = [1, 2, 3].to_vector(:object)
382
- v2 = [1, 2, 3].to_vector(:numeric)
383
- assert_not_equal(v1, v2)
384
- v2 = [1, 2, 3]
385
- assert_not_equal(v1, v2)
386
- v1 = [1, 2, 3].to_vector
387
- v2 = [1, 2, 3].to_vector
388
- assert_equal(v1, v2)
389
- assert_equal(false, v1 == Object.new)
390
- end
391
-
392
- def test_vector_percentil
393
- a = [1, 2, 2, 3, 4, 5, 5, 5, 6, 10].to_numeric
394
- expected = [10, 25, 25, 40, 50, 70, 70, 70, 90, 100].to_numeric
395
- assert_equal(expected, a.vector_percentil)
396
- a = [1, nil, nil, 2, 2, 3, 4, nil, nil, 5, 5, 5, 6, 10].to_numeric
397
- expected = [10, nil, nil, 25, 25, 40, 50, nil, nil, 70, 70, 70, 90, 100].to_numeric
398
- assert_equal(expected, a.vector_percentil)
399
- end
400
-
401
- def test_numeric
402
- @c.type = :numeric
403
- assert_equal(5, @c.median)
404
- assert_equal(4, @c.percentil(25))
405
- assert_equal(7, @c.percentil(75))
406
-
407
- v = [200_000, 200_000, 210_000, 220_000, 230_000, 250_000, 250_000, 250_000, 270_000, 300_000, 450_000, 130_000, 140_000, 140_000, 140_000, 145_000, 148_000, 165_000, 170_000, 180_000, 180_000, 180_000, 180_000, 180_000, 180_000].to_numeric
408
- assert_equal(180_000, v.median)
409
- a = [7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 12.0, 12.0, 13.0, 14.0, 14.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0].to_numeric
410
- assert_equal(4.5, a.percentil(25))
411
- assert_equal(6.5, a.percentil(50))
412
- assert_equal(9.5, a.percentil(75))
413
- assert_equal(3.0, a.percentil(10))
414
- end
415
-
416
- def test_linear_percentil_strategy
417
- values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_numeric
418
- assert_equal 102, values.percentil(0, :linear)
419
- assert_equal 104.75, values.percentil(25, :linear)
420
- assert_equal 108.5, values.percentil(50, :linear)
421
- assert_equal 112.75, values.percentil(75, :linear)
422
- assert_equal 116, values.percentil(100, :linear)
423
-
424
- values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_numeric
425
- assert_equal 102, values.percentil(0, :linear)
426
- assert_equal 105, values.percentil(25, :linear)
427
- assert_equal 109, values.percentil(50, :linear)
428
- assert_equal 115, values.percentil(75, :linear)
429
- assert_equal 118, values.percentil(100, :linear)
430
- end
431
-
432
- def test_ranked
433
- v1 = [0.8, 1.2, 1.2, 2.3, 18].to_vector(:numeric)
434
- expected = [1, 2.5, 2.5, 4, 5].to_vector(:numeric)
435
- assert_equal(expected, v1.ranked)
436
- v1 = [nil, 0.8, 1.2, 1.2, 2.3, 18, nil].to_vector(:numeric)
437
- expected = [nil, 1, 2.5, 2.5, 4, 5, nil].to_vector(:numeric)
438
- assert_equal(expected, v1.ranked)
439
- end
440
-
441
- def test_numeric
442
- a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'], :numeric)
443
- assert_equal(10, a.sum)
444
- i = 0
445
- factors = a.factors.sort
446
- [0, 1, 2, 3, 4].each{|v|
447
- assert(v == factors[i])
448
- assert(v.class == factors[i].class, "#{v} - #{v.class} != #{factors[i]} - #{factors[i].class}")
449
- i += 1
450
- }
451
- end
452
-
453
- def test_vector_centered
454
- mean = rand
455
- samples = 11
456
- centered = samples.times.map { |i| i - ((samples / 2).floor).to_i }.to_numeric
457
- not_centered = centered.recode { |v| v + mean }
458
- obs = not_centered.centered
459
- centered.each_with_index do |v, i|
460
- assert_in_delta(v, obs[i], 0.0001)
461
- end
462
- end
463
-
464
- def test_vector_standarized
465
- v1 = [1, 2, 3, 4, nil].to_vector(:numeric)
466
- sds = v1.sds
467
- expected = [((1 - 2.5).quo(sds)), ((2 - 2.5).quo(sds)), ((3 - 2.5).quo(sds)), ((4 - 2.5).quo(sds)), nil].to_vector(:numeric)
468
- vs = v1.vector_standarized
469
- assert_equal(expected, vs)
470
- assert_equal(0, vs.mean)
471
- assert_equal(1, vs.sds)
472
- end
473
-
474
- def test_vector_standarized_with_zero_variance
475
- v1 = 100.times.map { |_i| 1 }.to_numeric
476
- exp = 100.times.map { nil }.to_numeric
477
- assert_equal(exp, v1.standarized)
478
- end
479
-
480
- def test_check_type
481
- v = Statsample::Vector.new
482
- v.type = :object
483
- assert_raise(NoMethodError) { v.check_type(:numeric) }
484
- assert(v.check_type(:object).nil?)
485
-
486
- v.type = :numeric
487
-
488
- assert(v.check_type(:numeric).nil?)
489
- assert(v.check_type(:object).nil?)
108
+ should "show a warning when initializing with :nominal, :numeric or :ordinal" do
109
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: nominal has been deprecated.\n") do
110
+ Statsample::Vector.new [1,2,3,4,5,nil,'hello'], :nominal
111
+ end
490
112
 
491
- v.type = :date
492
- assert_raise(NoMethodError) { v.check_type(:numeric) }
493
- assert_raise(NoMethodError) { v.check_type(:numeric) }
494
- assert_raise(NoMethodError) { v.check_type(:object) }
495
- end
113
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: scale has been deprecated.\n") do
114
+ Statsample::Vector.new [1,2,3,4,nil,5], :scale
115
+ end
496
116
 
497
- def test_add
498
- a = Statsample::Vector.new([1, 2, 3, 4, 5], :numeric)
499
- b = Statsample::Vector.new([11, 12, 13, 14, 15], :numeric)
500
- assert_equal([3, 4, 5, 6, 7], (a + 2).to_a)
501
- assert_equal([12, 14, 16, 18, 20], (a + b).to_a)
502
- assert_raise ArgumentError do
503
- a + @c
504
- end
505
- assert_raise TypeError do
506
- a + 'string'
507
- end
508
- a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :numeric)
509
- b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :numeric)
510
- assert_equal([nil, 13, nil, 16, 18, 20], (a + b).to_a)
511
- assert_equal([nil, 13, nil, 16, 18, 20], (a + b.to_a).to_a)
512
- end
117
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: ordinal has been deprecated.\n") do
118
+ Statsample::Vector.new [1,2,3,4,5], :ordinal
119
+ end
513
120
 
514
- def test_minus
515
- a = Statsample::Vector.new([1, 2, 3, 4, 5], :numeric)
516
- b = Statsample::Vector.new([11, 12, 13, 14, 15], :numeric)
517
- assert_equal([-1, 0, 1, 2, 3], (a - 2).to_a)
518
- assert_equal([10, 10, 10, 10, 10], (b - a).to_a)
519
- assert_raise ArgumentError do
520
- a - @c
521
- end
522
- assert_raise TypeError do
523
- a - 'string'
121
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
122
+ Statsample::Vector.new_scale 10, 1
123
+ end
524
124
  end
525
- a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :numeric)
526
- b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :numeric)
527
- assert_equal([nil, 11, nil, 10, 10, 10], (b - a).to_a)
528
- assert_equal([nil, 11, nil, 10, 10, 10], (b - a.to_a).to_a)
529
- end
530
125
 
531
- def test_sum_of_squares
532
- a = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
533
- assert_equal(17.5, a.sum_of_squared_deviation)
534
- end
126
+ should "show a warning when Statsample::Vector shorthands are used" do
127
+ numeric = Statsample::Vector.new([1,2,3,4,nil,5], :numeric)
128
+ assert_equal(numeric, [1,2,3,4,nil,5].to_numeric)
129
+ assert_equal(numeric, [1,2,3,4,nil,5].to_vector(:numeric))
535
130
 
536
- def test_average_deviation
537
- a = [1, 2, 3, 4, 5, 6, 7, 8, 9].to_numeric
538
- assert_equal(20.quo(9), a.average_deviation_population)
539
- end
540
-
541
- def test_samples
542
- srand(1)
543
- assert_equal(100, @c.sample_with_replacement(100).size)
544
- assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
545
- assert_raise ArgumentError do
546
- @c.sample_without_replacement(20)
131
+ obj = Statsample::Vector.new([1,2,3,4,'one','two'], :object)
132
+ assert_equal(obj, [1,2,3,4,'one','two'].to_vector(:object))
547
133
  end
548
- @c.type = :numeric
549
- srand(1)
550
- assert_equal(100, @c.sample_with_replacement(100).size)
551
- assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
552
- end
553
-
554
- def test_valid_data
555
- a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'])
556
- a.missing_values = [-99]
557
- a.add(1, false)
558
- a.add(2, false)
559
- a.add(-99, false)
560
- a.set_valid_data
561
- exp_valid_data = [1, 2, 3, 4, 'STRING', 1, 2]
562
- assert_equal(exp_valid_data, a.valid_data)
563
- a.add(20, false)
564
- a.add(30, false)
565
- assert_equal(exp_valid_data, a.valid_data)
566
- a.set_valid_data
567
- exp_valid_data_2 = [1, 2, 3, 4, 'STRING', 1, 2, 20, 30]
568
- assert_equal(exp_valid_data_2, a.valid_data)
569
- end
570
-
571
- def test_set_value
572
- @c[2] = 10
573
- expected = [5, 5, 10, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99].to_vector
574
- assert_equal(expected.data, @c.data)
575
- end
576
134
 
577
- def test_gsl
578
- if Statsample.has_gsl?
579
- a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'], :numeric)
580
-
581
- assert_equal(2, a.mean)
582
- assert_equal(a.variance_sample_ruby, a.variance_sample)
583
- assert_equal(a.standard_deviation_sample_ruby, a.sds)
584
- assert_equal(a.variance_population_ruby, a.variance_population)
585
- assert_equal(a.standard_deviation_population_ruby, a.standard_deviation_population)
586
- assert_nothing_raised do
587
- a = [].to_vector(:numeric)
135
+ should "test that old shorthands show deprecation warnings" do
136
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
137
+ [1,2,3,4,nil,5].to_scale
588
138
  end
589
- a.add(1, false)
590
- a.add(2, false)
591
- a.set_valid_data
592
- assert_equal(3, a.sum)
593
- b = [1, 2, nil, 3, 4, 5, nil, 6].to_vector(:numeric)
594
- assert_equal(21, b.sum)
595
- assert_equal(3.5, b.mean)
596
- assert_equal(6, b.gsl.size)
597
- c = [10, 20, 30, 40, 50, 100, 1000, 2000, 5000].to_numeric
598
- assert_in_delta(c.skew, c.skew_ruby, 0.0001)
599
- assert_in_delta(c.kurtosis, c.kurtosis_ruby, 0.0001)
600
139
  end
601
140
  end
602
141
 
603
- def test_vector_matrix
604
- v1 = %w(a a a b b b c c).to_vector
605
- v2 = %w(1 3 4 5 6 4 3 2).to_vector
606
- v3 = %w(1 0 0 0 1 1 1 0).to_vector
607
- ex = Matrix.rows([%w(a 1 1), %w(a 3 0), %w(a 4 0), %w(b 5 0), %w(b 6 1), %w(b 4 1), %w(c 3 1), %w(c 2 0)])
608
- assert_equal(ex, Statsample.vector_cols_matrix(v1, v2, v3))
609
- end
610
-
611
- def test_marshalling
612
- v1 = (0..100).to_a.collect { |_n| rand(100) }.to_vector(:numeric)
613
- v2 = Marshal.load(Marshal.dump(v1))
614
- assert_equal(v1, v2)
615
- end
616
-
617
- def test_dup
618
- v1 = %w(a a a b b b c c).to_vector
619
- v2 = v1.dup
620
- assert_equal(v1.data, v2.data)
621
- assert_not_same(v1.data, v2.data)
622
- assert_equal(v1.type, v2.type)
623
-
624
- v1.type = :numeric
625
- assert_not_equal(v1.type, v2.type)
626
- assert_equal(v1.missing_values, v2.missing_values)
627
- assert_not_same(v1.missing_values, v2.missing_values)
628
- assert_equal(v1.labels, v2.labels)
629
- assert_not_same(v1.labels, v2.labels)
630
-
631
- v3 = v1.dup_empty
632
- assert_equal([], v3.data)
633
- assert_not_equal(v1.data, v3.data)
634
- assert_not_same(v1.data, v3.data)
635
- assert_equal(v1.type, v3.type)
636
- v1.type = :numeric
637
- v3.type = :object
638
- assert_not_equal(v1.type, v3.type)
639
- assert_equal(v1.missing_values, v3.missing_values)
640
- assert_not_same(v1.missing_values, v3.missing_values)
641
- assert_equal(v1.labels, v3.labels)
642
- assert_not_same(v1.labels, v3.labels)
643
- end
644
-
645
- def test_paired_ties
646
- a = [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4].to_vector(:numeric)
647
- expected = [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10].to_vector(:numeric)
648
- assert_equal(expected, a.ranked)
649
- end
650
-
651
- def test_dichotomize
652
- a = [0, 0, 0, 1, 2, 3, nil].to_vector
653
- exp = [0, 0, 0, 1, 1, 1, nil].to_numeric
654
- assert_equal(exp, a.dichotomize)
655
- a = [1, 1, 1, 2, 2, 2, 3].to_vector
656
- exp = [0, 0, 0, 1, 1, 1, 1].to_numeric
657
- assert_equal(exp, a.dichotomize)
658
- a = [0, 0, 0, 1, 2, 3, nil].to_vector
659
- exp = [0, 0, 0, 0, 1, 1, nil].to_numeric
660
- assert_equal(exp, a.dichotomize(1))
661
- a = %w(a a a b c d).to_vector
662
- exp = [0, 0, 0, 1, 1, 1].to_numeric
663
- assert_equal(exp, a.dichotomize)
664
- end
665
-
666
- def test_can_be_methods
667
- a = [0, 0, 0, 1, 2, 3, nil].to_vector
668
- assert(a.can_be_numeric?)
669
- a = [0, 's', 0, 1, 2, 3, nil].to_vector
670
- assert(!a.can_be_numeric?)
671
- a.missing_values = ['s']
672
- assert(a.can_be_numeric?)
673
-
674
- a = [Date.new(2009, 10, 10), Date.today, '2009-10-10', '2009-1-1', nil, 'NOW'].to_vector
675
- assert(a.can_be_date?)
676
- a = [Date.new(2009, 10, 10), Date.today, nil, 'sss'].to_vector
677
- assert(!a.can_be_date?)
678
- end
679
-
680
- def test_date_vector
681
- a = [Date.new(2009, 10, 10), :NOW, '2009-10-10', '2009-1-1', nil, 'NOW', 'MISSING'].to_vector(:date, missing_values: ['MISSING'])
682
-
683
- assert(a.type == :date)
684
- expected = [Date.new(2009, 10, 10), Date.today, Date.new(2009, 10, 10), Date.new(2009, 1, 1), nil, Date.today, nil]
685
- assert_equal(expected, a.date_data_with_nils)
142
+ should 'return correct histogram' do
143
+ a = Daru::Vector.new(10.times.map { |v| v })
144
+ hist = a.histogram(2)
145
+ assert_equal([5, 5], hist.bin)
146
+ 3.times do |i|
147
+ assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
148
+ end
686
149
  end
687
150
  end